e21-e26无改进

This commit is contained in:
czzhangheng 2025-04-21 20:31:09 +08:00
parent 0b006087ea
commit e851eb21d6
17 changed files with 2936 additions and 53 deletions

File diff suppressed because it is too large Load Diff

View File

@ -27,7 +27,7 @@ train:
epochs: 300
lr_init: 0.003
weight_decay: 0
lr_decay: True
lr_decay: False
lr_decay_rate: 0.5
lr_decay_step: "5,20,40,65"
early_stop: True

View File

@ -14,18 +14,10 @@ data:
days_per_week: 7
model:
batch_size: 64
input_dim: 1
output_dim: 1
embed_dim: 12
rnn_units: 64
num_layers: 1
cheb_order: 2
use_day: True
use_week: True
graph_size: 30
expert_nums: 8
top_k: 2
hidden_dim: 64
in_len: 12
train:
loss_func: mae

58
config/STID/PEMSD4.yaml Normal file
View File

@ -0,0 +1,58 @@
data:
num_nodes: 307
lag: 12
horizon: 12
val_ratio: 0.2
test_ratio: 0.2
tod: False
normalizer: std
column_wise: False
default_graph: True
add_time_in_day: True
add_day_in_week: True
steps_per_day: 288
days_per_week: 7
model:
input_dim: 3
output_dim: 1
history: 12
horizon: 12
num_nodes: 307
input_len: 12
embed_dim": 32
output_len: 12
num_layer: 3
if_node: True
node_dim: 32
if_T_i_D: True
if_D_i_W: True
temp_dim_tid: 32
temp_dim_diw: 32
time_of_day_size: 288
day_of_week_size: 7
train:
loss_func: mae
seed: 1
batch_size: 64
epochs: 300
lr_init: 0.002
weight_decay: 0.0001
lr_decay: False
lr_decay_rate: 0.3
lr_decay_step: "1,50,80"
early_stop: True
early_stop_patience: 15
grad_norm: False
max_grad_norm: 5
real_value: True
test:
mae_thresh: null
mape_thresh: 0.0
log:
log_step: 200
plot: False

View File

@ -12,6 +12,8 @@ def init_model(args, device):
nn.init.xavier_uniform_(p)
else:
nn.init.uniform_(p)
total_params = sum(p.numel() for p in model.parameters())
print(f"Model has {total_params} parameters")
return model
def init_optimizer(model, args):

View File

@ -21,7 +21,7 @@ class PositionalEncoding(nn.Module):
return x + self.pe[:T].unsqueeze(1) # (T,1,d_model) 广播到 (T,B,d_model)
class TemporalTransformerForecast(nn.Module):
class EXP(nn.Module):
"""
Transformer-based 多步预测
- 只使用 x[...,0] 作为输入通道

View File

@ -4,7 +4,7 @@ import torch.nn.functional as F
"""
使用多层感知机替换输入输出的proj层
添加时间嵌入
"""
class DynamicGraphConstructor(nn.Module):
@ -104,6 +104,7 @@ class EXP(nn.Module):
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# input projection now still only takes the flow history
self.input_proj = MLP(
in_dim = self.seq_len,

View File

@ -2,11 +2,10 @@ import torch
import torch.nn as nn
import torch.nn.functional as F
"""
添加空间嵌入
"""
"""
使用多层感知机替换输入输出的 proj
并在 EXP 模型中添加显式的空间嵌入Spatial Embedding
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
@ -114,6 +113,7 @@ class EXP(nn.Module):
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
self.node_emb = nn.Parameter(torch.empty(self.num_nodes, self.embed_dim))
# ==== 空间嵌入 ====
# 每个节点一个可学习的向量
@ -167,9 +167,11 @@ class EXP(nn.Module):
day_emb = self.day_embedding(d_idx) # (B, N, hidden_dim)
# 3) 计算空间嵌入并扩展到 batch 大小
node_idx = torch.arange(N, device=x.device) # (N,)
spatial_emb = self.spatial_embedding[node_idx] # (N, hidden_dim)
spatial_emb = spatial_emb.unsqueeze(0).expand(B, -1, -1) # (B, N, hidden_dim)
# node_emb = []
# node_emb.append(self.node_emb.unsqueeze(0).expand(
# B, -1, -1).transpose(1, 2).unsqueeze(-1))
# spatial_emb = torch.stack(node_emb)
spatial_emb = self.spatial_embedding.unsqueeze(0).expand(B, N, self.hidden_dim) # -> (B, N, hidden_dim)
# 4) 将三种嵌入相加到 h0
h0 = h0 + time_emb + day_emb + spatial_emb

159
model/EXP/EXP23.py Normal file
View File

@ -0,0 +1,159 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
添加时间嵌入 + 基于可学习邻接矩阵的图构造
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num):
super().__init__()
# 直接用一个 N×N 的可学习参数矩阵来表示邻接
self.adj_param = nn.Parameter(torch.randn(node_num, node_num), requires_grad=True)
def forward(self):
# 非线性截断,去除负边
adj = F.relu(self.adj_param)
# 行归一化
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
# x: (B, N, C)
res = x
# 邻接乘特征
x = torch.matmul(adj, x)
x = self.theta(x)
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, N, C)
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes)
self.gc = GraphConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
# h: (B, N, C)
h1 = self.manba1(h)
adj = self.graph_constructor() # (N, N)
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims) - 2):
layers += [nn.Linear(dims[i], dims[i + 1]), activation()]
layers += [nn.Linear(dims[-2], dims[-1])]
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
# ==== 离散时间嵌入 ====
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# 流量历史投影
self.input_proj = MLP(
in_dim = self.seq_len,
hidden_dims = [self.hidden_dim],
out_dim = self.hidden_dim
)
# 两个 SandwichBlock
self.sandwich1 = SandwichBlock(self.num_nodes, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.hidden_dim)
# 输出投影
self.out_proj = MLP(
in_dim = self.hidden_dim,
hidden_dims = [2 * self.hidden_dim],
out_dim = self.horizon * self.output_dim
)
def forward(self, x):
"""
x: (B, T, N, D_total)
D_total >= 3:
x[...,0] = flow,
x[...,1] = time_in_day (01),
x[...,2] = day_in_week (06)
"""
x_flow = x[..., 0] # (B, T, N)
x_time = x[..., 1] # (B, T, N)
x_day = x[..., 2] # (B, T, N)
B, T, N = x_flow.shape
assert T == self.seq_len
# 1) 投影流量历史
x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
# 2) 离散时间索引
t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long() # (B, N)
d_idx = x_day[:, -1, :,].long() # (B, N)
time_emb = self.time_embedding(t_idx)
day_emb = self.day_embedding(d_idx)
# 3) 注入时间嵌入
h0 = h0 + time_emb + day_emb
# 4) Sandwich + 残差
h1 = self.sandwich1(h0)
h1 = h1 + h0
h2 = self.sandwich2(h1)
# 5) 输出投影
out = self.out_proj(h2) # (B, N, horizon*output_dim)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3) # (B, horizon, N, output_dim)
return out

168
model/EXP/EXP24.py Normal file
View File

@ -0,0 +1,168 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
添加时间嵌入 + 三重残差
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
res = x
x = torch.matmul(adj, x)
x = self.theta(x)
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
self.gc = GraphConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
h1 = self.manba1(h)
adj = self.graph_constructor()
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3 # 不在这里加残差,留给上层 EXP 统一处理
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims)-2):
layers += [nn.Linear(dims[i], dims[i+1]), activation()]
layers += [nn.Linear(dims[-2], dims[-1])]
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
# ==== 离散时间嵌入 ====
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# 流量历史投影
self.input_proj = MLP(
in_dim = self.seq_len,
hidden_dims = [self.hidden_dim],
out_dim = self.hidden_dim
)
# 两个 SandwichBlock
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# 输出投影
self.out_proj = MLP(
in_dim = self.hidden_dim,
hidden_dims = [2 * self.hidden_dim],
out_dim = self.horizon * self.output_dim
)
def forward(self, x):
"""
x: (B, T, N, D_total)
D_total >= 3:
x[...,0] = flow,
x[...,1] = time_in_day (01),
x[...,2] = day_in_week (06)
"""
x_flow = x[..., 0] # (B, T, N)
x_time = x[..., 1] # (B, T, N)
x_day = x[..., 2] # (B, T, N)
B, T, N = x_flow.shape
assert T == self.seq_len
# 1) 投影流量历史
x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
# 2) 离散时间索引
t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long() # (B, N)
d_idx = x_day[:, -1, :,].long() # (B, N)
time_emb = self.time_embedding(t_idx)
day_emb = self.day_embedding(d_idx)
# 3) 注入时间嵌入
h0 = h0 + time_emb + day_emb
# ==== 三重残差 ====
# 第一重Sandwich1 + 残差
h1 = self.sandwich1(h0)
h1 = h1 + h0
# 第二重Sandwich2 + 残差
h2 = self.sandwich2(h1)
h2 = h2 + h1
# 第三重:全局残差 (直接连接到最初 h0)
h3 = h2 + h0
# 5) 输出投影
out = self.out_proj(h3) # (B, N, horizon*output_dim)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3) # (B, horizon, N, output_dim)
return out

196
model/EXP/EXP25.py Normal file
View File

@ -0,0 +1,196 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class DynamicTanh(nn.Module):
"""
Dynamic tanh activation with learnable scaling (alpha) and affine transformation (weight, bias).
"""
def __init__(self, normalized_shape, channels_last=True, alpha_init_value=0.5):
super().__init__()
self.normalized_shape = normalized_shape
self.alpha_init_value = alpha_init_value
self.channels_last = channels_last
# learnable scale for tanh
self.alpha = nn.Parameter(torch.full((1,), alpha_init_value))
# affine parameters
self.weight = nn.Parameter(torch.ones(normalized_shape))
self.bias = nn.Parameter(torch.zeros(normalized_shape))
def forward(self, x):
# scaled tanh
x = torch.tanh(self.alpha * x)
# affine transform
if self.channels_last:
x = x * self.weight + self.bias
else:
# channels_first: assume shape (B, C, H, W)
x = x * self.weight[:, None, None] + self.bias[:, None, None]
return x
def extra_repr(self):
return f"normalized_shape={self.normalized_shape}, alpha_init_value={self.alpha_init_value}, channels_last={self.channels_last}"
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
res = x
x = torch.matmul(adj, x)
x = self.theta(x)
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
class MANBA_Block(nn.Module):
"""
Multi-head attention + feed-forward network with DynamicTanh replacing LayerNorm.
"""
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
# replace LayerNorm with DynamicTanh
self.norm1 = DynamicTanh(normalized_shape=input_dim, channels_last=True)
self.norm2 = DynamicTanh(normalized_shape=input_dim, channels_last=True)
def forward(self, x):
# self-attention
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
# feed-forward
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
self.gc = GraphConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
h1 = self.manba1(h)
adj = self.graph_constructor()
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims) - 2):
layers.append(nn.Linear(dims[i], dims[i+1]))
layers.append(activation())
layers.append(nn.Linear(dims[-2], dims[-1]))
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
# discrete time embeddings
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# input projection for flow history
self.input_proj = MLP(
in_dim = self.seq_len,
hidden_dims = [self.hidden_dim],
out_dim = self.hidden_dim
)
# two Sandwich blocks
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# output projection
self.out_proj = MLP(
in_dim = self.hidden_dim,
hidden_dims = [2 * self.hidden_dim],
out_dim = self.horizon * self.output_dim
)
def forward(self, x):
"""
x: (B, T, N, D_total) where
x[...,0]=flow, x[...,1]=time_in_day (scaled), x[...,2]=day_in_week
"""
x_flow = x[..., 0] # (B, T, N)
x_time = x[..., 1] # (B, T, N)
x_day = x[..., 2] # (B, T, N)
B, T, N = x_flow.shape
assert T == self.seq_len, "Input sequence length mismatch"
# project flow history
x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
# time embeddings at last step
t_idx = (x_time[:, -1, :] * (self.time_slots - 1)).long()
d_idx = x_day[:, -1, :].long()
time_emb = self.time_embedding(t_idx)
day_emb = self.day_embedding(d_idx)
# inject time features
h0 = h0 + time_emb + day_emb
# Sandwich + residuals
h1 = self.sandwich1(h0) + h0
h2 = self.sandwich2(h1)
# output
out = self.out_proj(h2)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3)
return out
# Example usage:
# args = {'horizon':12, 'output_dim':1, 'num_nodes':170}
# model = EXP(args)
# print(model)

195
model/EXP/EXP26.py Normal file
View File

@ -0,0 +1,195 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
添加时间嵌入 + 引入图注意力网络GAT
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
# 原来的 GCN 块保留备用
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
res = x
x = torch.matmul(adj, x)
x = self.theta(x)
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
# ★★ GAT 部分:从 LeronQ/GCN_predict-Pytorch 改写而来 ★★
class GraphAttentionLayer(nn.Module):
def __init__(self, in_c, out_c):
super().__init__()
self.W = nn.Linear(in_c, out_c, bias=False)
self.b = nn.Parameter(torch.Tensor(out_c))
nn.init.xavier_uniform_(self.W.weight)
nn.init.zeros_(self.b)
def forward(self, h, adj):
# h: [B, N, C_in], adj: [N, N]
Wh = self.W(h) # [B, N, C_out]
# 计算注意力得分
score = torch.bmm(Wh, Wh.transpose(1, 2)) * adj.unsqueeze(0) # [B, N, N]
score = score.masked_fill(score == 0, -1e16)
alpha = F.softmax(score, dim=-1) # [B, N, N]
# 加权求和并加偏置
out = torch.bmm(alpha, Wh) + self.b # [B, N, C_out]
return F.relu(out)
class GraphAttentionBlock(nn.Module):
def __init__(self, input_dim, output_dim, n_heads=4):
super().__init__()
# 多头注意力
self.heads = nn.ModuleList([GraphAttentionLayer(input_dim, output_dim) for _ in range(n_heads)])
# 合并后再做一次线性映射
self.out_att = GraphAttentionLayer(output_dim * n_heads, output_dim)
self.act = nn.ReLU()
def forward(self, x, adj):
# x: [B, N, C], adj: [N, N]
# 并行多头,然后拼接
h_cat = torch.cat([head(x, adj) for head in self.heads], dim=-1) # [B, N, output_dim * n_heads]
h_out = self.out_att(h_cat, adj) # [B, N, output_dim]
return self.act(h_out)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
# ★★ 替换为 GATBlock ★★
self.gc = GraphAttentionBlock(hidden_dim, hidden_dim, n_heads=4)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
h1 = self.manba1(h)
adj = self.graph_constructor()
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims)-2):
layers += [nn.Linear(dims[i], dims[i+1]), activation()]
layers += [nn.Linear(dims[-2], dims[-1])]
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
# ==== 新增:离散时间嵌入 ====
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# 输入投影(仅 flow
self.input_proj = MLP(
in_dim = self.seq_len,
hidden_dims = [self.hidden_dim],
out_dim = self.hidden_dim
)
# 两个 SandwichBlock已替换为 GAT
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# 输出投影
self.out_proj = MLP(
in_dim = self.hidden_dim,
hidden_dims = [2 * self.hidden_dim],
out_dim = self.horizon * self.output_dim
)
def forward(self, x):
"""
x: (B, T, N, D_total)
D_total >= 3, x[...,0]=flow, x[...,1]=time_in_day, x[...,2]=day_in_week
"""
x_flow = x[..., 0] # (B, T, N)
x_time = x[..., 1] # (B, T, N)
x_day = x[..., 2] # (B, T, N)
B, T, N = x_flow.shape
assert T == self.seq_len
# 1) 投影流量历史
x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
# 2) 取最后一步的时间索引并嵌入
t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()
d_idx = x_day[:, -1, :,].long()
time_emb = self.time_embedding(t_idx)
day_emb = self.day_embedding(d_idx)
# 3) 注入时间信息
h0 = h0 + time_emb + day_emb
# 4) Sandwich + 残差
h1 = self.sandwich1(h0)
h1 = h1 + h0
h2 = self.sandwich2(h1)
# 5) 输出
out = self.out_proj(h2)
out = out.view(B, N, self.horizon, self.output_dim).permute(0, 2, 1, 3)
return out

170
model/EXP/EXP27.py Normal file
View File

@ -0,0 +1,170 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, N, input_dim)
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class ExpertBlock(nn.Module):
"""
Mixture-of-Experts block: routes each node's representation to a selected expert or a shared expert.
"""
def __init__(self, hidden_dim, num_experts):
super().__init__()
self.num_experts = num_experts
# gating network projects to num_experts + 1 (extra shared expert)
self.gate = nn.Linear(hidden_dim, num_experts + 1)
# per-expert FFNs
self.experts = nn.ModuleList([
nn.Sequential(
nn.Linear(hidden_dim, hidden_dim * 2),
nn.ReLU(),
nn.Linear(hidden_dim * 2, hidden_dim)
) for _ in range(num_experts)
])
# shared expert
self.shared_expert = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim * 2),
nn.ReLU(),
nn.Linear(hidden_dim * 2, hidden_dim)
)
def forward(self, x):
# x: (B, N, hidden_dim)
B, N, D = x.shape
# flatten to (B*N, D)
flat = x.view(B * N, D)
# compute gating scores and select expert per node
scores = F.softmax(self.gate(flat), dim=-1) # (B*N, num_experts+1)
idx = scores.argmax(dim=-1) # (B*N,)
out_flat = torch.zeros_like(flat)
# apply each expert
for e in range(self.num_experts):
mask = (idx == e)
if mask.any():
out_flat[mask] = self.experts[e](flat[mask])
# apply shared expert for last index
shared_mask = (idx == self.num_experts)
if shared_mask.any():
out_flat[shared_mask] = self.shared_expert(flat[shared_mask])
# reshape back to (B, N, D)
return out_flat.view(B, N, D)
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims) - 2):
layers += [nn.Linear(dims[i], dims[i+1]), activation()]
layers += [nn.Linear(dims[-2], dims[-1])]
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, embed_dim, hidden_dim, num_experts):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.expert_block = ExpertBlock(hidden_dim, num_experts)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
h1 = self.manba1(h)
h2 = self.expert_block(h1)
h3 = self.manba2(h2)
return h3
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
self.num_experts = args.get('num_experts', 8) # number of private experts
# discrete time embeddings
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# input projection
self.input_proj = MLP(
in_dim = self.seq_len,
hidden_dims = [self.hidden_dim],
out_dim = self.hidden_dim
)
# two Sandwich blocks with MoE
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim, self.num_experts)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim, self.num_experts)
# output projection
self.out_proj = MLP(
in_dim = self.hidden_dim,
hidden_dims = [2 * self.hidden_dim],
out_dim = self.horizon * self.output_dim
)
def forward(self, x):
"""
x: (B, T, N, D_total)
x[...,0]= flow, x[...,1]=time_in_day, x[...,2]=day_in_week
"""
x_flow = x[..., 0]
x_time = x[..., 1]
x_day = x[..., 2]
B, T, N = x_flow.shape
assert T == self.seq_len
# project flow history
x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
# time & day embeddings at last step
t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()
d_idx = x_day[:, -1, :,].long()
time_emb = self.time_embedding(t_idx)
day_emb = self.day_embedding(d_idx)
h0 = h0 + time_emb + day_emb
# two MoE Sandwich blocks + residuals
h1 = self.sandwich1(h0) + h0
h2 = self.sandwich2(h1) + h1
# output
out = self.out_proj(h2)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3)
return out

133
model/EXP/EXP8b.py Normal file
View File

@ -0,0 +1,133 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
含残差版本
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
# (N, D) @ (D, N) -> (N, N)
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = input_dim == output_dim
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
# x: (B, N, C) / adj: (N, N)
res = x
x = torch.matmul(adj, x) # (B, N, C)
x = self.theta(x)
# 残差连接
if self.residual:
x = x + res
else:
x = x + self.res_proj(res)
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, T, C)
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# 动态图构建
self.graph = DynamicGraphConstructor(self.num_nodes, embed_dim=16)
# 输入映射层
self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
# 图卷积
self.gc = GraphConvBlock(self.hidden_dim, self.hidden_dim)
# MANBA block
self.manba = MANBA_Block(self.hidden_dim, self.hidden_dim * 2)
# 输出映射
self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
x_time = x[..., 1] # (B, T, N)
x_day = x[..., 2] # (B, T, N)
x = x[..., 0] # 只用主通道 (B, T, N)
B, T, N = x.shape
assert T == self.seq_len
# 输入投影 (B, T, N) -> (B, N, T) -> (B*N, T) -> (B*N, H)
x = x.permute(0, 2, 1).reshape(B * N, T)
h = self.input_proj(x) # (B*N, hidden_dim)
h = h.view(B, N, self.hidden_dim)
t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long() # (B, N)
d_idx = x_day[:, -1, :,].long() # (B, N)
time_emb = self.time_embedding(t_idx) # (B, N, hidden_dim)
day_emb = self.day_embedding(d_idx) # (B, N, hidden_dim)
# 3) inject them into the initial hidden state
h = h + time_emb + day_emb
# 动态图构建
adj = self.graph() # (N, N)
# 空间建模:图卷积
h = self.gc(h, adj) # (B, N, hidden_dim)
# 时间建模MANBA
h = self.manba(h) # (B, N, hidden_dim)
# 输出映射
out = self.out_proj(h) # (B, N, horizon * output_dim)
out = out.view(B, N, self.horizon, self.output_dim).permute(0, 2, 1, 3)
return out # (B, horizon, N, output_dim)

29
model/STID/MLP.py Normal file
View File

@ -0,0 +1,29 @@
import torch
from torch import nn
class MultiLayerPerceptron(nn.Module):
"""Multi-Layer Perceptron with residual links."""
def __init__(self, input_dim, hidden_dim) -> None:
super().__init__()
self.fc1 = nn.Conv2d(
in_channels=input_dim, out_channels=hidden_dim, kernel_size=(1, 1), bias=True)
self.fc2 = nn.Conv2d(
in_channels=hidden_dim, out_channels=hidden_dim, kernel_size=(1, 1), bias=True)
self.act = nn.ReLU()
self.drop = nn.Dropout(p=0.15)
def forward(self, input_data: torch.Tensor) -> torch.Tensor:
"""Feed forward of MLP.
Args:
input_data (torch.Tensor): input data with shape [B, D, N]
Returns:
torch.Tensor: latent repr
"""
hidden = self.fc2(self.drop(self.act(self.fc1(input_data)))) # MLP
hidden = hidden + input_data # residual
return hidden

117
model/STID/STID.py Normal file
View File

@ -0,0 +1,117 @@
import torch
from torch import nn
from model.STID.MLP import MultiLayerPerceptron
class STID(nn.Module):
"""
Paper: Spatial-Temporal Identity: A Simple yet Effective Baseline for Multivariate Time Series Forecasting
Link: https://arxiv.org/abs/2208.05233
Official Code: https://github.com/zezhishao/STID
"""
def __init__(self, model_args):
super().__init__()
# attributes
self.num_nodes = model_args["num_nodes"]
self.node_dim = model_args["node_dim"]
self.input_len = model_args["input_len"]
self.input_dim = model_args["input_dim"]
self.embed_dim = model_args["embed_dim"]
self.output_len = model_args["output_len"]
self.num_layer = model_args["num_layer"]
self.temp_dim_tid = model_args["temp_dim_tid"]
self.temp_dim_diw = model_args["temp_dim_diw"]
self.time_of_day_size = model_args["time_of_day_size"]
self.day_of_week_size = model_args["day_of_week_size"]
self.if_time_in_day = model_args["if_T_i_D"]
self.if_day_in_week = model_args["if_D_i_W"]
self.if_spatial = model_args["if_node"]
# spatial embeddings
if self.if_spatial:
self.node_emb = nn.Parameter(torch.empty(self.num_nodes, self.node_dim))
nn.init.xavier_uniform_(self.node_emb)
# temporal embeddings
if self.if_time_in_day:
self.time_in_day_emb = nn.Parameter(
torch.empty(self.time_of_day_size, self.temp_dim_tid))
nn.init.xavier_uniform_(self.time_in_day_emb)
if self.if_day_in_week:
self.day_in_week_emb = nn.Parameter(
torch.empty(self.day_of_week_size, self.temp_dim_diw))
nn.init.xavier_uniform_(self.day_in_week_emb)
# embedding layer
self.time_series_emb_layer = nn.Conv2d(
in_channels=self.input_dim * self.input_len, out_channels=self.embed_dim, kernel_size=(1, 1), bias=True)
# encoding
self.hidden_dim = self.embed_dim+self.node_dim * \
int(self.if_spatial)+self.temp_dim_tid*int(self.if_day_in_week) + \
self.temp_dim_diw*int(self.if_time_in_day)
self.encoder = nn.Sequential(
*[MultiLayerPerceptron(self.hidden_dim, self.hidden_dim) for _ in range(self.num_layer)])
# regression
self.regression_layer = nn.Conv2d(
in_channels=self.hidden_dim, out_channels=self.output_len, kernel_size=(1, 1), bias=True)
def forward(self, history_data: torch.Tensor) -> torch.Tensor:
"""Feed forward of STID.
Args:
history_data (torch.Tensor): history data with shape [B, L, N, C]
Returns:
torch.Tensor: prediction with shape [B, L, N, C]
"""
# prepare data
input_data = history_data[..., range(self.input_dim)]
# input_data = history_data[..., 0:1]
if self.if_time_in_day:
t_i_d_data = history_data[..., 1]
# In the datasets used in STID, the time_of_day feature is normalized to [0, 1]. We multiply it by 288 to get the index.
# If you use other datasets, you may need to change this line.
time_in_day_emb = self.time_in_day_emb[(t_i_d_data[:, -1, :] * self.time_of_day_size).type(torch.LongTensor)]
else:
time_in_day_emb = None
if self.if_day_in_week:
d_i_w_data = history_data[..., 2]
day_in_week_emb = self.day_in_week_emb[(d_i_w_data[:, -1, :] * self.day_of_week_size).type(torch.LongTensor)]
else:
day_in_week_emb = None
# time series embedding
batch_size, _, num_nodes, _ = input_data.shape
input_data = input_data.transpose(1, 2).contiguous()
input_data = input_data.view(
batch_size, num_nodes, -1).transpose(1, 2).unsqueeze(-1)
time_series_emb = self.time_series_emb_layer(input_data)
node_emb = []
if self.if_spatial:
# expand node embeddings
node_emb.append(self.node_emb.unsqueeze(0).expand(
batch_size, -1, -1).transpose(1, 2).unsqueeze(-1))
# temporal embeddings
tem_emb = []
if time_in_day_emb is not None:
tem_emb.append(time_in_day_emb.transpose(1, 2).unsqueeze(-1))
if day_in_week_emb is not None:
tem_emb.append(day_in_week_emb.transpose(1, 2).unsqueeze(-1))
# concate all embeddings
hidden = torch.cat([time_series_emb] + node_emb + tem_emb, dim=1)
# encoding
hidden = self.encoder(hidden)
# regression
prediction = self.regression_layer(hidden)
return prediction

View File

@ -13,7 +13,8 @@ from model.STFGNN.STFGNN import STFGNN
from model.STSGCN.STSGCN import STSGCN
from model.STGODE.STGODE import ODEGCN
from model.PDG2SEQ.PDG2Seq import PDG2Seq
from model.EXP.EXP21 import EXP as EXP
from model.STID.STID import STID
from model.EXP.EXP26 import EXP as EXP
def model_selector(model):
match model['type']:
@ -32,5 +33,6 @@ def model_selector(model):
case 'STSGCN': return STSGCN(model)
case 'STGODE': return ODEGCN(model)
case 'PDG2SEQ': return PDG2Seq(model)
case 'STID': return STID(model)
case 'EXP': return EXP(model)