e19添加时间嵌入

This commit is contained in:
czzhangheng 2025-04-18 14:59:47 +08:00
parent bd94d3fdd3
commit 0b006087ea
10 changed files with 975 additions and 2485 deletions

File diff suppressed because it is too large Load Diff

View File

@ -27,9 +27,9 @@ train:
epochs: 300 epochs: 300
lr_init: 0.003 lr_init: 0.003
weight_decay: 0 weight_decay: 0
lr_decay: False lr_decay: True
lr_decay_rate: 0.3 lr_decay_rate: 0.5
lr_decay_step: "5,20,40,70" lr_decay_step: "5,20,40,65"
early_stop: True early_stop: True
early_stop_patience: 15 early_stop_patience: 15
grad_norm: False grad_norm: False

View File

@ -7,7 +7,7 @@ import torch.nn.functional as F
第一层时间 -> 空间 -> 时间 第一层时间 -> 空间 -> 时间
残差连接层输出 + 层输入 残差连接层输出 + 层输入
第二层同样三明治结构 -> 最终输出 第二层同样三明治结构 -> 最终输出
无小残差 无小残差 无效
""" """
class DynamicGraphConstructor(nn.Module): class DynamicGraphConstructor(nn.Module):

161
model/EXP/EXP17.py Normal file
View File

@ -0,0 +1,161 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
基于傅里叶变换优化的双层三明治结构模型
新增TemporalFourierBlock 用于全局捕捉时序频域特征提升预测精度
第一层Fourier 时域 -> 空间 -> 时间
残差连接层输出 + 层输入
第二层同样三明治结构 -> 最终输出
"""
class TemporalFourierBlock(nn.Module):
"""
时序傅里叶变换块
输入 x (B, T, N)
输出时域重构 (B, T, N)
"""
def __init__(self, seq_len):
super().__init__()
# 频域系数学习:对每个频率分量应用可学习缩放
# rfft 输出频率数 = seq_len//2 + 1
freq_len = seq_len // 2 + 1
self.scale = nn.Parameter(torch.randn(freq_len), requires_grad=True)
self.seq_len = seq_len
def forward(self, x):
# x: (B, T, N)
# FFT 到频域
Xf = torch.fft.rfft(x, dim=1) # (B, F, N), complex
# 学习缩放:实部和虚部同时缩放
scale = self.scale.view(1, -1, 1)
Xf = Xf * scale
# IFFT 回时域
x_rec = torch.fft.irfft(Xf, n=self.seq_len, dim=1) # (B, T, N)
return x_rec
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
# x: (B, N, C); adj: (N, N)
res = x
x = torch.matmul(adj, x)
x = self.theta(x)
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, N, C) 视 N 维为时间序列长度
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
"""
时间-空间-时间 三明治结构
输入/输出 (B, N, hidden_dim)
"""
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
self.gc = GraphConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
# h: (B, N, hidden_dim)
h1 = self.manba1(h)
adj = self.graph_constructor()
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
# 时序傅里叶块
self.fourier_block = TemporalFourierBlock(self.seq_len)
# 输入映射:(B*N, T) -> hidden_dim
self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
# 两层三明治块
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# 输出映射
self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
x_main = x[..., 0] # (B, T, N)
B, T, N = x_main.shape
assert T == self.seq_len
# 时序傅里叶变换 + 残差
x_freq = self.fourier_block(x_main) # (B, T, N)
x_main = x_main + x_freq
# 输入投影 (B, T, N) -> (B*N, T) -> (B, N, hidden_dim)
x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
# 第一层三明治 + 残差
h1 = self.sandwich1(h0)
h1 = h1 + h0
# 第二层三明治
h2 = self.sandwich2(h1)
# 输出映射
out = self.out_proj(h2) # (B, N, H*D_out)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3) # (B, horizon, N, output_dim)
return out

134
model/EXP/EXP18.py Normal file
View File

@ -0,0 +1,134 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
频域处理版双层三明治结构模型
1. 先做傅里叶变换 -> 频域中做三明治结构时间-空间-时间
2. 处理完成后回到时域 -> 输出预测
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
# x: (B, N, C)
res = x
x = torch.matmul(adj, x)
x = self.theta(x)
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, N, C)
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
"""
时间-空间-时间结构
输入/输出 (B, N, C)
"""
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
self.gc = GraphConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
h1 = self.manba1(h)
adj = self.graph_constructor()
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
self.freq_len = self.seq_len // 2 + 1 # rfft输出的频率维度
# 映射到频域隐藏维度
self.freq_proj = nn.Linear(self.freq_len * 2, self.hidden_dim)
# 频域中的三明治结构
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# 回到频域 -> 时域输出
self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
x_main = x[..., 0] # (B, T, N)
B, T, N = x_main.shape
assert T == self.seq_len
# 傅里叶变换:对每个节点的时间序列进行 rfft
Xf = torch.fft.rfft(x_main, dim=1) # (B, F, N), complex
# 拆分实部虚部,堆叠为 real + imag 两通道
real = Xf.real.permute(0, 2, 1) # (B, N, F)
imag = Xf.imag.permute(0, 2, 1) # (B, N, F)
freq_input = torch.cat([real, imag], dim=-1) # (B, N, 2F)
# 维度映射
h = self.freq_proj(freq_input) # (B, N, hidden_dim)
# 在频域中做三明治结构
h1 = self.sandwich1(h)
h1 = h1 + h # 残差连接
h2 = self.sandwich2(h1)
# 输出映射到频率域(输出 horizon * output_dim
out_freq = self.out_proj(h2) # (B, N, H*D)
out_freq = out_freq.view(B, N, self.horizon, self.output_dim)
# 将频域预测简单映射为时域结果
out = out_freq.permute(0, 2, 1, 3) # (B, horizon, N, output_dim)
return out

128
model/EXP/EXP19.py Normal file
View File

@ -0,0 +1,128 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
使用多层感知机替换输入输出的proj层
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
res = x
x = torch.matmul(adj, x)
x = self.theta(x)
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
self.gc = GraphConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
h1 = self.manba1(h)
adj = self.graph_constructor()
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims)-2):
layers += [nn.Linear(dims[i], dims[i+1]), activation()]
layers += [nn.Linear(dims[-2], dims[-1])]
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
# 替换为MLP: input_proj(seq_len -> hidden_dim -> hidden_dim)
self.input_proj = MLP(self.seq_len, [self.hidden_dim], self.hidden_dim)
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# 替换为MLP: out_proj(hidden_dim -> 2*hidden_dim -> horizon*output_dim)
self.out_proj = MLP(self.hidden_dim, [2 * self.hidden_dim], self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
x_main = x[..., 0] # (B, T, N)
B, T, N = x_main.shape
assert T == self.seq_len
# (B, T, N) -> (B, N, T) -> (B*N, T) -> MLP -> (B, N, hidden_dim)
x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
h1 = self.sandwich1(h0)
h1 = h1 + h0
h2 = self.sandwich2(h1)
# MLP输出 -> (B, N, H*D_out)
out = self.out_proj(h2)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3) # (B, horizon, N, output_dim)
return out

173
model/EXP/EXP20.py Normal file
View File

@ -0,0 +1,173 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
使用多层感知机替换输入输出的 proj 并将图卷积替换为图注意力网络GAT
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
# 构造可学习的邻接矩阵
adj = torch.matmul(self.nodevec1, self.nodevec2.T) # (N, N)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GATConvBlock(nn.Module):
"""
简易版 GAT 实现
- 先对每个节点特征做线性变换
- 计算每对节点间的注意力得分
- 掩码掉非边adj == 0softmax 后做加权求和
- 加上残差并经过非线性
"""
def __init__(self, input_dim, output_dim, alpha=0.2):
super().__init__()
self.fc = nn.Linear(input_dim, output_dim, bias=False)
self.attn_fc = nn.Linear(2 * output_dim, 1, bias=False)
self.leakyrelu = nn.LeakyReLU(alpha)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_fc = nn.Linear(input_dim, output_dim, bias=False)
def forward(self, x, adj):
"""
x: (B, N, F_in)
adj: (N, N), 动态学习得到的邻接矩阵
返回 h_prime: (B, N, F_out)
"""
B, N, _ = x.shape
h = self.fc(x) # (B, N, F_out)
# 计算每对节点的注意力打分
h_i = h.unsqueeze(2).expand(-1, -1, N, -1) # (B, N, N, F_out)
h_j = h.unsqueeze(1).expand(-1, N, -1, -1) # (B, N, N, F_out)
e = self.attn_fc(torch.cat([h_i, h_j], dim=-1)).squeeze(-1) # (B, N, N)
e = self.leakyrelu(e)
# 掩码:只有 adj > 0 的位置保留注意力,否则置为 -inf
mask = adj.unsqueeze(0).expand(B, -1, -1) > 0
e = e.masked_fill(~mask, float('-inf'))
# 归一化注意力
alpha = F.softmax(e, dim=-1) # (B, N, N)
# 聚合邻居
h_prime = torch.matmul(alpha, h) # (B, N, F_out)
# 残差连接
if self.residual:
h_prime = h_prime + x
else:
h_prime = h_prime + self.res_fc(x)
return F.elu(h_prime)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, N, input_dim) — 将节点序列看作时间序列处理
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
self.gat = GATConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
# h: (B, N, hidden_dim)
h1 = self.manba1(h) # 自注意力 + FFN
adj = self.graph_constructor() # 动态邻接 (N, N)
h2 = self.gat(h1, adj) # GAT 聚合
h3 = self.manba2(h2) # 再一次自注意力 + FFN
return h3
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims) - 2):
layers += [nn.Linear(dims[i], dims[i + 1]), activation()]
layers += [nn.Linear(dims[-2], dims[-1])]
self.net = nn.Sequential(*layers)
def forward(self, x):
# 支持任意形状Linear 运算对最后一维有效
return self.net(x)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
# 用 MLP 替换原来的输入投影
self.input_proj = MLP(self.seq_len, [self.hidden_dim], self.hidden_dim)
# 两层 SandwichBlock
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# 用 MLP 替换原来的输出投影
self.out_proj = MLP(self.hidden_dim, [2 * self.hidden_dim], self.horizon * self.output_dim)
def forward(self, x):
"""
x: (B, T, N, D_total)
假设 D_total >= 1且我们只使用第 0 维特征进行预测
返回:
out: (B, horizon, N, output_dim)
"""
x_main = x[..., 0] # (B, T, N)
B, T, N = x_main.shape
assert T == self.seq_len, f"Expected seq_len={self.seq_len}, got {T}"
# (B, T, N) -> (B, N, T) -> (B*N, T) -> MLP -> (B, N, hidden_dim)
x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
# 两层 Sandwich + 残差
h1 = self.sandwich1(h0)
h1 = h1 + h0
h2 = self.sandwich2(h1)
# 输出投影
out = self.out_proj(h2) # (B, N, horizon * output_dim)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3) # (B, horizon, N, output_dim)
return out

164
model/EXP/EXP21.py Normal file
View File

@ -0,0 +1,164 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
使用多层感知机替换输入输出的proj层
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
res = x
x = torch.matmul(adj, x)
x = self.theta(x)
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
self.gc = GraphConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
h1 = self.manba1(h)
adj = self.graph_constructor()
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims)-2):
layers += [nn.Linear(dims[i], dims[i+1]), activation()]
layers += [nn.Linear(dims[-2], dims[-1])]
self.net = nn.Sequential(*layers)
def forward(self, x):
return self.net(x)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
# ==== NEW: discrete time embeddings ====
# number of slots in a day (e.g. 24h * 60m / time_slot_minutes)
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# input projection now still only takes the flow history
self.input_proj = MLP(
in_dim = self.seq_len,
hidden_dims = [self.hidden_dim],
out_dim = self.hidden_dim
)
# two Sandwich blocks remain unchanged
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# output projection unchanged
self.out_proj = MLP(
in_dim = self.hidden_dim,
hidden_dims = [2 * self.hidden_dim],
out_dim = self.horizon * self.output_dim
)
def forward(self, x):
"""
x: (B, T, N, D_total)
D_total >= 3 where
x[...,0] = flow,
x[...,1] = time_in_day (0 1 to be scaled to 0 time_slots1),
x[...,2] = day_in_week (06)
"""
x_flow = x[..., 0] # (B, T, N)
x_time = x[..., 1] # (B, T, N)
x_day = x[..., 2] # (B, T, N)
B, T, N = x_flow.shape
assert T == self.seq_len
# 1) project the flow history
x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
# 2) lookup discrete time indexes at the last time step
# scale time_in_day ∈ [0,1] → slot_idx ∈ {0,…,time_slots1}
t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long() # (B, N)
d_idx = x_day[:, -1, :,].long() # (B, N)
time_emb = self.time_embedding(t_idx) # (B, N, hidden_dim)
day_emb = self.day_embedding(d_idx) # (B, N, hidden_dim)
# 3) inject them into the initial hidden state
h0 = h0 + time_emb + day_emb
# 4) the usual Sandwich + residuals
h1 = self.sandwich1(h0)
h1 = h1 + h0
h2 = self.sandwich2(h1)
# 5) output projection
out = self.out_proj(h2) # (B, N, horizon*output_dim)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3) # (B, horizon, N, output_dim)
return out

186
model/EXP/EXP22.py Normal file
View File

@ -0,0 +1,186 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
"""
使用多层感知机替换输入输出的 proj
并在 EXP 模型中添加显式的空间嵌入Spatial Embedding
"""
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
# 自适应邻接参数
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
# 构造动态邻接矩阵
adj = torch.matmul(self.nodevec1, self.nodevec2.T) # (N, N)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
# 线性变换 + 可选残差投影
self.theta = nn.Linear(input_dim, output_dim)
self.residual = (input_dim == output_dim)
if not self.residual:
self.res_proj = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
# x: (B, N, F_in), adj: (N, N)
res = x
x = torch.matmul(adj, x) # 邻接乘特征
x = self.theta(x) # 线性变换
# 残差连接
x = x + (res if self.residual else self.res_proj(res))
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
# 空间自注意力 + 前馈网络
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, N, C)
res = x
x_attn, _ = self.attn(x, x, x)
x = self.norm1(res + x_attn)
res2 = x
x_ffn = self.ffn(x)
x = self.norm2(res2 + x_ffn)
return x
class SandwichBlock(nn.Module):
def __init__(self, num_nodes, embed_dim, hidden_dim):
super().__init__()
self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
self.gc = GraphConvBlock(hidden_dim, hidden_dim)
self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
def forward(self, h):
# h: (B, N, hidden_dim)
h1 = self.manba1(h)
adj = self.graph_constructor()
h2 = self.gc(h1, adj)
h3 = self.manba2(h2)
return h3
class MLP(nn.Module):
def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
super().__init__()
# 多层感知机
dims = [in_dim] + hidden_dims + [out_dim]
layers = []
for i in range(len(dims) - 2):
layers += [nn.Linear(dims[i], dims[i+1]), activation()]
layers += [nn.Linear(dims[-2], dims[-1])]
self.net = nn.Sequential(*layers)
def forward(self, x):
# 对最后一维做线性映射
return self.net(x)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
# 训练 & 输出参数
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
self.embed_dim = args.get('embed_dim', 16)
# ==== 时间嵌入 ====
self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
self.day_embedding = nn.Embedding(7, self.hidden_dim)
# ==== 空间嵌入 ====
# 每个节点一个可学习的向量
self.spatial_embedding = nn.Parameter(
torch.randn(self.num_nodes, self.hidden_dim),
requires_grad=True
)
# 输入投影:仅对流量序列做 MLP
self.input_proj = MLP(
in_dim = self.seq_len,
hidden_dims = [self.hidden_dim],
out_dim = self.hidden_dim
)
# 两个 SandwichBlock
self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
# 输出投影
self.out_proj = MLP(
in_dim = self.hidden_dim,
hidden_dims = [2 * self.hidden_dim],
out_dim = self.horizon * self.output_dim
)
def forward(self, x):
"""
x: (B, T, N, D_total)
D_total >= 3其中
x[...,0] = 流量 (flow)
x[...,1] = 当天时间比 (time_in_day归一化到 [0,1])
x[...,2] = 星期几 (day_in_week06)
"""
# 拆分三条序列
x_flow = x[..., 0] # (B, T, N)
x_time = x[..., 1] # (B, T, N)
x_day = x[..., 2] # (B, T, N)
B, T, N = x_flow.shape
assert T == self.seq_len, f"序列长度应为 {self.seq_len},但收到 {T}"
# 1) MLP 投影流量历史 -> 节点初始特征 h0
x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T) # (B*N, T)
h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim) # (B, N, hidden_dim)
# 2) 计算离散时间嵌入
t_idx = (x_time[:, -1, :] * (self.time_slots - 1)).long() # (B, N)
d_idx = x_day[:, -1, :].long() # (B, N)
time_emb = self.time_embedding(t_idx) # (B, N, hidden_dim)
day_emb = self.day_embedding(d_idx) # (B, N, hidden_dim)
# 3) 计算空间嵌入并扩展到 batch 大小
node_idx = torch.arange(N, device=x.device) # (N,)
spatial_emb = self.spatial_embedding[node_idx] # (N, hidden_dim)
spatial_emb = spatial_emb.unsqueeze(0).expand(B, -1, -1) # (B, N, hidden_dim)
# 4) 将三种嵌入相加到 h0
h0 = h0 + time_emb + day_emb + spatial_emb
# 5) 两层 Sandwich + 残差连接
h1 = self.sandwich1(h0)
h1 = h1 + h0
h2 = self.sandwich2(h1)
# 6) 输出投影 -> (B, horizon, N, output_dim)
out = self.out_proj(h2) # (B, N, horizon*out_dim)
out = out.view(B, N, self.horizon, self.output_dim)
out = out.permute(0, 2, 1, 3) # (B, horizon, N, output_dim)
return out

View File

@ -13,7 +13,7 @@ from model.STFGNN.STFGNN import STFGNN
from model.STSGCN.STSGCN import STSGCN from model.STSGCN.STSGCN import STSGCN
from model.STGODE.STGODE import ODEGCN from model.STGODE.STGODE import ODEGCN
from model.PDG2SEQ.PDG2Seq import PDG2Seq from model.PDG2SEQ.PDG2Seq import PDG2Seq
from model.EXP.EXP16 import EXP as EXP from model.EXP.EXP21 import EXP as EXP
def model_selector(model): def model_selector(model):
match model['type']: match model['type']: