e19添加时间嵌入

2025-04-18 14:59:47 +08:00 · 2025-04-18 14:59:47 +08:00 · 0b006087ea
parent bd94d3fdd3
commit 0b006087ea
10 changed files with 975 additions and 2485 deletions
--- a/baseline.ipynb
+++ b/baseline.ipynb
--- a/config/EXP/PEMSD4.yaml
+++ b/config/EXP/PEMSD4.yaml
@ -27,9 +27,9 @@ train:
  epochs: 300
  lr_init: 0.003
  weight_decay: 0
-  lr_decay: False
-  lr_decay_rate: 0.3
-  lr_decay_step: "5,20,40,70"
+  lr_decay: True
+  lr_decay_rate: 0.5
+  lr_decay_step: "5,20,40,65"
  early_stop: True
  early_stop_patience: 15
  grad_norm: False
--- a/model/EXP/EXP16.py
+++ b/model/EXP/EXP16.py
@ -7,7 +7,7 @@ import torch.nn.functional as F
 第一层：时间 -> 空间 -> 时间
 残差连接：层输出 + 层输入
 第二层：同样三明治结构 -> 最终输出
-无小残差
+无小残差 无效
 """

 class DynamicGraphConstructor(nn.Module):
--- a/model/EXP/EXP17.py
+++ b/model/EXP/EXP17.py
@ -0,0 +1,161 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+"""
+基于傅里叶变换优化的双层三明治结构模型
+新增：TemporalFourierBlock 用于全局捕捉时序频域特征，提升预测精度
+第一层：Fourier 时域 -> 空间 -> 时间
+残差连接：层输出 + 层输入
+第二层：同样三明治结构 -> 最终输出
+"""
+
+class TemporalFourierBlock(nn.Module):
+    """
+    时序傅里叶变换块
+    输入： x (B, T, N)
+    输出：时域重构 (B, T, N)
+    """
+    def __init__(self, seq_len):
+        super().__init__()
+        # 频域系数学习：对每个频率分量应用可学习缩放
+        # rfft 输出频率数 = seq_len//2 + 1
+        freq_len = seq_len // 2 + 1
+        self.scale = nn.Parameter(torch.randn(freq_len), requires_grad=True)
+        self.seq_len = seq_len
+
+    def forward(self, x):
+        # x: (B, T, N)
+        # FFT 到频域
+        Xf = torch.fft.rfft(x, dim=1)            # (B, F, N), complex
+        # 学习缩放：实部和虚部同时缩放
+        scale = self.scale.view(1, -1, 1)
+        Xf = Xf * scale
+        # IFFT 回时域
+        x_rec = torch.fft.irfft(Xf, n=self.seq_len, dim=1)  # (B, T, N)
+        return x_rec
+
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        # x: (B, N, C); adj: (N, N)
+        res = x
+        x = torch.matmul(adj, x)
+        x = self.theta(x)
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        # x: (B, N, C) 视 N 维为时间序列长度
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    """
+    时间-空间-时间 三明治结构
+    输入/输出： (B, N, hidden_dim)
+    """
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        # h: (B, N, hidden_dim)
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon = args['horizon']
+        self.output_dim = args['output_dim']
+        self.seq_len = args.get('in_len', 12)
+        self.hidden_dim = args.get('hidden_dim', 64)
+        self.num_nodes = args['num_nodes']
+        self.embed_dim = args.get('embed_dim', 16)
+
+        # 时序傅里叶块
+        self.fourier_block = TemporalFourierBlock(self.seq_len)
+
+        # 输入映射：(B*N, T) -> hidden_dim
+        self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
+
+        # 两层三明治块
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # 输出映射
+        self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
+
+    def forward(self, x):
+        # x: (B, T, N, D_total)
+        x_main = x[..., 0]               # (B, T, N)
+        B, T, N = x_main.shape
+        assert T == self.seq_len
+
+        # 时序傅里叶变换 + 残差
+        x_freq = self.fourier_block(x_main)   # (B, T, N)
+        x_main = x_main + x_freq
+
+        # 输入投影 (B, T, N) -> (B*N, T) -> (B, N, hidden_dim)
+        x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        # 第一层三明治 + 残差
+        h1 = self.sandwich1(h0)
+        h1 = h1 + h0
+
+        # 第二层三明治
+        h2 = self.sandwich2(h1)
+
+        # 输出映射
+        out = self.out_proj(h2)              # (B, N, H*D_out)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)        # (B, horizon, N, output_dim)
+        return out
--- a/model/EXP/EXP18.py
+++ b/model/EXP/EXP18.py
@ -0,0 +1,134 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+"""
+频域处理版双层三明治结构模型：
+1. 先做傅里叶变换 -> 频域中做三明治结构（时间-空间-时间）
+2. 处理完成后回到时域 -> 输出预测
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        # x: (B, N, C)
+        res = x
+        x = torch.matmul(adj, x)
+        x = self.theta(x)
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        # x: (B, N, C)
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    """
+    时间-空间-时间结构
+    输入/输出： (B, N, C)
+    """
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon = args['horizon']
+        self.output_dim = args['output_dim']
+        self.seq_len = args.get('in_len', 12)
+        self.hidden_dim = args.get('hidden_dim', 64)
+        self.num_nodes = args['num_nodes']
+        self.embed_dim = args.get('embed_dim', 16)
+
+        self.freq_len = self.seq_len // 2 + 1  # rfft输出的频率维度
+
+        # 映射到频域隐藏维度
+        self.freq_proj = nn.Linear(self.freq_len * 2, self.hidden_dim)
+
+        # 频域中的三明治结构
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # 回到频域 -> 时域输出
+        self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
+
+    def forward(self, x):
+        # x: (B, T, N, D_total)
+        x_main = x[..., 0]  # (B, T, N)
+        B, T, N = x_main.shape
+        assert T == self.seq_len
+
+        # 傅里叶变换：对每个节点的时间序列进行 rfft
+        Xf = torch.fft.rfft(x_main, dim=1)  # (B, F, N), complex
+
+        # 拆分实部虚部，堆叠为 real + imag 两通道
+        real = Xf.real.permute(0, 2, 1)  # (B, N, F)
+        imag = Xf.imag.permute(0, 2, 1)  # (B, N, F)
+        freq_input = torch.cat([real, imag], dim=-1)  # (B, N, 2F)
+
+        # 维度映射
+        h = self.freq_proj(freq_input)  # (B, N, hidden_dim)
+
+        # 在频域中做三明治结构
+        h1 = self.sandwich1(h)
+        h1 = h1 + h  # 残差连接
+        h2 = self.sandwich2(h1)
+
+        # 输出映射到频率域（输出 horizon * output_dim）
+        out_freq = self.out_proj(h2)  # (B, N, H*D)
+        out_freq = out_freq.view(B, N, self.horizon, self.output_dim)
+
+        # 将频域预测简单映射为时域结果
+        out = out_freq.permute(0, 2, 1, 3)  # (B, horizon, N, output_dim)
+        return out
--- a/model/EXP/EXP19.py
+++ b/model/EXP/EXP19.py
@ -0,0 +1,128 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+"""
+使用多层感知机替换输入输出的proj层
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        res = x
+        x = torch.matmul(adj, x)
+        x = self.theta(x)
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims)-2):
+            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
+        layers += [nn.Linear(dims[-2], dims[-1])]
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon = args['horizon']
+        self.output_dim = args['output_dim']
+        self.seq_len = args.get('in_len', 12)
+        self.hidden_dim = args.get('hidden_dim', 64)
+        self.num_nodes = args['num_nodes']
+        self.embed_dim = args.get('embed_dim', 16)
+
+        # 替换为MLP: input_proj(seq_len -> hidden_dim -> hidden_dim)
+        self.input_proj = MLP(self.seq_len, [self.hidden_dim], self.hidden_dim)
+
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # 替换为MLP: out_proj(hidden_dim -> 2*hidden_dim -> horizon*output_dim)
+        self.out_proj = MLP(self.hidden_dim, [2 * self.hidden_dim], self.horizon * self.output_dim)
+
+    def forward(self, x):
+        # x: (B, T, N, D_total)
+        x_main = x[..., 0]              # (B, T, N)
+        B, T, N = x_main.shape
+        assert T == self.seq_len
+
+        # (B, T, N) -> (B, N, T) -> (B*N, T) -> MLP -> (B, N, hidden_dim)
+        x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        h1 = self.sandwich1(h0)
+        h1 = h1 + h0
+        h2 = self.sandwich2(h1)
+
+        # MLP输出 -> (B, N, H*D_out)
+        out = self.out_proj(h2)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)    # (B, horizon, N, output_dim)
+        return out
--- a/model/EXP/EXP20.py
+++ b/model/EXP/EXP20.py
@ -0,0 +1,173 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+"""
+使用多层感知机替换输入输出的 proj 层，并将图卷积替换为图注意力网络（GAT）
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        # 构造可学习的邻接矩阵
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)  # (N, N)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GATConvBlock(nn.Module):
+    """
+    简易版 GAT 实现：
+      - 先对每个节点特征做线性变换
+      - 计算每对节点间的注意力得分
+      - 掩码掉非边（adj == 0），softmax 后做加权求和
+      - 加上残差并经过非线性
+    """
+    def __init__(self, input_dim, output_dim, alpha=0.2):
+        super().__init__()
+        self.fc = nn.Linear(input_dim, output_dim, bias=False)
+        self.attn_fc = nn.Linear(2 * output_dim, 1, bias=False)
+        self.leakyrelu = nn.LeakyReLU(alpha)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_fc = nn.Linear(input_dim, output_dim, bias=False)
+
+    def forward(self, x, adj):
+        """
+        x: (B, N, F_in)
+        adj: (N, N), 动态学习得到的邻接矩阵
+        返回 h_prime: (B, N, F_out)
+        """
+        B, N, _ = x.shape
+        h = self.fc(x)  # (B, N, F_out)
+
+        # 计算每对节点的注意力打分
+        h_i = h.unsqueeze(2).expand(-1, -1, N, -1)  # (B, N, N, F_out)
+        h_j = h.unsqueeze(1).expand(-1, N, -1, -1)  # (B, N, N, F_out)
+        e = self.attn_fc(torch.cat([h_i, h_j], dim=-1)).squeeze(-1)  # (B, N, N)
+        e = self.leakyrelu(e)
+
+        # 掩码：只有 adj > 0 的位置保留注意力，否则置为 -inf
+        mask = adj.unsqueeze(0).expand(B, -1, -1) > 0
+        e = e.masked_fill(~mask, float('-inf'))
+
+        # 归一化注意力
+        alpha = F.softmax(e, dim=-1)  # (B, N, N)
+
+        # 聚合邻居
+        h_prime = torch.matmul(alpha, h)  # (B, N, F_out)
+
+        # 残差连接
+        if self.residual:
+            h_prime = h_prime + x
+        else:
+            h_prime = h_prime + self.res_fc(x)
+
+        return F.elu(h_prime)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        # x: (B, N, input_dim) — 将节点序列看作时间序列处理
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        self.gat = GATConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        # h: (B, N, hidden_dim)
+        h1 = self.manba1(h)                  # 自注意力 + FFN
+        adj = self.graph_constructor()       # 动态邻接 (N, N)
+        h2 = self.gat(h1, adj)               # GAT 聚合
+        h3 = self.manba2(h2)                 # 再一次自注意力 + FFN
+        return h3
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims) - 2):
+            layers += [nn.Linear(dims[i], dims[i + 1]), activation()]
+        layers += [nn.Linear(dims[-2], dims[-1])]
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        # 支持任意形状，Linear 运算对最后一维有效
+        return self.net(x)
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon = args['horizon']
+        self.output_dim = args['output_dim']
+        self.seq_len = args.get('in_len', 12)
+        self.hidden_dim = args.get('hidden_dim', 64)
+        self.num_nodes = args['num_nodes']
+        self.embed_dim = args.get('embed_dim', 16)
+
+        # 用 MLP 替换原来的输入投影
+        self.input_proj = MLP(self.seq_len, [self.hidden_dim], self.hidden_dim)
+
+        # 两层 SandwichBlock
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # 用 MLP 替换原来的输出投影
+        self.out_proj = MLP(self.hidden_dim, [2 * self.hidden_dim], self.horizon * self.output_dim)
+
+    def forward(self, x):
+        """
+        x: (B, T, N, D_total)
+           假设 D_total >= 1，且我们只使用第 0 维特征进行预测
+        返回:
+           out: (B, horizon, N, output_dim)
+        """
+        x_main = x[..., 0]                    # (B, T, N)
+        B, T, N = x_main.shape
+        assert T == self.seq_len, f"Expected seq_len={self.seq_len}, got {T}"
+
+        # (B, T, N) -> (B, N, T) -> (B*N, T) -> MLP -> (B, N, hidden_dim)
+        x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        # 两层 Sandwich + 残差
+        h1 = self.sandwich1(h0)
+        h1 = h1 + h0
+        h2 = self.sandwich2(h1)
+
+        # 输出投影
+        out = self.out_proj(h2)               # (B, N, horizon * output_dim)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)         # (B, horizon, N, output_dim)
+        return out
--- a/model/EXP/EXP21.py
+++ b/model/EXP/EXP21.py
@ -0,0 +1,164 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+"""
+使用多层感知机替换输入输出的proj层
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        res = x
+        x = torch.matmul(adj, x)
+        x = self.theta(x)
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims)-2):
+            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
+        layers += [nn.Linear(dims[-2], dims[-1])]
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.net(x)
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        self.horizon     = args['horizon']
+        self.output_dim  = args['output_dim']
+        self.seq_len     = args.get('in_len', 12)
+        self.hidden_dim  = args.get('hidden_dim', 64)
+        self.num_nodes   = args['num_nodes']
+        self.embed_dim   = args.get('embed_dim', 16)
+
+        # ==== NEW: discrete time embeddings ====
+        # number of slots in a day (e.g. 24h * 60m / time_slot_minutes)
+        self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
+        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+
+        # input projection now still only takes the flow history
+        self.input_proj = MLP(
+            in_dim      = self.seq_len,
+            hidden_dims = [self.hidden_dim],
+            out_dim     = self.hidden_dim
+        )
+
+        # two Sandwich blocks remain unchanged
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # output projection unchanged
+        self.out_proj = MLP(
+            in_dim      = self.hidden_dim,
+            hidden_dims = [2 * self.hidden_dim],
+            out_dim     = self.horizon * self.output_dim
+        )
+
+    def forward(self, x):
+        """
+        x: (B, T, N, D_total)
+           D_total >= 3 where
+             x[...,0] = flow,
+             x[...,1] = time_in_day (0 … 1 → to be scaled to 0 … time_slots‑1),
+             x[...,2] = day_in_week (0–6)
+        """
+        x_flow = x[..., 0]  # (B, T, N)
+        x_time = x[..., 1]  # (B, T, N)
+        x_day  = x[..., 2]  # (B, T, N)
+
+        B, T, N = x_flow.shape
+        assert T == self.seq_len
+
+        # 1) project the flow history
+        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
+
+        # 2) lookup discrete time indexes at the last time step
+        #    scale time_in_day ∈ [0,1] → slot_idx ∈ {0,…,time_slots‑1}
+        t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()  # (B, N)
+        d_idx = x_day[:,  -1, :,].long()                            # (B, N)
+
+        time_emb = self.time_embedding(t_idx)   # (B, N, hidden_dim)
+        day_emb  = self.day_embedding(d_idx)    # (B, N, hidden_dim)
+
+        # 3) inject them into the initial hidden state
+        h0 = h0 + time_emb + day_emb
+
+        # 4) the usual Sandwich + residuals
+        h1 = self.sandwich1(h0)
+        h1 = h1 + h0
+        h2 = self.sandwich2(h1)
+
+        # 5) output projection
+        out = self.out_proj(h2)                 # (B, N, horizon*output_dim)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)           # (B, horizon, N, output_dim)
+        return out
--- a/model/EXP/EXP22.py
+++ b/model/EXP/EXP22.py
@ -0,0 +1,186 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+"""
+使用多层感知机替换输入输出的 proj 层，
+并在 EXP 模型中添加显式的空间嵌入（Spatial Embedding）。
+"""
+
+class DynamicGraphConstructor(nn.Module):
+    def __init__(self, node_num, embed_dim):
+        super().__init__()
+        # 自适应邻接参数
+        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
+
+    def forward(self):
+        # 构造动态邻接矩阵
+        adj = torch.matmul(self.nodevec1, self.nodevec2.T)  # (N, N)
+        adj = F.relu(adj)
+        adj = F.softmax(adj, dim=-1)
+        return adj
+
+
+class GraphConvBlock(nn.Module):
+    def __init__(self, input_dim, output_dim):
+        super().__init__()
+        # 线性变换 + 可选残差投影
+        self.theta = nn.Linear(input_dim, output_dim)
+        self.residual = (input_dim == output_dim)
+        if not self.residual:
+            self.res_proj = nn.Linear(input_dim, output_dim)
+
+    def forward(self, x, adj):
+        # x: (B, N, F_in), adj: (N, N)
+        res = x
+        x = torch.matmul(adj, x)      # 邻接乘特征
+        x = self.theta(x)             # 线性变换
+        # 残差连接
+        x = x + (res if self.residual else self.res_proj(res))
+        return F.relu(x)
+
+
+class MANBA_Block(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        # 空间自注意力 + 前馈网络
+        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
+        self.ffn = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Linear(hidden_dim, input_dim)
+        )
+        self.norm1 = nn.LayerNorm(input_dim)
+        self.norm2 = nn.LayerNorm(input_dim)
+
+    def forward(self, x):
+        # x: (B, N, C)
+        res = x
+        x_attn, _ = self.attn(x, x, x)
+        x = self.norm1(res + x_attn)
+        res2 = x
+        x_ffn = self.ffn(x)
+        x = self.norm2(res2 + x_ffn)
+        return x
+
+
+class SandwichBlock(nn.Module):
+    def __init__(self, num_nodes, embed_dim, hidden_dim):
+        super().__init__()
+        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
+        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
+        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
+        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
+
+    def forward(self, h):
+        # h: (B, N, hidden_dim)
+        h1 = self.manba1(h)
+        adj = self.graph_constructor()
+        h2 = self.gc(h1, adj)
+        h3 = self.manba2(h2)
+        return h3
+
+
+class MLP(nn.Module):
+    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
+        super().__init__()
+        # 多层感知机
+        dims = [in_dim] + hidden_dims + [out_dim]
+        layers = []
+        for i in range(len(dims) - 2):
+            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
+        layers += [nn.Linear(dims[-2], dims[-1])]
+        self.net = nn.Sequential(*layers)
+
+    def forward(self, x):
+        # 对最后一维做线性映射
+        return self.net(x)
+
+
+class EXP(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+        # 训练 & 输出参数
+        self.horizon    = args['horizon']
+        self.output_dim = args['output_dim']
+        self.seq_len    = args.get('in_len', 12)
+        self.hidden_dim = args.get('hidden_dim', 64)
+        self.num_nodes  = args['num_nodes']
+        self.embed_dim  = args.get('embed_dim', 16)
+
+        # ==== 时间嵌入 ====
+        self.time_slots     = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
+        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
+        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
+
+        # ==== 空间嵌入 ====
+        # 每个节点一个可学习的向量
+        self.spatial_embedding = nn.Parameter(
+            torch.randn(self.num_nodes, self.hidden_dim),
+            requires_grad=True
+        )
+
+        # 输入投影：仅对流量序列做 MLP
+        self.input_proj = MLP(
+            in_dim      = self.seq_len,
+            hidden_dims = [self.hidden_dim],
+            out_dim     = self.hidden_dim
+        )
+
+        # 两个 SandwichBlock
+        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
+
+        # 输出投影
+        self.out_proj = MLP(
+            in_dim      = self.hidden_dim,
+            hidden_dims = [2 * self.hidden_dim],
+            out_dim     = self.horizon * self.output_dim
+        )
+
+    def forward(self, x):
+        """
+        x: (B, T, N, D_total)
+           D_total >= 3，其中：
+             x[...,0] = 流量 (flow)
+             x[...,1] = 当天时间比 (time_in_day，归一化到 [0,1])
+             x[...,2] = 星期几 (day_in_week，0–6)
+        """
+        # 拆分三条序列
+        x_flow = x[..., 0]  # (B, T, N)
+        x_time = x[..., 1]  # (B, T, N)
+        x_day  = x[..., 2]  # (B, T, N)
+
+        B, T, N = x_flow.shape
+        assert T == self.seq_len, f"序列长度应为 {self.seq_len}，但收到 {T}"
+
+        # 1) MLP 投影流量历史 -> 节点初始特征 h0
+        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)  # (B*N, T)
+        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)  # (B, N, hidden_dim)
+
+        # 2) 计算离散时间嵌入
+        t_idx = (x_time[:, -1, :] * (self.time_slots - 1)).long()  # (B, N)
+        d_idx = x_day[:,  -1, :].long()                           # (B, N)
+        time_emb = self.time_embedding(t_idx)                     # (B, N, hidden_dim)
+        day_emb  = self.day_embedding(d_idx)                      # (B, N, hidden_dim)
+
+        # 3) 计算空间嵌入并扩展到 batch 大小
+        node_idx    = torch.arange(N, device=x.device)            # (N,)
+        spatial_emb = self.spatial_embedding[node_idx]            # (N, hidden_dim)
+        spatial_emb = spatial_emb.unsqueeze(0).expand(B, -1, -1)  # (B, N, hidden_dim)
+
+        # 4) 将三种嵌入相加到 h0
+        h0 = h0 + time_emb + day_emb + spatial_emb
+
+        # 5) 两层 Sandwich + 残差连接
+        h1 = self.sandwich1(h0)
+        h1 = h1 + h0
+        h2 = self.sandwich2(h1)
+
+        # 6) 输出投影 -> (B, horizon, N, output_dim)
+        out = self.out_proj(h2)                   # (B, N, horizon*out_dim)
+        out = out.view(B, N, self.horizon, self.output_dim)
+        out = out.permute(0, 2, 1, 3)             # (B, horizon, N, output_dim)
+        return out
--- a/model/model_selector.py
+++ b/model/model_selector.py
@ -13,7 +13,7 @@ from model.STFGNN.STFGNN import STFGNN
 from model.STSGCN.STSGCN import STSGCN
 from model.STGODE.STGODE import ODEGCN
 from model.PDG2SEQ.PDG2Seq import PDG2Seq
-from model.EXP.EXP16 import EXP as EXP
+from model.EXP.EXP21 import EXP as EXP

 def model_selector(model):
    match model['type']: