e19添加时间嵌入

2025-04-18 14:59:47 +08:00 · 2025-04-18 14:59:47 +08:00 · 0b006087ea
parent bd94d3fdd3
commit 0b006087ea
10 changed files with 975 additions and 2485 deletions
--- a/baseline.ipynb
+++ b/baseline.ipynb
--- a/config/EXP/PEMSD4.yaml
+++ b/config/EXP/PEMSD4.yaml
@ -27,9 +27,9 @@ train:
  epochs: 300
  lr_init: 0.003
  weight_decay: 0
-  lr_decay: False
+  lr_decay: True
-  lr_decay_rate: 0.3
+  lr_decay_rate: 0.5
-  lr_decay_step: "5,20,40,70"
+  lr_decay_step: "5,20,40,65"
  early_stop: True
  early_stop_patience: 15
  grad_norm: False
--- a/model/EXP/EXP16.py
+++ b/model/EXP/EXP16.py
@ -7,7 +7,7 @@ import torch.nn.functional as F
 第一层：时间 -> 空间 -> 时间
 残差连接：层输出 + 层输入
 第二层：同样三明治结构 -> 最终输出
-无小残差
+无小残差 无效
 """
 class DynamicGraphConstructor(nn.Module):
--- a/model/EXP/EXP17.py
+++ b/model/EXP/EXP17.py
@ -0,0 +1,161 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 """
 基于傅里叶变换优化的双层三明治结构模型
 新增：TemporalFourierBlock 用于全局捕捉时序频域特征，提升预测精度
 第一层：Fourier 时域 -> 空间 -> 时间
 残差连接：层输出 + 层输入
 第二层：同样三明治结构 -> 最终输出
 """
 class TemporalFourierBlock(nn.Module):
    """
    时序傅里叶变换块
    输入： x (B, T, N)
    输出：时域重构 (B, T, N)
    """
    def __init__(self, seq_len):
        super().__init__()
        # 频域系数学习：对每个频率分量应用可学习缩放
        # rfft 输出频率数 = seq_len//2 + 1
        freq_len = seq_len // 2 + 1
        self.scale = nn.Parameter(torch.randn(freq_len), requires_grad=True)
        self.seq_len = seq_len
    def forward(self, x):
        # x: (B, T, N)
        # FFT 到频域
        Xf = torch.fft.rfft(x, dim=1)            # (B, F, N), complex
        # 学习缩放：实部和虚部同时缩放
        scale = self.scale.view(1, -1, 1)
        Xf = Xf * scale
        # IFFT 回时域
        x_rec = torch.fft.irfft(Xf, n=self.seq_len, dim=1)  # (B, T, N)
        return x_rec
 class DynamicGraphConstructor(nn.Module):
    def __init__(self, node_num, embed_dim):
        super().__init__()
        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
    def forward(self):
        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
        adj = F.relu(adj)
        adj = F.softmax(adj, dim=-1)
        return adj
 class GraphConvBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.theta = nn.Linear(input_dim, output_dim)
        self.residual = (input_dim == output_dim)
        if not self.residual:
            self.res_proj = nn.Linear(input_dim, output_dim)
    def forward(self, x, adj):
        # x: (B, N, C); adj: (N, N)
        res = x
        x = torch.matmul(adj, x)
        x = self.theta(x)
        x = x + (res if self.residual else self.res_proj(res))
        return F.relu(x)
 class MANBA_Block(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
        self.ffn = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
    def forward(self, x):
        # x: (B, N, C) 视 N 维为时间序列长度
        res = x
        x_attn, _ = self.attn(x, x, x)
        x = self.norm1(res + x_attn)
        res2 = x
        x_ffn = self.ffn(x)
        x = self.norm2(res2 + x_ffn)
        return x
 class SandwichBlock(nn.Module):
    """
    时间-空间-时间 三明治结构
    输入/输出： (B, N, hidden_dim)
    """
    def __init__(self, num_nodes, embed_dim, hidden_dim):
        super().__init__()
        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
    def forward(self, h):
        # h: (B, N, hidden_dim)
        h1 = self.manba1(h)
        adj = self.graph_constructor()
        h2 = self.gc(h1, adj)
        h3 = self.manba2(h2)
        return h3
 class EXP(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.horizon = args['horizon']
        self.output_dim = args['output_dim']
        self.seq_len = args.get('in_len', 12)
        self.hidden_dim = args.get('hidden_dim', 64)
        self.num_nodes = args['num_nodes']
        self.embed_dim = args.get('embed_dim', 16)
        # 时序傅里叶块
        self.fourier_block = TemporalFourierBlock(self.seq_len)
        # 输入映射：(B*N, T) -> hidden_dim
        self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
        # 两层三明治块
        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        # 输出映射
        self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
    def forward(self, x):
        # x: (B, T, N, D_total)
        x_main = x[..., 0]               # (B, T, N)
        B, T, N = x_main.shape
        assert T == self.seq_len
        # 时序傅里叶变换 + 残差
        x_freq = self.fourier_block(x_main)   # (B, T, N)
        x_main = x_main + x_freq
        # 输入投影 (B, T, N) -> (B*N, T) -> (B, N, hidden_dim)
        x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
        # 第一层三明治 + 残差
        h1 = self.sandwich1(h0)
        h1 = h1 + h0
        # 第二层三明治
        h2 = self.sandwich2(h1)
        # 输出映射
        out = self.out_proj(h2)              # (B, N, H*D_out)
        out = out.view(B, N, self.horizon, self.output_dim)
        out = out.permute(0, 2, 1, 3)        # (B, horizon, N, output_dim)
        return out
--- a/model/EXP/EXP18.py
+++ b/model/EXP/EXP18.py
@ -0,0 +1,134 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 """
 频域处理版双层三明治结构模型：
 1. 先做傅里叶变换 -> 频域中做三明治结构（时间-空间-时间）
 2. 处理完成后回到时域 -> 输出预测
 """
 class DynamicGraphConstructor(nn.Module):
    def __init__(self, node_num, embed_dim):
        super().__init__()
        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
    def forward(self):
        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
        adj = F.relu(adj)
        adj = F.softmax(adj, dim=-1)
        return adj
 class GraphConvBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.theta = nn.Linear(input_dim, output_dim)
        self.residual = (input_dim == output_dim)
        if not self.residual:
            self.res_proj = nn.Linear(input_dim, output_dim)
    def forward(self, x, adj):
        # x: (B, N, C)
        res = x
        x = torch.matmul(adj, x)
        x = self.theta(x)
        x = x + (res if self.residual else self.res_proj(res))
        return F.relu(x)
 class MANBA_Block(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
        self.ffn = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
    def forward(self, x):
        # x: (B, N, C)
        res = x
        x_attn, _ = self.attn(x, x, x)
        x = self.norm1(res + x_attn)
        res2 = x
        x_ffn = self.ffn(x)
        x = self.norm2(res2 + x_ffn)
        return x
 class SandwichBlock(nn.Module):
    """
    时间-空间-时间结构
    输入/输出： (B, N, C)
    """
    def __init__(self, num_nodes, embed_dim, hidden_dim):
        super().__init__()
        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
    def forward(self, h):
        h1 = self.manba1(h)
        adj = self.graph_constructor()
        h2 = self.gc(h1, adj)
        h3 = self.manba2(h2)
        return h3
 class EXP(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.horizon = args['horizon']
        self.output_dim = args['output_dim']
        self.seq_len = args.get('in_len', 12)
        self.hidden_dim = args.get('hidden_dim', 64)
        self.num_nodes = args['num_nodes']
        self.embed_dim = args.get('embed_dim', 16)
        self.freq_len = self.seq_len // 2 + 1  # rfft输出的频率维度
        # 映射到频域隐藏维度
        self.freq_proj = nn.Linear(self.freq_len * 2, self.hidden_dim)
        # 频域中的三明治结构
        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        # 回到频域 -> 时域输出
        self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
    def forward(self, x):
        # x: (B, T, N, D_total)
        x_main = x[..., 0]  # (B, T, N)
        B, T, N = x_main.shape
        assert T == self.seq_len
        # 傅里叶变换：对每个节点的时间序列进行 rfft
        Xf = torch.fft.rfft(x_main, dim=1)  # (B, F, N), complex
        # 拆分实部虚部，堆叠为 real + imag 两通道
        real = Xf.real.permute(0, 2, 1)  # (B, N, F)
        imag = Xf.imag.permute(0, 2, 1)  # (B, N, F)
        freq_input = torch.cat([real, imag], dim=-1)  # (B, N, 2F)
        # 维度映射
        h = self.freq_proj(freq_input)  # (B, N, hidden_dim)
        # 在频域中做三明治结构
        h1 = self.sandwich1(h)
        h1 = h1 + h  # 残差连接
        h2 = self.sandwich2(h1)
        # 输出映射到频率域（输出 horizon * output_dim）
        out_freq = self.out_proj(h2)  # (B, N, H*D)
        out_freq = out_freq.view(B, N, self.horizon, self.output_dim)
        # 将频域预测简单映射为时域结果
        out = out_freq.permute(0, 2, 1, 3)  # (B, horizon, N, output_dim)
        return out
--- a/model/EXP/EXP19.py
+++ b/model/EXP/EXP19.py
@ -0,0 +1,128 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 """
 使用多层感知机替换输入输出的proj层
 """
 class DynamicGraphConstructor(nn.Module):
    def __init__(self, node_num, embed_dim):
        super().__init__()
        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
    def forward(self):
        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
        adj = F.relu(adj)
        adj = F.softmax(adj, dim=-1)
        return adj
 class GraphConvBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.theta = nn.Linear(input_dim, output_dim)
        self.residual = (input_dim == output_dim)
        if not self.residual:
            self.res_proj = nn.Linear(input_dim, output_dim)
    def forward(self, x, adj):
        res = x
        x = torch.matmul(adj, x)
        x = self.theta(x)
        x = x + (res if self.residual else self.res_proj(res))
        return F.relu(x)
 class MANBA_Block(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
        self.ffn = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
    def forward(self, x):
        res = x
        x_attn, _ = self.attn(x, x, x)
        x = self.norm1(res + x_attn)
        res2 = x
        x_ffn = self.ffn(x)
        x = self.norm2(res2 + x_ffn)
        return x
 class SandwichBlock(nn.Module):
    def __init__(self, num_nodes, embed_dim, hidden_dim):
        super().__init__()
        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
    def forward(self, h):
        h1 = self.manba1(h)
        adj = self.graph_constructor()
        h2 = self.gc(h1, adj)
        h3 = self.manba2(h2)
        return h3
 class MLP(nn.Module):
    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
        super().__init__()
        dims = [in_dim] + hidden_dims + [out_dim]
        layers = []
        for i in range(len(dims)-2):
            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
        layers += [nn.Linear(dims[-2], dims[-1])]
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)
 class EXP(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.horizon = args['horizon']
        self.output_dim = args['output_dim']
        self.seq_len = args.get('in_len', 12)
        self.hidden_dim = args.get('hidden_dim', 64)
        self.num_nodes = args['num_nodes']
        self.embed_dim = args.get('embed_dim', 16)
        # 替换为MLP: input_proj(seq_len -> hidden_dim -> hidden_dim)
        self.input_proj = MLP(self.seq_len, [self.hidden_dim], self.hidden_dim)
        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        # 替换为MLP: out_proj(hidden_dim -> 2*hidden_dim -> horizon*output_dim)
        self.out_proj = MLP(self.hidden_dim, [2 * self.hidden_dim], self.horizon * self.output_dim)
    def forward(self, x):
        # x: (B, T, N, D_total)
        x_main = x[..., 0]              # (B, T, N)
        B, T, N = x_main.shape
        assert T == self.seq_len
        # (B, T, N) -> (B, N, T) -> (B*N, T) -> MLP -> (B, N, hidden_dim)
        x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
        h1 = self.sandwich1(h0)
        h1 = h1 + h0
        h2 = self.sandwich2(h1)
        # MLP输出 -> (B, N, H*D_out)
        out = self.out_proj(h2)
        out = out.view(B, N, self.horizon, self.output_dim)
        out = out.permute(0, 2, 1, 3)    # (B, horizon, N, output_dim)
        return out
--- a/model/EXP/EXP20.py
+++ b/model/EXP/EXP20.py
@ -0,0 +1,173 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 """
 使用多层感知机替换输入输出的 proj 层，并将图卷积替换为图注意力网络（GAT）
 """
 class DynamicGraphConstructor(nn.Module):
    def __init__(self, node_num, embed_dim):
        super().__init__()
        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
    def forward(self):
        # 构造可学习的邻接矩阵
        adj = torch.matmul(self.nodevec1, self.nodevec2.T)  # (N, N)
        adj = F.relu(adj)
        adj = F.softmax(adj, dim=-1)
        return adj
 class GATConvBlock(nn.Module):
    """
    简易版 GAT 实现：
      - 先对每个节点特征做线性变换
      - 计算每对节点间的注意力得分
      - 掩码掉非边（adj == 0），softmax 后做加权求和
      - 加上残差并经过非线性
    """
    def __init__(self, input_dim, output_dim, alpha=0.2):
        super().__init__()
        self.fc = nn.Linear(input_dim, output_dim, bias=False)
        self.attn_fc = nn.Linear(2 * output_dim, 1, bias=False)
        self.leakyrelu = nn.LeakyReLU(alpha)
        self.residual = (input_dim == output_dim)
        if not self.residual:
            self.res_fc = nn.Linear(input_dim, output_dim, bias=False)
    def forward(self, x, adj):
        """
        x: (B, N, F_in)
        adj: (N, N), 动态学习得到的邻接矩阵
        返回 h_prime: (B, N, F_out)
        """
        B, N, _ = x.shape
        h = self.fc(x)  # (B, N, F_out)
        # 计算每对节点的注意力打分
        h_i = h.unsqueeze(2).expand(-1, -1, N, -1)  # (B, N, N, F_out)
        h_j = h.unsqueeze(1).expand(-1, N, -1, -1)  # (B, N, N, F_out)
        e = self.attn_fc(torch.cat([h_i, h_j], dim=-1)).squeeze(-1)  # (B, N, N)
        e = self.leakyrelu(e)
        # 掩码：只有 adj > 0 的位置保留注意力，否则置为 -inf
        mask = adj.unsqueeze(0).expand(B, -1, -1) > 0
        e = e.masked_fill(~mask, float('-inf'))
        # 归一化注意力
        alpha = F.softmax(e, dim=-1)  # (B, N, N)
        # 聚合邻居
        h_prime = torch.matmul(alpha, h)  # (B, N, F_out)
        # 残差连接
        if self.residual:
            h_prime = h_prime + x
        else:
            h_prime = h_prime + self.res_fc(x)
        return F.elu(h_prime)
 class MANBA_Block(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
        self.ffn = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
    def forward(self, x):
        # x: (B, N, input_dim) — 将节点序列看作时间序列处理
        res = x
        x_attn, _ = self.attn(x, x, x)
        x = self.norm1(res + x_attn)
        res2 = x
        x_ffn = self.ffn(x)
        x = self.norm2(res2 + x_ffn)
        return x
 class SandwichBlock(nn.Module):
    def __init__(self, num_nodes, embed_dim, hidden_dim):
        super().__init__()
        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
        self.gat = GATConvBlock(hidden_dim, hidden_dim)
        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
    def forward(self, h):
        # h: (B, N, hidden_dim)
        h1 = self.manba1(h)                  # 自注意力 + FFN
        adj = self.graph_constructor()       # 动态邻接 (N, N)
        h2 = self.gat(h1, adj)               # GAT 聚合
        h3 = self.manba2(h2)                 # 再一次自注意力 + FFN
        return h3
 class MLP(nn.Module):
    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
        super().__init__()
        dims = [in_dim] + hidden_dims + [out_dim]
        layers = []
        for i in range(len(dims) - 2):
            layers += [nn.Linear(dims[i], dims[i + 1]), activation()]
        layers += [nn.Linear(dims[-2], dims[-1])]
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        # 支持任意形状，Linear 运算对最后一维有效
        return self.net(x)
 class EXP(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.horizon = args['horizon']
        self.output_dim = args['output_dim']
        self.seq_len = args.get('in_len', 12)
        self.hidden_dim = args.get('hidden_dim', 64)
        self.num_nodes = args['num_nodes']
        self.embed_dim = args.get('embed_dim', 16)
        # 用 MLP 替换原来的输入投影
        self.input_proj = MLP(self.seq_len, [self.hidden_dim], self.hidden_dim)
        # 两层 SandwichBlock
        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        # 用 MLP 替换原来的输出投影
        self.out_proj = MLP(self.hidden_dim, [2 * self.hidden_dim], self.horizon * self.output_dim)
    def forward(self, x):
        """
        x: (B, T, N, D_total)
           假设 D_total >= 1，且我们只使用第 0 维特征进行预测
        返回:
           out: (B, horizon, N, output_dim)
        """
        x_main = x[..., 0]                    # (B, T, N)
        B, T, N = x_main.shape
        assert T == self.seq_len, f"Expected seq_len={self.seq_len}, got {T}"
        # (B, T, N) -> (B, N, T) -> (B*N, T) -> MLP -> (B, N, hidden_dim)
        x_flat = x_main.permute(0, 2, 1).reshape(B * N, T)
        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
        # 两层 Sandwich + 残差
        h1 = self.sandwich1(h0)
        h1 = h1 + h0
        h2 = self.sandwich2(h1)
        # 输出投影
        out = self.out_proj(h2)               # (B, N, horizon * output_dim)
        out = out.view(B, N, self.horizon, self.output_dim)
        out = out.permute(0, 2, 1, 3)         # (B, horizon, N, output_dim)
        return out
--- a/model/EXP/EXP21.py
+++ b/model/EXP/EXP21.py
@ -0,0 +1,164 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 """
 使用多层感知机替换输入输出的proj层
 """
 class DynamicGraphConstructor(nn.Module):
    def __init__(self, node_num, embed_dim):
        super().__init__()
        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
    def forward(self):
        adj = torch.matmul(self.nodevec1, self.nodevec2.T)
        adj = F.relu(adj)
        adj = F.softmax(adj, dim=-1)
        return adj
 class GraphConvBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.theta = nn.Linear(input_dim, output_dim)
        self.residual = (input_dim == output_dim)
        if not self.residual:
            self.res_proj = nn.Linear(input_dim, output_dim)
    def forward(self, x, adj):
        res = x
        x = torch.matmul(adj, x)
        x = self.theta(x)
        x = x + (res if self.residual else self.res_proj(res))
        return F.relu(x)
 class MANBA_Block(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
        self.ffn = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
    def forward(self, x):
        res = x
        x_attn, _ = self.attn(x, x, x)
        x = self.norm1(res + x_attn)
        res2 = x
        x_ffn = self.ffn(x)
        x = self.norm2(res2 + x_ffn)
        return x
 class SandwichBlock(nn.Module):
    def __init__(self, num_nodes, embed_dim, hidden_dim):
        super().__init__()
        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
    def forward(self, h):
        h1 = self.manba1(h)
        adj = self.graph_constructor()
        h2 = self.gc(h1, adj)
        h3 = self.manba2(h2)
        return h3
 class MLP(nn.Module):
    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
        super().__init__()
        dims = [in_dim] + hidden_dims + [out_dim]
        layers = []
        for i in range(len(dims)-2):
            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
        layers += [nn.Linear(dims[-2], dims[-1])]
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)
 class EXP(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.horizon     = args['horizon']
        self.output_dim  = args['output_dim']
        self.seq_len     = args.get('in_len', 12)
        self.hidden_dim  = args.get('hidden_dim', 64)
        self.num_nodes   = args['num_nodes']
        self.embed_dim   = args.get('embed_dim', 16)
        # ==== NEW: discrete time embeddings ====
        # number of slots in a day (e.g. 24h * 60m / time_slot_minutes)
        self.time_slots = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
        # input projection now still only takes the flow history
        self.input_proj = MLP(
            in_dim      = self.seq_len,
            hidden_dims = [self.hidden_dim],
            out_dim     = self.hidden_dim
        )
        # two Sandwich blocks remain unchanged
        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        # output projection unchanged
        self.out_proj = MLP(
            in_dim      = self.hidden_dim,
            hidden_dims = [2 * self.hidden_dim],
            out_dim     = self.horizon * self.output_dim
        )
    def forward(self, x):
        """
        x: (B, T, N, D_total)
           D_total >= 3 where
             x[...,0] = flow,
             x[...,1] = time_in_day (0 … 1 → to be scaled to 0 … time_slots‑1),
             x[...,2] = day_in_week (0–6)
        """
        x_flow = x[..., 0]  # (B, T, N)
        x_time = x[..., 1]  # (B, T, N)
        x_day  = x[..., 2]  # (B, T, N)
        B, T, N = x_flow.shape
        assert T == self.seq_len
        # 1) project the flow history
        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)
        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)
        # 2) lookup discrete time indexes at the last time step
        #    scale time_in_day ∈ [0,1] → slot_idx ∈ {0,…,time_slots‑1}
        t_idx = (x_time[:, -1, :,] * (self.time_slots - 1)).long()  # (B, N)
        d_idx = x_day[:,  -1, :,].long()                            # (B, N)
        time_emb = self.time_embedding(t_idx)   # (B, N, hidden_dim)
        day_emb  = self.day_embedding(d_idx)    # (B, N, hidden_dim)
        # 3) inject them into the initial hidden state
        h0 = h0 + time_emb + day_emb
        # 4) the usual Sandwich + residuals
        h1 = self.sandwich1(h0)
        h1 = h1 + h0
        h2 = self.sandwich2(h1)
        # 5) output projection
        out = self.out_proj(h2)                 # (B, N, horizon*output_dim)
        out = out.view(B, N, self.horizon, self.output_dim)
        out = out.permute(0, 2, 1, 3)           # (B, horizon, N, output_dim)
        return out
--- a/model/EXP/EXP22.py
+++ b/model/EXP/EXP22.py
@ -0,0 +1,186 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 """
 使用多层感知机替换输入输出的 proj 层，
 并在 EXP 模型中添加显式的空间嵌入（Spatial Embedding）。
 """
 class DynamicGraphConstructor(nn.Module):
    def __init__(self, node_num, embed_dim):
        super().__init__()
        # 自适应邻接参数
        self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
        self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
    def forward(self):
        # 构造动态邻接矩阵
        adj = torch.matmul(self.nodevec1, self.nodevec2.T)  # (N, N)
        adj = F.relu(adj)
        adj = F.softmax(adj, dim=-1)
        return adj
 class GraphConvBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        # 线性变换 + 可选残差投影
        self.theta = nn.Linear(input_dim, output_dim)
        self.residual = (input_dim == output_dim)
        if not self.residual:
            self.res_proj = nn.Linear(input_dim, output_dim)
    def forward(self, x, adj):
        # x: (B, N, F_in), adj: (N, N)
        res = x
        x = torch.matmul(adj, x)      # 邻接乘特征
        x = self.theta(x)             # 线性变换
        # 残差连接
        x = x + (res if self.residual else self.res_proj(res))
        return F.relu(x)
 class MANBA_Block(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        # 空间自注意力 + 前馈网络
        self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
        self.ffn = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
    def forward(self, x):
        # x: (B, N, C)
        res = x
        x_attn, _ = self.attn(x, x, x)
        x = self.norm1(res + x_attn)
        res2 = x
        x_ffn = self.ffn(x)
        x = self.norm2(res2 + x_ffn)
        return x
 class SandwichBlock(nn.Module):
    def __init__(self, num_nodes, embed_dim, hidden_dim):
        super().__init__()
        self.manba1 = MANBA_Block(hidden_dim, hidden_dim * 2)
        self.graph_constructor = DynamicGraphConstructor(num_nodes, embed_dim)
        self.gc = GraphConvBlock(hidden_dim, hidden_dim)
        self.manba2 = MANBA_Block(hidden_dim, hidden_dim * 2)
    def forward(self, h):
        # h: (B, N, hidden_dim)
        h1 = self.manba1(h)
        adj = self.graph_constructor()
        h2 = self.gc(h1, adj)
        h3 = self.manba2(h2)
        return h3
 class MLP(nn.Module):
    def __init__(self, in_dim, hidden_dims, out_dim, activation=nn.ReLU):
        super().__init__()
        # 多层感知机
        dims = [in_dim] + hidden_dims + [out_dim]
        layers = []
        for i in range(len(dims) - 2):
            layers += [nn.Linear(dims[i], dims[i+1]), activation()]
        layers += [nn.Linear(dims[-2], dims[-1])]
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        # 对最后一维做线性映射
        return self.net(x)
 class EXP(nn.Module):
    def __init__(self, args):
        super().__init__()
        # 训练 & 输出参数
        self.horizon    = args['horizon']
        self.output_dim = args['output_dim']
        self.seq_len    = args.get('in_len', 12)
        self.hidden_dim = args.get('hidden_dim', 64)
        self.num_nodes  = args['num_nodes']
        self.embed_dim  = args.get('embed_dim', 16)
        # ==== 时间嵌入 ====
        self.time_slots     = args.get('time_slots', 24 * 60 // args.get('time_slot', 5))
        self.time_embedding = nn.Embedding(self.time_slots, self.hidden_dim)
        self.day_embedding  = nn.Embedding(7, self.hidden_dim)
        # ==== 空间嵌入 ====
        # 每个节点一个可学习的向量
        self.spatial_embedding = nn.Parameter(
            torch.randn(self.num_nodes, self.hidden_dim),
            requires_grad=True
        )
        # 输入投影：仅对流量序列做 MLP
        self.input_proj = MLP(
            in_dim      = self.seq_len,
            hidden_dims = [self.hidden_dim],
            out_dim     = self.hidden_dim
        )
        # 两个 SandwichBlock
        self.sandwich1 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        self.sandwich2 = SandwichBlock(self.num_nodes, self.embed_dim, self.hidden_dim)
        # 输出投影
        self.out_proj = MLP(
            in_dim      = self.hidden_dim,
            hidden_dims = [2 * self.hidden_dim],
            out_dim     = self.horizon * self.output_dim
        )
    def forward(self, x):
        """
        x: (B, T, N, D_total)
           D_total >= 3，其中：
             x[...,0] = 流量 (flow)
             x[...,1] = 当天时间比 (time_in_day，归一化到 [0,1])
             x[...,2] = 星期几 (day_in_week，0–6)
        """
        # 拆分三条序列
        x_flow = x[..., 0]  # (B, T, N)
        x_time = x[..., 1]  # (B, T, N)
        x_day  = x[..., 2]  # (B, T, N)
        B, T, N = x_flow.shape
        assert T == self.seq_len, f"序列长度应为 {self.seq_len}，但收到 {T}"
        # 1) MLP 投影流量历史 -> 节点初始特征 h0
        x_flat = x_flow.permute(0, 2, 1).reshape(B * N, T)  # (B*N, T)
        h0 = self.input_proj(x_flat).view(B, N, self.hidden_dim)  # (B, N, hidden_dim)
        # 2) 计算离散时间嵌入
        t_idx = (x_time[:, -1, :] * (self.time_slots - 1)).long()  # (B, N)
        d_idx = x_day[:,  -1, :].long()                           # (B, N)
        time_emb = self.time_embedding(t_idx)                     # (B, N, hidden_dim)
        day_emb  = self.day_embedding(d_idx)                      # (B, N, hidden_dim)
        # 3) 计算空间嵌入并扩展到 batch 大小
        node_idx    = torch.arange(N, device=x.device)            # (N,)
        spatial_emb = self.spatial_embedding[node_idx]            # (N, hidden_dim)
        spatial_emb = spatial_emb.unsqueeze(0).expand(B, -1, -1)  # (B, N, hidden_dim)
        # 4) 将三种嵌入相加到 h0
        h0 = h0 + time_emb + day_emb + spatial_emb
        # 5) 两层 Sandwich + 残差连接
        h1 = self.sandwich1(h0)
        h1 = h1 + h0
        h2 = self.sandwich2(h1)
        # 6) 输出投影 -> (B, horizon, N, output_dim)
        out = self.out_proj(h2)                   # (B, N, horizon*out_dim)
        out = out.view(B, N, self.horizon, self.output_dim)
        out = out.permute(0, 2, 1, 3)             # (B, horizon, N, output_dim)
        return out
--- a/model/model_selector.py
+++ b/model/model_selector.py
@ -13,7 +13,7 @@ from model.STFGNN.STFGNN import STFGNN
 from model.STSGCN.STSGCN import STSGCN
 from model.STGODE.STGODE import ODEGCN
 from model.PDG2SEQ.PDG2Seq import PDG2Seq
-from model.EXP.EXP16 import EXP as EXP
+from model.EXP.EXP21 import EXP as EXP
 def model_selector(model):
    match model['type']: