dev分支的实验

This commit is contained in:
czzhangheng 2025-04-17 15:57:39 +08:00
parent 229b6320b9
commit c9a5a54d90
20 changed files with 23651 additions and 2361 deletions

File diff suppressed because it is too large Load Diff

16544
baseline1.ipynb Normal file

File diff suppressed because it is too large Load Diff

51
config/EXP/PEMSD4.yaml Normal file
View File

@ -0,0 +1,51 @@
data:
num_nodes: 307
lag: 12
horizon: 12
val_ratio: 0.2
test_ratio: 0.2
tod: False
normalizer: std
column_wise: False
default_graph: True
add_time_in_day: True
add_day_in_week: True
steps_per_day: 288
days_per_week: 7
model:
input_dim: 1
output_dim: 1
embed_dim: 10
rnn_units: 64
num_layers: 1
cheb_order: 2
use_day: True
use_week: True
graph_size: 30
expert_nums: 8
top_k: 2
train:
loss_func: mae
seed: 10
batch_size: 64
epochs: 300
lr_init: 0.003
weight_decay: 0
lr_decay: False
lr_decay_rate: 0.3
lr_decay_step: "5,20,40,70"
early_stop: True
early_stop_patience: 15
grad_norm: False
max_grad_norm: 5
real_value: True
test:
mae_thresh: null
mape_thresh: 0.0
log:
log_step: 200
plot: False

52
config/EXP/PEMSD8.yaml Normal file
View File

@ -0,0 +1,52 @@
data:
num_nodes: 170
lag: 12
horizon: 12
val_ratio: 0.2
test_ratio: 0.2
tod: False
normalizer: std
column_wise: False
default_graph: True
add_time_in_day: True
add_day_in_week: True
steps_per_day: 288
days_per_week: 7
model:
input_dim: 1
output_dim: 1
embed_dim: 12
rnn_units: 64
num_layers: 1
cheb_order: 2
use_day: True
use_week: True
graph_size: 30
expert_nums: 8
top_k: 2
hidden_dim: 64
train:
loss_func: mae
seed: 10
batch_size: 64
epochs: 300
lr_init: 0.003
weight_decay: 0
lr_decay: False
lr_decay_rate: 0.3
lr_decay_step: "5,20,40,70"
early_stop: True
early_stop_patience: 15
grad_norm: False
max_grad_norm: 5
real_value: True
test:
mae_thresh: null
mape_thresh: 0.0
log:
log_step: 200
plot: False

49
config/EXPB/PEMSD4.yaml Normal file
View File

@ -0,0 +1,49 @@
data:
num_nodes: 307
lag: 12
horizon: 12
val_ratio: 0.2
test_ratio: 0.2
tod: False
normalizer: std
column_wise: False
default_graph: True
add_time_in_day: True
add_day_in_week: True
steps_per_day: 288
days_per_week: 7
model:
input_dim: 1
output_dim: 1
embed_dim: 10
rnn_units: 64
num_layers: 1
cheb_order: 2
patch_size: 3
use_day: True
use_week: True
train:
loss_func: mae
seed: 10
batch_size: 64
epochs: 300
lr_init: 0.003
weight_decay: 0
lr_decay: False
lr_decay_rate: 0.3
lr_decay_step: "5,20,40,70"
early_stop: True
early_stop_patience: 15
grad_norm: False
max_grad_norm: 5
real_value: True
test:
mae_thresh: null
mape_thresh: 0.0
log:
log_step: 200
plot: False

View File

@ -18,7 +18,7 @@ log:
plot: false
model:
cheb_order: 2
embed_dim: 5
embed_dim: 12
input_dim: 1
num_layers: 1
output_dim: 1
@ -29,10 +29,10 @@ test:
mae_thresh: None
mape_thresh: 0.001
train:
batch_size: 64
batch_size: 12
early_stop: true
early_stop_patience: 15
epochs: 100
early_stop_patience: 30
epochs: 200
grad_norm: false
loss_func: mae
lr_decay: true
@ -41,5 +41,5 @@ train:
lr_init: 0.003
max_grad_norm: 5
real_value: true
seed: 12
seed: 3407
weight_decay: 0

123
model/EXP/EXP0.py Normal file
View File

@ -0,0 +1,123 @@
import torch, torch.nn as nn, torch.nn.functional as F
from collections import OrderedDict
class DGCRM(nn.Module):
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim, num_layers=1):
super().__init__()
self.node_num, self.input_dim, self.num_layers = node_num, dim_in, num_layers
self.cells = nn.ModuleList([
DDGCRNCell(node_num, dim_in if i == 0 else dim_out, dim_out, cheb_k, embed_dim)
for i in range(num_layers)
])
def forward(self, x, init_state, node_embeddings):
# x: (B, T, N, D)
assert x.shape[2] == self.node_num and x.shape[3] == self.input_dim
for i in range(self.num_layers):
state, inner = init_state[i].to(x.device), []
for t in range(x.shape[1]):
state = self.cells[i](x[:, t, :, :], state, [node_embeddings[0][:, t, :, :], node_embeddings[1]])
inner.append(state)
init_state[i] = state
x = torch.stack(inner, dim=1)
return x, init_state
def init_hidden(self, bs):
return torch.stack([cell.init_hidden_state(bs) for cell in self.cells], dim=0)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.num_node, self.input_dim, self.hidden_dim = args['num_nodes'], args['input_dim'], args['rnn_units']
self.output_dim, self.horizon, self.num_layers = args['output_dim'], args['horizon'], args['num_layers']
self.use_day, self.use_week = args['use_day'], args['use_week']
self.node_embeddings1 = nn.Parameter(torch.randn(self.num_node, args['embed_dim']))
# 第二套节点向量已不再使用,减少参数
self.T_i_D_emb = nn.Parameter(torch.empty(288, args['embed_dim']))
self.D_i_W_emb = nn.Parameter(torch.empty(7, args['embed_dim']))
self.drop = nn.Dropout(0.1)
# 采用单编码器,减少一次前向计算
self.encoder = DGCRM(self.num_node, self.input_dim, self.hidden_dim,
args['cheb_order'], args['embed_dim'], self.num_layers)
# 主预测头:基础预测
self.base_conv = nn.Conv2d(1, self.horizon * self.output_dim, (1, self.hidden_dim))
# 残差预测头:利用最近时刻的输入信息进行修正,输入通道为 hidden_dim+1
self.res_conv = nn.Conv2d(1, self.horizon * self.output_dim, (1, self.hidden_dim + 1))
def forward(self, source):
# source: (B, T, N, D_total) 其中第0维为主观测第1、2维为时间编码
node_embed = self.node_embeddings1
if self.use_day:
node_embed = node_embed * self.T_i_D_emb[(source[..., 1] * 288).long()]
if self.use_week:
node_embed = node_embed * self.D_i_W_emb[source[..., 2].long()]
node_embeddings = [node_embed, self.node_embeddings1]
inp = source[..., 0].unsqueeze(-1) # (B, T, N, 1)
init = self.encoder.init_hidden(inp.shape[0])
enc_out, _ = self.encoder(inp, init, node_embeddings)
# 取最后时刻的隐状态作为表示shape: (B, 1, N, hidden_dim)
rep = self.drop(enc_out[:, -1:, :, :])
# 基础预测
base = self.base_conv(rep)
# 为修正分支拼接最近时刻的原始输入(取最后一帧)作为残差补偿信息,扩充通道数
res_in = torch.cat([rep, inp[:, -1:, :, :]], dim=-1) # (B, 1, N, hidden_dim+1)
res = self.res_conv(res_in)
return base + res
class DDGCRNCell(nn.Module):
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim):
super().__init__()
self.node_num, self.hidden_dim = node_num, dim_out
self.gate = DGCN(dim_in + dim_out, 2 * dim_out, cheb_k, embed_dim, node_num)
self.update = DGCN(dim_in + dim_out, dim_out, cheb_k, embed_dim, node_num)
self.ln = nn.LayerNorm(dim_out)
def forward(self, x, state, node_embeddings):
inp = torch.cat((x, state), -1)
z_r = torch.sigmoid(self.gate(inp, node_embeddings))
z, r = torch.split(z_r, self.hidden_dim, -1)
hc = torch.tanh(self.update(torch.cat((x, z * state), -1), node_embeddings))
out = r * state + (1 - r) * hc
return self.ln(out)
def init_hidden_state(self, bs):
return torch.zeros(bs, self.node_num, self.hidden_dim)
class DGCN(nn.Module):
def __init__(self, dim_in, dim_out, cheb_k, embed_dim, num_nodes):
super().__init__()
self.cheb_k, self.embed_dim = cheb_k, embed_dim
self.weights_pool = nn.Parameter(torch.FloatTensor(embed_dim, cheb_k, dim_in, dim_out))
self.weights = nn.Parameter(torch.FloatTensor(cheb_k, dim_in, dim_out))
self.bias_pool = nn.Parameter(torch.FloatTensor(embed_dim, dim_out))
self.bias = nn.Parameter(torch.FloatTensor(dim_out))
self.fc = nn.Sequential(OrderedDict([
('fc1', nn.Linear(dim_in, 16)),
('sigmoid1', nn.Sigmoid()),
('fc2', nn.Linear(16, 2)),
('sigmoid2', nn.Sigmoid()),
('fc3', nn.Linear(2, embed_dim))
]))
# 预注册单位矩阵,避免每次构造
self.register_buffer('eye', torch.eye(num_nodes))
def forward(self, x, node_embeddings):
supp1 = self.eye.to(node_embeddings[0].device)
filt = self.fc(x)
nodevec = torch.tanh(node_embeddings[0] * filt)
supp2 = self.get_laplacian(F.relu(torch.matmul(nodevec, nodevec.transpose(2, 1))), supp1)
x_g = torch.stack([torch.einsum("nm,bmc->bnc", supp1, x),
torch.einsum("bnm,bmc->bnc", supp2, x)], dim=1)
weights = torch.einsum('nd,dkio->nkio', node_embeddings[1], self.weights_pool)
bias = torch.matmul(node_embeddings[1], self.bias_pool)
return torch.einsum('bnki,nkio->bno', x_g.permute(0, 2, 1, 3), weights) + bias
@staticmethod
def get_laplacian(graph, I, normalize=True):
D_inv = torch.diag_embed(torch.sum(graph, -1) ** (-0.5))
return torch.matmul(torch.matmul(D_inv, graph), D_inv) if normalize else torch.matmul(
torch.matmul(D_inv, graph + I), D_inv)

122
model/EXP/EXP1.py Normal file
View File

@ -0,0 +1,122 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class SimpleExpert(nn.Module):
"""
专家内部只做
1. 上采样到固定 graph_size
2. 对每个时间步每个节点用 Linear(input_dim -> hidden_dim)
3. 下采样回原始 selected_node 数量
"""
def __init__(self, input_dim, hidden_dim, graph_size):
super().__init__()
self.graph_size = graph_size
self.linear = nn.Linear(input_dim, hidden_dim)
def up_sample(self, x, target_size):
# x: (B, T, N_sel, D)
B, T, N, D = x.shape
# 1) 合并 B 和 T得到 (B*T, N_sel, D)
x2 = x.reshape(B * T, N, D)
# 2) 转为 (B*T, D, N_sel) 以便做 1D 线性插值
x2 = x2.permute(0, 2, 1) # (B*T, D, N_sel)
# 3) 插值到 graph_size
x2 = F.interpolate(x2, size=target_size, mode='linear', align_corners=True) # (B*T, D, graph_size)
# 4) 恢复维度 (B*T, graph_size, D)
x2 = x2.permute(0, 2, 1) # (B*T, graph_size, D)
# 5) 拆回 (B, T, graph_size, D)
x_up = x2.reshape(B, T, target_size, D)
return x_up
def down_sample(self, x, target_size):
# x: (B, T, graph_size, H)
B, T, G, H = x.shape
# 1) 合并 B 和 T
x2 = x.reshape(B * T, G, H) # (B*T, graph_size, H)
# 2) 转为 (B*T, H, graph_size)
x2 = x2.permute(0, 2, 1) # (B*T, H, graph_size)
# 3) 插值到 target_size
x2 = F.interpolate(x2, size=target_size, mode='linear', align_corners=True) # (B*T, H, target_size)
# 4) 恢复 (B*T, target_size, H)
x2 = x2.permute(0, 2, 1) # (B*T, target_size, H)
# 5) 拆回 (B, T, target_size, H)
x_down = x2.reshape(B, T, target_size, H)
return x_down
def forward(self, x):
# x: (B, T, N_sel, D)
x_up = self.up_sample(x, self.graph_size) # (B, T, graph_size, D)
out = self.linear(x_up) # (B, T, graph_size, hidden_dim)
out_down = self.down_sample(out, x.shape[2]) # (B, T, N_sel, hidden_dim)
return out_down
class DGCRM_MOE(nn.Module):
"""
去掉 DGCRM SimpleExpert 作为专家输出 (B, T, N, output_dim)
- gate: last step -> top_k 专家
- 每个专家上采样->linear->下采样
- 累加所有专家输出 -> (B, T, N, hidden_dim)
- Linear(hidden_dim -> output_dim) -> (B, T, N, output_dim)
- 返回 balance_loss 用于正则化
"""
def __init__(self, args):
super().__init__()
self.graph_size = args['graph_size']
self.expert_nums = args['expert_nums']
self.top_k = args['top_k']
self.input_dim = args['input_dim']
self.hidden_dim = args['hidden_dim']
self.output_dim = args['output_dim']
self.num_node = args['num_nodes']
# gate 网络
self.gate_proj = nn.Linear(self.input_dim, self.hidden_dim)
self.gate = nn.Linear(self.hidden_dim, self.expert_nums)
# SimpleExpert 列表
self.experts = nn.ModuleList([
SimpleExpert(self.input_dim, self.hidden_dim, self.graph_size)
for _ in range(self.expert_nums)
])
# 最终多步预测头hidden_dim -> output_dim
self.pred = nn.Linear(self.hidden_dim, self.output_dim)
def forward(self, x, **kwargs):
"""
x: (B, T, N, D_total)只取第0通道作为主观测
returns:
out: (B, T, N, output_dim)
balance_loss: 标量
"""
x = x[..., 0:1] # (B, T, N, 1)
B, T, N, D = x.shape
# 1. 路由
last = x[:, -1, :, :] # (B, N, 1)
g = F.relu(self.gate_proj(last)) # (B, N, hidden_dim)
logits = self.gate(g) # (B, N, expert_nums)
rw = F.softmax(logits, dim=-1) # (B, N, expert_nums)
topk_w, topk_idx = torch.topk(rw, self.top_k, -1) # (B, N, top_k), 权重可选用 topk_w
# 2. 专家处理
expert_out = torch.zeros(B, T, N, self.hidden_dim, device=x.device)
balance_loss = 0.0
for i, expert in enumerate(self.experts):
mask = (topk_idx == i) # (B, N, top_k)
if not mask.any(): continue
# 平均路由概率
balance_loss += (rw[..., i].mean() - 1.0/self.expert_nums)**2
for b in range(B):
sel = torch.nonzero(mask[b].any(-1)).squeeze(-1)
if sel.numel()==0: continue
seq = x[b:b+1, :, sel, :] # (1, T, sel, 1)
out_seq = expert(seq) # (1, T, sel, hidden_dim)
expert_out[b:b+1, :, sel, :] += out_seq
# 3. 预测头
out = self.pred(expert_out) # (B, T, N, output_dim)
return out, balance_loss

84
model/EXP/EXP2.py Normal file
View File

@ -0,0 +1,84 @@
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class PositionalEncoding(nn.Module):
"""标准的位置编码,用于给 Transformer 输入添加位置信息"""
def __init__(self, d_model, max_len=500):
super().__init__()
pe = torch.zeros(max_len, d_model) # (max_len, d_model)
position = torch.arange(0, max_len).unsqueeze(1).float() # (max_len,1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term) # 偶数维
pe[:, 1::2] = torch.cos(position * div_term) # 奇数维
self.register_buffer('pe', pe) # 不参加梯度
def forward(self, x):
# x: (T, B, d_model)
T = x.size(0)
return x + self.pe[:T].unsqueeze(1) # (T,1,d_model) 广播到 (T,B,d_model)
class TemporalTransformerForecast(nn.Module):
"""
Transformer-based 多步预测
- 只使用 x[...,0] 作为输入通道
- 对每个节点的长度-T 序列并行应用 Transformer Encoder
- 取最后时间步的输出通过一个 Linear 映射到 horizon * output_dim
- 重塑为 (B, horizon, N, output_dim)
"""
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
assert self.seq_len is not None, "请在 args 中指定 in_len输入序列长度"
d_model = args.get('d_model', 64)
nhead = args.get('nhead', 4)
num_layers = args.get('num_layers', 2)
dim_ff = args.get('dim_feedforward', d_model * 4)
dropout = args.get('dropout', 0.1)
# 把单通道投影到 d_model
self.input_proj = nn.Linear(1, d_model)
self.pos_encoder = PositionalEncoding(d_model, max_len=self.seq_len)
encoder_layer = nn.TransformerEncoderLayer(
d_model=d_model, nhead=nhead, dim_feedforward=dim_ff, dropout=dropout,
batch_first=False # 我们用 (T, B, D) 格式
)
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
# 最后一步输出到 多步预测
self.decoder = nn.Linear(d_model, self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
x_main = x[..., 0] # (B, T, N)
B, T, N = x_main.shape
assert T == self.seq_len, f"实际序列长度 {T} != 配置 in_len {self.seq_len}"
# 重排:每个节点的序列是一个独立样本
# (B, T, N) -> (B*N, T, 1)
seq = x_main.permute(0, 2, 1).reshape(B * N, T, 1)
# 投影 & 位置编码
emb = self.input_proj(seq) # (B*N, T, d_model)
emb = emb.permute(1, 0, 2) # -> (T, B*N, d_model)
emb = self.pos_encoder(emb) # 加上位置信息
# Transformer Encoder
out = self.transformer(emb) # (T, B*N, d_model)
# 取最后时刻的隐藏向量
last = out[-1, :, :] # (B*N, d_model)
# 解码为多步预测
pred_flat = self.decoder(last) # (B*N, horizon * output_dim)
# 重塑回 (B, N, horizon, output_dim) -> (B, horizon, N, output_dim)
pred = pred_flat.view(B, N, self.horizon, self.output_dim) \
.permute(0, 2, 1, 3)
return pred

47
model/EXP/EXP3.py Normal file
View File

@ -0,0 +1,47 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class EXP(nn.Module):
"""
高效的多步预测模型
- 输入 x: (B, T, N, D_total)只使用主观测通道 x[...,0]
- 对每个节点的序列 x[b,:,n] (长度 T) 通过 shared MLP 编码
- 最后映射到 horizon * output_dim并重塑为 (B, horizon, N, output_dim)
"""
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
# 隐层维度,可调整
hidden_dim = args.get('hidden_dim', 128)
T = 12
self.encoder = nn.Sequential(
nn.Linear(in_features=T, out_features=hidden_dim),
nn.ReLU(),
nn.Dropout(0.1),
)
# decoder 将 hidden_dim -> horizon * output_dim
self.decoder = nn.Linear(hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
# 1) 只取主观测通道
x_main = x[..., 0] # (B, T, N)
B, T, N = x_main.shape
# 2) 重排并展开:每个节点的序列当作一个样本
# (B, T, N) -> (B, N, T) -> (B*N, T)
h_in = x_main.permute(0, 2, 1).reshape(B * N, T)
# 3) shared MLP 编码
h = self.encoder(h_in) # (B*N, hidden_dim)
# 4) 解码到所有步预测
out_flat = self.decoder(h) # (B*N, horizon * output_dim)
# 5) 重塑回 (B, horizon, N, output_dim)
out = out_flat.view(B, N, self.horizon, self.output_dim) \
.permute(0, 2, 1, 3)
return out

27
model/EXP/EXP3_easy.py Normal file
View File

@ -0,0 +1,27 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
hidden_dim = args.get('hidden_dim', 128)
self.encoder = nn.Sequential(
nn.Linear(in_features=12, out_features=hidden_dim),
nn.ReLU(),
nn.Dropout(0.1),
)
self.decoder = nn.Linear(hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
x_main = x[..., 0] # (B, T, N)
B, T, N = x_main.shape
h_in = x_main.permute(0, 2, 1).reshape(B * N, T)
h = self.encoder(h_in) # (B*N, hidden_dim)
out_flat = self.decoder(h) # (B*N, horizon * output_dim)
out = out_flat.view(B, N, self.horizon, self.output_dim) \
.permute(0, 2, 1, 3)
return out

81
model/EXP/EXP4.py Normal file
View File

@ -0,0 +1,81 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class ResidualMLPBlock(nn.Module):
"""
一个隐藏维度下的残差块
x -> Linear(hidden->hidden) -> ReLU -> Dropout
-> Linear(hidden->hidden) -> Dropout
+ 残差跳连 -> LayerNorm
"""
def __init__(self, hidden_dim, dropout=0.1):
super().__init__()
self.fc1 = nn.Linear(hidden_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
self.drop = nn.Dropout(dropout)
self.norm = nn.LayerNorm(hidden_dim)
def forward(self, x):
resid = x
x = F.relu(self.fc1(x))
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return self.norm(x + resid)
class EXP(nn.Module):
"""
带残差连接的多层 MLP 预测模型
- 输入 x: (B, T, N, D_total)使用 x[...,0]
- seq_len=T 的序列先投影到 hidden_dim
再经过 num_blocks ResidualMLPBlock
- 最后投影到 horizon * output_dim重塑为 (B, horizon, N, output_dim)
"""
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12) # 序列长度 T默认 12
hidden_dim = args.get('hidden_dim', 64)
num_blocks = args.get('num_mlp_layers', 2)
dropout = args.get('dropout', 0.1)
# 1) 输入投影T -> hidden_dim
self.input_proj = nn.Linear(self.seq_len, hidden_dim)
self.input_drop = nn.Dropout(dropout)
# 2) 残差 MLP 块
self.blocks = nn.ModuleList([
ResidualMLPBlock(hidden_dim, dropout=dropout)
for _ in range(num_blocks)
])
# 3) 输出投影hidden_dim -> horizon * output_dim
self.decoder = nn.Linear(hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
x_main = x[..., 0] # (B, T, N)
B, T, N = x_main.shape
assert T == self.seq_len, f"期望序列长度 {self.seq_len}, 实际 {T}"
# 每个节点的长度-T 序列作为独立样本
h_in = x_main.permute(0, 2, 1).reshape(B * N, T) # (B*N, T)
# 1) 输入投影 + Dropout
h = F.relu(self.input_proj(h_in)) # (B*N, hidden_dim)
h = self.input_drop(h)
# 2) 残差块堆叠
for block in self.blocks:
h = block(h) # (B*N, hidden_dim)
# 3) 解码到 horizon * output_dim
out_flat = self.decoder(h) # (B*N, horizon * output_dim)
# 4) 重塑为 (B, horizon, N, output_dim)
out = out_flat.view(B, N, self.horizon, self.output_dim) \
.permute(0, 2, 1, 3)
return out

137
model/EXP/EXP5.py Normal file
View File

@ -0,0 +1,137 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class TemporalBlock(nn.Module):
"""
TCN 中的因果残差块对每个节点的时间序列进行因果卷积
保证输出长度与输入一致
"""
def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout=0.1):
super().__init__()
self.kernel_size = kernel_size
self.dilation = dilation
# 填充长度= (kernel_size-1)*dilation
self.padding = (kernel_size - 1) * dilation
# 因果卷积:在 forward 里自己做 pad不在这里传 padding 参数
self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
padding=0, dilation=dilation)
self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
padding=0, dilation=dilation)
# 如果通道数要变,则用 1×1 做下采样;否则直接残差
self.downsample = (nn.Conv1d(in_channels, out_channels, 1)
if in_channels != out_channels else None)
self.dropout = nn.Dropout(dropout)
self.relu = nn.ReLU()
self.norm = nn.LayerNorm(out_channels)
def forward(self, x):
# x: (B*N, C_in, T)
# 1) 因果填充:在时间维度左侧 pad
x_padded = F.pad(x, (self.padding, 0)) # pad=(left, right)
# 2) 第一层卷积
out = self.conv1(x_padded) # (B*N, C_out, T + padding)
out = self.relu(out)
out = self.dropout(out)
# 3) 第二层卷积,同样先 pad
out = F.pad(out, (self.padding, 0))
out = self.conv2(out) # (B*N, C_out, T + padding)
out = self.dropout(out)
# 4) 残差分支
res = x if self.downsample is None else self.downsample(x) # (B*N, C_out, T)
# 5) 截掉多余的前面 padding取最后 T 个时间点
out = out[..., -x.size(2):] # now out.shape == res.shape
# 6) 残差相加 + LayerNorm + ReLU
return self.relu(self.norm((out + res).permute(0, 2, 1))).permute(0, 2, 1)
class EXP(nn.Module):
"""
时空混合模型
1. 对每个节点的长度-T 序列 TCN 提取时间特征
2. TCN 最后时刻的隐藏重组为 (B, N, hidden_dim)
3. Spatial SelfAttention 在节点维度上捕捉空间依赖
4. 最后一个 Linear 将每个节点的特征映射到 horizon 步预测
"""
def __init__(self, args):
super().__init__()
self.seq_len = args.get('in_len', 12) # 输入序列长度 T
self.horizon = args['horizon']
self.output_dim = args['output_dim']
hidden_dim = args.get('hidden_dim', 64)
tcn_layers = args.get('tcn_layers', 3)
kernel_size = args.get('kernel_size', 3)
dropout = args.get('dropout', 0.1)
nhead = args.get('nhead', 4)
# ----- Temporal Convolutional Network -----
tcn_blocks = []
in_ch = 1 # 只用主观测通道
for i in range(tcn_layers):
dilation = 2 ** i
out_ch = hidden_dim
tcn_blocks.append(
TemporalBlock(in_ch, out_ch, kernel_size, dilation, dropout)
)
in_ch = out_ch
self.tcn = nn.Sequential(*tcn_blocks)
# ----- Spatial Self-Attention -----
# 我们把节点看作 tokens特征维度 hidden_dim
# MultiheadAttention 要求输入 (S, B, E),这里 S = N
self.spatial_attn = nn.MultiheadAttention(embed_dim=hidden_dim,
num_heads=nhead,
dropout=dropout,
batch_first=False)
# 可选的 LayerNorm
self.norm_spatial = nn.LayerNorm(hidden_dim)
# ----- Decoder -----
# hidden_dim -> horizon * output_dim
self.decoder = nn.Linear(hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
"""
x: (B, T, N, D_total)只用第0通道
returns: (B, horizon, N, output_dim)
"""
B, T, N, D_total = x.shape
assert T == self.seq_len, f"Expected T={self.seq_len}, got {T}"
# 1) 取主观测、并重排给 TCN
x_main = x[..., 0] # (B, T, N)
x_tcn = x_main.reshape(B * N, 1, T) # (B*N, 1, T)
# 2) TCN 提取时间特征
tcn_out = self.tcn(x_tcn) # (B*N, hidden_dim, T)
# 3) 取最后时刻特征
last = tcn_out[:, :, -1] # (B*N, hidden_dim)
h = last.view(B, N, -1) # (B, N, hidden_dim)
# 4) Spatial Attention
# 调整为 (N, B, E) 以供 MultiheadAttention
h2 = h.permute(1, 0, 2) # (N, B, hidden_dim)
attn_out, _ = self.spatial_attn(h2, h2, h2) # (N, B, hidden_dim)
attn_out = attn_out.permute(1, 0, 2) # (B, N, hidden_dim)
h_spatial = self.norm_spatial(attn_out + h) # 残差 + LayerNorm
# 5) Decoder: 每个节点映射到 horizon*output_dim
flat = h_spatial.reshape(B * N, -1) # (B*N, hidden_dim)
out_flat = self.decoder(flat) # (B*N, horizon*output_dim)
# 6) 重塑为 (B, horizon, N, output_dim)
out = out_flat.view(B, N, self.horizon, self.output_dim) \
.permute(0, 2, 1, 3)
return out

101
model/EXP/EXP6.py Normal file
View File

@ -0,0 +1,101 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
# (N, D) @ (D, N) -> (N, N)
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
# x: (B, N, C) / adj: (N, N)
x = torch.matmul(adj, x) # (B, N, C)
x = self.theta(x)
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, T, C)
x_attn, _ = self.attn(x, x, x)
x = self.norm1(x + x_attn)
x_ffn = self.ffn(x)
return self.norm2(x + x_ffn)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
# 动态图构建
self.graph = DynamicGraphConstructor(self.num_nodes, embed_dim=16)
# 输入映射层
self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
# 图卷积
self.gc = GraphConvBlock(self.hidden_dim, self.hidden_dim)
# MANBA block
self.manba = MANBA_Block(self.hidden_dim, self.hidden_dim * 2)
# 输出映射
self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
x = x[..., 0] # 只用主通道 (B, T, N)
B, T, N = x.shape
assert T == self.seq_len
# 输入投影 (B, T, N) -> (B, N, T) -> (B*N, T) -> (B*N, H)
x = x.permute(0, 2, 1).reshape(B * N, T)
h = self.input_proj(x) # (B*N, hidden_dim)
h = h.view(B, N, self.hidden_dim)
# 动态图构建
adj = self.graph() # (N, N)
# 空间建模:图卷积
h = self.gc(h, adj) # (B, N, hidden_dim)
# 时间建模MANBA
h = h.permute(0, 2, 1) # (B, hidden_dim, N)
h = h.reshape(B, self.hidden_dim, N).permute(0, 2, 1) # (B, N, hidden_dim)
h = self.manba(h) # (B, N, hidden_dim)
# 输出映射
out = self.out_proj(h) # (B, N, horizon * output_dim)
out = out.view(B, N, self.horizon, self.output_dim).permute(0, 2, 1, 3)
return out # (B, horizon, N, output_dim)

101
model/EXP/EXP7.py Normal file
View File

@ -0,0 +1,101 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class DynamicGraphConstructor(nn.Module):
def __init__(self, node_num, embed_dim):
super().__init__()
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
def forward(self):
# (N, D) @ (D, N) -> (N, N)
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
adj = F.relu(adj)
adj = F.softmax(adj, dim=-1)
return adj
class GraphConvBlock(nn.Module):
def __init__(self, input_dim, output_dim):
super().__init__()
self.theta = nn.Linear(input_dim, output_dim)
def forward(self, x, adj):
# x: (B, N, C) / adj: (N, N)
x = torch.matmul(adj, x) # (B, N, C)
x = self.theta(x)
return F.relu(x)
class MANBA_Block(nn.Module):
def __init__(self, input_dim, hidden_dim):
super().__init__()
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, input_dim)
)
self.norm1 = nn.LayerNorm(input_dim)
self.norm2 = nn.LayerNorm(input_dim)
def forward(self, x):
# x: (B, T, C)
x_attn, _ = self.attn(x, x, x)
x = self.norm1(x + x_attn)
x_ffn = self.ffn(x)
return self.norm2(x + x_ffn)
class EXP(nn.Module):
def __init__(self, args):
super().__init__()
self.horizon = args['horizon']
self.output_dim = args['output_dim']
self.seq_len = args.get('in_len', 12)
self.hidden_dim = args.get('hidden_dim', 64)
self.num_nodes = args['num_nodes']
# 动态图构建
self.graph = DynamicGraphConstructor(self.num_nodes, embed_dim=16)
# 输入映射层
self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
# 图卷积
self.gc = GraphConvBlock(self.hidden_dim, self.hidden_dim)
# MANBA block
self.manba = MANBA_Block(self.hidden_dim, self.hidden_dim * 2)
# 输出映射
self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
def forward(self, x):
# x: (B, T, N, D_total)
x = x.sum(dim=-1) # (B, T, N)
B, T, N = x.shape
assert T == self.seq_len
# 输入投影 (B, T, N) -> (B, N, T) -> (B*N, T) -> (B*N, H)
x = x.permute(0, 2, 1).reshape(B * N, T)
h = self.input_proj(x) # (B*N, hidden_dim)
h = h.view(B, N, self.hidden_dim)
# 动态图构建
adj = self.graph() # (N, N)
# 空间建模:图卷积
h = self.gc(h, adj) # (B, N, hidden_dim)
# 时间建模MANBA
h = h.permute(0, 2, 1) # (B, hidden_dim, N)
h = h.reshape(B, self.hidden_dim, N).permute(0, 2, 1) # (B, N, hidden_dim)
h = self.manba(h) # (B, N, hidden_dim)
# 输出映射
out = self.out_proj(h) # (B, N, horizon * output_dim)
out = out.view(B, N, self.horizon, self.output_dim).permute(0, 2, 1, 3)
return out # (B, horizon, N, output_dim)

128
model/EXPB/EXP_b.py Normal file
View File

@ -0,0 +1,128 @@
import torch, torch.nn as nn, torch.nn.functional as F
from collections import OrderedDict
class DGCRM(nn.Module):
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim, num_layers=1):
super().__init__()
self.node_num, self.input_dim, self.num_layers = node_num, dim_in, num_layers
self.cells = nn.ModuleList([
DDGCRNCell(node_num, dim_in if i == 0 else dim_out, dim_out, cheb_k, embed_dim)
for i in range(num_layers)
])
def forward(self, x, init_state, node_embeddings):
assert x.shape[2] == self.node_num and x.shape[3] == self.input_dim
for i in range(self.num_layers):
state, inner = init_state[i].to(x.device), []
for t in range(x.shape[1]):
state = self.cells[i](x[:, t, :, :], state, [node_embeddings[0][:, t, :, :], node_embeddings[1]])
inner.append(state)
init_state[i] = state
x = torch.stack(inner, dim=1)
return x, init_state
def init_hidden(self, bs):
return torch.stack([cell.init_hidden_state(bs) for cell in self.cells], dim=0)
class EXPB(nn.Module):
def __init__(self, args):
super().__init__()
self.patch_size = args.get('patch_size', 1)
self.num_node, self.input_dim, self.hidden_dim = args['num_nodes'], args['input_dim'], args['rnn_units']
self.output_dim, self.horizon, self.num_layers = args['output_dim'], args['horizon'], args['num_layers']
self.use_day, self.use_week = args['use_day'], args['use_week']
self.node_embeddings1 = nn.Parameter(torch.randn(self.num_node, args['embed_dim']))
self.T_i_D_emb = nn.Parameter(torch.empty(288, args['embed_dim']))
self.D_i_W_emb = nn.Parameter(torch.empty(7, args['embed_dim']))
self.drop = nn.Dropout(0.1)
self.encoder = DGCRM(self.num_node, self.input_dim, self.hidden_dim,
args['cheb_order'], args['embed_dim'], self.num_layers)
self.base_conv = nn.Conv2d(1, self.horizon * self.output_dim, (1, self.hidden_dim))
self.res_conv = nn.Conv2d(1, self.horizon * self.output_dim, (1, self.hidden_dim + 1))
def forward(self, source):
# source: (B, T, N, D_total)第0通道为主观测第1、2通道为时间编码
B, T, N, D_total = source.shape
p = self.patch_size
num_patches = T // p
source = source[:, :num_patches * p, :, :].view(B, num_patches, p, N, D_total)
# 对主观测通道取均值,并转置为 (B, num_patches, N, 1)
inp = source[..., 0].mean(dim=2, keepdim=True).permute(0, 1, 3, 2)
# 每个 patch 最后时刻的时间编码
time_day = source[:, :, -1, :, 1] # (B, num_patches, N)
time_week = source[:, :, -1, :, 2] # (B, num_patches, N)
patched_source = torch.cat([inp, time_day.unsqueeze(-1), time_week.unsqueeze(-1)], dim=-1)
node_embed = self.node_embeddings1
if self.use_day:
node_embed = node_embed * self.T_i_D_emb[(patched_source[..., 1] * 288).long()]
if self.use_week:
node_embed = node_embed * self.D_i_W_emb[patched_source[..., 2].long()]
node_embeddings = [node_embed, self.node_embeddings1]
init = self.encoder.init_hidden(B)
enc_out, _ = self.encoder(inp, init, node_embeddings)
rep = self.drop(enc_out[:, -1:, :, :])
base = self.base_conv(rep)
res_in = torch.cat([rep, inp[:, -1:, :, :]], dim=-1)
res = self.res_conv(res_in)
out = base + res
out = out.squeeze(-1).view(B, self.horizon, self.output_dim, N).permute(0, 1, 3, 2)
return out
class DDGCRNCell(nn.Module):
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim):
super().__init__()
self.node_num, self.hidden_dim = node_num, dim_out
self.gate = DGCN(dim_in + dim_out, 2 * dim_out, cheb_k, embed_dim, node_num)
self.update = DGCN(dim_in + dim_out, dim_out, cheb_k, embed_dim, node_num)
self.ln = nn.LayerNorm(dim_out)
def forward(self, x, state, node_embeddings):
inp = torch.cat((x, state), -1)
z_r = torch.sigmoid(self.gate(inp, node_embeddings))
z, r = torch.split(z_r, self.hidden_dim, -1)
hc = torch.tanh(self.update(torch.cat((x, z * state), -1), node_embeddings))
out = r * state + (1 - r) * hc
return self.ln(out)
def init_hidden_state(self, bs):
return torch.zeros(bs, self.node_num, self.hidden_dim)
class DGCN(nn.Module):
def __init__(self, dim_in, dim_out, cheb_k, embed_dim, num_nodes):
super().__init__()
self.cheb_k, self.embed_dim = cheb_k, embed_dim
self.weights_pool = nn.Parameter(torch.FloatTensor(embed_dim, cheb_k, dim_in, dim_out))
self.weights = nn.Parameter(torch.FloatTensor(cheb_k, dim_in, dim_out))
self.bias_pool = nn.Parameter(torch.FloatTensor(embed_dim, dim_out))
self.bias = nn.Parameter(torch.FloatTensor(dim_out))
self.fc = nn.Sequential(OrderedDict([
('fc1', nn.Linear(dim_in, 16)),
('sigmoid1', nn.Sigmoid()),
('fc2', nn.Linear(16, 2)),
('sigmoid2', nn.Sigmoid()),
('fc3', nn.Linear(2, embed_dim))
]))
self.register_buffer('eye', torch.eye(num_nodes))
def forward(self, x, node_embeddings):
supp1 = self.eye.to(node_embeddings[0].device)
filt = self.fc(x)
nodevec = torch.tanh(node_embeddings[0] * filt)
supp2 = self.get_laplacian(F.relu(torch.matmul(nodevec, nodevec.transpose(2, 1))), supp1)
x_g = torch.stack([
torch.einsum("nm,bmc->bnc", supp1, x),
torch.einsum("bnm,bmc->bnc", supp2, x)
], dim=1)
weights = torch.einsum('nd,dkio->nkio', node_embeddings[1], self.weights_pool)
bias = torch.matmul(node_embeddings[1], self.bias_pool)
return torch.einsum('bnki,nkio->bno', x_g.permute(0, 2, 1, 3), weights) + bias
@staticmethod
def get_laplacian(graph, I, normalize=True):
D_inv = torch.diag_embed(torch.sum(graph, -1) ** (-0.5))
return torch.matmul(torch.matmul(D_inv, graph), D_inv) if normalize else torch.matmul(
torch.matmul(D_inv, graph + I), D_inv)

View File

@ -0,0 +1,217 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import sys
class nconv(nn.Module):
def __init__(self):
super(nconv, self).__init__()
def forward(self, x, A):
x = torch.einsum('ncvl,vw->ncwl', (x, A))
return x.contiguous()
class linear(nn.Module):
def __init__(self, c_in, c_out):
super(linear, self).__init__()
self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0, 0), stride=(1, 1), bias=True)
def forward(self, x):
return self.mlp(x)
class gcn(nn.Module):
def __init__(self, c_in, c_out, dropout, support_len=3, order=2):
super(gcn, self).__init__()
self.nconv = nconv()
c_in = (order * support_len + 1) * c_in
self.mlp = linear(c_in, c_out)
self.dropout = dropout
self.order = order
def forward(self, x, support):
out = [x]
for a in support:
x1 = self.nconv(x, a)
out.append(x1)
for k in range(2, self.order + 1):
x2 = self.nconv(x1, a)
out.append(x2)
x1 = x2
h = torch.cat(out, dim=1)
h = self.mlp(h)
h = F.dropout(h, self.dropout, training=self.training)
return h
class gwnet(nn.Module):
def __init__(self, args):
super(gwnet, self).__init__()
self.dropout = args['dropout']
self.blocks = args['blocks']
self.layers = args['layers']
self.gcn_bool = args['gcn_bool']
self.addaptadj = args['addaptadj']
self.filter_convs = nn.ModuleList()
self.gate_convs = nn.ModuleList()
self.residual_convs = nn.ModuleList()
self.skip_convs = nn.ModuleList()
self.bn = nn.ModuleList()
self.gconv = nn.ModuleList()
self.start_conv = nn.Conv2d(in_channels=args['in_dim'],
out_channels=args['residual_channels'],
kernel_size=(1, 1))
self.supports = args.get('supports', None)
receptive_field = 1
self.supports_len = 0
if self.supports is not None:
self.supports_len += len(self.supports)
if self.gcn_bool and self.addaptadj:
aptinit = args.get('aptinit', None)
if aptinit is None:
if self.supports is None:
self.supports = []
self.nodevec1 = nn.Parameter(torch.randn(args['num_nodes'], 10).to(args['device']),
requires_grad=True).to(args['device'])
self.nodevec2 = nn.Parameter(torch.randn(10, args['num_nodes']).to(args['device']),
requires_grad=True).to(args['device'])
self.supports_len += 1
else:
if self.supports is None:
self.supports = []
m, p, n = torch.svd(aptinit)
initemb1 = torch.mm(m[:, :10], torch.diag(p[:10] ** 0.5))
initemb2 = torch.mm(torch.diag(p[:10] ** 0.5), n[:, :10].t())
self.nodevec1 = nn.Parameter(initemb1, requires_grad=True).to(args['device'])
self.nodevec2 = nn.Parameter(initemb2, requires_grad=True).to(args['device'])
self.supports_len += 1
kernel_size = args['kernel_size']
residual_channels = args['residual_channels']
dilation_channels = args['dilation_channels']
kernel_size = args['kernel_size']
skip_channels = args['skip_channels']
end_channels = args['end_channels']
out_dim = args['out_dim']
dropout = args['dropout']
for b in range(self.blocks):
additional_scope = kernel_size - 1
new_dilation = 1
for i in range(self.layers):
# dilated convolutions
self.filter_convs.append(nn.Conv2d(in_channels=residual_channels,
out_channels=dilation_channels,
kernel_size=(1, kernel_size), dilation=new_dilation))
self.gate_convs.append(nn.Conv2d(in_channels=residual_channels,
out_channels=dilation_channels,
kernel_size=(1, kernel_size), dilation=new_dilation))
# 1x1 convolution for residual connection
self.residual_convs.append(nn.Conv2d(in_channels=dilation_channels,
out_channels=residual_channels,
kernel_size=(1, 1)))
# 1x1 convolution for skip connection
self.skip_convs.append(nn.Conv2d(in_channels=dilation_channels,
out_channels=skip_channels,
kernel_size=(1, 1)))
self.bn.append(nn.BatchNorm2d(residual_channels))
new_dilation *= 2
receptive_field += additional_scope
additional_scope *= 2
if self.gcn_bool:
self.gconv.append(gcn(dilation_channels, residual_channels, dropout, support_len=self.supports_len))
self.end_conv_1 = nn.Conv2d(in_channels=skip_channels,
out_channels=end_channels,
kernel_size=(1, 1),
bias=True)
self.end_conv_2 = nn.Conv2d(in_channels=end_channels,
out_channels=out_dim,
kernel_size=(1, 1),
bias=True)
self.receptive_field = receptive_field
def forward(self, input):
input = input[..., 0:2]
input = input.transpose(1,3)
input = nn.functional.pad(input,(1,0,0,0))
in_len = input.size(3)
if in_len < self.receptive_field:
x = nn.functional.pad(input, (self.receptive_field - in_len, 0, 0, 0))
else:
x = input
x = self.start_conv(x)
skip = 0
# calculate the current adaptive adj matrix once per iteration
new_supports = None
if self.gcn_bool and self.addaptadj and self.supports is not None:
adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1)
new_supports = self.supports + [adp]
# WaveNet layers
for i in range(self.blocks * self.layers):
# |----------------------------------------| *residual*
# | |
# | |-- conv -- tanh --| |
# -> dilate -|----| * ----|-- 1x1 -- + --> *input*
# |-- conv -- sigm --| |
# 1x1
# |
# ---------------------------------------> + -------------> *skip*
# (dilation, init_dilation) = self.dilations[i]
# residual = dilation_func(x, dilation, init_dilation, i)
residual = x
# dilated convolution
filter = self.filter_convs[i](residual)
filter = torch.tanh(filter)
gate = self.gate_convs[i](residual)
gate = torch.sigmoid(gate)
x = filter * gate
# parametrized skip connection
s = x
s = self.skip_convs[i](s)
try:
skip = skip[:, :, :, -s.size(3):]
except:
skip = 0
skip = s + skip
if self.gcn_bool and self.supports is not None:
if self.addaptadj:
x = self.gconv[i](x, new_supports)
else:
x = self.gconv[i](x, self.supports)
else:
x = self.residual_convs[i](x)
y = residual[:, :, :, -x.size(3):] # 从倒数第x.size(3)个到末尾
x = x + y
x = self.bn[i](x)
x = F.relu(skip)
x = F.relu(self.end_conv_1(x))
x = self.end_conv_2(x)
return x

View File

@ -13,8 +13,7 @@ from model.STFGNN.STFGNN import STFGNN
from model.STSGCN.STSGCN import STSGCN
from model.STGODE.STGODE import ODEGCN
from model.PDG2SEQ.PDG2Seq import PDG2Seq
from model.EXP.EXP import EXP
from model.EXPB.EXP_b import EXPB
from model.EXP.EXP7 import EXP as EXP
def model_selector(model):
match model['type']:
@ -34,5 +33,4 @@ def model_selector(model):
case 'STGODE': return ODEGCN(model)
case 'PDG2SEQ': return PDG2Seq(model)
case 'EXP': return EXP(model)
case 'EXPB': return EXPB(model)

176
trainer/EXP_trainer.py Normal file
View File

@ -0,0 +1,176 @@
import math
import os
import time
import copy
from tqdm import tqdm
import torch
from lib.logger import get_logger
from lib.loss_function import all_metrics
class Trainer:
def __init__(self, model, loss, optimizer, train_loader, val_loader, test_loader,
scaler, args, lr_scheduler=None):
self.model = model
self.loss = loss
self.optimizer = optimizer
self.train_loader = train_loader
self.val_loader = val_loader
self.test_loader = test_loader
self.scaler = scaler
self.args = args
self.lr_scheduler = lr_scheduler
self.train_per_epoch = len(train_loader)
self.val_per_epoch = len(val_loader) if val_loader else 0
# Paths for saving models and logs
self.best_path = os.path.join(args['log_dir'], 'best_model.pth')
self.best_test_path = os.path.join(args['log_dir'], 'best_test_model.pth')
self.loss_figure_path = os.path.join(args['log_dir'], 'loss.png')
# Initialize logger
if not os.path.isdir(args['log_dir']) and not args['debug']:
os.makedirs(args['log_dir'], exist_ok=True)
self.logger = get_logger(args['log_dir'], name=self.model.__class__.__name__, debug=args['debug'])
self.logger.info(f"Experiment log path in: {args['log_dir']}")
def _run_epoch(self, epoch, dataloader, mode):
if mode == 'train':
self.model.train()
optimizer_step = True
else:
self.model.eval()
optimizer_step = False
total_loss = 0
epoch_time = time.time()
with torch.set_grad_enabled(optimizer_step):
with tqdm(total=len(dataloader), desc=f'{mode.capitalize()} Epoch {epoch}') as pbar:
for batch_idx, (data, target) in enumerate(dataloader):
label = target[..., :self.args['output_dim']]
output = self.model(data).to(self.args['device'])
if self.args['real_value']:
output = self.scaler.inverse_transform(output)
loss = self.loss(output, label)
if optimizer_step and self.optimizer is not None:
self.optimizer.zero_grad()
loss.backward()
if self.args['grad_norm']:
torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args['max_grad_norm'])
self.optimizer.step()
total_loss += loss.item()
if mode == 'train' and (batch_idx + 1) % self.args['log_step'] == 0:
self.logger.info(
f'Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}')
# 更新 tqdm 的进度
pbar.update(1)
pbar.set_postfix(loss=loss.item())
avg_loss = total_loss / len(dataloader)
self.logger.info(
f'{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s')
return avg_loss
def train_epoch(self, epoch):
return self._run_epoch(epoch, self.train_loader, 'train')
def val_epoch(self, epoch):
return self._run_epoch(epoch, self.val_loader or self.test_loader, 'val')
def test_epoch(self, epoch):
return self._run_epoch(epoch, self.test_loader, 'test')
def train(self):
best_model, best_test_model = None, None
best_loss, best_test_loss = float('inf'), float('inf')
not_improved_count = 0
self.logger.info("Training process started")
for epoch in range(1, self.args['epochs'] + 1):
train_epoch_loss = self.train_epoch(epoch)
val_epoch_loss = self.val_epoch(epoch)
test_epoch_loss = self.test_epoch(epoch)
if train_epoch_loss > 1e6:
self.logger.warning('Gradient explosion detected. Ending...')
break
if val_epoch_loss < best_loss:
best_loss = val_epoch_loss
not_improved_count = 0
best_model = copy.deepcopy(self.model.state_dict())
self.logger.info('Best validation model saved!')
else:
not_improved_count += 1
if self.args['early_stop'] and not_improved_count == self.args['early_stop_patience']:
self.logger.info(
f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops.")
break
if test_epoch_loss < best_test_loss:
best_test_loss = test_epoch_loss
best_test_model = copy.deepcopy(self.model.state_dict())
if not self.args['debug']:
torch.save(best_model, self.best_path)
torch.save(best_test_model, self.best_test_path)
self.logger.info(f"Best models saved at {self.best_path} and {self.best_test_path}")
self._finalize_training(best_model, best_test_model)
def _finalize_training(self, best_model, best_test_model):
self.model.load_state_dict(best_model)
self.logger.info("Testing on best validation model")
self.test(self.model, self.args, self.test_loader, self.scaler, self.logger)
self.model.load_state_dict(best_test_model)
self.logger.info("Testing on best test model")
self.test(self.model, self.args, self.test_loader, self.scaler, self.logger)
@staticmethod
def test(model, args, data_loader, scaler, logger, path=None):
if path:
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['state_dict'])
model.to(args['device'])
model.eval()
y_pred, y_true = [], []
with torch.no_grad():
for data, target in data_loader:
label = target[..., :args['output_dim']]
output = model(data)
y_pred.append(output)
y_true.append(label)
if args['real_value']:
y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0))
else:
y_pred = torch.cat(y_pred, dim=0)
y_true = torch.cat(y_true, dim=0)
# 你在这里需要把y_pred和y_true保存下来
# torch.save(y_pred, "./test/PEMS07/y_pred_D.pt") # [3566,12,170,1]
# torch.save(y_true, "./test/PEMS08/y_true.pt") # [3566,12,170,1]
for t in range(y_true.shape[1]):
mae, rmse, mape = all_metrics(y_pred[:, t, ...], y_true[:, t, ...],
args['mae_thresh'], args['mape_thresh'])
logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
mae, rmse, mape = all_metrics(y_pred, y_true, args['mae_thresh'], args['mape_thresh'])
logger.info(f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
@staticmethod
def _compute_sampling_threshold(global_step, k):
return k / (k + math.exp(global_step / k))

View File

@ -2,6 +2,7 @@ from trainer.Trainer import Trainer
from trainer.cdeTrainer.cdetrainer import Trainer as cdeTrainer
from trainer.DCRNN_Trainer import Trainer as DCRNN_Trainer
from trainer.PDG2SEQ_Trainer import Trainer as PDG2SEQ_Trainer
from trainer.EXP_trainer import Trainer as EXP_Trainer
def select_trainer(model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args,
@ -13,5 +14,7 @@ def select_trainer(model, loss, optimizer, train_loader, val_loader, test_loader
lr_scheduler)
case 'PDG2SEQ': return PDG2SEQ_Trainer(model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args['train'],
lr_scheduler)
case 'EXP': return EXP_Trainer(model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args['train'],
lr_scheduler)
case _: return Trainer(model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args['train'],
lr_scheduler)