dev分支的实验
This commit is contained in:
parent
229b6320b9
commit
c9a5a54d90
7955
baseline.ipynb
7955
baseline.ipynb
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,51 @@
|
|||
data:
|
||||
num_nodes: 307
|
||||
lag: 12
|
||||
horizon: 12
|
||||
val_ratio: 0.2
|
||||
test_ratio: 0.2
|
||||
tod: False
|
||||
normalizer: std
|
||||
column_wise: False
|
||||
default_graph: True
|
||||
add_time_in_day: True
|
||||
add_day_in_week: True
|
||||
steps_per_day: 288
|
||||
days_per_week: 7
|
||||
|
||||
model:
|
||||
input_dim: 1
|
||||
output_dim: 1
|
||||
embed_dim: 10
|
||||
rnn_units: 64
|
||||
num_layers: 1
|
||||
cheb_order: 2
|
||||
use_day: True
|
||||
use_week: True
|
||||
graph_size: 30
|
||||
expert_nums: 8
|
||||
top_k: 2
|
||||
|
||||
train:
|
||||
loss_func: mae
|
||||
seed: 10
|
||||
batch_size: 64
|
||||
epochs: 300
|
||||
lr_init: 0.003
|
||||
weight_decay: 0
|
||||
lr_decay: False
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: "5,20,40,70"
|
||||
early_stop: True
|
||||
early_stop_patience: 15
|
||||
grad_norm: False
|
||||
max_grad_norm: 5
|
||||
real_value: True
|
||||
|
||||
test:
|
||||
mae_thresh: null
|
||||
mape_thresh: 0.0
|
||||
|
||||
log:
|
||||
log_step: 200
|
||||
plot: False
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
data:
|
||||
num_nodes: 170
|
||||
lag: 12
|
||||
horizon: 12
|
||||
val_ratio: 0.2
|
||||
test_ratio: 0.2
|
||||
tod: False
|
||||
normalizer: std
|
||||
column_wise: False
|
||||
default_graph: True
|
||||
add_time_in_day: True
|
||||
add_day_in_week: True
|
||||
steps_per_day: 288
|
||||
days_per_week: 7
|
||||
|
||||
model:
|
||||
input_dim: 1
|
||||
output_dim: 1
|
||||
embed_dim: 12
|
||||
rnn_units: 64
|
||||
num_layers: 1
|
||||
cheb_order: 2
|
||||
use_day: True
|
||||
use_week: True
|
||||
graph_size: 30
|
||||
expert_nums: 8
|
||||
top_k: 2
|
||||
hidden_dim: 64
|
||||
|
||||
train:
|
||||
loss_func: mae
|
||||
seed: 10
|
||||
batch_size: 64
|
||||
epochs: 300
|
||||
lr_init: 0.003
|
||||
weight_decay: 0
|
||||
lr_decay: False
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: "5,20,40,70"
|
||||
early_stop: True
|
||||
early_stop_patience: 15
|
||||
grad_norm: False
|
||||
max_grad_norm: 5
|
||||
real_value: True
|
||||
|
||||
test:
|
||||
mae_thresh: null
|
||||
mape_thresh: 0.0
|
||||
|
||||
log:
|
||||
log_step: 200
|
||||
plot: False
|
||||
|
|
@ -0,0 +1,49 @@
|
|||
data:
|
||||
num_nodes: 307
|
||||
lag: 12
|
||||
horizon: 12
|
||||
val_ratio: 0.2
|
||||
test_ratio: 0.2
|
||||
tod: False
|
||||
normalizer: std
|
||||
column_wise: False
|
||||
default_graph: True
|
||||
add_time_in_day: True
|
||||
add_day_in_week: True
|
||||
steps_per_day: 288
|
||||
days_per_week: 7
|
||||
|
||||
model:
|
||||
input_dim: 1
|
||||
output_dim: 1
|
||||
embed_dim: 10
|
||||
rnn_units: 64
|
||||
num_layers: 1
|
||||
cheb_order: 2
|
||||
patch_size: 3
|
||||
use_day: True
|
||||
use_week: True
|
||||
|
||||
train:
|
||||
loss_func: mae
|
||||
seed: 10
|
||||
batch_size: 64
|
||||
epochs: 300
|
||||
lr_init: 0.003
|
||||
weight_decay: 0
|
||||
lr_decay: False
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: "5,20,40,70"
|
||||
early_stop: True
|
||||
early_stop_patience: 15
|
||||
grad_norm: False
|
||||
max_grad_norm: 5
|
||||
real_value: True
|
||||
|
||||
test:
|
||||
mae_thresh: null
|
||||
mape_thresh: 0.0
|
||||
|
||||
log:
|
||||
log_step: 200
|
||||
plot: False
|
||||
|
|
@ -18,7 +18,7 @@ log:
|
|||
plot: false
|
||||
model:
|
||||
cheb_order: 2
|
||||
embed_dim: 5
|
||||
embed_dim: 12
|
||||
input_dim: 1
|
||||
num_layers: 1
|
||||
output_dim: 1
|
||||
|
|
@ -29,10 +29,10 @@ test:
|
|||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
train:
|
||||
batch_size: 64
|
||||
batch_size: 12
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
early_stop_patience: 30
|
||||
epochs: 200
|
||||
grad_norm: false
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
|
|
@ -41,5 +41,5 @@ train:
|
|||
lr_init: 0.003
|
||||
max_grad_norm: 5
|
||||
real_value: true
|
||||
seed: 12
|
||||
seed: 3407
|
||||
weight_decay: 0
|
||||
|
|
|
|||
|
|
@ -0,0 +1,123 @@
|
|||
import torch, torch.nn as nn, torch.nn.functional as F
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class DGCRM(nn.Module):
|
||||
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim, num_layers=1):
|
||||
super().__init__()
|
||||
self.node_num, self.input_dim, self.num_layers = node_num, dim_in, num_layers
|
||||
self.cells = nn.ModuleList([
|
||||
DDGCRNCell(node_num, dim_in if i == 0 else dim_out, dim_out, cheb_k, embed_dim)
|
||||
for i in range(num_layers)
|
||||
])
|
||||
|
||||
def forward(self, x, init_state, node_embeddings):
|
||||
# x: (B, T, N, D)
|
||||
assert x.shape[2] == self.node_num and x.shape[3] == self.input_dim
|
||||
for i in range(self.num_layers):
|
||||
state, inner = init_state[i].to(x.device), []
|
||||
for t in range(x.shape[1]):
|
||||
state = self.cells[i](x[:, t, :, :], state, [node_embeddings[0][:, t, :, :], node_embeddings[1]])
|
||||
inner.append(state)
|
||||
init_state[i] = state
|
||||
x = torch.stack(inner, dim=1)
|
||||
return x, init_state
|
||||
|
||||
def init_hidden(self, bs):
|
||||
return torch.stack([cell.init_hidden_state(bs) for cell in self.cells], dim=0)
|
||||
|
||||
|
||||
class EXP(nn.Module):
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.num_node, self.input_dim, self.hidden_dim = args['num_nodes'], args['input_dim'], args['rnn_units']
|
||||
self.output_dim, self.horizon, self.num_layers = args['output_dim'], args['horizon'], args['num_layers']
|
||||
self.use_day, self.use_week = args['use_day'], args['use_week']
|
||||
self.node_embeddings1 = nn.Parameter(torch.randn(self.num_node, args['embed_dim']))
|
||||
# 第二套节点向量已不再使用,减少参数
|
||||
self.T_i_D_emb = nn.Parameter(torch.empty(288, args['embed_dim']))
|
||||
self.D_i_W_emb = nn.Parameter(torch.empty(7, args['embed_dim']))
|
||||
self.drop = nn.Dropout(0.1)
|
||||
# 采用单编码器,减少一次前向计算
|
||||
self.encoder = DGCRM(self.num_node, self.input_dim, self.hidden_dim,
|
||||
args['cheb_order'], args['embed_dim'], self.num_layers)
|
||||
# 主预测头:基础预测
|
||||
self.base_conv = nn.Conv2d(1, self.horizon * self.output_dim, (1, self.hidden_dim))
|
||||
# 残差预测头:利用最近时刻的输入信息进行修正,输入通道为 hidden_dim+1
|
||||
self.res_conv = nn.Conv2d(1, self.horizon * self.output_dim, (1, self.hidden_dim + 1))
|
||||
|
||||
def forward(self, source):
|
||||
# source: (B, T, N, D_total) 其中第0维为主观测,第1、2维为时间编码
|
||||
node_embed = self.node_embeddings1
|
||||
if self.use_day:
|
||||
node_embed = node_embed * self.T_i_D_emb[(source[..., 1] * 288).long()]
|
||||
if self.use_week:
|
||||
node_embed = node_embed * self.D_i_W_emb[source[..., 2].long()]
|
||||
node_embeddings = [node_embed, self.node_embeddings1]
|
||||
inp = source[..., 0].unsqueeze(-1) # (B, T, N, 1)
|
||||
init = self.encoder.init_hidden(inp.shape[0])
|
||||
enc_out, _ = self.encoder(inp, init, node_embeddings)
|
||||
# 取最后时刻的隐状态作为表示,shape: (B, 1, N, hidden_dim)
|
||||
rep = self.drop(enc_out[:, -1:, :, :])
|
||||
# 基础预测
|
||||
base = self.base_conv(rep)
|
||||
# 为修正分支拼接最近时刻的原始输入(取最后一帧)作为残差补偿信息,扩充通道数
|
||||
res_in = torch.cat([rep, inp[:, -1:, :, :]], dim=-1) # (B, 1, N, hidden_dim+1)
|
||||
res = self.res_conv(res_in)
|
||||
return base + res
|
||||
|
||||
|
||||
class DDGCRNCell(nn.Module):
|
||||
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim):
|
||||
super().__init__()
|
||||
self.node_num, self.hidden_dim = node_num, dim_out
|
||||
self.gate = DGCN(dim_in + dim_out, 2 * dim_out, cheb_k, embed_dim, node_num)
|
||||
self.update = DGCN(dim_in + dim_out, dim_out, cheb_k, embed_dim, node_num)
|
||||
self.ln = nn.LayerNorm(dim_out)
|
||||
|
||||
def forward(self, x, state, node_embeddings):
|
||||
inp = torch.cat((x, state), -1)
|
||||
z_r = torch.sigmoid(self.gate(inp, node_embeddings))
|
||||
z, r = torch.split(z_r, self.hidden_dim, -1)
|
||||
hc = torch.tanh(self.update(torch.cat((x, z * state), -1), node_embeddings))
|
||||
out = r * state + (1 - r) * hc
|
||||
return self.ln(out)
|
||||
|
||||
def init_hidden_state(self, bs):
|
||||
return torch.zeros(bs, self.node_num, self.hidden_dim)
|
||||
|
||||
|
||||
class DGCN(nn.Module):
|
||||
def __init__(self, dim_in, dim_out, cheb_k, embed_dim, num_nodes):
|
||||
super().__init__()
|
||||
self.cheb_k, self.embed_dim = cheb_k, embed_dim
|
||||
self.weights_pool = nn.Parameter(torch.FloatTensor(embed_dim, cheb_k, dim_in, dim_out))
|
||||
self.weights = nn.Parameter(torch.FloatTensor(cheb_k, dim_in, dim_out))
|
||||
self.bias_pool = nn.Parameter(torch.FloatTensor(embed_dim, dim_out))
|
||||
self.bias = nn.Parameter(torch.FloatTensor(dim_out))
|
||||
self.fc = nn.Sequential(OrderedDict([
|
||||
('fc1', nn.Linear(dim_in, 16)),
|
||||
('sigmoid1', nn.Sigmoid()),
|
||||
('fc2', nn.Linear(16, 2)),
|
||||
('sigmoid2', nn.Sigmoid()),
|
||||
('fc3', nn.Linear(2, embed_dim))
|
||||
]))
|
||||
# 预注册单位矩阵,避免每次构造
|
||||
self.register_buffer('eye', torch.eye(num_nodes))
|
||||
|
||||
def forward(self, x, node_embeddings):
|
||||
supp1 = self.eye.to(node_embeddings[0].device)
|
||||
filt = self.fc(x)
|
||||
nodevec = torch.tanh(node_embeddings[0] * filt)
|
||||
supp2 = self.get_laplacian(F.relu(torch.matmul(nodevec, nodevec.transpose(2, 1))), supp1)
|
||||
x_g = torch.stack([torch.einsum("nm,bmc->bnc", supp1, x),
|
||||
torch.einsum("bnm,bmc->bnc", supp2, x)], dim=1)
|
||||
weights = torch.einsum('nd,dkio->nkio', node_embeddings[1], self.weights_pool)
|
||||
bias = torch.matmul(node_embeddings[1], self.bias_pool)
|
||||
return torch.einsum('bnki,nkio->bno', x_g.permute(0, 2, 1, 3), weights) + bias
|
||||
|
||||
@staticmethod
|
||||
def get_laplacian(graph, I, normalize=True):
|
||||
D_inv = torch.diag_embed(torch.sum(graph, -1) ** (-0.5))
|
||||
return torch.matmul(torch.matmul(D_inv, graph), D_inv) if normalize else torch.matmul(
|
||||
torch.matmul(D_inv, graph + I), D_inv)
|
||||
|
|
@ -0,0 +1,122 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class SimpleExpert(nn.Module):
|
||||
"""
|
||||
专家内部只做:
|
||||
1. 上采样到固定 graph_size
|
||||
2. 对每个时间步、每个节点用 Linear(input_dim -> hidden_dim)
|
||||
3. 下采样回原始 selected_node 数量
|
||||
"""
|
||||
def __init__(self, input_dim, hidden_dim, graph_size):
|
||||
super().__init__()
|
||||
self.graph_size = graph_size
|
||||
self.linear = nn.Linear(input_dim, hidden_dim)
|
||||
|
||||
def up_sample(self, x, target_size):
|
||||
# x: (B, T, N_sel, D)
|
||||
B, T, N, D = x.shape
|
||||
# 1) 合并 B 和 T,得到 (B*T, N_sel, D)
|
||||
x2 = x.reshape(B * T, N, D)
|
||||
# 2) 转为 (B*T, D, N_sel) 以便做 1D 线性插值
|
||||
x2 = x2.permute(0, 2, 1) # (B*T, D, N_sel)
|
||||
# 3) 插值到 graph_size
|
||||
x2 = F.interpolate(x2, size=target_size, mode='linear', align_corners=True) # (B*T, D, graph_size)
|
||||
# 4) 恢复维度 (B*T, graph_size, D)
|
||||
x2 = x2.permute(0, 2, 1) # (B*T, graph_size, D)
|
||||
# 5) 拆回 (B, T, graph_size, D)
|
||||
x_up = x2.reshape(B, T, target_size, D)
|
||||
return x_up
|
||||
|
||||
def down_sample(self, x, target_size):
|
||||
# x: (B, T, graph_size, H)
|
||||
B, T, G, H = x.shape
|
||||
# 1) 合并 B 和 T
|
||||
x2 = x.reshape(B * T, G, H) # (B*T, graph_size, H)
|
||||
# 2) 转为 (B*T, H, graph_size)
|
||||
x2 = x2.permute(0, 2, 1) # (B*T, H, graph_size)
|
||||
# 3) 插值到 target_size
|
||||
x2 = F.interpolate(x2, size=target_size, mode='linear', align_corners=True) # (B*T, H, target_size)
|
||||
# 4) 恢复 (B*T, target_size, H)
|
||||
x2 = x2.permute(0, 2, 1) # (B*T, target_size, H)
|
||||
# 5) 拆回 (B, T, target_size, H)
|
||||
x_down = x2.reshape(B, T, target_size, H)
|
||||
return x_down
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B, T, N_sel, D)
|
||||
x_up = self.up_sample(x, self.graph_size) # (B, T, graph_size, D)
|
||||
out = self.linear(x_up) # (B, T, graph_size, hidden_dim)
|
||||
out_down = self.down_sample(out, x.shape[2]) # (B, T, N_sel, hidden_dim)
|
||||
return out_down
|
||||
|
||||
class DGCRM_MOE(nn.Module):
|
||||
"""
|
||||
去掉 DGCRM,用 SimpleExpert 作为专家,输出 (B, T, N, output_dim):
|
||||
- gate: last step -> top_k 专家
|
||||
- 每个专家:上采样->linear->下采样
|
||||
- 累加所有专家输出 -> (B, T, N, hidden_dim)
|
||||
- Linear(hidden_dim -> output_dim) -> (B, T, N, output_dim)
|
||||
- 返回 balance_loss 用于正则化
|
||||
"""
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.graph_size = args['graph_size']
|
||||
self.expert_nums = args['expert_nums']
|
||||
self.top_k = args['top_k']
|
||||
self.input_dim = args['input_dim']
|
||||
self.hidden_dim = args['hidden_dim']
|
||||
self.output_dim = args['output_dim']
|
||||
self.num_node = args['num_nodes']
|
||||
|
||||
# gate 网络
|
||||
self.gate_proj = nn.Linear(self.input_dim, self.hidden_dim)
|
||||
self.gate = nn.Linear(self.hidden_dim, self.expert_nums)
|
||||
|
||||
# SimpleExpert 列表
|
||||
self.experts = nn.ModuleList([
|
||||
SimpleExpert(self.input_dim, self.hidden_dim, self.graph_size)
|
||||
for _ in range(self.expert_nums)
|
||||
])
|
||||
|
||||
# 最终多步预测头:hidden_dim -> output_dim
|
||||
self.pred = nn.Linear(self.hidden_dim, self.output_dim)
|
||||
|
||||
def forward(self, x, **kwargs):
|
||||
"""
|
||||
x: (B, T, N, D_total),只取第0通道作为主观测
|
||||
returns:
|
||||
out: (B, T, N, output_dim)
|
||||
balance_loss: 标量
|
||||
"""
|
||||
x = x[..., 0:1] # (B, T, N, 1)
|
||||
B, T, N, D = x.shape
|
||||
|
||||
# 1. 路由
|
||||
last = x[:, -1, :, :] # (B, N, 1)
|
||||
g = F.relu(self.gate_proj(last)) # (B, N, hidden_dim)
|
||||
logits = self.gate(g) # (B, N, expert_nums)
|
||||
rw = F.softmax(logits, dim=-1) # (B, N, expert_nums)
|
||||
topk_w, topk_idx = torch.topk(rw, self.top_k, -1) # (B, N, top_k), 权重可选用 topk_w
|
||||
|
||||
# 2. 专家处理
|
||||
expert_out = torch.zeros(B, T, N, self.hidden_dim, device=x.device)
|
||||
balance_loss = 0.0
|
||||
for i, expert in enumerate(self.experts):
|
||||
mask = (topk_idx == i) # (B, N, top_k)
|
||||
if not mask.any(): continue
|
||||
# 平均路由概率
|
||||
balance_loss += (rw[..., i].mean() - 1.0/self.expert_nums)**2
|
||||
|
||||
for b in range(B):
|
||||
sel = torch.nonzero(mask[b].any(-1)).squeeze(-1)
|
||||
if sel.numel()==0: continue
|
||||
seq = x[b:b+1, :, sel, :] # (1, T, sel, 1)
|
||||
out_seq = expert(seq) # (1, T, sel, hidden_dim)
|
||||
expert_out[b:b+1, :, sel, :] += out_seq
|
||||
|
||||
# 3. 预测头
|
||||
out = self.pred(expert_out) # (B, T, N, output_dim)
|
||||
return out, balance_loss
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
import math
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class PositionalEncoding(nn.Module):
|
||||
"""标准的位置编码,用于给 Transformer 输入添加位置信息"""
|
||||
def __init__(self, d_model, max_len=500):
|
||||
super().__init__()
|
||||
pe = torch.zeros(max_len, d_model) # (max_len, d_model)
|
||||
position = torch.arange(0, max_len).unsqueeze(1).float() # (max_len,1)
|
||||
div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
|
||||
pe[:, 0::2] = torch.sin(position * div_term) # 偶数维
|
||||
pe[:, 1::2] = torch.cos(position * div_term) # 奇数维
|
||||
self.register_buffer('pe', pe) # 不参加梯度
|
||||
|
||||
def forward(self, x):
|
||||
# x: (T, B, d_model)
|
||||
T = x.size(0)
|
||||
return x + self.pe[:T].unsqueeze(1) # (T,1,d_model) 广播到 (T,B,d_model)
|
||||
|
||||
|
||||
class TemporalTransformerForecast(nn.Module):
|
||||
"""
|
||||
Transformer-based 多步预测:
|
||||
- 只使用 x[...,0] 作为输入通道
|
||||
- 对每个节点的长度-T 序列并行应用 Transformer Encoder
|
||||
- 取最后时间步的输出,通过一个 Linear 映射到 horizon * output_dim
|
||||
- 重塑为 (B, horizon, N, output_dim)
|
||||
"""
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.horizon = args['horizon']
|
||||
self.output_dim = args['output_dim']
|
||||
self.seq_len = args.get('in_len', 12)
|
||||
assert self.seq_len is not None, "请在 args 中指定 in_len(输入序列长度)"
|
||||
d_model = args.get('d_model', 64)
|
||||
nhead = args.get('nhead', 4)
|
||||
num_layers = args.get('num_layers', 2)
|
||||
dim_ff = args.get('dim_feedforward', d_model * 4)
|
||||
dropout = args.get('dropout', 0.1)
|
||||
|
||||
# 把单通道投影到 d_model
|
||||
self.input_proj = nn.Linear(1, d_model)
|
||||
self.pos_encoder = PositionalEncoding(d_model, max_len=self.seq_len)
|
||||
|
||||
encoder_layer = nn.TransformerEncoderLayer(
|
||||
d_model=d_model, nhead=nhead, dim_feedforward=dim_ff, dropout=dropout,
|
||||
batch_first=False # 我们用 (T, B, D) 格式
|
||||
)
|
||||
self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
|
||||
|
||||
# 最后一步输出到 多步预测
|
||||
self.decoder = nn.Linear(d_model, self.horizon * self.output_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B, T, N, D_total)
|
||||
x_main = x[..., 0] # (B, T, N)
|
||||
B, T, N = x_main.shape
|
||||
assert T == self.seq_len, f"实际序列长度 {T} != 配置 in_len {self.seq_len}"
|
||||
|
||||
# 重排:每个节点的序列是一个独立样本
|
||||
# (B, T, N) -> (B*N, T, 1)
|
||||
seq = x_main.permute(0, 2, 1).reshape(B * N, T, 1)
|
||||
|
||||
# 投影 & 位置编码
|
||||
emb = self.input_proj(seq) # (B*N, T, d_model)
|
||||
emb = emb.permute(1, 0, 2) # -> (T, B*N, d_model)
|
||||
emb = self.pos_encoder(emb) # 加上位置信息
|
||||
|
||||
# Transformer Encoder
|
||||
out = self.transformer(emb) # (T, B*N, d_model)
|
||||
|
||||
# 取最后时刻的隐藏向量
|
||||
last = out[-1, :, :] # (B*N, d_model)
|
||||
|
||||
# 解码为多步预测
|
||||
pred_flat = self.decoder(last) # (B*N, horizon * output_dim)
|
||||
|
||||
# 重塑回 (B, N, horizon, output_dim) -> (B, horizon, N, output_dim)
|
||||
pred = pred_flat.view(B, N, self.horizon, self.output_dim) \
|
||||
.permute(0, 2, 1, 3)
|
||||
return pred
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class EXP(nn.Module):
|
||||
"""
|
||||
高效的多步预测模型:
|
||||
- 输入 x: (B, T, N, D_total),只使用主观测通道 x[...,0]
|
||||
- 对每个节点的序列 x[b,:,n] (长度 T) 通过 shared MLP 编码
|
||||
- 最后映射到 horizon * output_dim,并重塑为 (B, horizon, N, output_dim)
|
||||
"""
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.horizon = args['horizon']
|
||||
self.output_dim = args['output_dim']
|
||||
# 隐层维度,可调整
|
||||
hidden_dim = args.get('hidden_dim', 128)
|
||||
T = 12
|
||||
self.encoder = nn.Sequential(
|
||||
nn.Linear(in_features=T, out_features=hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.1),
|
||||
)
|
||||
# decoder 将 hidden_dim -> horizon * output_dim
|
||||
self.decoder = nn.Linear(hidden_dim, self.horizon * self.output_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B, T, N, D_total)
|
||||
# 1) 只取主观测通道
|
||||
x_main = x[..., 0] # (B, T, N)
|
||||
B, T, N = x_main.shape
|
||||
|
||||
# 2) 重排并展开:每个节点的序列当作一个样本
|
||||
# (B, T, N) -> (B, N, T) -> (B*N, T)
|
||||
h_in = x_main.permute(0, 2, 1).reshape(B * N, T)
|
||||
|
||||
# 3) shared MLP 编码
|
||||
h = self.encoder(h_in) # (B*N, hidden_dim)
|
||||
|
||||
# 4) 解码到所有步预测
|
||||
out_flat = self.decoder(h) # (B*N, horizon * output_dim)
|
||||
|
||||
# 5) 重塑回 (B, horizon, N, output_dim)
|
||||
out = out_flat.view(B, N, self.horizon, self.output_dim) \
|
||||
.permute(0, 2, 1, 3)
|
||||
|
||||
return out
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class EXP(nn.Module):
|
||||
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.horizon = args['horizon']
|
||||
self.output_dim = args['output_dim']
|
||||
hidden_dim = args.get('hidden_dim', 128)
|
||||
self.encoder = nn.Sequential(
|
||||
nn.Linear(in_features=12, out_features=hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(0.1),
|
||||
)
|
||||
self.decoder = nn.Linear(hidden_dim, self.horizon * self.output_dim)
|
||||
|
||||
def forward(self, x):
|
||||
x_main = x[..., 0] # (B, T, N)
|
||||
B, T, N = x_main.shape
|
||||
h_in = x_main.permute(0, 2, 1).reshape(B * N, T)
|
||||
h = self.encoder(h_in) # (B*N, hidden_dim)
|
||||
out_flat = self.decoder(h) # (B*N, horizon * output_dim)
|
||||
out = out_flat.view(B, N, self.horizon, self.output_dim) \
|
||||
.permute(0, 2, 1, 3)
|
||||
return out
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class ResidualMLPBlock(nn.Module):
|
||||
"""
|
||||
一个隐藏维度下的残差块:
|
||||
x -> Linear(hidden->hidden) -> ReLU -> Dropout
|
||||
-> Linear(hidden->hidden) -> Dropout
|
||||
+ 残差跳连 -> LayerNorm
|
||||
"""
|
||||
def __init__(self, hidden_dim, dropout=0.1):
|
||||
super().__init__()
|
||||
self.fc1 = nn.Linear(hidden_dim, hidden_dim)
|
||||
self.fc2 = nn.Linear(hidden_dim, hidden_dim)
|
||||
self.drop = nn.Dropout(dropout)
|
||||
self.norm = nn.LayerNorm(hidden_dim)
|
||||
|
||||
def forward(self, x):
|
||||
resid = x
|
||||
x = F.relu(self.fc1(x))
|
||||
x = self.drop(x)
|
||||
x = self.fc2(x)
|
||||
x = self.drop(x)
|
||||
return self.norm(x + resid)
|
||||
|
||||
|
||||
class EXP(nn.Module):
|
||||
"""
|
||||
带残差连接的多层 MLP 预测模型:
|
||||
- 输入 x: (B, T, N, D_total),使用 x[...,0]。
|
||||
- seq_len=T 的序列先投影到 hidden_dim,
|
||||
再经过 num_blocks 个 ResidualMLPBlock。
|
||||
- 最后投影到 horizon * output_dim,重塑为 (B, horizon, N, output_dim)。
|
||||
"""
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.horizon = args['horizon']
|
||||
self.output_dim = args['output_dim']
|
||||
self.seq_len = args.get('in_len', 12) # 序列长度 T,默认 12
|
||||
hidden_dim = args.get('hidden_dim', 64)
|
||||
num_blocks = args.get('num_mlp_layers', 2)
|
||||
dropout = args.get('dropout', 0.1)
|
||||
|
||||
# 1) 输入投影:T -> hidden_dim
|
||||
self.input_proj = nn.Linear(self.seq_len, hidden_dim)
|
||||
self.input_drop = nn.Dropout(dropout)
|
||||
|
||||
# 2) 残差 MLP 块
|
||||
self.blocks = nn.ModuleList([
|
||||
ResidualMLPBlock(hidden_dim, dropout=dropout)
|
||||
for _ in range(num_blocks)
|
||||
])
|
||||
|
||||
# 3) 输出投影:hidden_dim -> horizon * output_dim
|
||||
self.decoder = nn.Linear(hidden_dim, self.horizon * self.output_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B, T, N, D_total)
|
||||
x_main = x[..., 0] # (B, T, N)
|
||||
B, T, N = x_main.shape
|
||||
assert T == self.seq_len, f"期望序列长度 {self.seq_len}, 实际 {T}"
|
||||
|
||||
# 每个节点的长度-T 序列作为独立样本
|
||||
h_in = x_main.permute(0, 2, 1).reshape(B * N, T) # (B*N, T)
|
||||
|
||||
# 1) 输入投影 + Dropout
|
||||
h = F.relu(self.input_proj(h_in)) # (B*N, hidden_dim)
|
||||
h = self.input_drop(h)
|
||||
|
||||
# 2) 残差块堆叠
|
||||
for block in self.blocks:
|
||||
h = block(h) # (B*N, hidden_dim)
|
||||
|
||||
# 3) 解码到 horizon * output_dim
|
||||
out_flat = self.decoder(h) # (B*N, horizon * output_dim)
|
||||
|
||||
# 4) 重塑为 (B, horizon, N, output_dim)
|
||||
out = out_flat.view(B, N, self.horizon, self.output_dim) \
|
||||
.permute(0, 2, 1, 3)
|
||||
return out
|
||||
|
|
@ -0,0 +1,137 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class TemporalBlock(nn.Module):
|
||||
"""
|
||||
TCN 中的因果残差块,对每个节点的时间序列进行因果卷积,
|
||||
保证输出长度与输入一致。
|
||||
"""
|
||||
def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout=0.1):
|
||||
super().__init__()
|
||||
self.kernel_size = kernel_size
|
||||
self.dilation = dilation
|
||||
# 填充长度= (kernel_size-1)*dilation
|
||||
self.padding = (kernel_size - 1) * dilation
|
||||
|
||||
# 因果卷积:在 forward 里自己做 pad,不在这里传 padding 参数
|
||||
self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
|
||||
padding=0, dilation=dilation)
|
||||
self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
|
||||
padding=0, dilation=dilation)
|
||||
|
||||
# 如果通道数要变,则用 1×1 做下采样;否则直接残差
|
||||
self.downsample = (nn.Conv1d(in_channels, out_channels, 1)
|
||||
if in_channels != out_channels else None)
|
||||
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.relu = nn.ReLU()
|
||||
self.norm = nn.LayerNorm(out_channels)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B*N, C_in, T)
|
||||
# 1) 因果填充:在时间维度左侧 pad
|
||||
x_padded = F.pad(x, (self.padding, 0)) # pad=(left, right)
|
||||
|
||||
# 2) 第一层卷积
|
||||
out = self.conv1(x_padded) # (B*N, C_out, T + padding)
|
||||
out = self.relu(out)
|
||||
out = self.dropout(out)
|
||||
|
||||
# 3) 第二层卷积,同样先 pad
|
||||
out = F.pad(out, (self.padding, 0))
|
||||
out = self.conv2(out) # (B*N, C_out, T + padding)
|
||||
out = self.dropout(out)
|
||||
|
||||
# 4) 残差分支
|
||||
res = x if self.downsample is None else self.downsample(x) # (B*N, C_out, T)
|
||||
|
||||
# 5) 截掉多余的前面 padding,取最后 T 个时间点
|
||||
out = out[..., -x.size(2):] # now out.shape == res.shape
|
||||
|
||||
|
||||
# 6) 残差相加 + LayerNorm + ReLU
|
||||
return self.relu(self.norm((out + res).permute(0, 2, 1))).permute(0, 2, 1)
|
||||
|
||||
|
||||
class EXP(nn.Module):
|
||||
"""
|
||||
时空混合模型:
|
||||
1. 对每个节点的长度-T 序列,用 TCN 提取时间特征;
|
||||
2. 取 TCN 最后时刻的隐藏,重组为 (B, N, hidden_dim);
|
||||
3. 用 Spatial Self‑Attention 在节点维度上捕捉空间依赖;
|
||||
4. 最后一个 Linear 将每个节点的特征映射到 horizon 步预测。
|
||||
"""
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.seq_len = args.get('in_len', 12) # 输入序列长度 T
|
||||
self.horizon = args['horizon']
|
||||
self.output_dim = args['output_dim']
|
||||
hidden_dim = args.get('hidden_dim', 64)
|
||||
tcn_layers = args.get('tcn_layers', 3)
|
||||
kernel_size = args.get('kernel_size', 3)
|
||||
dropout = args.get('dropout', 0.1)
|
||||
nhead = args.get('nhead', 4)
|
||||
|
||||
# ----- Temporal Convolutional Network -----
|
||||
tcn_blocks = []
|
||||
in_ch = 1 # 只用主观测通道
|
||||
for i in range(tcn_layers):
|
||||
dilation = 2 ** i
|
||||
out_ch = hidden_dim
|
||||
tcn_blocks.append(
|
||||
TemporalBlock(in_ch, out_ch, kernel_size, dilation, dropout)
|
||||
)
|
||||
in_ch = out_ch
|
||||
self.tcn = nn.Sequential(*tcn_blocks)
|
||||
|
||||
# ----- Spatial Self-Attention -----
|
||||
# 我们把节点看作 tokens,特征维度 hidden_dim
|
||||
# MultiheadAttention 要求输入 (S, B, E),这里 S = N
|
||||
self.spatial_attn = nn.MultiheadAttention(embed_dim=hidden_dim,
|
||||
num_heads=nhead,
|
||||
dropout=dropout,
|
||||
batch_first=False)
|
||||
|
||||
# 可选的 LayerNorm
|
||||
self.norm_spatial = nn.LayerNorm(hidden_dim)
|
||||
|
||||
# ----- Decoder -----
|
||||
# hidden_dim -> horizon * output_dim
|
||||
self.decoder = nn.Linear(hidden_dim, self.horizon * self.output_dim)
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
x: (B, T, N, D_total),只用第0通道
|
||||
returns: (B, horizon, N, output_dim)
|
||||
"""
|
||||
B, T, N, D_total = x.shape
|
||||
assert T == self.seq_len, f"Expected T={self.seq_len}, got {T}"
|
||||
|
||||
# 1) 取主观测、并重排给 TCN
|
||||
x_main = x[..., 0] # (B, T, N)
|
||||
x_tcn = x_main.reshape(B * N, 1, T) # (B*N, 1, T)
|
||||
|
||||
# 2) TCN 提取时间特征
|
||||
tcn_out = self.tcn(x_tcn) # (B*N, hidden_dim, T)
|
||||
|
||||
# 3) 取最后时刻特征
|
||||
last = tcn_out[:, :, -1] # (B*N, hidden_dim)
|
||||
h = last.view(B, N, -1) # (B, N, hidden_dim)
|
||||
|
||||
# 4) Spatial Attention
|
||||
# 调整为 (N, B, E) 以供 MultiheadAttention
|
||||
h2 = h.permute(1, 0, 2) # (N, B, hidden_dim)
|
||||
attn_out, _ = self.spatial_attn(h2, h2, h2) # (N, B, hidden_dim)
|
||||
attn_out = attn_out.permute(1, 0, 2) # (B, N, hidden_dim)
|
||||
h_spatial = self.norm_spatial(attn_out + h) # 残差 + LayerNorm
|
||||
|
||||
# 5) Decoder: 每个节点映射到 horizon*output_dim
|
||||
flat = h_spatial.reshape(B * N, -1) # (B*N, hidden_dim)
|
||||
out_flat = self.decoder(flat) # (B*N, horizon*output_dim)
|
||||
|
||||
# 6) 重塑为 (B, horizon, N, output_dim)
|
||||
out = out_flat.view(B, N, self.horizon, self.output_dim) \
|
||||
.permute(0, 2, 1, 3)
|
||||
return out
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class DynamicGraphConstructor(nn.Module):
|
||||
def __init__(self, node_num, embed_dim):
|
||||
super().__init__()
|
||||
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
|
||||
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
|
||||
|
||||
def forward(self):
|
||||
# (N, D) @ (D, N) -> (N, N)
|
||||
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
|
||||
adj = F.relu(adj)
|
||||
adj = F.softmax(adj, dim=-1)
|
||||
return adj
|
||||
|
||||
|
||||
class GraphConvBlock(nn.Module):
|
||||
def __init__(self, input_dim, output_dim):
|
||||
super().__init__()
|
||||
self.theta = nn.Linear(input_dim, output_dim)
|
||||
|
||||
def forward(self, x, adj):
|
||||
# x: (B, N, C) / adj: (N, N)
|
||||
x = torch.matmul(adj, x) # (B, N, C)
|
||||
x = self.theta(x)
|
||||
return F.relu(x)
|
||||
|
||||
|
||||
class MANBA_Block(nn.Module):
|
||||
def __init__(self, input_dim, hidden_dim):
|
||||
super().__init__()
|
||||
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
|
||||
self.ffn = nn.Sequential(
|
||||
nn.Linear(input_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, input_dim)
|
||||
)
|
||||
self.norm1 = nn.LayerNorm(input_dim)
|
||||
self.norm2 = nn.LayerNorm(input_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B, T, C)
|
||||
x_attn, _ = self.attn(x, x, x)
|
||||
x = self.norm1(x + x_attn)
|
||||
x_ffn = self.ffn(x)
|
||||
return self.norm2(x + x_ffn)
|
||||
|
||||
|
||||
class EXP(nn.Module):
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.horizon = args['horizon']
|
||||
self.output_dim = args['output_dim']
|
||||
self.seq_len = args.get('in_len', 12)
|
||||
self.hidden_dim = args.get('hidden_dim', 64)
|
||||
self.num_nodes = args['num_nodes']
|
||||
|
||||
# 动态图构建
|
||||
self.graph = DynamicGraphConstructor(self.num_nodes, embed_dim=16)
|
||||
|
||||
# 输入映射层
|
||||
self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
|
||||
|
||||
# 图卷积
|
||||
self.gc = GraphConvBlock(self.hidden_dim, self.hidden_dim)
|
||||
|
||||
# MANBA block
|
||||
self.manba = MANBA_Block(self.hidden_dim, self.hidden_dim * 2)
|
||||
|
||||
# 输出映射
|
||||
self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B, T, N, D_total)
|
||||
x = x[..., 0] # 只用主通道 (B, T, N)
|
||||
B, T, N = x.shape
|
||||
assert T == self.seq_len
|
||||
|
||||
# 输入投影 (B, T, N) -> (B, N, T) -> (B*N, T) -> (B*N, H)
|
||||
x = x.permute(0, 2, 1).reshape(B * N, T)
|
||||
h = self.input_proj(x) # (B*N, hidden_dim)
|
||||
h = h.view(B, N, self.hidden_dim)
|
||||
|
||||
# 动态图构建
|
||||
adj = self.graph() # (N, N)
|
||||
|
||||
# 空间建模:图卷积
|
||||
h = self.gc(h, adj) # (B, N, hidden_dim)
|
||||
|
||||
# 时间建模:MANBA
|
||||
h = h.permute(0, 2, 1) # (B, hidden_dim, N)
|
||||
h = h.reshape(B, self.hidden_dim, N).permute(0, 2, 1) # (B, N, hidden_dim)
|
||||
h = self.manba(h) # (B, N, hidden_dim)
|
||||
|
||||
# 输出映射
|
||||
out = self.out_proj(h) # (B, N, horizon * output_dim)
|
||||
out = out.view(B, N, self.horizon, self.output_dim).permute(0, 2, 1, 3)
|
||||
return out # (B, horizon, N, output_dim)
|
||||
|
|
@ -0,0 +1,101 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class DynamicGraphConstructor(nn.Module):
|
||||
def __init__(self, node_num, embed_dim):
|
||||
super().__init__()
|
||||
self.nodevec1 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
|
||||
self.nodevec2 = nn.Parameter(torch.randn(node_num, embed_dim), requires_grad=True)
|
||||
|
||||
def forward(self):
|
||||
# (N, D) @ (D, N) -> (N, N)
|
||||
adj = torch.matmul(self.nodevec1, self.nodevec2.T)
|
||||
adj = F.relu(adj)
|
||||
adj = F.softmax(adj, dim=-1)
|
||||
return adj
|
||||
|
||||
|
||||
class GraphConvBlock(nn.Module):
|
||||
def __init__(self, input_dim, output_dim):
|
||||
super().__init__()
|
||||
self.theta = nn.Linear(input_dim, output_dim)
|
||||
|
||||
def forward(self, x, adj):
|
||||
# x: (B, N, C) / adj: (N, N)
|
||||
x = torch.matmul(adj, x) # (B, N, C)
|
||||
x = self.theta(x)
|
||||
return F.relu(x)
|
||||
|
||||
|
||||
class MANBA_Block(nn.Module):
|
||||
def __init__(self, input_dim, hidden_dim):
|
||||
super().__init__()
|
||||
self.attn = nn.MultiheadAttention(embed_dim=input_dim, num_heads=4, batch_first=True)
|
||||
self.ffn = nn.Sequential(
|
||||
nn.Linear(input_dim, hidden_dim),
|
||||
nn.ReLU(),
|
||||
nn.Linear(hidden_dim, input_dim)
|
||||
)
|
||||
self.norm1 = nn.LayerNorm(input_dim)
|
||||
self.norm2 = nn.LayerNorm(input_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B, T, C)
|
||||
x_attn, _ = self.attn(x, x, x)
|
||||
x = self.norm1(x + x_attn)
|
||||
x_ffn = self.ffn(x)
|
||||
return self.norm2(x + x_ffn)
|
||||
|
||||
|
||||
class EXP(nn.Module):
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.horizon = args['horizon']
|
||||
self.output_dim = args['output_dim']
|
||||
self.seq_len = args.get('in_len', 12)
|
||||
self.hidden_dim = args.get('hidden_dim', 64)
|
||||
self.num_nodes = args['num_nodes']
|
||||
|
||||
# 动态图构建
|
||||
self.graph = DynamicGraphConstructor(self.num_nodes, embed_dim=16)
|
||||
|
||||
# 输入映射层
|
||||
self.input_proj = nn.Linear(self.seq_len, self.hidden_dim)
|
||||
|
||||
# 图卷积
|
||||
self.gc = GraphConvBlock(self.hidden_dim, self.hidden_dim)
|
||||
|
||||
# MANBA block
|
||||
self.manba = MANBA_Block(self.hidden_dim, self.hidden_dim * 2)
|
||||
|
||||
# 输出映射
|
||||
self.out_proj = nn.Linear(self.hidden_dim, self.horizon * self.output_dim)
|
||||
|
||||
def forward(self, x):
|
||||
# x: (B, T, N, D_total)
|
||||
x = x.sum(dim=-1) # (B, T, N)
|
||||
B, T, N = x.shape
|
||||
assert T == self.seq_len
|
||||
|
||||
# 输入投影 (B, T, N) -> (B, N, T) -> (B*N, T) -> (B*N, H)
|
||||
x = x.permute(0, 2, 1).reshape(B * N, T)
|
||||
h = self.input_proj(x) # (B*N, hidden_dim)
|
||||
h = h.view(B, N, self.hidden_dim)
|
||||
|
||||
# 动态图构建
|
||||
adj = self.graph() # (N, N)
|
||||
|
||||
# 空间建模:图卷积
|
||||
h = self.gc(h, adj) # (B, N, hidden_dim)
|
||||
|
||||
# 时间建模:MANBA
|
||||
h = h.permute(0, 2, 1) # (B, hidden_dim, N)
|
||||
h = h.reshape(B, self.hidden_dim, N).permute(0, 2, 1) # (B, N, hidden_dim)
|
||||
h = self.manba(h) # (B, N, hidden_dim)
|
||||
|
||||
# 输出映射
|
||||
out = self.out_proj(h) # (B, N, horizon * output_dim)
|
||||
out = out.view(B, N, self.horizon, self.output_dim).permute(0, 2, 1, 3)
|
||||
return out # (B, horizon, N, output_dim)
|
||||
|
|
@ -0,0 +1,128 @@
|
|||
import torch, torch.nn as nn, torch.nn.functional as F
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
class DGCRM(nn.Module):
|
||||
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim, num_layers=1):
|
||||
super().__init__()
|
||||
self.node_num, self.input_dim, self.num_layers = node_num, dim_in, num_layers
|
||||
self.cells = nn.ModuleList([
|
||||
DDGCRNCell(node_num, dim_in if i == 0 else dim_out, dim_out, cheb_k, embed_dim)
|
||||
for i in range(num_layers)
|
||||
])
|
||||
|
||||
def forward(self, x, init_state, node_embeddings):
|
||||
assert x.shape[2] == self.node_num and x.shape[3] == self.input_dim
|
||||
for i in range(self.num_layers):
|
||||
state, inner = init_state[i].to(x.device), []
|
||||
for t in range(x.shape[1]):
|
||||
state = self.cells[i](x[:, t, :, :], state, [node_embeddings[0][:, t, :, :], node_embeddings[1]])
|
||||
inner.append(state)
|
||||
init_state[i] = state
|
||||
x = torch.stack(inner, dim=1)
|
||||
return x, init_state
|
||||
|
||||
def init_hidden(self, bs):
|
||||
return torch.stack([cell.init_hidden_state(bs) for cell in self.cells], dim=0)
|
||||
|
||||
|
||||
class EXPB(nn.Module):
|
||||
def __init__(self, args):
|
||||
super().__init__()
|
||||
self.patch_size = args.get('patch_size', 1)
|
||||
self.num_node, self.input_dim, self.hidden_dim = args['num_nodes'], args['input_dim'], args['rnn_units']
|
||||
self.output_dim, self.horizon, self.num_layers = args['output_dim'], args['horizon'], args['num_layers']
|
||||
self.use_day, self.use_week = args['use_day'], args['use_week']
|
||||
self.node_embeddings1 = nn.Parameter(torch.randn(self.num_node, args['embed_dim']))
|
||||
self.T_i_D_emb = nn.Parameter(torch.empty(288, args['embed_dim']))
|
||||
self.D_i_W_emb = nn.Parameter(torch.empty(7, args['embed_dim']))
|
||||
self.drop = nn.Dropout(0.1)
|
||||
self.encoder = DGCRM(self.num_node, self.input_dim, self.hidden_dim,
|
||||
args['cheb_order'], args['embed_dim'], self.num_layers)
|
||||
self.base_conv = nn.Conv2d(1, self.horizon * self.output_dim, (1, self.hidden_dim))
|
||||
self.res_conv = nn.Conv2d(1, self.horizon * self.output_dim, (1, self.hidden_dim + 1))
|
||||
|
||||
def forward(self, source):
|
||||
# source: (B, T, N, D_total),第0通道为主观测,第1、2通道为时间编码
|
||||
B, T, N, D_total = source.shape
|
||||
p = self.patch_size
|
||||
num_patches = T // p
|
||||
source = source[:, :num_patches * p, :, :].view(B, num_patches, p, N, D_total)
|
||||
# 对主观测通道取均值,并转置为 (B, num_patches, N, 1)
|
||||
inp = source[..., 0].mean(dim=2, keepdim=True).permute(0, 1, 3, 2)
|
||||
# 每个 patch 最后时刻的时间编码
|
||||
time_day = source[:, :, -1, :, 1] # (B, num_patches, N)
|
||||
time_week = source[:, :, -1, :, 2] # (B, num_patches, N)
|
||||
patched_source = torch.cat([inp, time_day.unsqueeze(-1), time_week.unsqueeze(-1)], dim=-1)
|
||||
node_embed = self.node_embeddings1
|
||||
if self.use_day:
|
||||
node_embed = node_embed * self.T_i_D_emb[(patched_source[..., 1] * 288).long()]
|
||||
if self.use_week:
|
||||
node_embed = node_embed * self.D_i_W_emb[patched_source[..., 2].long()]
|
||||
node_embeddings = [node_embed, self.node_embeddings1]
|
||||
init = self.encoder.init_hidden(B)
|
||||
enc_out, _ = self.encoder(inp, init, node_embeddings)
|
||||
rep = self.drop(enc_out[:, -1:, :, :])
|
||||
base = self.base_conv(rep)
|
||||
res_in = torch.cat([rep, inp[:, -1:, :, :]], dim=-1)
|
||||
res = self.res_conv(res_in)
|
||||
out = base + res
|
||||
out = out.squeeze(-1).view(B, self.horizon, self.output_dim, N).permute(0, 1, 3, 2)
|
||||
return out
|
||||
|
||||
|
||||
class DDGCRNCell(nn.Module):
|
||||
def __init__(self, node_num, dim_in, dim_out, cheb_k, embed_dim):
|
||||
super().__init__()
|
||||
self.node_num, self.hidden_dim = node_num, dim_out
|
||||
self.gate = DGCN(dim_in + dim_out, 2 * dim_out, cheb_k, embed_dim, node_num)
|
||||
self.update = DGCN(dim_in + dim_out, dim_out, cheb_k, embed_dim, node_num)
|
||||
self.ln = nn.LayerNorm(dim_out)
|
||||
|
||||
def forward(self, x, state, node_embeddings):
|
||||
inp = torch.cat((x, state), -1)
|
||||
z_r = torch.sigmoid(self.gate(inp, node_embeddings))
|
||||
z, r = torch.split(z_r, self.hidden_dim, -1)
|
||||
hc = torch.tanh(self.update(torch.cat((x, z * state), -1), node_embeddings))
|
||||
out = r * state + (1 - r) * hc
|
||||
return self.ln(out)
|
||||
|
||||
def init_hidden_state(self, bs):
|
||||
return torch.zeros(bs, self.node_num, self.hidden_dim)
|
||||
|
||||
|
||||
class DGCN(nn.Module):
|
||||
def __init__(self, dim_in, dim_out, cheb_k, embed_dim, num_nodes):
|
||||
super().__init__()
|
||||
self.cheb_k, self.embed_dim = cheb_k, embed_dim
|
||||
self.weights_pool = nn.Parameter(torch.FloatTensor(embed_dim, cheb_k, dim_in, dim_out))
|
||||
self.weights = nn.Parameter(torch.FloatTensor(cheb_k, dim_in, dim_out))
|
||||
self.bias_pool = nn.Parameter(torch.FloatTensor(embed_dim, dim_out))
|
||||
self.bias = nn.Parameter(torch.FloatTensor(dim_out))
|
||||
self.fc = nn.Sequential(OrderedDict([
|
||||
('fc1', nn.Linear(dim_in, 16)),
|
||||
('sigmoid1', nn.Sigmoid()),
|
||||
('fc2', nn.Linear(16, 2)),
|
||||
('sigmoid2', nn.Sigmoid()),
|
||||
('fc3', nn.Linear(2, embed_dim))
|
||||
]))
|
||||
self.register_buffer('eye', torch.eye(num_nodes))
|
||||
|
||||
def forward(self, x, node_embeddings):
|
||||
supp1 = self.eye.to(node_embeddings[0].device)
|
||||
filt = self.fc(x)
|
||||
nodevec = torch.tanh(node_embeddings[0] * filt)
|
||||
supp2 = self.get_laplacian(F.relu(torch.matmul(nodevec, nodevec.transpose(2, 1))), supp1)
|
||||
x_g = torch.stack([
|
||||
torch.einsum("nm,bmc->bnc", supp1, x),
|
||||
torch.einsum("bnm,bmc->bnc", supp2, x)
|
||||
], dim=1)
|
||||
weights = torch.einsum('nd,dkio->nkio', node_embeddings[1], self.weights_pool)
|
||||
bias = torch.matmul(node_embeddings[1], self.bias_pool)
|
||||
return torch.einsum('bnki,nkio->bno', x_g.permute(0, 2, 1, 3), weights) + bias
|
||||
|
||||
@staticmethod
|
||||
def get_laplacian(graph, I, normalize=True):
|
||||
D_inv = torch.diag_embed(torch.sum(graph, -1) ** (-0.5))
|
||||
return torch.matmul(torch.matmul(D_inv, graph), D_inv) if normalize else torch.matmul(
|
||||
torch.matmul(D_inv, graph + I), D_inv)
|
||||
|
|
@ -0,0 +1,217 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from torch.autograd import Variable
|
||||
import sys
|
||||
|
||||
|
||||
class nconv(nn.Module):
|
||||
def __init__(self):
|
||||
super(nconv, self).__init__()
|
||||
|
||||
def forward(self, x, A):
|
||||
x = torch.einsum('ncvl,vw->ncwl', (x, A))
|
||||
return x.contiguous()
|
||||
|
||||
|
||||
class linear(nn.Module):
|
||||
def __init__(self, c_in, c_out):
|
||||
super(linear, self).__init__()
|
||||
self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0, 0), stride=(1, 1), bias=True)
|
||||
|
||||
def forward(self, x):
|
||||
return self.mlp(x)
|
||||
|
||||
|
||||
class gcn(nn.Module):
|
||||
def __init__(self, c_in, c_out, dropout, support_len=3, order=2):
|
||||
super(gcn, self).__init__()
|
||||
self.nconv = nconv()
|
||||
c_in = (order * support_len + 1) * c_in
|
||||
self.mlp = linear(c_in, c_out)
|
||||
self.dropout = dropout
|
||||
self.order = order
|
||||
|
||||
def forward(self, x, support):
|
||||
out = [x]
|
||||
for a in support:
|
||||
x1 = self.nconv(x, a)
|
||||
out.append(x1)
|
||||
for k in range(2, self.order + 1):
|
||||
x2 = self.nconv(x1, a)
|
||||
out.append(x2)
|
||||
x1 = x2
|
||||
|
||||
h = torch.cat(out, dim=1)
|
||||
h = self.mlp(h)
|
||||
h = F.dropout(h, self.dropout, training=self.training)
|
||||
return h
|
||||
|
||||
|
||||
class gwnet(nn.Module):
|
||||
def __init__(self, args):
|
||||
super(gwnet, self).__init__()
|
||||
self.dropout = args['dropout']
|
||||
self.blocks = args['blocks']
|
||||
self.layers = args['layers']
|
||||
self.gcn_bool = args['gcn_bool']
|
||||
self.addaptadj = args['addaptadj']
|
||||
|
||||
self.filter_convs = nn.ModuleList()
|
||||
self.gate_convs = nn.ModuleList()
|
||||
self.residual_convs = nn.ModuleList()
|
||||
self.skip_convs = nn.ModuleList()
|
||||
self.bn = nn.ModuleList()
|
||||
self.gconv = nn.ModuleList()
|
||||
|
||||
self.start_conv = nn.Conv2d(in_channels=args['in_dim'],
|
||||
out_channels=args['residual_channels'],
|
||||
kernel_size=(1, 1))
|
||||
self.supports = args.get('supports', None)
|
||||
|
||||
receptive_field = 1
|
||||
|
||||
self.supports_len = 0
|
||||
if self.supports is not None:
|
||||
self.supports_len += len(self.supports)
|
||||
|
||||
if self.gcn_bool and self.addaptadj:
|
||||
aptinit = args.get('aptinit', None)
|
||||
if aptinit is None:
|
||||
if self.supports is None:
|
||||
self.supports = []
|
||||
self.nodevec1 = nn.Parameter(torch.randn(args['num_nodes'], 10).to(args['device']),
|
||||
requires_grad=True).to(args['device'])
|
||||
self.nodevec2 = nn.Parameter(torch.randn(10, args['num_nodes']).to(args['device']),
|
||||
requires_grad=True).to(args['device'])
|
||||
self.supports_len += 1
|
||||
else:
|
||||
if self.supports is None:
|
||||
self.supports = []
|
||||
m, p, n = torch.svd(aptinit)
|
||||
initemb1 = torch.mm(m[:, :10], torch.diag(p[:10] ** 0.5))
|
||||
initemb2 = torch.mm(torch.diag(p[:10] ** 0.5), n[:, :10].t())
|
||||
self.nodevec1 = nn.Parameter(initemb1, requires_grad=True).to(args['device'])
|
||||
self.nodevec2 = nn.Parameter(initemb2, requires_grad=True).to(args['device'])
|
||||
self.supports_len += 1
|
||||
|
||||
kernel_size = args['kernel_size']
|
||||
residual_channels = args['residual_channels']
|
||||
dilation_channels = args['dilation_channels']
|
||||
kernel_size = args['kernel_size']
|
||||
skip_channels = args['skip_channels']
|
||||
end_channels = args['end_channels']
|
||||
out_dim = args['out_dim']
|
||||
dropout = args['dropout']
|
||||
|
||||
|
||||
for b in range(self.blocks):
|
||||
additional_scope = kernel_size - 1
|
||||
new_dilation = 1
|
||||
for i in range(self.layers):
|
||||
# dilated convolutions
|
||||
self.filter_convs.append(nn.Conv2d(in_channels=residual_channels,
|
||||
out_channels=dilation_channels,
|
||||
kernel_size=(1, kernel_size), dilation=new_dilation))
|
||||
|
||||
self.gate_convs.append(nn.Conv2d(in_channels=residual_channels,
|
||||
out_channels=dilation_channels,
|
||||
kernel_size=(1, kernel_size), dilation=new_dilation))
|
||||
|
||||
# 1x1 convolution for residual connection
|
||||
self.residual_convs.append(nn.Conv2d(in_channels=dilation_channels,
|
||||
out_channels=residual_channels,
|
||||
kernel_size=(1, 1)))
|
||||
|
||||
# 1x1 convolution for skip connection
|
||||
self.skip_convs.append(nn.Conv2d(in_channels=dilation_channels,
|
||||
out_channels=skip_channels,
|
||||
kernel_size=(1, 1)))
|
||||
self.bn.append(nn.BatchNorm2d(residual_channels))
|
||||
new_dilation *= 2
|
||||
receptive_field += additional_scope
|
||||
additional_scope *= 2
|
||||
if self.gcn_bool:
|
||||
self.gconv.append(gcn(dilation_channels, residual_channels, dropout, support_len=self.supports_len))
|
||||
|
||||
self.end_conv_1 = nn.Conv2d(in_channels=skip_channels,
|
||||
out_channels=end_channels,
|
||||
kernel_size=(1, 1),
|
||||
bias=True)
|
||||
|
||||
self.end_conv_2 = nn.Conv2d(in_channels=end_channels,
|
||||
out_channels=out_dim,
|
||||
kernel_size=(1, 1),
|
||||
bias=True)
|
||||
|
||||
self.receptive_field = receptive_field
|
||||
|
||||
def forward(self, input):
|
||||
input = input[..., 0:2]
|
||||
input = input.transpose(1,3)
|
||||
input = nn.functional.pad(input,(1,0,0,0))
|
||||
in_len = input.size(3)
|
||||
if in_len < self.receptive_field:
|
||||
x = nn.functional.pad(input, (self.receptive_field - in_len, 0, 0, 0))
|
||||
else:
|
||||
x = input
|
||||
x = self.start_conv(x)
|
||||
skip = 0
|
||||
|
||||
# calculate the current adaptive adj matrix once per iteration
|
||||
new_supports = None
|
||||
if self.gcn_bool and self.addaptadj and self.supports is not None:
|
||||
adp = F.softmax(F.relu(torch.mm(self.nodevec1, self.nodevec2)), dim=1)
|
||||
new_supports = self.supports + [adp]
|
||||
|
||||
# WaveNet layers
|
||||
for i in range(self.blocks * self.layers):
|
||||
|
||||
# |----------------------------------------| *residual*
|
||||
# | |
|
||||
# | |-- conv -- tanh --| |
|
||||
# -> dilate -|----| * ----|-- 1x1 -- + --> *input*
|
||||
# |-- conv -- sigm --| |
|
||||
# 1x1
|
||||
# |
|
||||
# ---------------------------------------> + -------------> *skip*
|
||||
|
||||
# (dilation, init_dilation) = self.dilations[i]
|
||||
|
||||
# residual = dilation_func(x, dilation, init_dilation, i)
|
||||
residual = x
|
||||
# dilated convolution
|
||||
filter = self.filter_convs[i](residual)
|
||||
filter = torch.tanh(filter)
|
||||
gate = self.gate_convs[i](residual)
|
||||
gate = torch.sigmoid(gate)
|
||||
x = filter * gate
|
||||
|
||||
# parametrized skip connection
|
||||
|
||||
s = x
|
||||
s = self.skip_convs[i](s)
|
||||
try:
|
||||
skip = skip[:, :, :, -s.size(3):]
|
||||
except:
|
||||
skip = 0
|
||||
skip = s + skip
|
||||
|
||||
if self.gcn_bool and self.supports is not None:
|
||||
if self.addaptadj:
|
||||
x = self.gconv[i](x, new_supports)
|
||||
else:
|
||||
x = self.gconv[i](x, self.supports)
|
||||
else:
|
||||
x = self.residual_convs[i](x)
|
||||
|
||||
y = residual[:, :, :, -x.size(3):] # 从倒数第x.size(3)个到末尾
|
||||
|
||||
x = x + y
|
||||
|
||||
x = self.bn[i](x)
|
||||
|
||||
x = F.relu(skip)
|
||||
x = F.relu(self.end_conv_1(x))
|
||||
x = self.end_conv_2(x)
|
||||
return x
|
||||
|
|
@ -13,8 +13,7 @@ from model.STFGNN.STFGNN import STFGNN
|
|||
from model.STSGCN.STSGCN import STSGCN
|
||||
from model.STGODE.STGODE import ODEGCN
|
||||
from model.PDG2SEQ.PDG2Seq import PDG2Seq
|
||||
from model.EXP.EXP import EXP
|
||||
from model.EXPB.EXP_b import EXPB
|
||||
from model.EXP.EXP7 import EXP as EXP
|
||||
|
||||
def model_selector(model):
|
||||
match model['type']:
|
||||
|
|
@ -34,5 +33,4 @@ def model_selector(model):
|
|||
case 'STGODE': return ODEGCN(model)
|
||||
case 'PDG2SEQ': return PDG2Seq(model)
|
||||
case 'EXP': return EXP(model)
|
||||
case 'EXPB': return EXPB(model)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,176 @@
|
|||
import math
|
||||
import os
|
||||
import time
|
||||
import copy
|
||||
from tqdm import tqdm
|
||||
|
||||
import torch
|
||||
from lib.logger import get_logger
|
||||
from lib.loss_function import all_metrics
|
||||
|
||||
|
||||
class Trainer:
|
||||
def __init__(self, model, loss, optimizer, train_loader, val_loader, test_loader,
|
||||
scaler, args, lr_scheduler=None):
|
||||
self.model = model
|
||||
self.loss = loss
|
||||
self.optimizer = optimizer
|
||||
self.train_loader = train_loader
|
||||
self.val_loader = val_loader
|
||||
self.test_loader = test_loader
|
||||
self.scaler = scaler
|
||||
self.args = args
|
||||
self.lr_scheduler = lr_scheduler
|
||||
self.train_per_epoch = len(train_loader)
|
||||
self.val_per_epoch = len(val_loader) if val_loader else 0
|
||||
|
||||
# Paths for saving models and logs
|
||||
self.best_path = os.path.join(args['log_dir'], 'best_model.pth')
|
||||
self.best_test_path = os.path.join(args['log_dir'], 'best_test_model.pth')
|
||||
self.loss_figure_path = os.path.join(args['log_dir'], 'loss.png')
|
||||
|
||||
# Initialize logger
|
||||
if not os.path.isdir(args['log_dir']) and not args['debug']:
|
||||
os.makedirs(args['log_dir'], exist_ok=True)
|
||||
self.logger = get_logger(args['log_dir'], name=self.model.__class__.__name__, debug=args['debug'])
|
||||
self.logger.info(f"Experiment log path in: {args['log_dir']}")
|
||||
|
||||
def _run_epoch(self, epoch, dataloader, mode):
|
||||
if mode == 'train':
|
||||
self.model.train()
|
||||
optimizer_step = True
|
||||
else:
|
||||
self.model.eval()
|
||||
optimizer_step = False
|
||||
|
||||
total_loss = 0
|
||||
epoch_time = time.time()
|
||||
|
||||
with torch.set_grad_enabled(optimizer_step):
|
||||
with tqdm(total=len(dataloader), desc=f'{mode.capitalize()} Epoch {epoch}') as pbar:
|
||||
for batch_idx, (data, target) in enumerate(dataloader):
|
||||
label = target[..., :self.args['output_dim']]
|
||||
output = self.model(data).to(self.args['device'])
|
||||
|
||||
if self.args['real_value']:
|
||||
output = self.scaler.inverse_transform(output)
|
||||
|
||||
loss = self.loss(output, label)
|
||||
if optimizer_step and self.optimizer is not None:
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
|
||||
if self.args['grad_norm']:
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args['max_grad_norm'])
|
||||
self.optimizer.step()
|
||||
|
||||
total_loss += loss.item()
|
||||
|
||||
if mode == 'train' and (batch_idx + 1) % self.args['log_step'] == 0:
|
||||
self.logger.info(
|
||||
f'Train Epoch {epoch}: {batch_idx + 1}/{len(dataloader)} Loss: {loss.item():.6f}')
|
||||
|
||||
# 更新 tqdm 的进度
|
||||
pbar.update(1)
|
||||
pbar.set_postfix(loss=loss.item())
|
||||
|
||||
avg_loss = total_loss / len(dataloader)
|
||||
self.logger.info(
|
||||
f'{mode.capitalize()} Epoch {epoch}: average Loss: {avg_loss:.6f}, time: {time.time() - epoch_time:.2f} s')
|
||||
return avg_loss
|
||||
|
||||
def train_epoch(self, epoch):
|
||||
return self._run_epoch(epoch, self.train_loader, 'train')
|
||||
|
||||
def val_epoch(self, epoch):
|
||||
return self._run_epoch(epoch, self.val_loader or self.test_loader, 'val')
|
||||
|
||||
def test_epoch(self, epoch):
|
||||
return self._run_epoch(epoch, self.test_loader, 'test')
|
||||
|
||||
def train(self):
|
||||
best_model, best_test_model = None, None
|
||||
best_loss, best_test_loss = float('inf'), float('inf')
|
||||
not_improved_count = 0
|
||||
|
||||
self.logger.info("Training process started")
|
||||
for epoch in range(1, self.args['epochs'] + 1):
|
||||
train_epoch_loss = self.train_epoch(epoch)
|
||||
val_epoch_loss = self.val_epoch(epoch)
|
||||
test_epoch_loss = self.test_epoch(epoch)
|
||||
|
||||
if train_epoch_loss > 1e6:
|
||||
self.logger.warning('Gradient explosion detected. Ending...')
|
||||
break
|
||||
|
||||
if val_epoch_loss < best_loss:
|
||||
best_loss = val_epoch_loss
|
||||
not_improved_count = 0
|
||||
best_model = copy.deepcopy(self.model.state_dict())
|
||||
self.logger.info('Best validation model saved!')
|
||||
else:
|
||||
not_improved_count += 1
|
||||
|
||||
if self.args['early_stop'] and not_improved_count == self.args['early_stop_patience']:
|
||||
self.logger.info(
|
||||
f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops.")
|
||||
break
|
||||
|
||||
if test_epoch_loss < best_test_loss:
|
||||
best_test_loss = test_epoch_loss
|
||||
best_test_model = copy.deepcopy(self.model.state_dict())
|
||||
|
||||
if not self.args['debug']:
|
||||
torch.save(best_model, self.best_path)
|
||||
torch.save(best_test_model, self.best_test_path)
|
||||
self.logger.info(f"Best models saved at {self.best_path} and {self.best_test_path}")
|
||||
|
||||
self._finalize_training(best_model, best_test_model)
|
||||
|
||||
def _finalize_training(self, best_model, best_test_model):
|
||||
self.model.load_state_dict(best_model)
|
||||
self.logger.info("Testing on best validation model")
|
||||
self.test(self.model, self.args, self.test_loader, self.scaler, self.logger)
|
||||
|
||||
self.model.load_state_dict(best_test_model)
|
||||
self.logger.info("Testing on best test model")
|
||||
self.test(self.model, self.args, self.test_loader, self.scaler, self.logger)
|
||||
|
||||
@staticmethod
|
||||
def test(model, args, data_loader, scaler, logger, path=None):
|
||||
if path:
|
||||
checkpoint = torch.load(path)
|
||||
model.load_state_dict(checkpoint['state_dict'])
|
||||
model.to(args['device'])
|
||||
|
||||
model.eval()
|
||||
y_pred, y_true = [], []
|
||||
|
||||
with torch.no_grad():
|
||||
for data, target in data_loader:
|
||||
label = target[..., :args['output_dim']]
|
||||
output = model(data)
|
||||
y_pred.append(output)
|
||||
y_true.append(label)
|
||||
|
||||
if args['real_value']:
|
||||
y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0))
|
||||
else:
|
||||
y_pred = torch.cat(y_pred, dim=0)
|
||||
y_true = torch.cat(y_true, dim=0)
|
||||
|
||||
# 你在这里需要把y_pred和y_true保存下来
|
||||
# torch.save(y_pred, "./test/PEMS07/y_pred_D.pt") # [3566,12,170,1]
|
||||
# torch.save(y_true, "./test/PEMS08/y_true.pt") # [3566,12,170,1]
|
||||
|
||||
for t in range(y_true.shape[1]):
|
||||
mae, rmse, mape = all_metrics(y_pred[:, t, ...], y_true[:, t, ...],
|
||||
args['mae_thresh'], args['mape_thresh'])
|
||||
logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
|
||||
|
||||
mae, rmse, mape = all_metrics(y_pred, y_true, args['mae_thresh'], args['mape_thresh'])
|
||||
logger.info(f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
|
||||
|
||||
@staticmethod
|
||||
def _compute_sampling_threshold(global_step, k):
|
||||
return k / (k + math.exp(global_step / k))
|
||||
|
|
@ -2,6 +2,7 @@ from trainer.Trainer import Trainer
|
|||
from trainer.cdeTrainer.cdetrainer import Trainer as cdeTrainer
|
||||
from trainer.DCRNN_Trainer import Trainer as DCRNN_Trainer
|
||||
from trainer.PDG2SEQ_Trainer import Trainer as PDG2SEQ_Trainer
|
||||
from trainer.EXP_trainer import Trainer as EXP_Trainer
|
||||
|
||||
|
||||
def select_trainer(model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args,
|
||||
|
|
@ -13,5 +14,7 @@ def select_trainer(model, loss, optimizer, train_loader, val_loader, test_loader
|
|||
lr_scheduler)
|
||||
case 'PDG2SEQ': return PDG2SEQ_Trainer(model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args['train'],
|
||||
lr_scheduler)
|
||||
case 'EXP': return EXP_Trainer(model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args['train'],
|
||||
lr_scheduler)
|
||||
case _: return Trainer(model, loss, optimizer, train_loader, val_loader, test_loader, scaler, args['train'],
|
||||
lr_scheduler)
|
||||
|
|
|
|||
Loading…
Reference in New Issue