REPST #3

Merged
czzhangheng merged 42 commits from REPST into main 2025-12-20 16:03:22 +08:00
18 changed files with 1177 additions and 3 deletions
Showing only changes of commit 4ccb029d7e - Show all commits

View File

@ -0,0 +1,54 @@
basic:
dataset: AirQuality
device: cuda:0
mode: train
model: iTransformer
seed: 2023
data:
batch_size: 256
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 6
lag: 24
normalizer: std
num_nodes: 35
steps_per_day: 24
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
pred_len: 24
patch_len: 6
stride: 8
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
n_heads: 8
output_attention: False
train:
batch_size: 256
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
grad_norm: false
log_step: 1000
loss_func: mae
lr_decay: true
lr_decay_rate: 0.3
lr_decay_step: 5,20,40,70
lr_init: 0.0001
mae_thresh: None
mape_thresh: 0.001
max_grad_norm: 5
output_dim: 6
plot: false
real_value: true
weight_decay: 0

View File

@ -0,0 +1,54 @@
basic:
dataset: BJTaxi-InFlow
device: cuda:0
mode: train
model: iTransformer
seed: 2023
data:
batch_size: 2048
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
normalizer: std
num_nodes: 1024
steps_per_day: 48
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
pred_len: 24
patch_len: 6
stride: 8
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
n_heads: 8
output_attention: False
train:
batch_size: 2048
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
grad_norm: false
log_step: 1000
loss_func: mae
lr_decay: true
lr_decay_rate: 0.3
lr_decay_step: 5,20,40,70
lr_init: 0.0001
mae_thresh: None
mape_thresh: 0.001
max_grad_norm: 5
output_dim: 1
plot: false
real_value: true
weight_decay: 0

View File

@ -0,0 +1,54 @@
basic:
dataset: BJTaxi-OutFlow
device: cuda:0
mode: train
model: iTransformer
seed: 2023
data:
batch_size: 2048
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
normalizer: std
num_nodes: 1024
steps_per_day: 48
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
pred_len: 24
patch_len: 6
stride: 8
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
n_heads: 8
output_attention: False
train:
batch_size: 2048
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
grad_norm: false
log_step: 1000
loss_func: mae
lr_decay: true
lr_decay_rate: 0.3
lr_decay_step: 5,20,40,70
lr_init: 0.0001
mae_thresh: None
mape_thresh: 0.001
max_grad_norm: 5
output_dim: 1
plot: false
real_value: true
weight_decay: 0

View File

@ -0,0 +1,54 @@
basic:
dataset: METR-LA
device: cuda:1
mode: train
model: iTransformer
seed: 2023
data:
batch_size: 256
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
normalizer: std
num_nodes: 207
steps_per_day: 288
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
pred_len: 24
patch_len: 6
stride: 8
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
n_heads: 8
output_attention: False
train:
batch_size: 256
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
grad_norm: false
log_step: 1000
loss_func: mae
lr_decay: true
lr_decay_rate: 0.3
lr_decay_step: 5,20,40,70
lr_init: 0.0001
mae_thresh: None
mape_thresh: 0.001
max_grad_norm: 5
output_dim: 1
plot: false
real_value: true
weight_decay: 0

View File

@ -0,0 +1,54 @@
basic:
dataset: NYCBike-InFlow
device: cuda:0
mode: train
model: iTransformer
seed: 2023
data:
batch_size: 256
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
normalizer: std
num_nodes: 128
steps_per_day: 48
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
pred_len: 24
patch_len: 6
stride: 8
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
n_heads: 8
output_attention: False
train:
batch_size: 256
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
grad_norm: false
log_step: 1000
loss_func: mae
lr_decay: true
lr_decay_rate: 0.3
lr_decay_step: 5,20,40,70
lr_init: 0.0001
mae_thresh: None
mape_thresh: 0.001
max_grad_norm: 5
output_dim: 1
plot: false
real_value: true
weight_decay: 0

View File

@ -0,0 +1,54 @@
basic:
dataset: NYCBike-OutFlow
device: cuda:0
mode: train
model: iTransformer
seed: 2023
data:
batch_size: 256
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
normalizer: std
num_nodes: 128
steps_per_day: 48
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
pred_len: 24
patch_len: 6
stride: 8
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
n_heads: 8
output_attention: False
train:
batch_size: 256
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
grad_norm: false
log_step: 1000
loss_func: mae
lr_decay: true
lr_decay_rate: 0.3
lr_decay_step: 5,20,40,70
lr_init: 0.0001
mae_thresh: None
mape_thresh: 0.001
max_grad_norm: 5
output_dim: 1
plot: false
real_value: true
weight_decay: 0

View File

@ -0,0 +1,54 @@
basic:
dataset: PEMS-BAY
device: cuda:0
mode: train
model: iTransformer
seed: 2023
data:
batch_size: 256
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
normalizer: std
num_nodes: 325
steps_per_day: 288
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
pred_len: 24
d_model: 128
patch_len: 6
stride: 8
d_ff: 2048
dropout: 0.1
e_layers: 2
n_heads: 8
output_attention: False
train:
batch_size: 256
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
grad_norm: false
log_step: 1000
loss_func: mae
lr_decay: true
lr_decay_rate: 0.3
lr_decay_step: 5,20,40,70
lr_init: 0.0001
mae_thresh: None
mape_thresh: 0.001
max_grad_norm: 5
output_dim: 1
plot: false
real_value: true
weight_decay: 0

View File

@ -0,0 +1,54 @@
basic:
dataset: SolarEnergy
device: cuda:0
mode: train
model: iTransformer
seed: 2023
data:
batch_size: 256
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 6
lag: 24
normalizer: std
num_nodes: 137
steps_per_day: 24
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
pred_len: 24
d_model: 128
patch_len: 6
stride: 8
d_ff: 2048
dropout: 0.1
e_layers: 2
n_heads: 8
output_attention: False
train:
batch_size: 256
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
grad_norm: false
log_step: 1000
loss_func: mae
lr_decay: true
lr_decay_rate: 0.3
lr_decay_step: 5,20,40,70
lr_init: 0.0001
mae_thresh: None
mape_thresh: 0.001
max_grad_norm: 5
output_dim: 1
plot: false
real_value: true
weight_decay: 0

View File

@ -7,7 +7,7 @@ from dataloader.TSloader import get_dataloader as TS_loader
def get_dataloader(config, normalizer, single): def get_dataloader(config, normalizer, single):
TS_model = ["iTransformer", "HI"] TS_model = ["iTransformer", "HI", "PatchTST"]
model_name = config["basic"]["model"] model_name = config["basic"]["model"]
if model_name in TS_model: if model_name in TS_model:
return TS_loader(config, normalizer, single) return TS_loader(config, normalizer, single)

134
model/MTGNN/MTGNN.py Normal file
View File

@ -0,0 +1,134 @@
import torch.nn as nn
from model.MTGNN.layer import *
class gtnet(nn.Module):
def __init__(self, gcn_true, buildA_true, gcn_depth, num_nodes, device, predefined_A=None, static_feat=None, dropout=0.3, subgraph_size=20, node_dim=40, dilation_exponential=1, conv_channels=32, residual_channels=32, skip_channels=64, end_channels=128, seq_length=12, in_dim=2, out_dim=12, layers=3, propalpha=0.05, tanhalpha=3, layer_norm_affline=True):
super(gtnet, self).__init__()
self.gcn_true = gcn_true
self.buildA_true = buildA_true
self.num_nodes = num_nodes
self.dropout = dropout
self.predefined_A = predefined_A
self.filter_convs = nn.ModuleList()
self.gate_convs = nn.ModuleList()
self.residual_convs = nn.ModuleList()
self.skip_convs = nn.ModuleList()
self.gconv1 = nn.ModuleList()
self.gconv2 = nn.ModuleList()
self.norm = nn.ModuleList()
self.start_conv = nn.Conv2d(in_channels=in_dim,
out_channels=residual_channels,
kernel_size=(1, 1))
self.gc = graph_constructor(num_nodes, subgraph_size, node_dim, device, alpha=tanhalpha, static_feat=static_feat)
self.seq_length = seq_length
kernel_size = 7
if dilation_exponential>1:
self.receptive_field = int(1+(kernel_size-1)*(dilation_exponential**layers-1)/(dilation_exponential-1))
else:
self.receptive_field = layers*(kernel_size-1) + 1
for i in range(1):
if dilation_exponential>1:
rf_size_i = int(1 + i*(kernel_size-1)*(dilation_exponential**layers-1)/(dilation_exponential-1))
else:
rf_size_i = i*layers*(kernel_size-1)+1
new_dilation = 1
for j in range(1,layers+1):
if dilation_exponential > 1:
rf_size_j = int(rf_size_i + (kernel_size-1)*(dilation_exponential**j-1)/(dilation_exponential-1))
else:
rf_size_j = rf_size_i+j*(kernel_size-1)
self.filter_convs.append(dilated_inception(residual_channels, conv_channels, dilation_factor=new_dilation))
self.gate_convs.append(dilated_inception(residual_channels, conv_channels, dilation_factor=new_dilation))
self.residual_convs.append(nn.Conv2d(in_channels=conv_channels,
out_channels=residual_channels,
kernel_size=(1, 1)))
if self.seq_length>self.receptive_field:
self.skip_convs.append(nn.Conv2d(in_channels=conv_channels,
out_channels=skip_channels,
kernel_size=(1, self.seq_length-rf_size_j+1)))
else:
self.skip_convs.append(nn.Conv2d(in_channels=conv_channels,
out_channels=skip_channels,
kernel_size=(1, self.receptive_field-rf_size_j+1)))
if self.gcn_true:
self.gconv1.append(mixprop(conv_channels, residual_channels, gcn_depth, dropout, propalpha))
self.gconv2.append(mixprop(conv_channels, residual_channels, gcn_depth, dropout, propalpha))
if self.seq_length>self.receptive_field:
self.norm.append(LayerNorm((residual_channels, num_nodes, self.seq_length - rf_size_j + 1),elementwise_affine=layer_norm_affline))
else:
self.norm.append(LayerNorm((residual_channels, num_nodes, self.receptive_field - rf_size_j + 1),elementwise_affine=layer_norm_affline))
new_dilation *= dilation_exponential
self.layers = layers
self.end_conv_1 = nn.Conv2d(in_channels=skip_channels,
out_channels=end_channels,
kernel_size=(1,1),
bias=True)
self.end_conv_2 = nn.Conv2d(in_channels=end_channels,
out_channels=out_dim,
kernel_size=(1,1),
bias=True)
if self.seq_length > self.receptive_field:
self.skip0 = nn.Conv2d(in_channels=in_dim, out_channels=skip_channels, kernel_size=(1, self.seq_length), bias=True)
self.skipE = nn.Conv2d(in_channels=residual_channels, out_channels=skip_channels, kernel_size=(1, self.seq_length-self.receptive_field+1), bias=True)
else:
self.skip0 = nn.Conv2d(in_channels=in_dim, out_channels=skip_channels, kernel_size=(1, self.receptive_field), bias=True)
self.skipE = nn.Conv2d(in_channels=residual_channels, out_channels=skip_channels, kernel_size=(1, 1), bias=True)
self.idx = torch.arange(self.num_nodes).to(device)
def forward(self, input, idx=None):
seq_len = input.size(3)
assert seq_len==self.seq_length, 'input sequence length not equal to preset sequence length'
if self.seq_length<self.receptive_field:
input = nn.functional.pad(input,(self.receptive_field-self.seq_length,0,0,0))
if self.gcn_true:
if self.buildA_true:
if idx is None:
adp = self.gc(self.idx)
else:
adp = self.gc(idx)
else:
adp = self.predefined_A
x = self.start_conv(input)
skip = self.skip0(F.dropout(input, self.dropout, training=self.training))
for i in range(self.layers):
residual = x
filter = self.filter_convs[i](x)
filter = torch.tanh(filter)
gate = self.gate_convs[i](x)
gate = torch.sigmoid(gate)
x = filter * gate
x = F.dropout(x, self.dropout, training=self.training)
s = x
s = self.skip_convs[i](s)
skip = s + skip
if self.gcn_true:
x = self.gconv1[i](x, adp)+self.gconv2[i](x, adp.transpose(1,0))
else:
x = self.residual_convs[i](x)
x = x + residual[:, :, :, -x.size(3):]
if idx is None:
x = self.norm[i](x,self.idx)
else:
x = self.norm[i](x,idx)
skip = self.skipE(x) + skip
x = F.relu(skip)
x = F.relu(self.end_conv_1(x))
x = self.end_conv_2(x)
return x

328
model/MTGNN/layer.py Normal file
View File

@ -0,0 +1,328 @@
from __future__ import division
import torch
import torch.nn as nn
from torch.nn import init
import numbers
import torch.nn.functional as F
class nconv(nn.Module):
def __init__(self):
super(nconv,self).__init__()
def forward(self,x, A):
x = torch.einsum('ncwl,vw->ncvl',(x,A))
return x.contiguous()
class dy_nconv(nn.Module):
def __init__(self):
super(dy_nconv,self).__init__()
def forward(self,x, A):
x = torch.einsum('ncvl,nvwl->ncwl',(x,A))
return x.contiguous()
class linear(nn.Module):
def __init__(self,c_in,c_out,bias=True):
super(linear,self).__init__()
self.mlp = torch.nn.Conv2d(c_in, c_out, kernel_size=(1, 1), padding=(0,0), stride=(1,1), bias=bias)
def forward(self,x):
return self.mlp(x)
class prop(nn.Module):
def __init__(self,c_in,c_out,gdep,dropout,alpha):
super(prop, self).__init__()
self.nconv = nconv()
self.mlp = linear(c_in,c_out)
self.gdep = gdep
self.dropout = dropout
self.alpha = alpha
def forward(self,x,adj):
adj = adj + torch.eye(adj.size(0)).to(x.device)
d = adj.sum(1)
h = x
dv = d
a = adj / dv.view(-1, 1)
for i in range(self.gdep):
h = self.alpha*x + (1-self.alpha)*self.nconv(h,a)
ho = self.mlp(h)
return ho
class mixprop(nn.Module):
def __init__(self,c_in,c_out,gdep,dropout,alpha):
super(mixprop, self).__init__()
self.nconv = nconv()
self.mlp = linear((gdep+1)*c_in,c_out)
self.gdep = gdep
self.dropout = dropout
self.alpha = alpha
def forward(self,x,adj):
adj = adj + torch.eye(adj.size(0)).to(x.device)
d = adj.sum(1)
h = x
out = [h]
a = adj / d.view(-1, 1)
for i in range(self.gdep):
h = self.alpha*x + (1-self.alpha)*self.nconv(h,a)
out.append(h)
ho = torch.cat(out,dim=1)
ho = self.mlp(ho)
return ho
class dy_mixprop(nn.Module):
def __init__(self,c_in,c_out,gdep,dropout,alpha):
super(dy_mixprop, self).__init__()
self.nconv = dy_nconv()
self.mlp1 = linear((gdep+1)*c_in,c_out)
self.mlp2 = linear((gdep+1)*c_in,c_out)
self.gdep = gdep
self.dropout = dropout
self.alpha = alpha
self.lin1 = linear(c_in,c_in)
self.lin2 = linear(c_in,c_in)
def forward(self,x):
#adj = adj + torch.eye(adj.size(0)).to(x.device)
#d = adj.sum(1)
x1 = torch.tanh(self.lin1(x))
x2 = torch.tanh(self.lin2(x))
adj = self.nconv(x1.transpose(2,1),x2)
adj0 = torch.softmax(adj, dim=2)
adj1 = torch.softmax(adj.transpose(2,1), dim=2)
h = x
out = [h]
for i in range(self.gdep):
h = self.alpha*x + (1-self.alpha)*self.nconv(h,adj0)
out.append(h)
ho = torch.cat(out,dim=1)
ho1 = self.mlp1(ho)
h = x
out = [h]
for i in range(self.gdep):
h = self.alpha * x + (1 - self.alpha) * self.nconv(h, adj1)
out.append(h)
ho = torch.cat(out, dim=1)
ho2 = self.mlp2(ho)
return ho1+ho2
class dilated_1D(nn.Module):
def __init__(self, cin, cout, dilation_factor=2):
super(dilated_1D, self).__init__()
self.tconv = nn.ModuleList()
self.kernel_set = [2,3,6,7]
self.tconv = nn.Conv2d(cin,cout,(1,7),dilation=(1,dilation_factor))
def forward(self,input):
x = self.tconv(input)
return x
class dilated_inception(nn.Module):
def __init__(self, cin, cout, dilation_factor=2):
super(dilated_inception, self).__init__()
self.tconv = nn.ModuleList()
self.kernel_set = [2,3,6,7]
cout = int(cout/len(self.kernel_set))
for kern in self.kernel_set:
self.tconv.append(nn.Conv2d(cin,cout,(1,kern),dilation=(1,dilation_factor)))
def forward(self,input):
x = []
for i in range(len(self.kernel_set)):
x.append(self.tconv[i](input))
for i in range(len(self.kernel_set)):
x[i] = x[i][...,-x[-1].size(3):]
x = torch.cat(x,dim=1)
return x
class graph_constructor(nn.Module):
def __init__(self, nnodes, k, dim, device, alpha=3, static_feat=None):
super(graph_constructor, self).__init__()
self.nnodes = nnodes
if static_feat is not None:
xd = static_feat.shape[1]
self.lin1 = nn.Linear(xd, dim)
self.lin2 = nn.Linear(xd, dim)
else:
self.emb1 = nn.Embedding(nnodes, dim)
self.emb2 = nn.Embedding(nnodes, dim)
self.lin1 = nn.Linear(dim,dim)
self.lin2 = nn.Linear(dim,dim)
self.device = device
self.k = k
self.dim = dim
self.alpha = alpha
self.static_feat = static_feat
def forward(self, idx):
if self.static_feat is None:
nodevec1 = self.emb1(idx)
nodevec2 = self.emb2(idx)
else:
nodevec1 = self.static_feat[idx,:]
nodevec2 = nodevec1
nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1))
nodevec2 = torch.tanh(self.alpha*self.lin2(nodevec2))
a = torch.mm(nodevec1, nodevec2.transpose(1,0))-torch.mm(nodevec2, nodevec1.transpose(1,0))
adj = F.relu(torch.tanh(self.alpha*a))
mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
mask.fill_(float('0'))
s1,t1 = (adj + torch.rand_like(adj)*0.01).topk(self.k,1)
mask.scatter_(1,t1,s1.fill_(1))
adj = adj*mask
return adj
def fullA(self, idx):
if self.static_feat is None:
nodevec1 = self.emb1(idx)
nodevec2 = self.emb2(idx)
else:
nodevec1 = self.static_feat[idx,:]
nodevec2 = nodevec1
nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1))
nodevec2 = torch.tanh(self.alpha*self.lin2(nodevec2))
a = torch.mm(nodevec1, nodevec2.transpose(1,0))-torch.mm(nodevec2, nodevec1.transpose(1,0))
adj = F.relu(torch.tanh(self.alpha*a))
return adj
class graph_global(nn.Module):
def __init__(self, nnodes, k, dim, device, alpha=3, static_feat=None):
super(graph_global, self).__init__()
self.nnodes = nnodes
self.A = nn.Parameter(torch.randn(nnodes, nnodes).to(device), requires_grad=True).to(device)
def forward(self, idx):
return F.relu(self.A)
class graph_undirected(nn.Module):
def __init__(self, nnodes, k, dim, device, alpha=3, static_feat=None):
super(graph_undirected, self).__init__()
self.nnodes = nnodes
if static_feat is not None:
xd = static_feat.shape[1]
self.lin1 = nn.Linear(xd, dim)
else:
self.emb1 = nn.Embedding(nnodes, dim)
self.lin1 = nn.Linear(dim,dim)
self.device = device
self.k = k
self.dim = dim
self.alpha = alpha
self.static_feat = static_feat
def forward(self, idx):
if self.static_feat is None:
nodevec1 = self.emb1(idx)
nodevec2 = self.emb1(idx)
else:
nodevec1 = self.static_feat[idx,:]
nodevec2 = nodevec1
nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1))
nodevec2 = torch.tanh(self.alpha*self.lin1(nodevec2))
a = torch.mm(nodevec1, nodevec2.transpose(1,0))
adj = F.relu(torch.tanh(self.alpha*a))
mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
mask.fill_(float('0'))
s1,t1 = adj.topk(self.k,1)
mask.scatter_(1,t1,s1.fill_(1))
adj = adj*mask
return adj
class graph_directed(nn.Module):
def __init__(self, nnodes, k, dim, device, alpha=3, static_feat=None):
super(graph_directed, self).__init__()
self.nnodes = nnodes
if static_feat is not None:
xd = static_feat.shape[1]
self.lin1 = nn.Linear(xd, dim)
self.lin2 = nn.Linear(xd, dim)
else:
self.emb1 = nn.Embedding(nnodes, dim)
self.emb2 = nn.Embedding(nnodes, dim)
self.lin1 = nn.Linear(dim,dim)
self.lin2 = nn.Linear(dim,dim)
self.device = device
self.k = k
self.dim = dim
self.alpha = alpha
self.static_feat = static_feat
def forward(self, idx):
if self.static_feat is None:
nodevec1 = self.emb1(idx)
nodevec2 = self.emb2(idx)
else:
nodevec1 = self.static_feat[idx,:]
nodevec2 = nodevec1
nodevec1 = torch.tanh(self.alpha*self.lin1(nodevec1))
nodevec2 = torch.tanh(self.alpha*self.lin2(nodevec2))
a = torch.mm(nodevec1, nodevec2.transpose(1,0))
adj = F.relu(torch.tanh(self.alpha*a))
mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
mask.fill_(float('0'))
s1,t1 = adj.topk(self.k,1)
mask.scatter_(1,t1,s1.fill_(1))
adj = adj*mask
return adj
class LayerNorm(nn.Module):
__constants__ = ['normalized_shape', 'weight', 'bias', 'eps', 'elementwise_affine']
def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
super(LayerNorm, self).__init__()
if isinstance(normalized_shape, numbers.Integral):
normalized_shape = (normalized_shape,)
self.normalized_shape = tuple(normalized_shape)
self.eps = eps
self.elementwise_affine = elementwise_affine
if self.elementwise_affine:
self.weight = nn.Parameter(torch.Tensor(*normalized_shape))
self.bias = nn.Parameter(torch.Tensor(*normalized_shape))
else:
self.register_parameter('weight', None)
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
if self.elementwise_affine:
init.ones_(self.weight)
init.zeros_(self.bias)
def forward(self, input, idx):
if self.elementwise_affine:
return F.layer_norm(input, tuple(input.shape[1:]), self.weight[:,idx,:], self.bias[:,idx,:], self.eps)
else:
return F.layer_norm(input, tuple(input.shape[1:]), self.weight, self.bias, self.eps)
def extra_repr(self):
return '{normalized_shape}, eps={eps}, ' \
'elementwise_affine={elementwise_affine}'.format(**self.__dict__)

109
model/PatchTST/PatchTST.py Normal file
View File

@ -0,0 +1,109 @@
import torch
from torch import nn
from model.PatchTST.layers.Transformer import Encoder, EncoderLayer
from model.PatchTST.layers.SelfAttention import FullAttention, AttentionLayer
from model.PatchTST.layers.Embed import PatchEmbedding
class Transpose(nn.Module):
def __init__(self, *dims, contiguous=False):
super().__init__()
self.dims, self.contiguous = dims, contiguous
def forward(self, x):
if self.contiguous: return x.transpose(*self.dims).contiguous()
else: return x.transpose(*self.dims)
class FlattenHead(nn.Module):
def __init__(self, n_vars, nf, target_window, head_dropout=0):
super().__init__()
self.n_vars = n_vars
self.flatten = nn.Flatten(start_dim=-2)
self.linear = nn.Linear(nf, target_window)
self.dropout = nn.Dropout(head_dropout)
def forward(self, x): # x: [bs x nvars x d_model x patch_num]
x = self.flatten(x)
x = self.linear(x)
x = self.dropout(x)
return x
class Model(nn.Module):
"""
Paper link: https://arxiv.org/pdf/2211.14730.pdf
"""
def __init__(self, configs):
"""
patch_len: int, patch len for patch_embedding
stride: int, stride for patch_embedding
"""
super().__init__()
self.seq_len = configs['seq_len']
self.pred_len = configs['pred_len']
self.patch_len = configs['patch_len']
self.stride = configs['stride']
padding = self.stride
# patching and embedding
self.patch_embedding = PatchEmbedding(
configs['d_model'], self.patch_len, self.stride, padding, configs['dropout'])
# Encoder
self.encoder = Encoder(
[
EncoderLayer(
AttentionLayer(
FullAttention(False, attention_dropout=configs['dropout'],
output_attention=False), configs['d_model'], configs['n_heads']),
configs['d_model'],
configs['d_ff'],
dropout=configs['dropout'],
activation=configs['activation']
) for l in range(configs['e_layers'])
],
norm_layer=nn.Sequential(Transpose(1,2), nn.BatchNorm1d(configs.d_model), Transpose(1,2))
)
# Prediction Head
self.head_nf = configs.d_model * \
int((configs.seq_len - self.patch_len) / self.stride + 2)
self.head = FlattenHead(configs.enc_in, self.head_nf, configs.pred_len,
head_dropout=configs.dropout)
def forecast(self, x_enc):
# Normalization from Non-stationary Transformer
means = x_enc.mean(1, keepdim=True).detach()
x_enc = x_enc - means
stdev = torch.sqrt(
torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
x_enc /= stdev
# do patching and embedding
x_enc = x_enc.permute(0, 2, 1)
# u: [bs * nvars x patch_num x d_model]
enc_out, n_vars = self.patch_embedding(x_enc)
# Encoder
# z: [bs * nvars x patch_num x d_model]
enc_out, attns = self.encoder(enc_out)
# z: [bs x nvars x patch_num x d_model]
enc_out = torch.reshape(
enc_out, (-1, n_vars, enc_out.shape[-2], enc_out.shape[-1]))
# z: [bs x nvars x d_model x patch_num]
enc_out = enc_out.permute(0, 1, 3, 2)
# Decoder
dec_out = self.head(enc_out) # z: [bs x nvars x target_window]
dec_out = dec_out.permute(0, 2, 1)
# De-Normalization from Non-stationary Transformer
dec_out = dec_out * \
(stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
dec_out = dec_out + \
(means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
return dec_out
def forward(self, x_enc):
dec_out = self.forecast(x_enc)
return dec_out[:, -self.pred_len:, :] # [B, L, D]

View File

@ -0,0 +1,29 @@
import torch
import torch.nn as nn
class PatchEmbedding(nn.Module):
def __init__(self, d_model, patch_len, stride, padding, dropout):
super(PatchEmbedding, self).__init__()
# Patching
self.patch_len = patch_len
self.stride = stride
self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
# Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
# Positional embedding
self.position_embedding = PositionalEmbedding(d_model)
# Residual dropout
self.dropout = nn.Dropout(dropout)
def forward(self, x):
# do patching
n_vars = x.shape[1]
x = self.padding_patch_layer(x)
x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
# Input encoding
x = self.value_embedding(x) + self.position_embedding(x)
return self.dropout(x), n_vars

View File

@ -0,0 +1,80 @@
import torch
import torch.nn as nn
import numpy as np
from math import sqrt
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1. / sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
if self.output_attention:
return V.contiguous(), A
else:
return V.contiguous(), None
class AttentionLayer(nn.Module):
def __init__(self, attention, d_model, n_heads, d_keys=None,
d_values=None):
super(AttentionLayer, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.inner_attention = attention
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask,
tau=tau,
delta=delta
)
out = out.view(B, L, -1)
return self.out_projection(out), attn
class TriangularCausalMask:
def __init__(self, B, L, device="cpu"):
mask_shape = [B, 1, L, L]
with torch.no_grad():
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
@property
def mask(self):
return self._mask

View File

@ -0,0 +1,57 @@
import torch.nn as nn
import torch.nn.functional as F
class EncoderLayer(nn.Module):
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4 * d_model
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None, tau=None, delta=None):
new_x, attn = self.attention(
x, x, x,
attn_mask=attn_mask,
tau=tau, delta=delta
)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
y = self.dropout(self.conv2(y).transpose(-1, 1))
return self.norm2(x + y), attn
class Encoder(nn.Module):
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
self.norm = norm_layer
def forward(self, x, attn_mask=None, tau=None, delta=None):
# x [B, L, D]
attns = []
if self.conv_layers is not None:
for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
delta = delta if i == 0 else None
x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
x = conv_layer(x)
attns.append(attn)
x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
attns.append(attn)
else:
for attn_layer in self.attn_layers:
x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
attns.append(attn)
if self.norm is not None:
x = self.norm(x)
return x, attns

View File

@ -29,6 +29,7 @@ from model.ASTRA.astrav2 import ASTRA as ASTRAv2
from model.ASTRA.astrav3 import ASTRA as ASTRAv3 from model.ASTRA.astrav3 import ASTRA as ASTRAv3
from model.iTransformer.iTransformer import iTransformer from model.iTransformer.iTransformer import iTransformer
from model.HI.HI import HI from model.HI.HI import HI
from model.PatchTST.PatchTST import Model as PatchTST
@ -96,3 +97,5 @@ def model_selector(config):
return iTransformer(model_config) return iTransformer(model_config)
case "HI": case "HI":
return HI(model_config) return HI(model_config)
case "PatchTST":
return PatchTST(model_config)

View File

@ -45,11 +45,13 @@ def run(config):
if __name__ == "__main__": if __name__ == "__main__":
# 指定模型 # 指定模型
model_list = ["HI"] model_list = ["PatchTST"]
# 指定数据集 # 指定数据集
dataset_list = ["AirQuality", "SolarEnergy", "PEMS-BAY", "METR-LA", "BJTaxi-Inflow", "BJTaxi-Outflow", "NYCBike-Inflow", "NYCBike-Outflow"] dataset_list = ["AirQuality", "SolarEnergy", "PEMS-BAY", "METR-LA", "BJTaxi-Inflow", "BJTaxi-Outflow", "NYCBike-Inflow", "NYCBike-Outflow"]
# dataset_list = ["AirQuality"]
device = "cuda:0" # 指定设备 device = "cuda:0" # 指定设备
seed = 2023 # 随机种子 seed = 2023 # 随机种子
epochs = 1
for model in model_list: for model in model_list:
for dataset in dataset_list: for dataset in dataset_list:
config_path = f"./config/{model}/{dataset}.yaml" config_path = f"./config/{model}/{dataset}.yaml"
@ -57,6 +59,7 @@ if __name__ == "__main__":
config = yaml.safe_load(file) config = yaml.safe_load(file)
config["basic"]["device"] = device config["basic"]["device"] = device
config["basic"]["seed"] = seed config["basic"]["seed"] = seed
config["train"]["epochs"] = epochs
print(f"\nRunning {model} on {dataset} with seed {seed} on {device}") print(f"\nRunning {model} on {dataset} with seed {seed} on {device}")
print(f"config: {config}") print(f"config: {config}")
run(config) run(config)

View File

@ -2,7 +2,6 @@ import math
import os import os
import time import time
import copy import copy
import psutil
import torch import torch
from utils.logger import get_logger from utils.logger import get_logger
from utils.loss_function import all_metrics from utils.loss_function import all_metrics