update Informer
This commit is contained in:
parent
dfc76b8e90
commit
3063489534
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 29f2a739226a509202a092b464163da81fa74960
|
||||
|
|
@ -6,50 +6,47 @@ basic:
|
|||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 16
|
||||
batch_size: 64
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 207
|
||||
steps_per_day: 288
|
||||
num_nodes: 137
|
||||
steps_per_day: 24
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 24
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
label_len: 12
|
||||
out_len: 24
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
d_model: 64
|
||||
d_ff: 512
|
||||
dropout: 0.1
|
||||
e_layers: 1
|
||||
d_layers: 1
|
||||
n_heads: 4
|
||||
factor: 5
|
||||
output_attention: False
|
||||
distil: True
|
||||
mix: True
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
|
||||
train:
|
||||
batch_size: 16
|
||||
batch_size: 64
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
epochs: 1
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
|
|
@ -61,6 +58,5 @@ train:
|
|||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -6,16 +6,15 @@ basic:
|
|||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 16
|
||||
batch_size: 64
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 128
|
||||
steps_per_day: 48
|
||||
num_nodes: 137
|
||||
steps_per_day: 24
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
|
|
@ -23,33 +22,31 @@ model:
|
|||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
out_len: 24
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
d_model: 64
|
||||
d_ff: 512
|
||||
dropout: 0.1
|
||||
e_layers: 1
|
||||
d_layers: 1
|
||||
n_heads: 4
|
||||
factor: 5
|
||||
output_attention: False
|
||||
distil: True
|
||||
mix: True
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
|
||||
train:
|
||||
batch_size: 16
|
||||
batch_size: 64
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
epochs: 1
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
|
|
@ -61,6 +58,5 @@ train:
|
|||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -6,16 +6,15 @@ basic:
|
|||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 16
|
||||
batch_size: 64
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 128
|
||||
steps_per_day: 48
|
||||
num_nodes: 137
|
||||
steps_per_day: 24
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
|
|
@ -23,33 +22,31 @@ model:
|
|||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
out_len: 24
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
d_model: 64
|
||||
d_ff: 512
|
||||
dropout: 0.1
|
||||
e_layers: 1
|
||||
d_layers: 1
|
||||
n_heads: 4
|
||||
factor: 5
|
||||
output_attention: False
|
||||
distil: True
|
||||
mix: True
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
|
||||
train:
|
||||
batch_size: 16
|
||||
batch_size: 64
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
epochs: 1
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
|
|
@ -61,6 +58,5 @@ train:
|
|||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -22,23 +22,25 @@ model:
|
|||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
out_len: 24
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
d_model: 64
|
||||
d_ff: 512
|
||||
dropout: 0.1
|
||||
e_layers: 1
|
||||
d_layers: 1
|
||||
n_heads: 4
|
||||
factor: 5
|
||||
output_attention: False
|
||||
distil: True
|
||||
mix: True
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
|
||||
|
||||
|
||||
|
||||
train:
|
||||
|
|
@ -48,7 +50,6 @@ train:
|
|||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
|
|
@ -60,6 +61,5 @@ train:
|
|||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,163 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
import numpy as np
|
||||
|
||||
from math import sqrt
|
||||
from model.Informer.masking import TriangularCausalMask, ProbMask
|
||||
|
||||
class FullAttention(nn.Module):
|
||||
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
|
||||
super(FullAttention, self).__init__()
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L, H, E = queries.shape
|
||||
_, S, _, D = values.shape
|
||||
scale = self.scale or 1./sqrt(E)
|
||||
|
||||
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
|
||||
if self.mask_flag:
|
||||
if attn_mask is None:
|
||||
attn_mask = TriangularCausalMask(B, L, device=queries.device)
|
||||
|
||||
scores.masked_fill_(attn_mask.mask, -np.inf)
|
||||
|
||||
A = self.dropout(torch.softmax(scale * scores, dim=-1))
|
||||
V = torch.einsum("bhls,bshd->blhd", A, values)
|
||||
|
||||
if self.output_attention:
|
||||
return (V.contiguous(), A)
|
||||
else:
|
||||
return (V.contiguous(), None)
|
||||
|
||||
class ProbAttention(nn.Module):
|
||||
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
|
||||
super(ProbAttention, self).__init__()
|
||||
self.factor = factor
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
|
||||
def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
|
||||
# Q [B, H, L, D]
|
||||
B, H, L_K, E = K.shape
|
||||
_, _, L_Q, _ = Q.shape
|
||||
|
||||
# calculate the sampled Q_K
|
||||
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
|
||||
index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
|
||||
K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
|
||||
Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2)
|
||||
|
||||
# find the Top_k query with sparisty measurement
|
||||
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
|
||||
M_top = M.topk(n_top, sorted=False)[1]
|
||||
|
||||
# use the reduced Q to calculate Q_K
|
||||
Q_reduce = Q[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
M_top, :] # factor*ln(L_q)
|
||||
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
|
||||
|
||||
return Q_K, M_top
|
||||
|
||||
def _get_initial_context(self, V, L_Q):
|
||||
B, H, L_V, D = V.shape
|
||||
if not self.mask_flag:
|
||||
# V_sum = V.sum(dim=-2)
|
||||
V_sum = V.mean(dim=-2)
|
||||
contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
|
||||
else: # use mask
|
||||
assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
|
||||
contex = V.cumsum(dim=-2)
|
||||
return contex
|
||||
|
||||
def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
|
||||
B, H, L_V, D = V.shape
|
||||
|
||||
if self.mask_flag:
|
||||
attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
|
||||
scores.masked_fill_(attn_mask.mask, -np.inf)
|
||||
|
||||
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
|
||||
|
||||
context_in[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
index, :] = torch.matmul(attn, V).type_as(context_in)
|
||||
if self.output_attention:
|
||||
attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device)
|
||||
attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
|
||||
return (context_in, attns)
|
||||
else:
|
||||
return (context_in, None)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L_Q, H, D = queries.shape
|
||||
_, L_K, _, _ = keys.shape
|
||||
|
||||
queries = queries.transpose(2,1)
|
||||
keys = keys.transpose(2,1)
|
||||
values = values.transpose(2,1)
|
||||
|
||||
U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
|
||||
u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
|
||||
|
||||
U_part = U_part if U_part<L_K else L_K
|
||||
u = u if u<L_Q else L_Q
|
||||
|
||||
scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
|
||||
|
||||
# add scale factor
|
||||
scale = self.scale or 1./sqrt(D)
|
||||
if scale is not None:
|
||||
scores_top = scores_top * scale
|
||||
# get the context
|
||||
context = self._get_initial_context(values, L_Q)
|
||||
# update the context with selected top_k queries
|
||||
context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
|
||||
|
||||
return context.transpose(2,1).contiguous(), attn
|
||||
|
||||
|
||||
class AttentionLayer(nn.Module):
|
||||
def __init__(self, attention, d_model, n_heads,
|
||||
d_keys=None, d_values=None, mix=False):
|
||||
super(AttentionLayer, self).__init__()
|
||||
|
||||
d_keys = d_keys or (d_model//n_heads)
|
||||
d_values = d_values or (d_model//n_heads)
|
||||
|
||||
self.inner_attention = attention
|
||||
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.value_projection = nn.Linear(d_model, d_values * n_heads)
|
||||
self.out_projection = nn.Linear(d_values * n_heads, d_model)
|
||||
self.n_heads = n_heads
|
||||
self.mix = mix
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L, _ = queries.shape
|
||||
_, S, _ = keys.shape
|
||||
H = self.n_heads
|
||||
|
||||
queries = self.query_projection(queries).view(B, L, H, -1)
|
||||
keys = self.key_projection(keys).view(B, S, H, -1)
|
||||
values = self.value_projection(values).view(B, S, H, -1)
|
||||
|
||||
out, attn = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
attn_mask
|
||||
)
|
||||
if self.mix:
|
||||
out = out.transpose(2,1).contiguous()
|
||||
out = out.view(B, L, -1)
|
||||
|
||||
return self.out_projection(out), attn
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class DecoderLayer(nn.Module):
|
||||
def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
|
||||
dropout=0.1, activation="relu"):
|
||||
super(DecoderLayer, self).__init__()
|
||||
d_ff = d_ff or 4*d_model
|
||||
self.self_attention = self_attention
|
||||
self.cross_attention = cross_attention
|
||||
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
|
||||
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
self.norm2 = nn.LayerNorm(d_model)
|
||||
self.norm3 = nn.LayerNorm(d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.activation = F.relu if activation == "relu" else F.gelu
|
||||
|
||||
def forward(self, x, cross, x_mask=None, cross_mask=None):
|
||||
x = x + self.dropout(self.self_attention(
|
||||
x, x, x,
|
||||
attn_mask=x_mask
|
||||
)[0])
|
||||
x = self.norm1(x)
|
||||
|
||||
x = x + self.dropout(self.cross_attention(
|
||||
x, cross, cross,
|
||||
attn_mask=cross_mask
|
||||
)[0])
|
||||
|
||||
y = x = self.norm2(x)
|
||||
y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
|
||||
y = self.dropout(self.conv2(y).transpose(-1,1))
|
||||
|
||||
return self.norm3(x+y)
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(self, layers, norm_layer=None):
|
||||
super(Decoder, self).__init__()
|
||||
self.layers = nn.ModuleList(layers)
|
||||
self.norm = norm_layer
|
||||
|
||||
def forward(self, x, cross, x_mask=None, cross_mask=None):
|
||||
for layer in self.layers:
|
||||
x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
|
||||
|
||||
if self.norm is not None:
|
||||
x = self.norm(x)
|
||||
|
||||
return x
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
# model/InformerOnlyX/embed.py
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import math
|
||||
|
||||
|
||||
class PositionalEmbedding(nn.Module):
|
||||
def __init__(self, d_model, max_len=5000):
|
||||
super().__init__()
|
||||
pe = torch.zeros(max_len, d_model)
|
||||
position = torch.arange(0, max_len).unsqueeze(1).float()
|
||||
div_term = torch.exp(
|
||||
torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
|
||||
)
|
||||
pe[:, 0::2] = torch.sin(position * div_term)
|
||||
pe[:, 1::2] = torch.cos(position * div_term)
|
||||
self.register_buffer("pe", pe.unsqueeze(0)) # [1, L, D]
|
||||
|
||||
def forward(self, x):
|
||||
return self.pe[:, :x.size(1)]
|
||||
|
||||
|
||||
class DataEmbedding(nn.Module):
|
||||
"""
|
||||
Informer-style embedding without time covariates
|
||||
"""
|
||||
|
||||
def __init__(self, c_in, d_model, dropout):
|
||||
super().__init__()
|
||||
self.value_embedding = nn.Linear(c_in, d_model)
|
||||
self.position_embedding = PositionalEmbedding(d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.value_embedding(x) + self.position_embedding(x)
|
||||
return self.dropout(x)
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class ConvLayer(nn.Module):
|
||||
def __init__(self, c_in):
|
||||
super(ConvLayer, self).__init__()
|
||||
padding = 1 if torch.__version__>='1.5.0' else 2
|
||||
self.downConv = nn.Conv1d(in_channels=c_in,
|
||||
out_channels=c_in,
|
||||
kernel_size=3,
|
||||
padding=padding,
|
||||
padding_mode='circular')
|
||||
self.norm = nn.BatchNorm1d(c_in)
|
||||
self.activation = nn.ELU()
|
||||
self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.downConv(x.permute(0, 2, 1))
|
||||
x = self.norm(x)
|
||||
x = self.activation(x)
|
||||
x = self.maxPool(x)
|
||||
x = x.transpose(1,2)
|
||||
return x
|
||||
|
||||
class EncoderLayer(nn.Module):
|
||||
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
|
||||
super(EncoderLayer, self).__init__()
|
||||
d_ff = d_ff or 4*d_model
|
||||
self.attention = attention
|
||||
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
|
||||
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
self.norm2 = nn.LayerNorm(d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.activation = F.relu if activation == "relu" else F.gelu
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
# x [B, L, D]
|
||||
# x = x + self.dropout(self.attention(
|
||||
# x, x, x,
|
||||
# attn_mask = attn_mask
|
||||
# ))
|
||||
new_x, attn = self.attention(
|
||||
x, x, x,
|
||||
attn_mask = attn_mask
|
||||
)
|
||||
x = x + self.dropout(new_x)
|
||||
|
||||
y = x = self.norm1(x)
|
||||
y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
|
||||
y = self.dropout(self.conv2(y).transpose(-1,1))
|
||||
|
||||
return self.norm2(x+y), attn
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
|
||||
super(Encoder, self).__init__()
|
||||
self.attn_layers = nn.ModuleList(attn_layers)
|
||||
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
|
||||
self.norm = norm_layer
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
# x [B, L, D]
|
||||
attns = []
|
||||
if self.conv_layers is not None:
|
||||
for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
|
||||
x, attn = attn_layer(x, attn_mask=attn_mask)
|
||||
x = conv_layer(x)
|
||||
attns.append(attn)
|
||||
x, attn = self.attn_layers[-1](x, attn_mask=attn_mask)
|
||||
attns.append(attn)
|
||||
else:
|
||||
for attn_layer in self.attn_layers:
|
||||
x, attn = attn_layer(x, attn_mask=attn_mask)
|
||||
attns.append(attn)
|
||||
|
||||
if self.norm is not None:
|
||||
x = self.norm(x)
|
||||
|
||||
return x, attns
|
||||
|
||||
class EncoderStack(nn.Module):
|
||||
def __init__(self, encoders, inp_lens):
|
||||
super(EncoderStack, self).__init__()
|
||||
self.encoders = nn.ModuleList(encoders)
|
||||
self.inp_lens = inp_lens
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
# x [B, L, D]
|
||||
x_stack = []; attns = []
|
||||
for i_len, encoder in zip(self.inp_lens, self.encoders):
|
||||
inp_len = x.shape[1]//(2**i_len)
|
||||
x_s, attn = encoder(x[:, -inp_len:, :])
|
||||
x_stack.append(x_s); attns.append(attn)
|
||||
x_stack = torch.cat(x_stack, -2)
|
||||
|
||||
return x_stack, attns
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
# model/Informer/head.py
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class TemporalProjectionHead(nn.Module):
|
||||
"""
|
||||
Project along temporal dimension
|
||||
[B, L, D] -> [B, pred_len, C]
|
||||
"""
|
||||
|
||||
def __init__(self, d_model, pred_len, c_out):
|
||||
super().__init__()
|
||||
self.temporal_proj = nn.Linear(1, pred_len)
|
||||
self.channel_proj = nn.Linear(d_model, c_out)
|
||||
|
||||
def forward(self, x):
|
||||
# x: [B, L, D]
|
||||
# Average over the sequence dimension and then project
|
||||
x = x.mean(dim=1, keepdim=True) # [B, 1, D]
|
||||
x = x.transpose(1, 2) # [B, D, 1]
|
||||
x = self.temporal_proj(x) # [B, D, pred_len]
|
||||
x = x.transpose(1, 2) # [B, pred_len, D]
|
||||
x = self.channel_proj(x) # [B, pred_len, C]
|
||||
return x
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
import torch
|
||||
|
||||
class TriangularCausalMask():
|
||||
def __init__(self, B, L, device="cpu"):
|
||||
mask_shape = [B, 1, L, L]
|
||||
with torch.no_grad():
|
||||
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
|
||||
|
||||
@property
|
||||
def mask(self):
|
||||
return self._mask
|
||||
|
||||
class ProbMask():
|
||||
def __init__(self, B, H, L, index, scores, device="cpu"):
|
||||
_mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
|
||||
_mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
|
||||
indicator = _mask_ex[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
index, :].to(device)
|
||||
self._mask = indicator.view(scores.shape).to(device)
|
||||
|
||||
@property
|
||||
def mask(self):
|
||||
return self._mask
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from model.Informer.encoder import Encoder, EncoderLayer, ConvLayer
|
||||
from model.Informer.attn import FullAttention, ProbAttention, AttentionLayer
|
||||
from model.Informer.embed import DataEmbedding
|
||||
from model.Informer.head import TemporalProjectionHead
|
||||
|
||||
|
||||
class InformerEncoder(nn.Module):
|
||||
|
||||
def __init__(self, configs):
|
||||
super().__init__()
|
||||
|
||||
self.seq_len = configs["seq_len"]
|
||||
self.pred_len = configs["pred_len"]
|
||||
|
||||
Attn = ProbAttention if configs["attn"] == "prob" else FullAttention
|
||||
|
||||
# Embedding
|
||||
self.embedding = DataEmbedding(configs["enc_in"], configs["d_model"], configs["dropout"])
|
||||
|
||||
# Encoder(Attn-Conv-Norm)
|
||||
self.encoder = Encoder(
|
||||
[EncoderLayer(
|
||||
# Attn
|
||||
AttentionLayer(Attn(False, configs["factor"], configs["dropout"], False),
|
||||
configs["d_model"], configs["n_heads"], False),
|
||||
configs["d_model"], configs["d_ff"], configs["dropout"], configs["activation"])
|
||||
for _ in range(configs["e_layers"])],
|
||||
# Conv
|
||||
[ConvLayer(configs["d_model"]) for _ in range(configs["e_layers"] - 1)]
|
||||
# Norm
|
||||
if configs.get("distil") else None, norm_layer=nn.LayerNorm(configs["d_model"])
|
||||
)
|
||||
|
||||
# Forecast Head
|
||||
self.head = TemporalProjectionHead(
|
||||
d_model=configs["d_model"],
|
||||
pred_len=configs["pred_len"],
|
||||
c_out=configs["c_out"],
|
||||
)
|
||||
|
||||
def forward(self, x_enc):
|
||||
x = self.embedding(x_enc)
|
||||
x, _ = self.encoder(x)
|
||||
out = self.head(x)
|
||||
return out[:, -self.pred_len :, :]
|
||||
|
|
@ -1,36 +1,111 @@
|
|||
# model/InformerOnlyX/embed.py
|
||||
# model/Informer/embed.py
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import math
|
||||
|
||||
class TokenEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model):
|
||||
super(TokenEmbedding, self).__init__()
|
||||
padding = 1 if torch.__version__>='1.5.0' else 2
|
||||
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
|
||||
kernel_size=3, padding=padding, padding_mode='circular')
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv1d):
|
||||
nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu')
|
||||
|
||||
def forward(self, x):
|
||||
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2)
|
||||
return x
|
||||
|
||||
class PositionalEmbedding(nn.Module):
|
||||
def __init__(self, d_model, max_len=5000):
|
||||
super().__init__()
|
||||
pe = torch.zeros(max_len, d_model)
|
||||
position = torch.arange(0, max_len).unsqueeze(1).float()
|
||||
div_term = torch.exp(
|
||||
torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
|
||||
)
|
||||
super(PositionalEmbedding, self).__init__()
|
||||
# Compute the positional encodings once in log space.
|
||||
pe = torch.zeros(max_len, d_model).float()
|
||||
pe.require_grad = False
|
||||
|
||||
position = torch.arange(0, max_len).float().unsqueeze(1)
|
||||
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
|
||||
|
||||
pe[:, 0::2] = torch.sin(position * div_term)
|
||||
pe[:, 1::2] = torch.cos(position * div_term)
|
||||
self.register_buffer("pe", pe.unsqueeze(0)) # [1, L, D]
|
||||
|
||||
pe = pe.unsqueeze(0)
|
||||
self.register_buffer('pe', pe)
|
||||
|
||||
def forward(self, x):
|
||||
return self.pe[:, :x.size(1)]
|
||||
|
||||
class FixedEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model):
|
||||
super(FixedEmbedding, self).__init__()
|
||||
|
||||
class DataEmbedding(nn.Module):
|
||||
"""
|
||||
Informer-style embedding without time covariates
|
||||
"""
|
||||
w = torch.zeros(c_in, d_model).float()
|
||||
w.require_grad = False
|
||||
|
||||
def __init__(self, c_in, d_model, dropout):
|
||||
super().__init__()
|
||||
self.value_embedding = nn.Linear(c_in, d_model)
|
||||
self.position_embedding = PositionalEmbedding(d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
position = torch.arange(0, c_in).float().unsqueeze(1)
|
||||
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
|
||||
|
||||
w[:, 0::2] = torch.sin(position * div_term)
|
||||
w[:, 1::2] = torch.cos(position * div_term)
|
||||
|
||||
self.emb = nn.Embedding(c_in, d_model)
|
||||
self.emb.weight = nn.Parameter(w, requires_grad=False)
|
||||
|
||||
def forward(self, x):
|
||||
return self.emb(x).detach()
|
||||
|
||||
class TemporalEmbedding(nn.Module):
|
||||
def __init__(self, d_model, embed_type='fixed', freq='h'):
|
||||
super(TemporalEmbedding, self).__init__()
|
||||
|
||||
minute_size = 4; hour_size = 24
|
||||
weekday_size = 7; day_size = 32; month_size = 13
|
||||
|
||||
Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding
|
||||
if freq=='t':
|
||||
self.minute_embed = Embed(minute_size, d_model)
|
||||
self.hour_embed = Embed(hour_size, d_model)
|
||||
self.weekday_embed = Embed(weekday_size, d_model)
|
||||
self.day_embed = Embed(day_size, d_model)
|
||||
self.month_embed = Embed(month_size, d_model)
|
||||
|
||||
def forward(self, x):
|
||||
x = x.long()
|
||||
|
||||
minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0.
|
||||
hour_x = self.hour_embed(x[:,:,3])
|
||||
weekday_x = self.weekday_embed(x[:,:,2])
|
||||
day_x = self.day_embed(x[:,:,1])
|
||||
month_x = self.month_embed(x[:,:,0])
|
||||
|
||||
return hour_x + weekday_x + day_x + month_x + minute_x
|
||||
|
||||
class TimeFeatureEmbedding(nn.Module):
|
||||
def __init__(self, d_model, embed_type='timeF', freq='h'):
|
||||
super(TimeFeatureEmbedding, self).__init__()
|
||||
|
||||
freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3}
|
||||
d_inp = freq_map[freq]
|
||||
self.embed = nn.Linear(d_inp, d_model)
|
||||
|
||||
def forward(self, x):
|
||||
return self.embed(x)
|
||||
|
||||
class DataEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
|
||||
super(DataEmbedding, self).__init__()
|
||||
|
||||
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
|
||||
self.position_embedding = PositionalEmbedding(d_model=d_model)
|
||||
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
|
||||
|
||||
self.dropout = nn.Dropout(p=dropout)
|
||||
|
||||
def forward(self, x, x_mark):
|
||||
if x_mark is None:
|
||||
x = self.value_embedding(x) + self.position_embedding(x)
|
||||
else:
|
||||
x = self.value_embedding(x) + self.position_embedding(x) + self.temporal_embedding(x_mark)
|
||||
|
||||
return self.dropout(x)
|
||||
|
|
|
|||
|
|
@ -1,48 +1,209 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from model.Informer.encoder import Encoder, EncoderLayer, ConvLayer
|
||||
from model.Informer.encoder import Encoder, EncoderLayer, ConvLayer, EncoderStack
|
||||
from model.Informer.decoder import Decoder, DecoderLayer
|
||||
from model.Informer.attn import FullAttention, ProbAttention, AttentionLayer
|
||||
from model.Informer.embed import DataEmbedding
|
||||
from model.Informer.head import TemporalProjectionHead
|
||||
|
||||
|
||||
class InformerEncoder(nn.Module):
|
||||
from model.Informer.masking import TriangularCausalMask, ProbMask
|
||||
|
||||
class Informer(nn.Module):
|
||||
def __init__(self, configs):
|
||||
super().__init__()
|
||||
super(Informer, self).__init__()
|
||||
# 从configs中提取参数
|
||||
self.enc_in = configs.get("enc_in", 1)
|
||||
self.dec_in = configs.get("dec_in", 1)
|
||||
self.c_out = configs.get("c_out", 1)
|
||||
self.seq_len = configs.get("seq_len", 96)
|
||||
self.label_len = configs.get("label_len", 48)
|
||||
self.out_len = configs.get("out_len", 24)
|
||||
self.factor = configs.get("factor", 5)
|
||||
self.d_model = configs.get("d_model", 512)
|
||||
self.n_heads = configs.get("n_heads", 8)
|
||||
self.e_layers = configs.get("e_layers", 3)
|
||||
self.d_layers = configs.get("d_layers", 2)
|
||||
self.d_ff = configs.get("d_ff", 512)
|
||||
self.dropout = configs.get("dropout", 0.0)
|
||||
self.attn = configs.get("attn", "prob")
|
||||
self.embed = configs.get("embed", "fixed")
|
||||
self.freq = configs.get("freq", "h")
|
||||
self.activation = configs.get("activation", "gelu")
|
||||
self.output_attention = configs.get("output_attention", False)
|
||||
self.distil = configs.get("distil", True)
|
||||
self.mix = configs.get("mix", True)
|
||||
self.device = configs.get("device", torch.device('cuda:0'))
|
||||
|
||||
self.seq_len = configs["seq_len"]
|
||||
self.pred_len = configs["pred_len"]
|
||||
self.pred_len = self.out_len
|
||||
|
||||
Attn = ProbAttention if configs["attn"] == "prob" else FullAttention
|
||||
# 编码层
|
||||
self.enc_embedding = DataEmbedding(self.enc_in, self.d_model, self.embed, self.freq, self.dropout)
|
||||
self.dec_embedding = DataEmbedding(self.dec_in, self.d_model, self.embed, self.freq, self.dropout)
|
||||
|
||||
# Embedding
|
||||
self.embedding = DataEmbedding(configs["enc_in"], configs["d_model"], configs["dropout"])
|
||||
# 注意力层
|
||||
Attn = ProbAttention if self.attn == 'prob' else FullAttention
|
||||
|
||||
# Encoder(Attn-Conv-Norm)
|
||||
# 编码器
|
||||
self.encoder = Encoder(
|
||||
[EncoderLayer(
|
||||
# Attn
|
||||
AttentionLayer(Attn(False, configs["factor"], configs["dropout"], False),
|
||||
configs["d_model"], configs["n_heads"], False),
|
||||
configs["d_model"], configs["d_ff"], configs["dropout"], configs["activation"])
|
||||
for _ in range(configs["e_layers"])],
|
||||
# Conv
|
||||
[ConvLayer(configs["d_model"]) for _ in range(configs["e_layers"] - 1)]
|
||||
# Norm
|
||||
if configs.get("distil") else None, norm_layer=nn.LayerNorm(configs["d_model"])
|
||||
[
|
||||
EncoderLayer(
|
||||
AttentionLayer(Attn(False, self.factor, attention_dropout=self.dropout, output_attention=self.output_attention),
|
||||
self.d_model, self.n_heads, mix=False),
|
||||
self.d_model,
|
||||
self.d_ff,
|
||||
dropout=self.dropout,
|
||||
activation=self.activation
|
||||
) for l in range(self.e_layers)
|
||||
],
|
||||
[
|
||||
ConvLayer(
|
||||
self.d_model
|
||||
) for l in range(self.e_layers - 1)
|
||||
] if self.distil else None,
|
||||
norm_layer=torch.nn.LayerNorm(self.d_model)
|
||||
)
|
||||
|
||||
# Forecast Head
|
||||
self.head = TemporalProjectionHead(
|
||||
d_model=configs["d_model"],
|
||||
pred_len=configs["pred_len"],
|
||||
c_out=configs["c_out"],
|
||||
# 解码器
|
||||
self.decoder = Decoder(
|
||||
[
|
||||
DecoderLayer(
|
||||
AttentionLayer(Attn(True, self.factor, attention_dropout=self.dropout, output_attention=False),
|
||||
self.d_model, self.n_heads, mix=self.mix),
|
||||
AttentionLayer(FullAttention(False, self.factor, attention_dropout=self.dropout, output_attention=False),
|
||||
self.d_model, self.n_heads, mix=False),
|
||||
self.d_model,
|
||||
self.d_ff,
|
||||
dropout=self.dropout,
|
||||
activation=self.activation,
|
||||
)
|
||||
for l in range(self.d_layers)
|
||||
],
|
||||
norm_layer=torch.nn.LayerNorm(self.d_model)
|
||||
)
|
||||
|
||||
def forward(self, x_enc):
|
||||
x = self.embedding(x_enc)
|
||||
x, _ = self.encoder(x)
|
||||
out = self.head(x)
|
||||
return out[:, -self.pred_len :, :]
|
||||
# 投影层
|
||||
self.projection = nn.Linear(self.d_model, self.c_out, bias=True)
|
||||
|
||||
def forward(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None,
|
||||
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
|
||||
# 如果没有提供x_dec和x_mark_dec,则根据x_enc和label_len生成
|
||||
if x_dec is None:
|
||||
x_dec = torch.cat([x_enc[:, -self.label_len:, :], torch.zeros_like(x_enc[:, :self.pred_len, :])], dim=1)
|
||||
if x_mark_dec is None and x_mark_enc is not None:
|
||||
x_mark_dec = torch.cat([x_mark_enc[:, -self.label_len:, :], torch.zeros_like(x_mark_enc[:, :self.pred_len, :])], dim=1)
|
||||
|
||||
# 编码
|
||||
enc_out = self.enc_embedding(x_enc, x_mark_enc)
|
||||
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
|
||||
|
||||
# 解码
|
||||
dec_out = self.dec_embedding(x_dec, x_mark_dec)
|
||||
dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
|
||||
dec_out = self.projection(dec_out)
|
||||
|
||||
if self.output_attention:
|
||||
return dec_out[:, -self.pred_len:, :], attns
|
||||
else:
|
||||
return dec_out[:, -self.pred_len:, :] # [B, L, D]
|
||||
|
||||
|
||||
class InformerStack(nn.Module):
|
||||
def __init__(self, configs):
|
||||
super(InformerStack, self).__init__()
|
||||
# 从configs中提取参数
|
||||
self.enc_in = configs.get("enc_in", 1)
|
||||
self.dec_in = configs.get("dec_in", 1)
|
||||
self.c_out = configs.get("c_out", 1)
|
||||
self.seq_len = configs.get("seq_len", 96)
|
||||
self.label_len = configs.get("label_len", 48)
|
||||
self.out_len = configs.get("out_len", 24)
|
||||
self.factor = configs.get("factor", 5)
|
||||
self.d_model = configs.get("d_model", 512)
|
||||
self.n_heads = configs.get("n_heads", 8)
|
||||
self.e_layers = configs.get("e_layers", [3, 2, 1])
|
||||
self.d_layers = configs.get("d_layers", 2)
|
||||
self.d_ff = configs.get("d_ff", 512)
|
||||
self.dropout = configs.get("dropout", 0.0)
|
||||
self.attn = configs.get("attn", "prob")
|
||||
self.embed = configs.get("embed", "fixed")
|
||||
self.freq = configs.get("freq", "h")
|
||||
self.activation = configs.get("activation", "gelu")
|
||||
self.output_attention = configs.get("output_attention", False)
|
||||
self.distil = configs.get("distil", True)
|
||||
self.mix = configs.get("mix", True)
|
||||
self.device = configs.get("device", torch.device('cuda:0'))
|
||||
|
||||
self.pred_len = self.out_len
|
||||
|
||||
# 编码层
|
||||
self.enc_embedding = DataEmbedding(self.enc_in, self.d_model, self.embed, self.freq, self.dropout)
|
||||
self.dec_embedding = DataEmbedding(self.dec_in, self.d_model, self.embed, self.freq, self.dropout)
|
||||
|
||||
# 注意力层
|
||||
Attn = ProbAttention if self.attn == 'prob' else FullAttention
|
||||
|
||||
# 编码器栈
|
||||
inp_lens = list(range(len(self.e_layers))) # [0,1,2,...] you can customize here
|
||||
encoders = [
|
||||
Encoder(
|
||||
[
|
||||
EncoderLayer(
|
||||
AttentionLayer(Attn(False, self.factor, attention_dropout=self.dropout, output_attention=self.output_attention),
|
||||
self.d_model, self.n_heads, mix=False),
|
||||
self.d_model,
|
||||
self.d_ff,
|
||||
dropout=self.dropout,
|
||||
activation=self.activation
|
||||
) for l in range(el)
|
||||
],
|
||||
[
|
||||
ConvLayer(
|
||||
self.d_model
|
||||
) for l in range(el-1)
|
||||
] if self.distil else None,
|
||||
norm_layer=torch.nn.LayerNorm(self.d_model)
|
||||
) for el in self.e_layers]
|
||||
self.encoder = EncoderStack(encoders, inp_lens)
|
||||
|
||||
# 解码器
|
||||
self.decoder = Decoder(
|
||||
[
|
||||
DecoderLayer(
|
||||
AttentionLayer(Attn(True, self.factor, attention_dropout=self.dropout, output_attention=False),
|
||||
self.d_model, self.n_heads, mix=self.mix),
|
||||
AttentionLayer(FullAttention(False, self.factor, attention_dropout=self.dropout, output_attention=False),
|
||||
self.d_model, self.n_heads, mix=False),
|
||||
self.d_model,
|
||||
self.d_ff,
|
||||
dropout=self.dropout,
|
||||
activation=self.activation,
|
||||
)
|
||||
for l in range(self.d_layers)
|
||||
],
|
||||
norm_layer=torch.nn.LayerNorm(self.d_model)
|
||||
)
|
||||
|
||||
# 投影层
|
||||
self.projection = nn.Linear(self.d_model, self.c_out, bias=True)
|
||||
|
||||
def forward(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None,
|
||||
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
|
||||
# 如果没有提供x_dec和x_mark_dec,则根据x_enc和label_len生成
|
||||
if x_dec is None:
|
||||
x_dec = torch.cat([x_enc[:, -self.label_len:, :], torch.zeros_like(x_enc[:, :self.pred_len, :])], dim=1)
|
||||
if x_mark_dec is None and x_mark_enc is not None:
|
||||
x_mark_dec = torch.cat([x_mark_enc[:, -self.label_len:, :], torch.zeros_like(x_mark_enc[:, :self.pred_len, :])], dim=1)
|
||||
|
||||
# 编码
|
||||
enc_out = self.enc_embedding(x_enc, x_mark_enc)
|
||||
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
|
||||
|
||||
# 解码
|
||||
dec_out = self.dec_embedding(x_dec, x_mark_dec)
|
||||
dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
|
||||
dec_out = self.projection(dec_out)
|
||||
|
||||
if self.output_attention:
|
||||
return dec_out[:, -self.pred_len:, :], attns
|
||||
else:
|
||||
return dec_out[:, -self.pred_len:, :] # [B, L, D]
|
||||
|
|
|
|||
|
|
@ -2,6 +2,11 @@
|
|||
{
|
||||
"name": "Informer",
|
||||
"module": "model.Informer.model",
|
||||
"entry": "InformerEncoder"
|
||||
"entry": "Informer"
|
||||
},
|
||||
{
|
||||
"name": "InformerStack",
|
||||
"module": "model.Informer.model",
|
||||
"entry": "InformerStack"
|
||||
}
|
||||
]
|
||||
19
train.py
19
train.py
|
|
@ -12,9 +12,9 @@ def read_config(config_path):
|
|||
config = yaml.safe_load(file)
|
||||
|
||||
# 全局配置
|
||||
device = "cuda:0" # 指定设备为cuda:0
|
||||
device = "cpu" # 指定设备为cuda:0
|
||||
seed = 2023 # 随机种子
|
||||
epochs = 100 # 训练轮数
|
||||
epochs = 1 # 训练轮数
|
||||
|
||||
# 拷贝项
|
||||
config["basic"]["device"] = device
|
||||
|
|
@ -104,9 +104,14 @@ if __name__ == "__main__":
|
|||
# model_list = ["iTransformer", "PatchTST", "HI"]
|
||||
model_list = ["Informer"]
|
||||
# model_list = ["PatchTST"]
|
||||
# dataset_list = ["AirQuality"]
|
||||
# dataset_list = ["SolarEnergy"]
|
||||
# dataset_list = ["BJTaxi-InFlow", "BJTaxi-OutFlow"]
|
||||
dataset_list = ["SolarEnergy", "NYCBike-InFlow", "NYCBike-OutFlow", "METR-LA"]
|
||||
# dataset_list = ["BJTaxi-OutFlow"]
|
||||
|
||||
air = ["AirQuality"]
|
||||
big_dataset = ["BJTaxi-InFlow", "BJTaxi-OutFlow"]
|
||||
mid_dataset = ["PEMS-BAY"]
|
||||
regular_dataset = ["AirQuality", "SolarEnergy", "NYCBike-InFlow", "NYCBike-OutFlow", "METR-LA"]
|
||||
test_dataset = ["BJTaxi-InFlow"]
|
||||
|
||||
all_dataset = big_dataset + mid_dataset + regular_dataset
|
||||
|
||||
dataset_list = test_dataset
|
||||
main(model_list, dataset_list, debug=False)
|
||||
|
|
|
|||
Loading…
Reference in New Issue