update Informer

This commit is contained in:
czzhangheng 2025-12-26 12:17:28 +08:00
parent dfc76b8e90
commit 3063489534
16 changed files with 830 additions and 150 deletions

1
Informer/Informer2020 Submodule

@ -0,0 +1 @@
Subproject commit 29f2a739226a509202a092b464163da81fa74960

View File

@ -6,50 +6,47 @@ basic:
seed: 2023
data:
batch_size: 16
batch_size: 64
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
label_len: 24
normalizer: std
num_nodes: 207
steps_per_day: 288
num_nodes: 137
steps_per_day: 24
test_ratio: 0.2
val_ratio: 0.2
model:
activation: gelu
seq_len: 24
label_len: 24
pred_len: 24
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
d_layers: 1
n_heads: 8
output_attention: False
factor: 5
attn: prob
embed: fixed
freq: h
distil: true
mix: true
label_len: 12
out_len: 24
enc_in: 1
dec_in: 1
c_out: 1
d_model: 64
d_ff: 512
dropout: 0.1
e_layers: 1
d_layers: 1
n_heads: 4
factor: 5
output_attention: False
distil: True
mix: True
attn: prob
embed: fixed
freq: h
train:
batch_size: 16
batch_size: 64
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
epochs: 1
grad_norm: false
label_len: 24
log_step: 1000
loss_func: mae
lr_decay: true
@ -61,6 +58,5 @@ train:
max_grad_norm: 5
output_dim: 1
plot: false
pred_len: 24
real_value: true
weight_decay: 0

View File

@ -6,16 +6,15 @@ basic:
seed: 2023
data:
batch_size: 16
batch_size: 64
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
label_len: 24
normalizer: std
num_nodes: 128
steps_per_day: 48
num_nodes: 137
steps_per_day: 24
test_ratio: 0.2
val_ratio: 0.2
@ -23,33 +22,31 @@ model:
activation: gelu
seq_len: 24
label_len: 12
pred_len: 24
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
d_layers: 1
n_heads: 8
output_attention: False
factor: 5
attn: prob
embed: fixed
freq: h
distil: true
mix: true
out_len: 24
enc_in: 1
dec_in: 1
c_out: 1
d_model: 64
d_ff: 512
dropout: 0.1
e_layers: 1
d_layers: 1
n_heads: 4
factor: 5
output_attention: False
distil: True
mix: True
attn: prob
embed: fixed
freq: h
train:
batch_size: 16
batch_size: 64
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
epochs: 1
grad_norm: false
label_len: 24
log_step: 1000
loss_func: mae
lr_decay: true
@ -61,6 +58,5 @@ train:
max_grad_norm: 5
output_dim: 1
plot: false
pred_len: 24
real_value: true
weight_decay: 0

View File

@ -6,16 +6,15 @@ basic:
seed: 2023
data:
batch_size: 16
batch_size: 64
column_wise: false
days_per_week: 7
horizon: 24
input_dim: 1
lag: 24
label_len: 24
normalizer: std
num_nodes: 128
steps_per_day: 48
num_nodes: 137
steps_per_day: 24
test_ratio: 0.2
val_ratio: 0.2
@ -23,33 +22,31 @@ model:
activation: gelu
seq_len: 24
label_len: 12
pred_len: 24
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
d_layers: 1
n_heads: 8
output_attention: False
factor: 5
attn: prob
embed: fixed
freq: h
distil: true
mix: true
out_len: 24
enc_in: 1
dec_in: 1
c_out: 1
d_model: 64
d_ff: 512
dropout: 0.1
e_layers: 1
d_layers: 1
n_heads: 4
factor: 5
output_attention: False
distil: True
mix: True
attn: prob
embed: fixed
freq: h
train:
batch_size: 16
batch_size: 64
debug: false
early_stop: true
early_stop_patience: 15
epochs: 100
epochs: 1
grad_norm: false
label_len: 24
log_step: 1000
loss_func: mae
lr_decay: true
@ -61,6 +58,5 @@ train:
max_grad_norm: 5
output_dim: 1
plot: false
pred_len: 24
real_value: true
weight_decay: 0

View File

@ -22,23 +22,25 @@ model:
activation: gelu
seq_len: 24
label_len: 12
pred_len: 24
d_model: 128
d_ff: 2048
dropout: 0.1
e_layers: 2
d_layers: 1
n_heads: 8
output_attention: False
factor: 5
attn: prob
embed: fixed
freq: h
distil: true
mix: true
out_len: 24
enc_in: 1
dec_in: 1
c_out: 1
d_model: 64
d_ff: 512
dropout: 0.1
e_layers: 1
d_layers: 1
n_heads: 4
factor: 5
output_attention: False
distil: True
mix: True
attn: prob
embed: fixed
freq: h
train:
@ -48,7 +50,6 @@ train:
early_stop_patience: 15
epochs: 100
grad_norm: false
label_len: 24
log_step: 1000
loss_func: mae
lr_decay: true
@ -60,6 +61,5 @@ train:
max_grad_norm: 5
output_dim: 1
plot: false
pred_len: 24
real_value: true
weight_decay: 0

View File

@ -0,0 +1,163 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from math import sqrt
from model.Informer.masking import TriangularCausalMask, ProbMask
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1./sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
if self.output_attention:
return (V.contiguous(), A)
else:
return (V.contiguous(), None)
class ProbAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(ProbAttention, self).__init__()
self.factor = factor
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
# Q [B, H, L, D]
B, H, L_K, E = K.shape
_, _, L_Q, _ = Q.shape
# calculate the sampled Q_K
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2)
# find the Top_k query with sparisty measurement
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
M_top = M.topk(n_top, sorted=False)[1]
# use the reduced Q to calculate Q_K
Q_reduce = Q[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
M_top, :] # factor*ln(L_q)
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
return Q_K, M_top
def _get_initial_context(self, V, L_Q):
B, H, L_V, D = V.shape
if not self.mask_flag:
# V_sum = V.sum(dim=-2)
V_sum = V.mean(dim=-2)
contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
else: # use mask
assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
contex = V.cumsum(dim=-2)
return contex
def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
B, H, L_V, D = V.shape
if self.mask_flag:
attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
context_in[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :] = torch.matmul(attn, V).type_as(context_in)
if self.output_attention:
attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device)
attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
return (context_in, attns)
else:
return (context_in, None)
def forward(self, queries, keys, values, attn_mask):
B, L_Q, H, D = queries.shape
_, L_K, _, _ = keys.shape
queries = queries.transpose(2,1)
keys = keys.transpose(2,1)
values = values.transpose(2,1)
U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
U_part = U_part if U_part<L_K else L_K
u = u if u<L_Q else L_Q
scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
# add scale factor
scale = self.scale or 1./sqrt(D)
if scale is not None:
scores_top = scores_top * scale
# get the context
context = self._get_initial_context(values, L_Q)
# update the context with selected top_k queries
context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
return context.transpose(2,1).contiguous(), attn
class AttentionLayer(nn.Module):
def __init__(self, attention, d_model, n_heads,
d_keys=None, d_values=None, mix=False):
super(AttentionLayer, self).__init__()
d_keys = d_keys or (d_model//n_heads)
d_values = d_values or (d_model//n_heads)
self.inner_attention = attention
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
self.mix = mix
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
if self.mix:
out = out.transpose(2,1).contiguous()
out = out.view(B, L, -1)
return self.out_projection(out), attn

View File

@ -0,0 +1,51 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class DecoderLayer(nn.Module):
def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
dropout=0.1, activation="relu"):
super(DecoderLayer, self).__init__()
d_ff = d_ff or 4*d_model
self.self_attention = self_attention
self.cross_attention = cross_attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.norm3 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, cross, x_mask=None, cross_mask=None):
x = x + self.dropout(self.self_attention(
x, x, x,
attn_mask=x_mask
)[0])
x = self.norm1(x)
x = x + self.dropout(self.cross_attention(
x, cross, cross,
attn_mask=cross_mask
)[0])
y = x = self.norm2(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
y = self.dropout(self.conv2(y).transpose(-1,1))
return self.norm3(x+y)
class Decoder(nn.Module):
def __init__(self, layers, norm_layer=None):
super(Decoder, self).__init__()
self.layers = nn.ModuleList(layers)
self.norm = norm_layer
def forward(self, x, cross, x_mask=None, cross_mask=None):
for layer in self.layers:
x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
if self.norm is not None:
x = self.norm(x)
return x

View File

@ -0,0 +1,36 @@
# model/InformerOnlyX/embed.py
import torch
import torch.nn as nn
import math
class PositionalEmbedding(nn.Module):
def __init__(self, d_model, max_len=5000):
super().__init__()
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len).unsqueeze(1).float()
div_term = torch.exp(
torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
)
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
self.register_buffer("pe", pe.unsqueeze(0)) # [1, L, D]
def forward(self, x):
return self.pe[:, :x.size(1)]
class DataEmbedding(nn.Module):
"""
Informer-style embedding without time covariates
"""
def __init__(self, c_in, d_model, dropout):
super().__init__()
self.value_embedding = nn.Linear(c_in, d_model)
self.position_embedding = PositionalEmbedding(d_model)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.value_embedding(x) + self.position_embedding(x)
return self.dropout(x)

View File

@ -0,0 +1,98 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class ConvLayer(nn.Module):
def __init__(self, c_in):
super(ConvLayer, self).__init__()
padding = 1 if torch.__version__>='1.5.0' else 2
self.downConv = nn.Conv1d(in_channels=c_in,
out_channels=c_in,
kernel_size=3,
padding=padding,
padding_mode='circular')
self.norm = nn.BatchNorm1d(c_in)
self.activation = nn.ELU()
self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
def forward(self, x):
x = self.downConv(x.permute(0, 2, 1))
x = self.norm(x)
x = self.activation(x)
x = self.maxPool(x)
x = x.transpose(1,2)
return x
class EncoderLayer(nn.Module):
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
super(EncoderLayer, self).__init__()
d_ff = d_ff or 4*d_model
self.attention = attention
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout = nn.Dropout(dropout)
self.activation = F.relu if activation == "relu" else F.gelu
def forward(self, x, attn_mask=None):
# x [B, L, D]
# x = x + self.dropout(self.attention(
# x, x, x,
# attn_mask = attn_mask
# ))
new_x, attn = self.attention(
x, x, x,
attn_mask = attn_mask
)
x = x + self.dropout(new_x)
y = x = self.norm1(x)
y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
y = self.dropout(self.conv2(y).transpose(-1,1))
return self.norm2(x+y), attn
class Encoder(nn.Module):
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
super(Encoder, self).__init__()
self.attn_layers = nn.ModuleList(attn_layers)
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
self.norm = norm_layer
def forward(self, x, attn_mask=None):
# x [B, L, D]
attns = []
if self.conv_layers is not None:
for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
x, attn = attn_layer(x, attn_mask=attn_mask)
x = conv_layer(x)
attns.append(attn)
x, attn = self.attn_layers[-1](x, attn_mask=attn_mask)
attns.append(attn)
else:
for attn_layer in self.attn_layers:
x, attn = attn_layer(x, attn_mask=attn_mask)
attns.append(attn)
if self.norm is not None:
x = self.norm(x)
return x, attns
class EncoderStack(nn.Module):
def __init__(self, encoders, inp_lens):
super(EncoderStack, self).__init__()
self.encoders = nn.ModuleList(encoders)
self.inp_lens = inp_lens
def forward(self, x, attn_mask=None):
# x [B, L, D]
x_stack = []; attns = []
for i_len, encoder in zip(self.inp_lens, self.encoders):
inp_len = x.shape[1]//(2**i_len)
x_s, attn = encoder(x[:, -inp_len:, :])
x_stack.append(x_s); attns.append(attn)
x_stack = torch.cat(x_stack, -2)
return x_stack, attns

View File

@ -0,0 +1,25 @@
# model/Informer/head.py
import torch
import torch.nn as nn
class TemporalProjectionHead(nn.Module):
"""
Project along temporal dimension
[B, L, D] -> [B, pred_len, C]
"""
def __init__(self, d_model, pred_len, c_out):
super().__init__()
self.temporal_proj = nn.Linear(1, pred_len)
self.channel_proj = nn.Linear(d_model, c_out)
def forward(self, x):
# x: [B, L, D]
# Average over the sequence dimension and then project
x = x.mean(dim=1, keepdim=True) # [B, 1, D]
x = x.transpose(1, 2) # [B, D, 1]
x = self.temporal_proj(x) # [B, D, pred_len]
x = x.transpose(1, 2) # [B, pred_len, D]
x = self.channel_proj(x) # [B, pred_len, C]
return x

View File

@ -0,0 +1,24 @@
import torch
class TriangularCausalMask():
def __init__(self, B, L, device="cpu"):
mask_shape = [B, 1, L, L]
with torch.no_grad():
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
@property
def mask(self):
return self._mask
class ProbMask():
def __init__(self, B, H, L, index, scores, device="cpu"):
_mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
_mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
indicator = _mask_ex[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :].to(device)
self._mask = indicator.view(scores.shape).to(device)
@property
def mask(self):
return self._mask

View File

@ -0,0 +1,48 @@
import torch
import torch.nn as nn
from model.Informer.encoder import Encoder, EncoderLayer, ConvLayer
from model.Informer.attn import FullAttention, ProbAttention, AttentionLayer
from model.Informer.embed import DataEmbedding
from model.Informer.head import TemporalProjectionHead
class InformerEncoder(nn.Module):
def __init__(self, configs):
super().__init__()
self.seq_len = configs["seq_len"]
self.pred_len = configs["pred_len"]
Attn = ProbAttention if configs["attn"] == "prob" else FullAttention
# Embedding
self.embedding = DataEmbedding(configs["enc_in"], configs["d_model"], configs["dropout"])
# Encoder(Attn-Conv-Norm)
self.encoder = Encoder(
[EncoderLayer(
# Attn
AttentionLayer(Attn(False, configs["factor"], configs["dropout"], False),
configs["d_model"], configs["n_heads"], False),
configs["d_model"], configs["d_ff"], configs["dropout"], configs["activation"])
for _ in range(configs["e_layers"])],
# Conv
[ConvLayer(configs["d_model"]) for _ in range(configs["e_layers"] - 1)]
# Norm
if configs.get("distil") else None, norm_layer=nn.LayerNorm(configs["d_model"])
)
# Forecast Head
self.head = TemporalProjectionHead(
d_model=configs["d_model"],
pred_len=configs["pred_len"],
c_out=configs["c_out"],
)
def forward(self, x_enc):
x = self.embedding(x_enc)
x, _ = self.encoder(x)
out = self.head(x)
return out[:, -self.pred_len :, :]

View File

@ -1,36 +1,111 @@
# model/InformerOnlyX/embed.py
# model/Informer/embed.py
import torch
import torch.nn as nn
import math
class TokenEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(TokenEmbedding, self).__init__()
padding = 1 if torch.__version__>='1.5.0' else 2
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
kernel_size=3, padding=padding, padding_mode='circular')
for m in self.modules():
if isinstance(m, nn.Conv1d):
nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu')
def forward(self, x):
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2)
return x
class PositionalEmbedding(nn.Module):
def __init__(self, d_model, max_len=5000):
super().__init__()
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len).unsqueeze(1).float()
div_term = torch.exp(
torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
)
super(PositionalEmbedding, self).__init__()
# Compute the positional encodings once in log space.
pe = torch.zeros(max_len, d_model).float()
pe.require_grad = False
position = torch.arange(0, max_len).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
self.register_buffer("pe", pe.unsqueeze(0)) # [1, L, D]
pe = pe.unsqueeze(0)
self.register_buffer('pe', pe)
def forward(self, x):
return self.pe[:, :x.size(1)]
class FixedEmbedding(nn.Module):
def __init__(self, c_in, d_model):
super(FixedEmbedding, self).__init__()
class DataEmbedding(nn.Module):
"""
Informer-style embedding without time covariates
"""
w = torch.zeros(c_in, d_model).float()
w.require_grad = False
def __init__(self, c_in, d_model, dropout):
super().__init__()
self.value_embedding = nn.Linear(c_in, d_model)
self.position_embedding = PositionalEmbedding(d_model)
self.dropout = nn.Dropout(dropout)
position = torch.arange(0, c_in).float().unsqueeze(1)
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
w[:, 0::2] = torch.sin(position * div_term)
w[:, 1::2] = torch.cos(position * div_term)
self.emb = nn.Embedding(c_in, d_model)
self.emb.weight = nn.Parameter(w, requires_grad=False)
def forward(self, x):
x = self.value_embedding(x) + self.position_embedding(x)
return self.emb(x).detach()
class TemporalEmbedding(nn.Module):
def __init__(self, d_model, embed_type='fixed', freq='h'):
super(TemporalEmbedding, self).__init__()
minute_size = 4; hour_size = 24
weekday_size = 7; day_size = 32; month_size = 13
Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding
if freq=='t':
self.minute_embed = Embed(minute_size, d_model)
self.hour_embed = Embed(hour_size, d_model)
self.weekday_embed = Embed(weekday_size, d_model)
self.day_embed = Embed(day_size, d_model)
self.month_embed = Embed(month_size, d_model)
def forward(self, x):
x = x.long()
minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0.
hour_x = self.hour_embed(x[:,:,3])
weekday_x = self.weekday_embed(x[:,:,2])
day_x = self.day_embed(x[:,:,1])
month_x = self.month_embed(x[:,:,0])
return hour_x + weekday_x + day_x + month_x + minute_x
class TimeFeatureEmbedding(nn.Module):
def __init__(self, d_model, embed_type='timeF', freq='h'):
super(TimeFeatureEmbedding, self).__init__()
freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3}
d_inp = freq_map[freq]
self.embed = nn.Linear(d_inp, d_model)
def forward(self, x):
return self.embed(x)
class DataEmbedding(nn.Module):
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
super(DataEmbedding, self).__init__()
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
self.position_embedding = PositionalEmbedding(d_model=d_model)
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
self.dropout = nn.Dropout(p=dropout)
def forward(self, x, x_mark):
if x_mark is None:
x = self.value_embedding(x) + self.position_embedding(x)
else:
x = self.value_embedding(x) + self.position_embedding(x) + self.temporal_embedding(x_mark)
return self.dropout(x)

View File

@ -1,48 +1,209 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from model.Informer.encoder import Encoder, EncoderLayer, ConvLayer
from model.Informer.encoder import Encoder, EncoderLayer, ConvLayer, EncoderStack
from model.Informer.decoder import Decoder, DecoderLayer
from model.Informer.attn import FullAttention, ProbAttention, AttentionLayer
from model.Informer.embed import DataEmbedding
from model.Informer.head import TemporalProjectionHead
class InformerEncoder(nn.Module):
from model.Informer.masking import TriangularCausalMask, ProbMask
class Informer(nn.Module):
def __init__(self, configs):
super().__init__()
super(Informer, self).__init__()
# 从configs中提取参数
self.enc_in = configs.get("enc_in", 1)
self.dec_in = configs.get("dec_in", 1)
self.c_out = configs.get("c_out", 1)
self.seq_len = configs.get("seq_len", 96)
self.label_len = configs.get("label_len", 48)
self.out_len = configs.get("out_len", 24)
self.factor = configs.get("factor", 5)
self.d_model = configs.get("d_model", 512)
self.n_heads = configs.get("n_heads", 8)
self.e_layers = configs.get("e_layers", 3)
self.d_layers = configs.get("d_layers", 2)
self.d_ff = configs.get("d_ff", 512)
self.dropout = configs.get("dropout", 0.0)
self.attn = configs.get("attn", "prob")
self.embed = configs.get("embed", "fixed")
self.freq = configs.get("freq", "h")
self.activation = configs.get("activation", "gelu")
self.output_attention = configs.get("output_attention", False)
self.distil = configs.get("distil", True)
self.mix = configs.get("mix", True)
self.device = configs.get("device", torch.device('cuda:0'))
self.seq_len = configs["seq_len"]
self.pred_len = configs["pred_len"]
self.pred_len = self.out_len
Attn = ProbAttention if configs["attn"] == "prob" else FullAttention
# 编码层
self.enc_embedding = DataEmbedding(self.enc_in, self.d_model, self.embed, self.freq, self.dropout)
self.dec_embedding = DataEmbedding(self.dec_in, self.d_model, self.embed, self.freq, self.dropout)
# Embedding
self.embedding = DataEmbedding(configs["enc_in"], configs["d_model"], configs["dropout"])
# 注意力层
Attn = ProbAttention if self.attn == 'prob' else FullAttention
# Encoder(Attn-Conv-Norm)
# 编码器
self.encoder = Encoder(
[EncoderLayer(
# Attn
AttentionLayer(Attn(False, configs["factor"], configs["dropout"], False),
configs["d_model"], configs["n_heads"], False),
configs["d_model"], configs["d_ff"], configs["dropout"], configs["activation"])
for _ in range(configs["e_layers"])],
# Conv
[ConvLayer(configs["d_model"]) for _ in range(configs["e_layers"] - 1)]
# Norm
if configs.get("distil") else None, norm_layer=nn.LayerNorm(configs["d_model"])
[
EncoderLayer(
AttentionLayer(Attn(False, self.factor, attention_dropout=self.dropout, output_attention=self.output_attention),
self.d_model, self.n_heads, mix=False),
self.d_model,
self.d_ff,
dropout=self.dropout,
activation=self.activation
) for l in range(self.e_layers)
],
[
ConvLayer(
self.d_model
) for l in range(self.e_layers - 1)
] if self.distil else None,
norm_layer=torch.nn.LayerNorm(self.d_model)
)
# Forecast Head
self.head = TemporalProjectionHead(
d_model=configs["d_model"],
pred_len=configs["pred_len"],
c_out=configs["c_out"],
# 解码器
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(Attn(True, self.factor, attention_dropout=self.dropout, output_attention=False),
self.d_model, self.n_heads, mix=self.mix),
AttentionLayer(FullAttention(False, self.factor, attention_dropout=self.dropout, output_attention=False),
self.d_model, self.n_heads, mix=False),
self.d_model,
self.d_ff,
dropout=self.dropout,
activation=self.activation,
)
for l in range(self.d_layers)
],
norm_layer=torch.nn.LayerNorm(self.d_model)
)
def forward(self, x_enc):
x = self.embedding(x_enc)
x, _ = self.encoder(x)
out = self.head(x)
return out[:, -self.pred_len :, :]
# 投影层
self.projection = nn.Linear(self.d_model, self.c_out, bias=True)
def forward(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None,
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
# 如果没有提供x_dec和x_mark_dec则根据x_enc和label_len生成
if x_dec is None:
x_dec = torch.cat([x_enc[:, -self.label_len:, :], torch.zeros_like(x_enc[:, :self.pred_len, :])], dim=1)
if x_mark_dec is None and x_mark_enc is not None:
x_mark_dec = torch.cat([x_mark_enc[:, -self.label_len:, :], torch.zeros_like(x_mark_enc[:, :self.pred_len, :])], dim=1)
# 编码
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
# 解码
dec_out = self.dec_embedding(x_dec, x_mark_dec)
dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
dec_out = self.projection(dec_out)
if self.output_attention:
return dec_out[:, -self.pred_len:, :], attns
else:
return dec_out[:, -self.pred_len:, :] # [B, L, D]
class InformerStack(nn.Module):
def __init__(self, configs):
super(InformerStack, self).__init__()
# 从configs中提取参数
self.enc_in = configs.get("enc_in", 1)
self.dec_in = configs.get("dec_in", 1)
self.c_out = configs.get("c_out", 1)
self.seq_len = configs.get("seq_len", 96)
self.label_len = configs.get("label_len", 48)
self.out_len = configs.get("out_len", 24)
self.factor = configs.get("factor", 5)
self.d_model = configs.get("d_model", 512)
self.n_heads = configs.get("n_heads", 8)
self.e_layers = configs.get("e_layers", [3, 2, 1])
self.d_layers = configs.get("d_layers", 2)
self.d_ff = configs.get("d_ff", 512)
self.dropout = configs.get("dropout", 0.0)
self.attn = configs.get("attn", "prob")
self.embed = configs.get("embed", "fixed")
self.freq = configs.get("freq", "h")
self.activation = configs.get("activation", "gelu")
self.output_attention = configs.get("output_attention", False)
self.distil = configs.get("distil", True)
self.mix = configs.get("mix", True)
self.device = configs.get("device", torch.device('cuda:0'))
self.pred_len = self.out_len
# 编码层
self.enc_embedding = DataEmbedding(self.enc_in, self.d_model, self.embed, self.freq, self.dropout)
self.dec_embedding = DataEmbedding(self.dec_in, self.d_model, self.embed, self.freq, self.dropout)
# 注意力层
Attn = ProbAttention if self.attn == 'prob' else FullAttention
# 编码器栈
inp_lens = list(range(len(self.e_layers))) # [0,1,2,...] you can customize here
encoders = [
Encoder(
[
EncoderLayer(
AttentionLayer(Attn(False, self.factor, attention_dropout=self.dropout, output_attention=self.output_attention),
self.d_model, self.n_heads, mix=False),
self.d_model,
self.d_ff,
dropout=self.dropout,
activation=self.activation
) for l in range(el)
],
[
ConvLayer(
self.d_model
) for l in range(el-1)
] if self.distil else None,
norm_layer=torch.nn.LayerNorm(self.d_model)
) for el in self.e_layers]
self.encoder = EncoderStack(encoders, inp_lens)
# 解码器
self.decoder = Decoder(
[
DecoderLayer(
AttentionLayer(Attn(True, self.factor, attention_dropout=self.dropout, output_attention=False),
self.d_model, self.n_heads, mix=self.mix),
AttentionLayer(FullAttention(False, self.factor, attention_dropout=self.dropout, output_attention=False),
self.d_model, self.n_heads, mix=False),
self.d_model,
self.d_ff,
dropout=self.dropout,
activation=self.activation,
)
for l in range(self.d_layers)
],
norm_layer=torch.nn.LayerNorm(self.d_model)
)
# 投影层
self.projection = nn.Linear(self.d_model, self.c_out, bias=True)
def forward(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None,
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
# 如果没有提供x_dec和x_mark_dec则根据x_enc和label_len生成
if x_dec is None:
x_dec = torch.cat([x_enc[:, -self.label_len:, :], torch.zeros_like(x_enc[:, :self.pred_len, :])], dim=1)
if x_mark_dec is None and x_mark_enc is not None:
x_mark_dec = torch.cat([x_mark_enc[:, -self.label_len:, :], torch.zeros_like(x_mark_enc[:, :self.pred_len, :])], dim=1)
# 编码
enc_out = self.enc_embedding(x_enc, x_mark_enc)
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
# 解码
dec_out = self.dec_embedding(x_dec, x_mark_dec)
dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
dec_out = self.projection(dec_out)
if self.output_attention:
return dec_out[:, -self.pred_len:, :], attns
else:
return dec_out[:, -self.pred_len:, :] # [B, L, D]

View File

@ -2,6 +2,11 @@
{
"name": "Informer",
"module": "model.Informer.model",
"entry": "InformerEncoder"
"entry": "Informer"
},
{
"name": "InformerStack",
"module": "model.Informer.model",
"entry": "InformerStack"
}
]

View File

@ -12,9 +12,9 @@ def read_config(config_path):
config = yaml.safe_load(file)
# 全局配置
device = "cuda:0" # 指定设备为cuda:0
device = "cpu" # 指定设备为cuda:0
seed = 2023 # 随机种子
epochs = 100 # 训练轮数
epochs = 1 # 训练轮数
# 拷贝项
config["basic"]["device"] = device
@ -104,9 +104,14 @@ if __name__ == "__main__":
# model_list = ["iTransformer", "PatchTST", "HI"]
model_list = ["Informer"]
# model_list = ["PatchTST"]
# dataset_list = ["AirQuality"]
# dataset_list = ["SolarEnergy"]
# dataset_list = ["BJTaxi-InFlow", "BJTaxi-OutFlow"]
dataset_list = ["SolarEnergy", "NYCBike-InFlow", "NYCBike-OutFlow", "METR-LA"]
# dataset_list = ["BJTaxi-OutFlow"]
air = ["AirQuality"]
big_dataset = ["BJTaxi-InFlow", "BJTaxi-OutFlow"]
mid_dataset = ["PEMS-BAY"]
regular_dataset = ["AirQuality", "SolarEnergy", "NYCBike-InFlow", "NYCBike-OutFlow", "METR-LA"]
test_dataset = ["BJTaxi-InFlow"]
all_dataset = big_dataset + mid_dataset + regular_dataset
dataset_list = test_dataset
main(model_list, dataset_list, debug=False)