兼容InFormer
This commit is contained in:
parent
600420e8df
commit
19fd7622a3
|
|
@ -0,0 +1,66 @@
|
|||
basic:
|
||||
dataset: AirQuality
|
||||
device: cuda:0
|
||||
mode: train
|
||||
model: Informer
|
||||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 256
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 6
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 35
|
||||
steps_per_day: 24
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 24
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
enc_in: 6
|
||||
dec_in: 6
|
||||
c_out: 6
|
||||
|
||||
|
||||
train:
|
||||
batch_size: 256
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: 5,20,40,70
|
||||
lr_init: 0.0001
|
||||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
max_grad_norm: 5
|
||||
output_dim: 6
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
basic:
|
||||
dataset: BJTaxi-InFlow
|
||||
device: cuda:0
|
||||
mode: train
|
||||
model: Informer
|
||||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 2048
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 1024
|
||||
steps_per_day: 48
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
|
||||
train:
|
||||
batch_size: 2048
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: 5,20,40,70
|
||||
lr_init: 0.0001
|
||||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
max_grad_norm: 5
|
||||
output_dim: 6
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
basic:
|
||||
dataset: BJTaxi-OutFlow
|
||||
device: cuda:0
|
||||
mode: train
|
||||
model: Informer
|
||||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 2048
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 1024
|
||||
steps_per_day: 48
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
|
||||
train:
|
||||
batch_size: 2048
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: 5,20,40,70
|
||||
lr_init: 0.0001
|
||||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
basic:
|
||||
dataset: METR-LA
|
||||
device: cuda:0
|
||||
mode: train
|
||||
model: Informer
|
||||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 256
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 207
|
||||
steps_per_day: 288
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
|
||||
train:
|
||||
batch_size: 256
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: 5,20,40,70
|
||||
lr_init: 0.0001
|
||||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
basic:
|
||||
dataset: NYCBike-InFlow
|
||||
device: cuda:0
|
||||
mode: train
|
||||
model: Informer
|
||||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 256
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 128
|
||||
steps_per_day: 48
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
|
||||
train:
|
||||
batch_size: 256
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: 5,20,40,70
|
||||
lr_init: 0.0001
|
||||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
basic:
|
||||
dataset: NYCBike-OutFlow
|
||||
device: cuda:0
|
||||
mode: train
|
||||
model: Informer
|
||||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 256
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 128
|
||||
steps_per_day: 48
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
|
||||
train:
|
||||
batch_size: 256
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: 5,20,40,70
|
||||
lr_init: 0.0001
|
||||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
basic:
|
||||
dataset: PEMS-BAY
|
||||
device: cuda:0
|
||||
mode: train
|
||||
model: Informer
|
||||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 2048
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 1
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 325
|
||||
steps_per_day: 288
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
|
||||
train:
|
||||
batch_size: 2048
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: 5,20,40,70
|
||||
lr_init: 0.0001
|
||||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
max_grad_norm: 5
|
||||
output_dim: 6
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
basic:
|
||||
dataset: SolarEnergy
|
||||
device: cuda:0
|
||||
mode: train
|
||||
model: Informer
|
||||
seed: 2023
|
||||
|
||||
data:
|
||||
batch_size: 1024
|
||||
column_wise: false
|
||||
days_per_week: 7
|
||||
horizon: 24
|
||||
input_dim: 6
|
||||
lag: 24
|
||||
label_len: 24
|
||||
normalizer: std
|
||||
num_nodes: 137
|
||||
steps_per_day: 24
|
||||
test_ratio: 0.2
|
||||
val_ratio: 0.2
|
||||
|
||||
model:
|
||||
activation: gelu
|
||||
seq_len: 24
|
||||
label_len: 12
|
||||
pred_len: 24
|
||||
d_model: 128
|
||||
d_ff: 2048
|
||||
dropout: 0.1
|
||||
e_layers: 2
|
||||
d_layers: 1
|
||||
n_heads: 8
|
||||
output_attention: False
|
||||
factor: 5
|
||||
attn: prob
|
||||
embed: fixed
|
||||
freq: h
|
||||
distil: true
|
||||
mix: true
|
||||
enc_in: 1
|
||||
dec_in: 1
|
||||
c_out: 1
|
||||
|
||||
|
||||
train:
|
||||
batch_size: 1024
|
||||
debug: false
|
||||
early_stop: true
|
||||
early_stop_patience: 15
|
||||
epochs: 100
|
||||
grad_norm: false
|
||||
label_len: 24
|
||||
log_step: 1000
|
||||
loss_func: mae
|
||||
lr_decay: true
|
||||
lr_decay_rate: 0.3
|
||||
lr_decay_step: 5,20,40,70
|
||||
lr_init: 0.0001
|
||||
mae_thresh: None
|
||||
mape_thresh: 0.001
|
||||
max_grad_norm: 5
|
||||
output_dim: 1
|
||||
plot: false
|
||||
pred_len: 24
|
||||
real_value: true
|
||||
weight_decay: 0
|
||||
|
|
@ -0,0 +1,179 @@
|
|||
import numpy as np
|
||||
import torch
|
||||
from dataloader.data_selector import load_st_dataset
|
||||
from utils.normalization import normalize_dataset
|
||||
|
||||
|
||||
# ==============================================================
|
||||
# MAIN ENTRY
|
||||
# ==============================================================
|
||||
|
||||
def get_dataloader(args, normalizer="std", single=True):
|
||||
"""
|
||||
Return dataloaders with x, y, x_mark, y_mark.
|
||||
This version follows Informer/ETSformer official dataloader behavior.
|
||||
"""
|
||||
data = load_st_dataset(args)
|
||||
args = args["data"]
|
||||
|
||||
x, y, x_mark, y_mark = _prepare_data_with_windows(data, args)
|
||||
|
||||
# --- split ---
|
||||
split_fn = split_data_by_days if args["test_ratio"] > 1 else split_data_by_ratio
|
||||
x_train, x_val, x_test = split_fn(x, args["val_ratio"], args["test_ratio"])
|
||||
y_train, y_val, y_test = split_fn(y, args["val_ratio"], args["test_ratio"])
|
||||
x_mark_train, x_mark_val, x_mark_test = split_fn(x_mark, args["val_ratio"], args["test_ratio"])
|
||||
y_mark_train, y_mark_val, y_mark_test = split_fn(y_mark, args["val_ratio"], args["test_ratio"])
|
||||
|
||||
# --- normalization ---
|
||||
scaler = _normalize_data(x_train, x_val, x_test, args, normalizer)
|
||||
_apply_existing_scaler(y_train, y_val, y_test, scaler, args)
|
||||
|
||||
# reshape [b, t, n, c] -> [b*n, t, c]
|
||||
(x_train, x_val, x_test,
|
||||
y_train, y_val, y_test,
|
||||
x_mark_train, x_mark_val, x_mark_test,
|
||||
y_mark_train, y_mark_val, y_mark_test) = _reshape_tensor(
|
||||
x_train, x_val, x_test,
|
||||
y_train, y_val, y_test,
|
||||
x_mark_train, x_mark_val, x_mark_test,
|
||||
y_mark_train, y_mark_val, y_mark_test
|
||||
)
|
||||
|
||||
# --- dataloaders ---
|
||||
return (
|
||||
_create_dataloader(x_train, y_train, x_mark_train, y_mark_train,
|
||||
args["batch_size"], True, False),
|
||||
_create_dataloader(x_val, y_val, x_mark_val, y_mark_val,
|
||||
args["batch_size"], False, False),
|
||||
_create_dataloader(x_test, y_test, x_mark_test, y_mark_test,
|
||||
args["batch_size"], False, False),
|
||||
scaler
|
||||
)
|
||||
|
||||
|
||||
# ==============================================================
|
||||
# Informer-style WINDOW GENERATION
|
||||
# ==============================================================
|
||||
|
||||
def _prepare_data_with_windows(data, args):
|
||||
"""
|
||||
Generate x, y, x_mark, y_mark using Informer slicing rule.
|
||||
|
||||
x: [seq_len]
|
||||
y: [label_len + pred_len]
|
||||
"""
|
||||
seq_len = args["lag"]
|
||||
label_len = args["label_len"]
|
||||
pred_len = args["horizon"]
|
||||
|
||||
L, N, C = data.shape
|
||||
|
||||
# ---------- construct timestamp features ----------
|
||||
time_in_day, day_in_week = _generate_time_features(L, args)
|
||||
data_mark = np.concatenate([time_in_day, day_in_week], axis=-1)
|
||||
|
||||
xs, ys, x_marks, y_marks = [], [], [], []
|
||||
|
||||
for s_begin in range(L - seq_len - pred_len - 1):
|
||||
s_end = s_begin + seq_len
|
||||
r_begin = s_end - label_len
|
||||
r_end = r_begin + label_len + pred_len
|
||||
|
||||
xs.append(data[s_begin:s_end])
|
||||
ys.append(data[r_begin:r_end])
|
||||
|
||||
x_marks.append(data_mark[s_begin:s_end])
|
||||
y_marks.append(data_mark[r_begin:r_end])
|
||||
|
||||
return np.array(xs), np.array(ys), np.array(x_marks), np.array(y_marks)
|
||||
|
||||
|
||||
# ==============================================================
|
||||
# TIME FEATURE
|
||||
# ==============================================================
|
||||
|
||||
def _generate_time_features(L, args):
|
||||
N = args["num_nodes"]
|
||||
|
||||
# Time in day
|
||||
tid = np.array([i % args["steps_per_day"] / args["steps_per_day"] for i in range(L)])
|
||||
tid = np.tile(tid[:, None], (1, N))
|
||||
|
||||
# Day in week
|
||||
diw = np.array([(i // args["steps_per_day"]) % args["days_per_week"] for i in range(L)])
|
||||
diw = np.tile(diw[:, None], (1, N))
|
||||
|
||||
return tid[..., None], diw[..., None]
|
||||
|
||||
|
||||
# ==============================================================
|
||||
# NORMALIZATION
|
||||
# ==============================================================
|
||||
|
||||
def _normalize_data(train_data, val_data, test_data, args, normalizer):
|
||||
scaler = normalize_dataset(
|
||||
train_data[..., :args["input_dim"]],
|
||||
normalizer, args["column_wise"]
|
||||
)
|
||||
for data in [train_data, val_data, test_data]:
|
||||
data[..., :args["input_dim"]] = scaler.transform(
|
||||
data[..., :args["input_dim"]]
|
||||
)
|
||||
return scaler
|
||||
|
||||
|
||||
def _apply_existing_scaler(train_data, val_data, test_data, scaler, args):
|
||||
for data in [train_data, val_data, test_data]:
|
||||
data[..., :args["input_dim"]] = scaler.transform(
|
||||
data[..., :args["input_dim"]]
|
||||
)
|
||||
|
||||
|
||||
# ==============================================================
|
||||
# DATALOADER
|
||||
# ==============================================================
|
||||
|
||||
def _create_dataloader(x, y, x_mark, y_mark, batch_size, shuffle, drop_last):
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
dataset = torch.utils.data.TensorDataset(
|
||||
torch.tensor(x, dtype=torch.float32, device=device),
|
||||
torch.tensor(y, dtype=torch.float32, device=device),
|
||||
torch.tensor(x_mark, dtype=torch.float32, device=device),
|
||||
torch.tensor(y_mark, dtype=torch.float32, device=device),
|
||||
)
|
||||
return torch.utils.data.DataLoader(dataset, batch_size=batch_size,
|
||||
shuffle=shuffle, drop_last=drop_last)
|
||||
|
||||
|
||||
# ==============================================================
|
||||
# SPLIT
|
||||
# ==============================================================
|
||||
|
||||
def split_data_by_days(data, val_days, test_days, interval=30):
|
||||
t = int((24 * 60) / interval)
|
||||
test_data = data[-t * int(test_days):]
|
||||
val_data = data[-t * int(test_days + val_days):-t * int(test_days)]
|
||||
train_data = data[:-t * int(test_days + val_days)]
|
||||
return train_data, val_data, test_data
|
||||
|
||||
|
||||
def split_data_by_ratio(data, val_ratio, test_ratio):
|
||||
L = len(data)
|
||||
test_data = data[-int(L * test_ratio):]
|
||||
val_data = data[-int(L * (test_ratio + val_ratio)):-int(L * test_ratio)]
|
||||
train_data = data[: -int(L * (test_ratio + val_ratio))]
|
||||
return train_data, val_data, test_data
|
||||
|
||||
|
||||
# ==============================================================
|
||||
# RESHAPE [B,T,N,C] -> [B*N,T,C]
|
||||
# ==============================================================
|
||||
|
||||
def _reshape_tensor(*tensors):
|
||||
reshaped = []
|
||||
for x in tensors:
|
||||
b, t, n, c = x.shape
|
||||
x_new = x.transpose(0, 2, 1, 3).reshape(b * n, t, c)
|
||||
reshaped.append(x_new)
|
||||
return reshaped
|
||||
|
|
@ -4,12 +4,15 @@ from dataloader.DCRNNdataloader import get_dataloader as DCRNN_loader
|
|||
from dataloader.EXPdataloader import get_dataloader as EXP_loader
|
||||
from dataloader.cde_loader.cdeDataloader import get_dataloader as nrde_loader
|
||||
from dataloader.TSloader import get_dataloader as TS_loader
|
||||
from dataloader.Informer_loader import get_dataloader as Informer_loader
|
||||
|
||||
|
||||
def get_dataloader(config, normalizer, single):
|
||||
TS_model = ["iTransformer", "HI", "PatchTST"]
|
||||
model_name = config["basic"]["model"]
|
||||
if model_name in TS_model:
|
||||
if model_name == "Informer":
|
||||
return Informer_loader(config, normalizer, single)
|
||||
elif model_name in TS_model:
|
||||
return TS_loader(config, normalizer, single)
|
||||
else :
|
||||
match model_name:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,163 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
import numpy as np
|
||||
|
||||
from math import sqrt
|
||||
from model.Informer.masking import TriangularCausalMask, ProbMask
|
||||
|
||||
class FullAttention(nn.Module):
|
||||
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
|
||||
super(FullAttention, self).__init__()
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L, H, E = queries.shape
|
||||
_, S, _, D = values.shape
|
||||
scale = self.scale or 1./sqrt(E)
|
||||
|
||||
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
|
||||
if self.mask_flag:
|
||||
if attn_mask is None:
|
||||
attn_mask = TriangularCausalMask(B, L, device=queries.device)
|
||||
|
||||
scores.masked_fill_(attn_mask.mask, -np.inf)
|
||||
|
||||
A = self.dropout(torch.softmax(scale * scores, dim=-1))
|
||||
V = torch.einsum("bhls,bshd->blhd", A, values)
|
||||
|
||||
if self.output_attention:
|
||||
return (V.contiguous(), A)
|
||||
else:
|
||||
return (V.contiguous(), None)
|
||||
|
||||
class ProbAttention(nn.Module):
|
||||
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
|
||||
super(ProbAttention, self).__init__()
|
||||
self.factor = factor
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
|
||||
def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
|
||||
# Q [B, H, L, D]
|
||||
B, H, L_K, E = K.shape
|
||||
_, _, L_Q, _ = Q.shape
|
||||
|
||||
# calculate the sampled Q_K
|
||||
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
|
||||
index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
|
||||
K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
|
||||
Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2)
|
||||
|
||||
# find the Top_k query with sparisty measurement
|
||||
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
|
||||
M_top = M.topk(n_top, sorted=False)[1]
|
||||
|
||||
# use the reduced Q to calculate Q_K
|
||||
Q_reduce = Q[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
M_top, :] # factor*ln(L_q)
|
||||
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
|
||||
|
||||
return Q_K, M_top
|
||||
|
||||
def _get_initial_context(self, V, L_Q):
|
||||
B, H, L_V, D = V.shape
|
||||
if not self.mask_flag:
|
||||
# V_sum = V.sum(dim=-2)
|
||||
V_sum = V.mean(dim=-2)
|
||||
contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
|
||||
else: # use mask
|
||||
assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
|
||||
contex = V.cumsum(dim=-2)
|
||||
return contex
|
||||
|
||||
def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
|
||||
B, H, L_V, D = V.shape
|
||||
|
||||
if self.mask_flag:
|
||||
attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
|
||||
scores.masked_fill_(attn_mask.mask, -np.inf)
|
||||
|
||||
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
|
||||
|
||||
context_in[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
index, :] = torch.matmul(attn, V).type_as(context_in)
|
||||
if self.output_attention:
|
||||
attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device)
|
||||
attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
|
||||
return (context_in, attns)
|
||||
else:
|
||||
return (context_in, None)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L_Q, H, D = queries.shape
|
||||
_, L_K, _, _ = keys.shape
|
||||
|
||||
queries = queries.transpose(2,1)
|
||||
keys = keys.transpose(2,1)
|
||||
values = values.transpose(2,1)
|
||||
|
||||
U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
|
||||
u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
|
||||
|
||||
U_part = U_part if U_part<L_K else L_K
|
||||
u = u if u<L_Q else L_Q
|
||||
|
||||
scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
|
||||
|
||||
# add scale factor
|
||||
scale = self.scale or 1./sqrt(D)
|
||||
if scale is not None:
|
||||
scores_top = scores_top * scale
|
||||
# get the context
|
||||
context = self._get_initial_context(values, L_Q)
|
||||
# update the context with selected top_k queries
|
||||
context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
|
||||
|
||||
return context.transpose(2,1).contiguous(), attn
|
||||
|
||||
|
||||
class AttentionLayer(nn.Module):
|
||||
def __init__(self, attention, d_model, n_heads,
|
||||
d_keys=None, d_values=None, mix=False):
|
||||
super(AttentionLayer, self).__init__()
|
||||
|
||||
d_keys = d_keys or (d_model//n_heads)
|
||||
d_values = d_values or (d_model//n_heads)
|
||||
|
||||
self.inner_attention = attention
|
||||
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.value_projection = nn.Linear(d_model, d_values * n_heads)
|
||||
self.out_projection = nn.Linear(d_values * n_heads, d_model)
|
||||
self.n_heads = n_heads
|
||||
self.mix = mix
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L, _ = queries.shape
|
||||
_, S, _ = keys.shape
|
||||
H = self.n_heads
|
||||
|
||||
queries = self.query_projection(queries).view(B, L, H, -1)
|
||||
keys = self.key_projection(keys).view(B, S, H, -1)
|
||||
values = self.value_projection(values).view(B, S, H, -1)
|
||||
|
||||
out, attn = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
attn_mask
|
||||
)
|
||||
if self.mix:
|
||||
out = out.transpose(2,1).contiguous()
|
||||
out = out.view(B, L, -1)
|
||||
|
||||
return self.out_projection(out), attn
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class DecoderLayer(nn.Module):
|
||||
def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
|
||||
dropout=0.1, activation="relu"):
|
||||
super(DecoderLayer, self).__init__()
|
||||
d_ff = d_ff or 4*d_model
|
||||
self.self_attention = self_attention
|
||||
self.cross_attention = cross_attention
|
||||
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
|
||||
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
self.norm2 = nn.LayerNorm(d_model)
|
||||
self.norm3 = nn.LayerNorm(d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.activation = F.relu if activation == "relu" else F.gelu
|
||||
|
||||
def forward(self, x, cross, x_mask=None, cross_mask=None):
|
||||
x = x + self.dropout(self.self_attention(
|
||||
x, x, x,
|
||||
attn_mask=x_mask
|
||||
)[0])
|
||||
x = self.norm1(x)
|
||||
|
||||
x = x + self.dropout(self.cross_attention(
|
||||
x, cross, cross,
|
||||
attn_mask=cross_mask
|
||||
)[0])
|
||||
|
||||
y = x = self.norm2(x)
|
||||
y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
|
||||
y = self.dropout(self.conv2(y).transpose(-1,1))
|
||||
|
||||
return self.norm3(x+y)
|
||||
|
||||
class Decoder(nn.Module):
|
||||
def __init__(self, layers, norm_layer=None):
|
||||
super(Decoder, self).__init__()
|
||||
self.layers = nn.ModuleList(layers)
|
||||
self.norm = norm_layer
|
||||
|
||||
def forward(self, x, cross, x_mask=None, cross_mask=None):
|
||||
for layer in self.layers:
|
||||
x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
|
||||
|
||||
if self.norm is not None:
|
||||
x = self.norm(x)
|
||||
|
||||
return x
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
import math
|
||||
|
||||
class PositionalEmbedding(nn.Module):
|
||||
def __init__(self, d_model, max_len=5000):
|
||||
super(PositionalEmbedding, self).__init__()
|
||||
# Compute the positional encodings once in log space.
|
||||
pe = torch.zeros(max_len, d_model).float()
|
||||
pe.require_grad = False
|
||||
|
||||
position = torch.arange(0, max_len).float().unsqueeze(1)
|
||||
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
|
||||
|
||||
pe[:, 0::2] = torch.sin(position * div_term)
|
||||
pe[:, 1::2] = torch.cos(position * div_term)
|
||||
|
||||
pe = pe.unsqueeze(0)
|
||||
self.register_buffer('pe', pe)
|
||||
|
||||
def forward(self, x):
|
||||
return self.pe[:, :x.size(1)]
|
||||
|
||||
class TokenEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model):
|
||||
super(TokenEmbedding, self).__init__()
|
||||
padding = 1 if torch.__version__>='1.5.0' else 2
|
||||
self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
|
||||
kernel_size=3, padding=padding, padding_mode='circular')
|
||||
for m in self.modules():
|
||||
if isinstance(m, nn.Conv1d):
|
||||
nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu')
|
||||
|
||||
def forward(self, x):
|
||||
x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2)
|
||||
return x
|
||||
|
||||
class FixedEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model):
|
||||
super(FixedEmbedding, self).__init__()
|
||||
|
||||
w = torch.zeros(c_in, d_model).float()
|
||||
w.require_grad = False
|
||||
|
||||
position = torch.arange(0, c_in).float().unsqueeze(1)
|
||||
div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
|
||||
|
||||
w[:, 0::2] = torch.sin(position * div_term)
|
||||
w[:, 1::2] = torch.cos(position * div_term)
|
||||
|
||||
self.emb = nn.Embedding(c_in, d_model)
|
||||
self.emb.weight = nn.Parameter(w, requires_grad=False)
|
||||
|
||||
def forward(self, x):
|
||||
return self.emb(x).detach()
|
||||
|
||||
class TemporalEmbedding(nn.Module):
|
||||
def __init__(self, d_model, embed_type='fixed', freq='h'):
|
||||
super(TemporalEmbedding, self).__init__()
|
||||
|
||||
minute_size = 4; hour_size = 24
|
||||
weekday_size = 7; day_size = 32; month_size = 13
|
||||
|
||||
Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding
|
||||
if freq=='t':
|
||||
self.minute_embed = Embed(minute_size, d_model)
|
||||
self.hour_embed = Embed(hour_size, d_model)
|
||||
self.weekday_embed = Embed(weekday_size, d_model)
|
||||
self.day_embed = Embed(day_size, d_model)
|
||||
self.month_embed = Embed(month_size, d_model)
|
||||
|
||||
def forward(self, x):
|
||||
x = x.long()
|
||||
|
||||
# Check the size of x's last dimension to avoid index errors
|
||||
last_dim = x.shape[-1]
|
||||
|
||||
minute_x = 0.
|
||||
hour_x = 0.
|
||||
weekday_x = 0.
|
||||
day_x = 0.
|
||||
month_x = 0.
|
||||
|
||||
# For our generated time features, we have only 2 dimensions: [day_of_week, hour]
|
||||
# So we need to map them to the appropriate embedding layers
|
||||
if last_dim > 0:
|
||||
# Use the first dimension for hour
|
||||
# Ensure hour is in the valid range [0, 23]
|
||||
hour = torch.clamp(x[:,:,0], 0, 23)
|
||||
hour_x = self.hour_embed(hour)
|
||||
|
||||
if last_dim > 1:
|
||||
# Use the second dimension for weekday
|
||||
# Ensure weekday is in the valid range [0, 6]
|
||||
weekday = torch.clamp(x[:,:,1], 0, 6)
|
||||
weekday_x = self.weekday_embed(weekday)
|
||||
|
||||
return hour_x + weekday_x + day_x + month_x + minute_x
|
||||
|
||||
class TimeFeatureEmbedding(nn.Module):
|
||||
def __init__(self, d_model, embed_type='timeF', freq='h'):
|
||||
super(TimeFeatureEmbedding, self).__init__()
|
||||
|
||||
freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3}
|
||||
d_inp = freq_map[freq]
|
||||
self.embed = nn.Linear(d_inp, d_model)
|
||||
|
||||
def forward(self, x):
|
||||
return self.embed(x)
|
||||
|
||||
class DataEmbedding(nn.Module):
|
||||
def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
|
||||
super(DataEmbedding, self).__init__()
|
||||
|
||||
self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
|
||||
self.position_embedding = PositionalEmbedding(d_model=d_model)
|
||||
self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
|
||||
|
||||
self.dropout = nn.Dropout(p=dropout)
|
||||
|
||||
def forward(self, x, x_mark):
|
||||
a = self.value_embedding(x)
|
||||
b = self.position_embedding(x)
|
||||
c = self.temporal_embedding(x_mark)
|
||||
x = a + b + c
|
||||
|
||||
return self.dropout(x)
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
class ConvLayer(nn.Module):
|
||||
def __init__(self, c_in):
|
||||
super(ConvLayer, self).__init__()
|
||||
padding = 1 if torch.__version__>='1.5.0' else 2
|
||||
self.downConv = nn.Conv1d(in_channels=c_in,
|
||||
out_channels=c_in,
|
||||
kernel_size=3,
|
||||
padding=padding,
|
||||
padding_mode='circular')
|
||||
self.norm = nn.BatchNorm1d(c_in)
|
||||
self.activation = nn.ELU()
|
||||
self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.downConv(x.permute(0, 2, 1))
|
||||
x = self.norm(x)
|
||||
x = self.activation(x)
|
||||
x = self.maxPool(x)
|
||||
x = x.transpose(1,2)
|
||||
return x
|
||||
|
||||
class EncoderLayer(nn.Module):
|
||||
def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
|
||||
super(EncoderLayer, self).__init__()
|
||||
d_ff = d_ff or 4*d_model
|
||||
self.attention = attention
|
||||
self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
|
||||
self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
self.norm2 = nn.LayerNorm(d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.activation = F.relu if activation == "relu" else F.gelu
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
# x [B, L, D]
|
||||
# x = x + self.dropout(self.attention(
|
||||
# x, x, x,
|
||||
# attn_mask = attn_mask
|
||||
# ))
|
||||
new_x, attn = self.attention(
|
||||
x, x, x,
|
||||
attn_mask = attn_mask
|
||||
)
|
||||
x = x + self.dropout(new_x)
|
||||
|
||||
y = x = self.norm1(x)
|
||||
y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
|
||||
y = self.dropout(self.conv2(y).transpose(-1,1))
|
||||
|
||||
return self.norm2(x+y), attn
|
||||
|
||||
class Encoder(nn.Module):
|
||||
def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
|
||||
super(Encoder, self).__init__()
|
||||
self.attn_layers = nn.ModuleList(attn_layers)
|
||||
self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
|
||||
self.norm = norm_layer
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
# x [B, L, D]
|
||||
attns = []
|
||||
if self.conv_layers is not None:
|
||||
for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
|
||||
x, attn = attn_layer(x, attn_mask=attn_mask)
|
||||
x = conv_layer(x)
|
||||
attns.append(attn)
|
||||
x, attn = self.attn_layers[-1](x, attn_mask=attn_mask)
|
||||
attns.append(attn)
|
||||
else:
|
||||
for attn_layer in self.attn_layers:
|
||||
x, attn = attn_layer(x, attn_mask=attn_mask)
|
||||
attns.append(attn)
|
||||
|
||||
if self.norm is not None:
|
||||
x = self.norm(x)
|
||||
|
||||
return x, attns
|
||||
|
||||
class EncoderStack(nn.Module):
|
||||
def __init__(self, encoders, inp_lens):
|
||||
super(EncoderStack, self).__init__()
|
||||
self.encoders = nn.ModuleList(encoders)
|
||||
self.inp_lens = inp_lens
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
# x [B, L, D]
|
||||
x_stack = []; attns = []
|
||||
for i_len, encoder in zip(self.inp_lens, self.encoders):
|
||||
inp_len = x.shape[1]//(2**i_len)
|
||||
x_s, attn = encoder(x[:, -inp_len:, :])
|
||||
x_stack.append(x_s); attns.append(attn)
|
||||
x_stack = torch.cat(x_stack, -2)
|
||||
|
||||
return x_stack, attns
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
import torch
|
||||
|
||||
class TriangularCausalMask():
|
||||
def __init__(self, B, L, device="cpu"):
|
||||
mask_shape = [B, 1, L, L]
|
||||
with torch.no_grad():
|
||||
self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
|
||||
|
||||
@property
|
||||
def mask(self):
|
||||
return self._mask
|
||||
|
||||
class ProbMask():
|
||||
def __init__(self, B, H, L, index, scores, device="cpu"):
|
||||
_mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
|
||||
_mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
|
||||
indicator = _mask_ex[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
index, :].to(device)
|
||||
self._mask = indicator.view(scores.shape).to(device)
|
||||
|
||||
@property
|
||||
def mask(self):
|
||||
return self._mask
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
from model.Informer.encoder import Encoder, EncoderLayer, ConvLayer, EncoderStack
|
||||
from model.Informer.decoder import Decoder, DecoderLayer
|
||||
from model.Informer.attn import FullAttention, ProbAttention, AttentionLayer
|
||||
from model.Informer.embed import DataEmbedding
|
||||
|
||||
class Informer(nn.Module):
|
||||
def __init__(self, args):
|
||||
super(Informer, self).__init__()
|
||||
self.pred_len = args['pred_len']
|
||||
self.attn = args['attn']
|
||||
self.output_attention = args['output_attention']
|
||||
|
||||
# Encoding
|
||||
self.enc_embedding = DataEmbedding(args['enc_in'], args['d_model'], args['embed'], args['freq'], args['dropout'])
|
||||
self.dec_embedding = DataEmbedding(args['dec_in'], args['d_model'], args['embed'], args['freq'], args['dropout'])
|
||||
# Attention
|
||||
Attn = ProbAttention if args['attn']=='prob' else FullAttention
|
||||
# Encoder
|
||||
self.encoder = Encoder(
|
||||
[
|
||||
EncoderLayer(
|
||||
AttentionLayer(Attn(False, args['factor'], attention_dropout=args['dropout'], output_attention=args['output_attention']),
|
||||
args['d_model'], args['n_heads'], mix=False),
|
||||
args['d_model'],
|
||||
args['d_ff'],
|
||||
dropout=args['dropout'],
|
||||
activation=args['activation']
|
||||
) for l in range(args['e_layers'])
|
||||
],
|
||||
[
|
||||
ConvLayer(
|
||||
args['d_model']
|
||||
) for l in range(args['e_layers']-1)
|
||||
] if args['distil'] else None,
|
||||
norm_layer=torch.nn.LayerNorm(args['d_model'])
|
||||
)
|
||||
# Decoder
|
||||
self.decoder = Decoder(
|
||||
[
|
||||
DecoderLayer(
|
||||
AttentionLayer(Attn(True, args['factor'], attention_dropout=args['dropout'], output_attention=False),
|
||||
args['d_model'], args['n_heads'], mix=args['mix']),
|
||||
AttentionLayer(FullAttention(False, args['factor'], attention_dropout=args['dropout'], output_attention=False),
|
||||
args['d_model'], args['n_heads'], mix=False),
|
||||
args['d_model'],
|
||||
args['d_ff'],
|
||||
dropout=args['dropout'],
|
||||
activation=args['activation'],
|
||||
)
|
||||
for l in range(args['d_layers'])
|
||||
],
|
||||
norm_layer=torch.nn.LayerNorm(args['d_model'])
|
||||
)
|
||||
self.projection = nn.Linear(args['d_model'], args['c_out'], bias=True)
|
||||
|
||||
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
|
||||
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
|
||||
enc_out = self.enc_embedding(x_enc, x_mark_enc)
|
||||
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
|
||||
|
||||
dec_out = self.dec_embedding(x_dec, x_mark_dec)
|
||||
dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
|
||||
dec_out = self.projection(dec_out)
|
||||
|
||||
if self.output_attention:
|
||||
return dec_out[:,-self.pred_len:,:], attns
|
||||
else:
|
||||
return dec_out[:,-self.pred_len:,:] # [B, L, D]
|
||||
|
||||
|
||||
class InformerStack(nn.Module):
|
||||
def __init__(self, args):
|
||||
super(InformerStack, self).__init__()
|
||||
self.pred_len = args['pred_len']
|
||||
self.attn = args['attn']
|
||||
self.output_attention = args['output_attention']
|
||||
|
||||
# Encoding
|
||||
self.enc_embedding = DataEmbedding(args['enc_in'], args['d_model'], args['embed'], args['freq'], args['dropout'])
|
||||
self.dec_embedding = DataEmbedding(args['dec_in'], args['d_model'], args['embed'], args['freq'], args['dropout'])
|
||||
# Attention
|
||||
Attn = ProbAttention if args['attn']=='prob' else FullAttention
|
||||
# Encoder
|
||||
|
||||
inp_lens = list(range(len(args['e_layers']))) # [0,1,2,...] you can customize here
|
||||
encoders = [
|
||||
Encoder(
|
||||
[
|
||||
EncoderLayer(
|
||||
AttentionLayer(Attn(False, args['factor'], attention_dropout=args['dropout'], output_attention=args['output_attention']),
|
||||
args['d_model'], args['n_heads'], mix=False),
|
||||
args['d_model'],
|
||||
args['d_ff'],
|
||||
dropout=args['dropout'],
|
||||
activation=args['activation']
|
||||
) for l in range(el)
|
||||
],
|
||||
[
|
||||
ConvLayer(
|
||||
args['d_model']
|
||||
) for l in range(el-1)
|
||||
] if args['distil'] else None,
|
||||
norm_layer=torch.nn.LayerNorm(args['d_model'])
|
||||
) for el in args['e_layers']]
|
||||
self.encoder = EncoderStack(encoders, inp_lens)
|
||||
# Decoder
|
||||
self.decoder = Decoder(
|
||||
[
|
||||
DecoderLayer(
|
||||
AttentionLayer(Attn(True, args['factor'], attention_dropout=args['dropout'], output_attention=False),
|
||||
args['d_model'], args['n_heads'], mix=args['mix']),
|
||||
AttentionLayer(FullAttention(False, args['factor'], attention_dropout=args['dropout'], output_attention=False),
|
||||
args['d_model'], args['n_heads'], mix=False),
|
||||
args['d_model'],
|
||||
args['d_ff'],
|
||||
dropout=args['dropout'],
|
||||
activation=args['activation'],
|
||||
)
|
||||
for l in range(args['d_layers'])
|
||||
],
|
||||
norm_layer=torch.nn.LayerNorm(args['d_model'])
|
||||
)
|
||||
self.projection = nn.Linear(args['d_model'], args['c_out'], bias=True)
|
||||
|
||||
def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec,
|
||||
enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
|
||||
enc_out = self.enc_embedding(x_enc, x_mark_enc)
|
||||
enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
|
||||
|
||||
dec_out = self.dec_embedding(x_dec, x_mark_dec)
|
||||
dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
|
||||
dec_out = self.projection(dec_out)
|
||||
|
||||
if self.output_attention:
|
||||
return dec_out[:,-self.pred_len:,:], attns
|
||||
else:
|
||||
return dec_out[:,-self.pred_len:,:] # [B, L, D]
|
||||
|
|
@ -28,6 +28,7 @@ from model.ASTRA.astra import ASTRA as ASTRA
|
|||
from model.ASTRA.astrav2 import ASTRA as ASTRAv2
|
||||
from model.ASTRA.astrav3 import ASTRA as ASTRAv3
|
||||
from model.iTransformer.iTransformer import iTransformer
|
||||
from model.Informer.model import Informer
|
||||
from model.HI.HI import HI
|
||||
from model.PatchTST.PatchTST import Model as PatchTST
|
||||
from model.MTGNN.MTGNN import gtnet as MTGNN
|
||||
|
|
@ -96,6 +97,8 @@ def model_selector(config):
|
|||
return ASTRAv3(model_config)
|
||||
case "iTransformer":
|
||||
return iTransformer(model_config)
|
||||
case "Informer":
|
||||
return Informer(model_config)
|
||||
case "HI":
|
||||
return HI(model_config)
|
||||
case "PatchTST":
|
||||
|
|
|
|||
|
|
@ -0,0 +1,57 @@
|
|||
import torch
|
||||
from model.model_selector import model_selector
|
||||
import yaml
|
||||
|
||||
# 读取配置文件
|
||||
with open('/user/czzhangheng/code/TrafficWheel/config/Informer/AirQuality.yaml', 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
# 初始化模型
|
||||
model = model_selector(config)
|
||||
print('Informer模型初始化成功!')
|
||||
print(f'模型参数数量: {sum(p.numel() for p in model.parameters())}')
|
||||
|
||||
# 创建测试数据
|
||||
B, T, C = 2, 24, 6
|
||||
x_enc = torch.randn(B, T, C)
|
||||
|
||||
# 测试1: 完整参数
|
||||
print('\n测试1: 完整参数')
|
||||
x_mark_enc = torch.randn(B, T, 4) # 假设时间特征为4维
|
||||
x_dec = torch.randn(B, 12+24, C) # label_len + pred_len
|
||||
x_mark_dec = torch.randn(B, 12+24, 4)
|
||||
try:
|
||||
output = model(x_enc, x_mark_enc, x_dec, x_mark_dec)
|
||||
print(f'输出形状: {output.shape}')
|
||||
print('测试1通过!')
|
||||
except Exception as e:
|
||||
print(f'测试1失败: {e}')
|
||||
|
||||
# 测试2: 省略x_mark_enc
|
||||
print('\n测试2: 省略x_mark_enc')
|
||||
try:
|
||||
output = model(x_enc, x_dec=x_dec, x_mark_dec=x_mark_dec)
|
||||
print(f'输出形状: {output.shape}')
|
||||
print('测试2通过!')
|
||||
except Exception as e:
|
||||
print(f'测试2失败: {e}')
|
||||
|
||||
# 测试3: 省略x_dec和x_mark_dec
|
||||
print('\n测试3: 省略x_dec和x_mark_dec')
|
||||
try:
|
||||
output = model(x_enc, x_mark_enc=x_mark_enc)
|
||||
print(f'输出形状: {output.shape}')
|
||||
print('测试3通过!')
|
||||
except Exception as e:
|
||||
print(f'测试3失败: {e}')
|
||||
|
||||
# 测试4: 仅传入x_enc
|
||||
print('\n测试4: 仅传入x_enc')
|
||||
try:
|
||||
output = model(x_enc)
|
||||
print(f'输出形状: {output.shape}')
|
||||
print('测试4通过!')
|
||||
except Exception as e:
|
||||
print(f'测试4失败: {e}')
|
||||
|
||||
print('\n所有测试完成!')
|
||||
24
train.py
24
train.py
|
|
@ -11,13 +11,14 @@ def read_config(config_path):
|
|||
config = yaml.safe_load(file)
|
||||
|
||||
# 全局配置
|
||||
device = "cuda:0" # 指定设备
|
||||
device = "cuda:0" # 指定设备为cuda:0
|
||||
seed = 2023 # 随机种子
|
||||
epochs = 100
|
||||
epochs = 1
|
||||
|
||||
# 拷贝项
|
||||
config["basic"]["device"] = device
|
||||
config["model"]["device"] = device
|
||||
config["train"]["device"] = device
|
||||
config["basic"]["seed"] = seed
|
||||
config["train"]["epochs"] = epochs
|
||||
return config
|
||||
|
|
@ -62,14 +63,15 @@ def run(config):
|
|||
|
||||
if __name__ == "__main__":
|
||||
# 指定模型
|
||||
model_list = ["MTGNN"]
|
||||
model_list = ["Informer"]
|
||||
# 指定数据集
|
||||
dataset_list = ["AirQuality", "SolarEnergy", "PEMS-BAY", "METR-LA", "BJTaxi-Inflow", "BJTaxi-Outflow", "NYCBike-Inflow", "NYCBike-Outflow"]
|
||||
# dataset_list = ["AirQuality"]
|
||||
dataset_list = ["AirQuality", "SolarEnergy", "PEMS-BAY", "METR-LA", "BJTaxi-Inflow", "BJTaxi-Outflow", "NYCBike-Inflow", "NYCBike-Outflow"]
|
||||
# dataset_list = ["PEMS-BAY"]
|
||||
|
||||
# 我的调试开关,不做测试就填 str(False)
|
||||
os.environ["TRY"] = str(False)
|
||||
|
||||
# os.environ["TRY"] = str(False)
|
||||
os.environ["TRY"] = str(True)
|
||||
|
||||
for model in model_list:
|
||||
for dataset in dataset_list:
|
||||
config_path = f"./config/{model}/{dataset}.yaml"
|
||||
|
|
@ -81,7 +83,13 @@ if __name__ == "__main__":
|
|||
try:
|
||||
run(config)
|
||||
except Exception as e:
|
||||
pass
|
||||
import traceback
|
||||
import sys, traceback
|
||||
tb_lines = traceback.format_exc().splitlines()
|
||||
# 如果不是AssertionError,才打印完整traceback
|
||||
if not tb_lines[-1].startswith("AssertionError"):
|
||||
traceback.print_exc()
|
||||
print(f"\n===== {model} on {dataset} failed with error: {e} =====\n")
|
||||
else:
|
||||
run(config)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,250 @@
|
|||
import math
|
||||
import os
|
||||
import time
|
||||
import copy
|
||||
import torch
|
||||
from utils.logger import get_logger
|
||||
from utils.loss_function import all_metrics
|
||||
from tqdm import tqdm
|
||||
|
||||
class InformerTrainer:
|
||||
"""Informer模型训练器,负责整个训练流程的管理,支持多输入模型"""
|
||||
|
||||
def __init__(self, model, loss, optimizer,
|
||||
train_loader, val_loader, test_loader, scaler,
|
||||
args, lr_scheduler=None,):
|
||||
# 设备和基本参数
|
||||
self.config = args
|
||||
self.device = args["basic"]["device"]
|
||||
train_args = args["train"]
|
||||
# 模型和训练相关组件
|
||||
self.model = model
|
||||
self.loss = loss
|
||||
self.optimizer = optimizer
|
||||
self.lr_scheduler = lr_scheduler
|
||||
# 数据加载器
|
||||
self.train_loader = train_loader
|
||||
self.val_loader = val_loader
|
||||
self.test_loader = test_loader
|
||||
# 数据处理工具
|
||||
self.scaler = scaler
|
||||
self.args = train_args
|
||||
# 初始化路径、日志和统计
|
||||
self._initialize_paths(train_args)
|
||||
self._initialize_logger(train_args)
|
||||
|
||||
def _initialize_paths(self, args):
|
||||
"""初始化模型保存路径"""
|
||||
self.best_path = os.path.join(args["log_dir"], "best_model.pth")
|
||||
self.best_test_path = os.path.join(args["log_dir"], "best_test_model.pth")
|
||||
self.loss_figure_path = os.path.join(args["log_dir"], "loss.png")
|
||||
|
||||
def _initialize_logger(self, args):
|
||||
"""初始化日志记录器"""
|
||||
if not os.path.isdir(args["log_dir"]) and not args["debug"]:
|
||||
os.makedirs(args["log_dir"], exist_ok=True)
|
||||
self.logger = get_logger(args["log_dir"], name=self.model.__class__.__name__, debug=args["debug"])
|
||||
self.logger.info(f"Experiment log path in: {args['log_dir']}")
|
||||
|
||||
def _run_epoch(self, epoch, dataloader, mode):
|
||||
"""运行一个训练/验证/测试epoch,支持多输入模型"""
|
||||
# 设置模型模式和是否进行优化
|
||||
if mode == "train": self.model.train(); optimizer_step = True
|
||||
else: self.model.eval(); optimizer_step = False
|
||||
|
||||
# 初始化变量
|
||||
total_loss = 0
|
||||
epoch_time = time.time()
|
||||
y_pred, y_true = [], []
|
||||
|
||||
# 训练/验证循环
|
||||
with torch.set_grad_enabled(optimizer_step):
|
||||
progress_bar = tqdm(
|
||||
enumerate(dataloader),
|
||||
total=len(dataloader),
|
||||
desc=f"{mode.capitalize()} Epoch {epoch}"
|
||||
)
|
||||
for _, (x, y, x_mark, y_mark) in progress_bar:
|
||||
# 转移数据
|
||||
x = x.to(self.device)
|
||||
y = y[:, -self.args['pred_len']:, :self.args["output_dim"]].to(self.device)
|
||||
x_mark = x_mark.to(self.device)
|
||||
y_mark = y_mark.to(self.device)
|
||||
# [256, 24, 6]
|
||||
dec_inp = torch.zeros_like(y[:, -self.args['pred_len']:, :]).float()
|
||||
# [256, 48(pred+label), 6]
|
||||
dec_inp = torch.cat([y[:, :self.args['label_len'], :], dec_inp], dim=1).float().to(self.device)
|
||||
|
||||
# 计算loss和反归一化loss
|
||||
output = self.model(x, x_mark, dec_inp, y_mark)
|
||||
if os.environ.get("TRY") == "True":
|
||||
print(f"[{'✅' if output.shape == y.shape else '❌'}]: output: {output.shape}, label: {y.shape}")
|
||||
assert False
|
||||
loss = self.loss(output, y)
|
||||
d_output = self.scaler.inverse_transform(output)
|
||||
d_label = self.scaler.inverse_transform(y)
|
||||
d_loss = self.loss(d_output, d_label)
|
||||
# 累积损失和预测结果
|
||||
total_loss += d_loss.item()
|
||||
y_pred.append(d_output.detach().cpu())
|
||||
y_true.append(d_label.detach().cpu())
|
||||
# 反向传播和优化(仅在训练模式)
|
||||
if optimizer_step and self.optimizer is not None:
|
||||
self.optimizer.zero_grad()
|
||||
loss.backward()
|
||||
# 梯度裁剪(如果需要)
|
||||
if self.args["grad_norm"]:
|
||||
torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args["max_grad_norm"])
|
||||
self.optimizer.step()
|
||||
# 更新进度条
|
||||
progress_bar.set_postfix(loss=d_loss.item())
|
||||
|
||||
# 合并所有批次的预测结果
|
||||
y_pred = torch.cat(y_pred, dim=0)
|
||||
y_true = torch.cat(y_true, dim=0)
|
||||
# 计算损失并记录指标
|
||||
avg_loss = total_loss / len(dataloader)
|
||||
mae, rmse, mape = all_metrics(y_pred, y_true, self.args["mae_thresh"], self.args["mape_thresh"])
|
||||
self.logger.info(
|
||||
f"Epoch #{epoch:02d}: {mode.capitalize():<5} "
|
||||
f"MAE:{mae:5.2f} | RMSE:{rmse:5.2f} | MAPE:{mape:7.4f} | Time: {time.time() - epoch_time:.2f} s"
|
||||
)
|
||||
return avg_loss
|
||||
|
||||
def train_epoch(self, epoch):
|
||||
return self._run_epoch(epoch, self.train_loader, "train")
|
||||
|
||||
def val_epoch(self, epoch):
|
||||
return self._run_epoch(epoch, self.val_loader or self.test_loader, "val")
|
||||
|
||||
def test_epoch(self, epoch):
|
||||
return self._run_epoch(epoch, self.test_loader, "test")
|
||||
|
||||
def train(self):
|
||||
# 初始化记录
|
||||
best_model, best_test_model = None, None
|
||||
best_loss, best_test_loss = float("inf"), float("inf")
|
||||
not_improved_count = 0
|
||||
# 开始训练
|
||||
self.logger.info("Training process started")
|
||||
# 训练循环
|
||||
for epoch in range(1, self.args["epochs"] + 1):
|
||||
# 训练、验证和测试一个epoch
|
||||
train_epoch_loss = self.train_epoch(epoch)
|
||||
val_epoch_loss = self.val_epoch(epoch)
|
||||
test_epoch_loss = self.test_epoch(epoch)
|
||||
# 检查梯度爆炸
|
||||
if train_epoch_loss > 1e6:
|
||||
self.logger.warning("Gradient explosion detected. Ending...")
|
||||
break
|
||||
# 更新最佳验证模型
|
||||
if val_epoch_loss < best_loss:
|
||||
best_loss = val_epoch_loss
|
||||
not_improved_count = 0
|
||||
best_model = copy.deepcopy(self.model.state_dict())
|
||||
self.logger.info("Best validation model saved!")
|
||||
else:
|
||||
not_improved_count += 1
|
||||
# 早停
|
||||
if self._should_early_stop(not_improved_count):
|
||||
break
|
||||
# 更新最佳测试模型
|
||||
if test_epoch_loss < best_test_loss:
|
||||
best_test_loss = test_epoch_loss
|
||||
best_test_model = copy.deepcopy(self.model.state_dict())
|
||||
# 保存最佳模型
|
||||
if not self.args["debug"]:
|
||||
self._save_best_models(best_model, best_test_model)
|
||||
# 最终评估
|
||||
self._finalize_training(best_model, best_test_model)
|
||||
|
||||
def _should_early_stop(self, not_improved_count):
|
||||
"""检查是否满足早停条件"""
|
||||
if (
|
||||
self.args["early_stop"]
|
||||
and not_improved_count == self.args["early_stop_patience"]
|
||||
):
|
||||
self.logger.info(
|
||||
f"Validation performance didn't improve for {self.args['early_stop_patience']} epochs. Training stops."
|
||||
)
|
||||
return True
|
||||
return False
|
||||
|
||||
def _save_best_models(self, best_model, best_test_model):
|
||||
"""保存最佳模型到文件"""
|
||||
torch.save(best_model, self.best_path)
|
||||
torch.save(best_test_model, self.best_test_path)
|
||||
self.logger.info(
|
||||
f"Best models saved at {self.best_path} and {self.best_test_path}"
|
||||
)
|
||||
|
||||
def _log_model_params(self):
|
||||
"""输出模型可训练参数数量"""
|
||||
total_params = sum( p.numel() for p in self.model.parameters() if p.requires_grad)
|
||||
self.logger.info(f"Trainable params: {total_params}")
|
||||
|
||||
|
||||
def _finalize_training(self, best_model, best_test_model):
|
||||
self.model.load_state_dict(best_model)
|
||||
self.logger.info("Testing on best validation model")
|
||||
self.test(self.model, self.args, self.test_loader, self.scaler, self.logger)
|
||||
self.model.load_state_dict(best_test_model)
|
||||
self.logger.info("Testing on best test model")
|
||||
self.test(self.model, self.args, self.test_loader, self.scaler, self.logger)
|
||||
|
||||
@staticmethod
|
||||
def test(model, args, data_loader, scaler, logger, path=None):
|
||||
"""对模型进行评估并输出性能指标,支持多输入模型"""
|
||||
device = args["device"]
|
||||
|
||||
if path:
|
||||
checkpoint = torch.load(path)
|
||||
model.load_state_dict(checkpoint["state_dict"])
|
||||
model.to(device)
|
||||
|
||||
# 设置为评估模式
|
||||
model.eval()
|
||||
|
||||
# 收集预测和真实标签
|
||||
y_pred, y_true = [], []
|
||||
pred_len = args['pred_len']
|
||||
label_len = args['label_len']
|
||||
output_dim = args['output_dim']
|
||||
|
||||
# 不计算梯度的情况下进行预测
|
||||
with torch.no_grad():
|
||||
for _, (x, y, x_mark, y_mark) in enumerate(data_loader):
|
||||
# 转移数据
|
||||
x = x.to(device)
|
||||
y = y[:, -pred_len:, :output_dim].to(device)
|
||||
x_mark = x_mark.to(device)
|
||||
y_mark = y_mark.to(device)
|
||||
# 生成dec_inp
|
||||
dec_inp = torch.zeros_like(y[:, -pred_len:, :]).float()
|
||||
dec_inp = torch.cat([y[:, :label_len, :], dec_inp], dim=1).float().to(device)
|
||||
output = model(x, x_mark, dec_inp, y_mark)
|
||||
y_pred.append(output.detach().cpu())
|
||||
y_true.append(y.detach().cpu())
|
||||
|
||||
d_y_pred = scaler.inverse_transform(torch.cat(y_pred, dim=0))
|
||||
d_y_true = scaler.inverse_transform(torch.cat(y_true, dim=0))
|
||||
mae_thresh = args["mae_thresh"]
|
||||
mape_thresh = args["mape_thresh"]
|
||||
|
||||
# 计算并记录每个时间步的指标
|
||||
for t in range(d_y_true.shape[1]):
|
||||
mae, rmse, mape = all_metrics(
|
||||
d_y_pred[:, t, ...],
|
||||
d_y_true[:, t, ...],
|
||||
mae_thresh,
|
||||
mape_thresh,
|
||||
)
|
||||
logger.info(f"Horizon {t + 1:02d}, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
|
||||
|
||||
# 计算并记录平均指标
|
||||
mae, rmse, mape = all_metrics(d_y_pred, d_y_true, mae_thresh, mape_thresh)
|
||||
logger.info( f"Average Horizon, MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}")
|
||||
|
||||
@staticmethod
|
||||
def _compute_sampling_threshold(global_step, k):
|
||||
return k / (k + math.exp(global_step / k))
|
||||
|
|
@ -4,6 +4,7 @@ from trainer.DCRNN_Trainer import Trainer as DCRNN_Trainer
|
|||
from trainer.PDG2SEQ_Trainer import Trainer as PDG2SEQ_Trainer
|
||||
from trainer.STMLP_Trainer import Trainer as STMLP_Trainer
|
||||
from trainer.E32Trainer import Trainer as EXP_Trainer
|
||||
from trainer.InformerTrainer import InformerTrainer
|
||||
|
||||
|
||||
def select_trainer(
|
||||
|
|
@ -96,6 +97,18 @@ def select_trainer(
|
|||
args,
|
||||
lr_scheduler,
|
||||
)
|
||||
case "Informer":
|
||||
return InformerTrainer(
|
||||
model,
|
||||
loss,
|
||||
optimizer,
|
||||
train_loader,
|
||||
val_loader,
|
||||
test_loader,
|
||||
scaler,
|
||||
args,
|
||||
lr_scheduler,
|
||||
)
|
||||
case _:
|
||||
return Trainer(
|
||||
model,
|
||||
|
|
|
|||
Loading…
Reference in New Issue