diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index d1a09b9..196422d 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -5,14 +5,10 @@
-
-
-
-
@@ -66,7 +62,7 @@
"Python.main.executor": "Run",
"RunOnceActivity.OpenProjectViewOnStart": "true",
"RunOnceActivity.ShowReadmeOnStart": "true",
- "git-widget-placeholder": "STGODE",
+ "git-widget-placeholder": "main",
"last_opened_file_path": "/home/czzhangheng/code/Project-I/main.py",
"node.js.detected.package.eslint": "true",
"node.js.detected.package.tslint": "true",
@@ -93,11 +89,12 @@
+
-
+
@@ -211,6 +208,7 @@
-
+
+
\ No newline at end of file
diff --git a/README.md b/README.md
index 10a6160..e678e6d 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,9 @@
# Project-I
-Secret Projct
\ No newline at end of file
+Secret Projct
+
+mkdir -p models/gpt2
+
+Download config.json & pytorch_model.bin from https://huggingface.co/openai-community/gpt2/tree/main
+
+Use pytorch >= 2.6 to load model.
\ No newline at end of file
diff --git a/configs/STGODE_LLM/PEMS08.yaml b/configs/STGODE_LLM/PEMS08.yaml
new file mode 100644
index 0000000..bc13529
--- /dev/null
+++ b/configs/STGODE_LLM/PEMS08.yaml
@@ -0,0 +1,65 @@
+basic:
+ device: cuda:0
+ dataset: PEMS08
+ model: STGODE-LLM
+ mode: test
+ seed: 2025
+
+data:
+ dataset_dir: data/PEMS08
+ val_batch_size: 32
+ graph_pkl_filename: data/PEMS08/PEMS08_spatial_distance.npy
+ num_nodes: 170
+ batch_size: 64
+ input_dim: 1
+ lag: 12
+ horizon: 12
+ val_ratio: 0.2
+ test_ratio: 0.2
+ tod: False
+ normalizer: std
+ column_wise: False
+ default_graph: True
+ add_time_in_day: True
+ add_day_in_week: True
+ steps_per_day: 24
+ days_per_week: 7
+
+model:
+ input_dim: 1
+ output_dim: 1
+ history: 12
+ horizon: 12
+ num_features: 1
+ rnn_units: 64
+ sigma1: 0.1
+ sigma2: 10
+ thres1: 0.6
+ thres2: 0.5
+ # LLM backbone settings
+ llm_hidden: 128
+ llm_layers: 4
+ llm_heads: 4
+ llm_pretrained: True
+
+train:
+ loss: mae
+ batch_size: 64
+ epochs: 100
+ lr_init: 0.003
+ mape_thresh: 0.001
+ mae_thresh: None
+ debug: False
+ output_dim: 1
+ weight_decay: 0
+ lr_decay: False
+ lr_decay_rate: 0.3
+ lr_decay_step: "5,20,40,70"
+ early_stop: True
+ early_stop_patience: 15
+ grad_norm: False
+ max_grad_norm: 5
+ real_value: True
+ log_step: 3000
+
+
diff --git a/configs/STGODE_LLM_GPT2/PEMS08.yaml b/configs/STGODE_LLM_GPT2/PEMS08.yaml
new file mode 100644
index 0000000..53540ce
--- /dev/null
+++ b/configs/STGODE_LLM_GPT2/PEMS08.yaml
@@ -0,0 +1,66 @@
+basic:
+ device: cuda:0
+ dataset: PEMS08
+ model: STGODE-LLM-GPT2
+ mode: train
+ seed: 2025
+
+data:
+ dataset_dir: data/PEMS08
+ val_batch_size: 16
+ graph_pkl_filename: data/PEMS08/PEMS08_spatial_distance.npy
+ num_nodes: 170
+ batch_size: 32
+ input_dim: 1
+ lag: 12
+ horizon: 12
+ val_ratio: 0.2
+ test_ratio: 0.2
+ tod: False
+ normalizer: std
+ column_wise: False
+ default_graph: True
+ add_time_in_day: True
+ add_day_in_week: True
+ steps_per_day: 24
+ days_per_week: 7
+
+model:
+ input_dim: 1
+ output_dim: 1
+ history: 12
+ horizon: 12
+ num_features: 1
+ rnn_units: 64
+ sigma1: 0.1
+ sigma2: 10
+ thres1: 0.6
+ thres2: 0.5
+ # HF GPT-2 settings
+ gpt2_name: gpt2
+ gpt2_grad_ckpt: True
+ gpt2_freeze: True
+ gpt2_local_dir: ./models/gpt2
+
+train:
+ loss: mae
+ batch_size: 32
+ epochs: 100
+ lr_init: 0.0003
+ mape_thresh: 0.001
+ mae_thresh: None
+ debug: False
+ output_dim: 1
+ weight_decay: 0
+ lr_decay: False
+ lr_decay_rate: 0.3
+ lr_decay_step: "10,30,60,90"
+ early_stop: True
+ early_stop_patience: 15
+ grad_norm: False
+ max_grad_norm: 5
+ real_value: True
+ log_step: 3000
+
+
+
diff --git a/models/STGODE_LLM/STGODE_LLM.py b/models/STGODE_LLM/STGODE_LLM.py
new file mode 100644
index 0000000..817f4e4
--- /dev/null
+++ b/models/STGODE_LLM/STGODE_LLM.py
@@ -0,0 +1,152 @@
+import torch
+import math
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.STGODE.odegcn import ODEG
+from models.STGODE.adj import get_A_hat
+
+
+class Chomp1d(nn.Module):
+ def __init__(self, chomp_size):
+ super(Chomp1d, self).__init__()
+ self.chomp_size = chomp_size
+
+ def forward(self, x):
+ return x[:, :, :, :-self.chomp_size].contiguous()
+
+
+class TemporalConvNet(nn.Module):
+ def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
+ super(TemporalConvNet, self).__init__()
+ layers = []
+ num_levels = len(num_channels)
+ for i in range(num_levels):
+ dilation_size = 2 ** i
+ in_channels = num_inputs if i == 0 else num_channels[i - 1]
+ out_channels = num_channels[i]
+ padding = (kernel_size - 1) * dilation_size
+ self.conv = nn.Conv2d(in_channels, out_channels, (1, kernel_size), dilation=(1, dilation_size),
+ padding=(0, padding))
+ self.conv.weight.data.normal_(0, 0.01)
+ self.chomp = Chomp1d(padding)
+ self.relu = nn.ReLU()
+ self.dropout = nn.Dropout(dropout)
+ layers += [nn.Sequential(self.conv, self.chomp, self.relu, self.dropout)]
+
+ self.network = nn.Sequential(*layers)
+ self.downsample = nn.Conv2d(num_inputs, num_channels[-1], (1, 1)) if num_inputs != num_channels[-1] else None
+ if self.downsample:
+ self.downsample.weight.data.normal_(0, 0.01)
+
+ def forward(self, x):
+ y = x.permute(0, 3, 1, 2)
+ y = F.relu(self.network(y) + self.downsample(y) if self.downsample else y)
+ y = y.permute(0, 2, 3, 1)
+ return y
+
+
+class STGCNBlock(nn.Module):
+ def __init__(self, in_channels, out_channels, num_nodes, A_hat):
+ super(STGCNBlock, self).__init__()
+ self.A_hat = A_hat
+ self.temporal1 = TemporalConvNet(num_inputs=in_channels, num_channels=out_channels)
+ self.odeg = ODEG(out_channels[-1], 12, A_hat, time=6)
+ self.temporal2 = TemporalConvNet(num_inputs=out_channels[-1], num_channels=out_channels)
+ self.batch_norm = nn.BatchNorm2d(num_nodes)
+
+ def forward(self, X):
+ t = self.temporal1(X)
+ t = self.odeg(t)
+ t = self.temporal2(F.relu(t))
+ return self.batch_norm(t)
+
+
+class GPT2Backbone(nn.Module):
+ def __init__(self, hidden_size: int, n_layer: int = 4, n_head: int = 4, n_embd: int | None = None, use_pretrained: bool = True):
+ super().__init__()
+ self.hidden_size = hidden_size
+ self.use_transformers = False
+ self.model = None
+ if n_embd is None:
+ n_embd = hidden_size
+ if use_pretrained:
+ try:
+ from transformers import GPT2Model, GPT2Config
+ config = GPT2Config(n_embd=n_embd, n_layer=n_layer, n_head=n_head, n_positions=1024, n_ctx=1024, vocab_size=1)
+ self.model = GPT2Model(config)
+ self.use_transformers = True
+ except Exception:
+ self.use_transformers = False
+ if not self.use_transformers:
+ encoder_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=n_head, batch_first=True)
+ self.model = nn.TransformerEncoder(encoder_layer, num_layers=n_layer)
+
+ def forward(self, inputs_embeds: torch.Tensor) -> torch.Tensor:
+ if self.use_transformers:
+ outputs = self.model(inputs_embeds=inputs_embeds)
+ return outputs.last_hidden_state
+ else:
+ return self.model(inputs_embeds)
+
+
+class ODEGCN_LLM(nn.Module):
+ def __init__(self, config):
+ super(ODEGCN_LLM, self).__init__()
+ args = config['model']
+ num_nodes = config['data']['num_nodes']
+ num_features = args['num_features']
+ num_timesteps_input = args['history']
+ num_timesteps_output = args['horizon']
+ A_sp_hat, A_se_hat = get_A_hat(config)
+
+ self.sp_blocks = nn.ModuleList(
+ [nn.Sequential(
+ STGCNBlock(in_channels=num_features, out_channels=[64, 32, 64], num_nodes=num_nodes, A_hat=A_sp_hat),
+ STGCNBlock(in_channels=64, out_channels=[64, 32, 64], num_nodes=num_nodes, A_hat=A_sp_hat)) for _ in range(3)
+ ])
+
+ self.se_blocks = nn.ModuleList(
+ [nn.Sequential(
+ STGCNBlock(in_channels=num_features, out_channels=[64, 32, 64], num_nodes=num_nodes, A_hat=A_se_hat),
+ STGCNBlock(in_channels=64, out_channels=[64, 32, 64], num_nodes=num_nodes, A_hat=A_se_hat)) for _ in range(3)
+ ])
+
+ self.history = num_timesteps_input
+ self.horizon = num_timesteps_output
+
+ hidden_size = int(args.get('llm_hidden', 128))
+ llm_layers = int(args.get('llm_layers', 4))
+ llm_heads = int(args.get('llm_heads', 4))
+ use_pretrained = bool(args.get('llm_pretrained', True))
+
+ self.to_llm_embed = nn.Linear(64, hidden_size)
+ self.gpt2 = GPT2Backbone(hidden_size=hidden_size, n_layer=llm_layers, n_head=llm_heads, use_pretrained=use_pretrained)
+ self.proj_head = nn.Sequential(
+ nn.Linear(hidden_size, hidden_size),
+ nn.ReLU(),
+ nn.Linear(hidden_size, self.horizon)
+ )
+
+ def forward(self, x):
+ x = x[..., 0:1].permute(0, 2, 1, 3)
+ outs = []
+ for blk in self.sp_blocks:
+ outs.append(blk(x))
+ for blk in self.se_blocks:
+ outs.append(blk(x))
+ outs = torch.stack(outs)
+ x = torch.max(outs, dim=0)[0]
+
+ # x: (B, N, T, 64) physical quantities after ODE-based transform
+ B, N, T, C = x.shape
+ x = self.to_llm_embed(x) # (B, N, T, H)
+ x = x.permute(0, 1, 2, 3).contiguous().view(B * N, T, -1) # (B*N, T, H)
+
+ llm_hidden = self.gpt2(inputs_embeds=x) # (B*N, T, H)
+ last_state = llm_hidden[:, -1, :] # (B*N, H)
+ y = self.proj_head(last_state) # (B*N, horizon)
+ y = y.view(B, N, self.horizon).permute(0, 2, 1).unsqueeze(-1) # (B, horizon, N, 1)
+ return y
+
+
diff --git a/models/STGODE_LLM/__init__.py b/models/STGODE_LLM/__init__.py
new file mode 100644
index 0000000..93b8daf
--- /dev/null
+++ b/models/STGODE_LLM/__init__.py
@@ -0,0 +1,4 @@
+from .STGODE_LLM import ODEGCN_LLM
+
+
+
diff --git a/models/STGODE_LLM_GPT2/STGODE_LLM_GPT2.py b/models/STGODE_LLM_GPT2/STGODE_LLM_GPT2.py
new file mode 100644
index 0000000..af65de7
--- /dev/null
+++ b/models/STGODE_LLM_GPT2/STGODE_LLM_GPT2.py
@@ -0,0 +1,145 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from models.STGODE.odegcn import ODEG
+from models.STGODE.adj import get_A_hat
+
+
+class Chomp1d(nn.Module):
+ def __init__(self, chomp_size):
+ super(Chomp1d, self).__init__()
+ self.chomp_size = chomp_size
+
+ def forward(self, x):
+ return x[:, :, :, :-self.chomp_size].contiguous()
+
+
+class TemporalConvNet(nn.Module):
+ def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
+ super(TemporalConvNet, self).__init__()
+ layers = []
+ num_levels = len(num_channels)
+ for i in range(num_levels):
+ dilation_size = 2 ** i
+ in_channels = num_inputs if i == 0 else num_channels[i - 1]
+ out_channels = num_channels[i]
+ padding = (kernel_size - 1) * dilation_size
+ self.conv = nn.Conv2d(in_channels, out_channels, (1, kernel_size), dilation=(1, dilation_size),
+ padding=(0, padding))
+ self.conv.weight.data.normal_(0, 0.01)
+ self.chomp = Chomp1d(padding)
+ self.relu = nn.ReLU()
+ self.dropout = nn.Dropout(dropout)
+ layers += [nn.Sequential(self.conv, self.chomp, self.relu, self.dropout)]
+
+ self.network = nn.Sequential(*layers)
+ self.downsample = nn.Conv2d(num_inputs, num_channels[-1], (1, 1)) if num_inputs != num_channels[-1] else None
+ if self.downsample:
+ self.downsample.weight.data.normal_(0, 0.01)
+
+ def forward(self, x):
+ y = x.permute(0, 3, 1, 2)
+ y = F.relu(self.network(y) + self.downsample(y) if self.downsample else y)
+ y = y.permute(0, 2, 3, 1)
+ return y
+
+
+class STGCNBlock(nn.Module):
+ def __init__(self, in_channels, out_channels, num_nodes, A_hat):
+ super(STGCNBlock, self).__init__()
+ self.A_hat = A_hat
+ self.temporal1 = TemporalConvNet(num_inputs=in_channels, num_channels=out_channels)
+ self.odeg = ODEG(out_channels[-1], 12, A_hat, time=6)
+ self.temporal2 = TemporalConvNet(num_inputs=out_channels[-1], num_channels=out_channels)
+ self.batch_norm = nn.BatchNorm2d(num_nodes)
+
+ def forward(self, X):
+ t = self.temporal1(X)
+ t = self.odeg(t)
+ t = self.temporal2(F.relu(t))
+ return self.batch_norm(t)
+
+
+class GPT2BackboneHF(nn.Module):
+ def __init__(self, model_name: str | None = None, gradient_checkpointing: bool = False, freeze: bool = False, local_dir: str | None = None):
+ super().__init__()
+ from transformers import GPT2Model
+ if local_dir is not None and len(local_dir) > 0:
+ self.model = GPT2Model.from_pretrained(local_dir, local_files_only=True)
+ else:
+ if model_name is None:
+ model_name = 'gpt2'
+ self.model = GPT2Model.from_pretrained(model_name)
+ if gradient_checkpointing:
+ self.model.gradient_checkpointing_enable()
+ self.hidden_size = self.model.config.hidden_size
+ if freeze:
+ for p in self.model.parameters():
+ p.requires_grad = False
+
+ def forward(self, inputs_embeds: torch.Tensor) -> torch.Tensor:
+ outputs = self.model(inputs_embeds=inputs_embeds)
+ return outputs.last_hidden_state
+
+
+class ODEGCN_LLM_GPT2(nn.Module):
+ def __init__(self, config):
+ super(ODEGCN_LLM_GPT2, self).__init__()
+ args = config['model']
+ num_nodes = config['data']['num_nodes']
+ num_features = args['num_features']
+ self.history = args['history']
+ self.horizon = args['horizon']
+ A_sp_hat, A_se_hat = get_A_hat(config)
+
+ self.sp_blocks = nn.ModuleList(
+ [nn.Sequential(
+ STGCNBlock(in_channels=num_features, out_channels=[64, 32, 64], num_nodes=num_nodes, A_hat=A_sp_hat),
+ STGCNBlock(in_channels=64, out_channels=[64, 32, 64], num_nodes=num_nodes, A_hat=A_sp_hat)) for _ in range(3)
+ ])
+
+ self.se_blocks = nn.ModuleList(
+ [nn.Sequential(
+ STGCNBlock(in_channels=num_features, out_channels=[64, 32, 64], num_nodes=num_nodes, A_hat=A_se_hat),
+ STGCNBlock(in_channels=64, out_channels=[64, 32, 64], num_nodes=num_nodes, A_hat=A_se_hat)) for _ in range(3)
+ ])
+
+ # HF GPT-2
+ gpt2_name = args.get('gpt2_name', 'gpt2')
+ grad_ckpt = bool(args.get('gpt2_grad_ckpt', False))
+ gpt2_freeze = bool(args.get('gpt2_freeze', False))
+ gpt2_local_dir = args.get('gpt2_local_dir', None)
+ self.gpt2 = GPT2BackboneHF(gpt2_name, gradient_checkpointing=grad_ckpt, freeze=gpt2_freeze, local_dir=gpt2_local_dir)
+
+ # Project ODE features to GPT-2 hidden size
+ self.to_llm_embed = nn.Linear(64, self.gpt2.hidden_size)
+
+ # Prediction head
+ self.proj_head = nn.Sequential(
+ nn.Linear(self.gpt2.hidden_size, self.gpt2.hidden_size),
+ nn.ReLU(),
+ nn.Linear(self.gpt2.hidden_size, self.horizon)
+ )
+
+ def forward(self, x):
+ x = x[..., 0:1].permute(0, 2, 1, 3)
+ outs = []
+ for blk in self.sp_blocks:
+ outs.append(blk(x))
+ for blk in self.se_blocks:
+ outs.append(blk(x))
+ outs = torch.stack(outs)
+ x = torch.max(outs, dim=0)[0] # (B, N, T, 64)
+
+ B, N, T, C = x.shape
+ x = self.to_llm_embed(x).view(B * N, T, -1)
+
+ llm_hidden = self.gpt2(inputs_embeds=x)
+ last_state = llm_hidden[:, -1, :]
+ y = self.proj_head(last_state)
+ y = y.view(B, N, self.horizon).permute(0, 2, 1).unsqueeze(-1)
+ return y
+
+
+
diff --git a/models/STGODE_LLM_GPT2/__init__.py b/models/STGODE_LLM_GPT2/__init__.py
new file mode 100644
index 0000000..8650578
--- /dev/null
+++ b/models/STGODE_LLM_GPT2/__init__.py
@@ -0,0 +1,4 @@
+from .STGODE_LLM_GPT2 import ODEGCN_LLM_GPT2
+
+
+
diff --git a/test_semantic.npy b/test_semantic.npy
deleted file mode 100644
index e0d1b75..0000000
Binary files a/test_semantic.npy and /dev/null differ
diff --git a/test_spatial.npy b/test_spatial.npy
deleted file mode 100644
index 747906d..0000000
Binary files a/test_spatial.npy and /dev/null differ