diff --git a/.gitignore b/.gitignore index 681395d..a332757 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,9 @@ review_lab/ scripts/ experiments/ - +*.csv +*.npz +*.pkl # ---> Python # Byte-compiled / optimized / DLL files diff --git a/dataloader/DCRNNdataloader.py b/dataloader/DCRNNdataloader.py index 7a250e0..b485a2c 100644 --- a/dataloader/DCRNNdataloader.py +++ b/dataloader/DCRNNdataloader.py @@ -94,22 +94,22 @@ def load_st_dataset(dataset, sample): # output B, N, D match dataset: case 'PEMSD3': - data_path = os.path.join('./data/PeMS03/PEMS03.npz') + data_path = os.path.join('./data/PEMS03/PEMS03.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD4': - data_path = os.path.join('./data/PeMS04/PEMS04.npz') + data_path = os.path.join('./data/PEMS04/PEMS04.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7': - data_path = os.path.join('./data/PeMS07/PEMS07.npz') + data_path = os.path.join('./data/PEMS07/PEMS07.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD8': - data_path = os.path.join('./data/PeMS08/PeMS08.npz') + data_path = os.path.join('./data/PEMS08/PEMS08.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7(L)': - data_path = os.path.join('./data/PeMS07(L)/PEMS07L.npz') + data_path = os.path.join('./data/PEMS07(L)/PEMS07L.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7(M)': - data_path = os.path.join('./data/PeMS07(M)/V_228.csv') + data_path = os.path.join('./data/PEMS07(M)/V_228.csv') data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy case 'METR-LA': data_path = os.path.join('./data/METR-LA/METR.h5') diff --git a/dataloader/PeMSDdataloader.py b/dataloader/PeMSDdataloader.py index ab7294d..925e82a 100644 --- a/dataloader/PeMSDdataloader.py +++ b/dataloader/PeMSDdataloader.py @@ -94,22 +94,22 @@ def load_st_dataset(dataset, sample): # output B, N, D match dataset: case 'PEMSD3': - data_path = os.path.join('./data/PeMS03/PEMS03.npz') + data_path = os.path.join('./data/PEMS03/PEMS03.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD4': - data_path = os.path.join('./data/PeMS04/PEMS04.npz') + data_path = os.path.join('./data/PEMS04/PEMS04.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7': - data_path = os.path.join('./data/PeMS07/PEMS07.npz') + data_path = os.path.join('./data/PEMS07/PEMS07.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD8': - data_path = os.path.join('./data/PeMS08/PeMS08.npz') + data_path = os.path.join('./data/PEMS08/PEMS08.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7(L)': - data_path = os.path.join('./data/PeMS07(L)/PEMS07L.npz') + data_path = os.path.join('./data/PEMS07(L)/PEMS07L.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7(M)': - data_path = os.path.join('./data/PeMS07(M)/V_228.csv') + data_path = os.path.join('./data/PEMS07(M)/V_228.csv') data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy case 'METR-LA': data_path = os.path.join('./data/METR-LA/METR.h5') diff --git a/dataloader/PeMSDdataloader_old.py b/dataloader/PeMSDdataloader_old.py index 83db721..c76d18f 100644 --- a/dataloader/PeMSDdataloader_old.py +++ b/dataloader/PeMSDdataloader_old.py @@ -94,22 +94,22 @@ def load_st_dataset(dataset): # output B, N, D match dataset: case 'PEMSD3': - data_path = os.path.join('./data/PeMS03/PEMS03.npz') + data_path = os.path.join('./data/PEMS03/PEMS03.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD4': - data_path = os.path.join('./data/PeMS04/PEMS04.npz') + data_path = os.path.join('./data/PEMS04/PEMS04.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7': - data_path = os.path.join('./data/PeMS07/PEMS07.npz') + data_path = os.path.join('./data/PEMS07/PEMS07.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD8': - data_path = os.path.join('./data/PeMS08/PeMS08.npz') + data_path = os.path.join('./data/PEMS08/PEMS08.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7(L)': - data_path = os.path.join('./data/PeMS07(L)/PEMS07L.npz') + data_path = os.path.join('./data/PEMS07(L)/PEMS07L.npz') data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data case 'PEMSD7(M)': - data_path = os.path.join('./data/PeMS07(M)/V_228.csv') + data_path = os.path.join('./data/PEMS07(M)/V_228.csv') data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy case 'METR-LA': data_path = os.path.join('./data/METR-LA/METR.h5') diff --git a/model/STGODE/adj.py b/model/STGODE/adj.py index b1083c1..e5da55d 100644 --- a/model/STGODE/adj.py +++ b/model/STGODE/adj.py @@ -7,13 +7,13 @@ from tqdm import tqdm import torch files = { - 358: ['PeMS03/PEMS03.npz', 'PeMS03/PEMS03.csv'], - 307: ['PeMS04/PEMS04.npz', 'PeMS04/PEMS04.csv'], - 883: ['PeMS07/PEMS07.npz', 'PeMS07/PEMS07.csv'], - 170: ['PeMS08/PEMS08.npz', 'PeMS08/PEMS08.csv'], + 358: ['PEMS03/PEMS03.npz', 'PEMS03/PEMS03.csv'], + 307: ['PEMS04/PEMS04.npz', 'PEMS04/PEMS04.csv'], + 883: ['PEMS07/PEMS07.npz', 'PEMS07/PEMS07.csv'], + 170: ['PEMS08/PEMS08.npz', 'PEMS08/PEMS08.csv'], # 'pemsbay': ['PEMSBAY/pems_bay.npz', 'PEMSBAY/distance.csv'], - # 'pemsD7M': ['PeMSD7M/PeMSD7M.npz', 'PeMSD7M/distance.csv'], - # 'pemsD7L': ['PeMSD7L/PeMSD7L.npz', 'PeMSD7L/distance.csv'] + # 'pemsD7M': ['PEMSD7M/PEMSD7M.npz', 'PEMSD7M/distance.csv'], + # 'pemsD7L': ['PEMSD7L/PEMSD7L.npz', 'PEMSD7L/distance.csv'] } @@ -43,7 +43,7 @@ def get_A_hat(args): data = (data - mean_value) / std_value # 计算dtw_distance, 如果存在缓存则直接读取缓存 - if not os.path.exists(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy'): + if not os.path.exists(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy'): data_mean = np.mean([data[:, :, 0][24 * 12 * i: 24 * 12 * (i + 1)] for i in range(data.shape[0] // (24 * 12))], axis=0) data_mean = data_mean.squeeze().T @@ -54,9 +54,9 @@ def get_A_hat(args): for i in range(num_node): for j in range(i): dtw_distance[i][j] = dtw_distance[j][i] - np.save(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy', dtw_distance) + np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy', dtw_distance) - dist_matrix = np.load(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy') + dist_matrix = np.load(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy') mean = np.mean(dist_matrix) std = np.std(dist_matrix) @@ -67,9 +67,9 @@ def get_A_hat(args): dtw_matrix[dist_matrix > args['thres1']] = 1 # 计算spatial_distance, 如果存在缓存则直接读取缓存 - if not os.path.exists(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy'): + if not os.path.exists(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy'): if num_node == 358: - with open(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}.txt', 'r') as f: + with open(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}.txt', 'r') as f: id_dict = {int(i): idx for idx, i in enumerate(f.read().strip().split('\n'))} # 建立映射列表 # 使用 pandas 读取 CSV 文件,跳过标题行 df = pd.read_csv(filepath + file[1], skiprows=1, header=None) @@ -79,7 +79,7 @@ def get_A_hat(args): end = int(id_dict[row[1]]) dist_matrix[start][end] = float(row[2]) dist_matrix[end][start] = float(row[2]) - np.save(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix) + np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix) else: # 使用 pandas 读取 CSV 文件,跳过标题行 df = pd.read_csv(filepath + file[1], skiprows=1, header=None) @@ -89,7 +89,7 @@ def get_A_hat(args): end = int(row[1]) dist_matrix[start][end] = float(row[2]) dist_matrix[end][start] = float(row[2]) - np.save(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix) + np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix) # normalization std = np.std(dist_matrix[dist_matrix != float('inf')]) mean = np.mean(dist_matrix[dist_matrix != float('inf')]) diff --git a/model/STGODE/new_adj.py b/model/STGODE/new_adj.py deleted file mode 100644 index 4c45af8..0000000 --- a/model/STGODE/new_adj.py +++ /dev/null @@ -1,116 +0,0 @@ -import os -import pandas as pd -import numpy as np -from fastdtw import fastdtw -from tqdm import tqdm -import torch -from joblib import Parallel, delayed - -files = { - 358: ('PeMS03/PEMS03.npz', 'PeMS03/PEMS03.csv'), - 307: ('PeMS04/PEMS04.npz', 'PEMS04.csv'), - 883: ('PeMS07/PEMS07.npz', 'PEMS07.csv'), - 170: ('PeMS08/PEMS08.npz', 'PEMS08.csv') -} - - -def compute_dtw_pair(i, j, data_mean): - return i, j, fastdtw(data_mean[i], data_mean[j], radius=6)[0] - - -def get_A_hat(args): - """Optimized version with GPU support and parallel computing""" - # 基础配置 - device = torch.device(args['device']) - data_dir = './data/' - num_node = args['num_nodes'] - file_npz, file_csv = files[num_node] - dataset_name = file_npz.split('/')[0] - os.makedirs(f'{data_dir}{dataset_name}', exist_ok=True) - - # 数据加载与标准化 - with np.load(f'{data_dir}{file_npz}') as data: - arr_data = data['data'] - arr_data = (arr_data - arr_data.mean((0, 1))) / arr_data.std((0, 1)) - arr_data = torch.from_numpy(arr_data).float().to(device) - - # DTW矩阵计算(带缓存) - dtw_path = f'{data_dir}{dataset_name}/{dataset_name}_dtw_distance.npy' - if not os.path.exists(dtw_path): - # 使用GPU加速的均值计算 - daily_data = arr_data[..., 0].unfold(0, 288, 288).mean(dim=0).T.cpu().numpy() - - # 并行计算DTW - print("Computing DTW matrix with parallel optimization...") - results = Parallel(n_jobs=-1)( - delayed(compute_dtw_pair)(i, j, daily_data) - for i in tqdm(range(num_node)) for j in range(i, num_node) - ) - - dtw_matrix = np.full((num_node, num_node), np.inf) - for i, j, d in results: - dtw_matrix[i, j] = d - dtw_matrix[j, i] = d - np.save(dtw_path, dtw_matrix) - else: - dtw_matrix = np.load(dtw_path) - - # DTW矩阵标准化(GPU加速) - dtw_tensor = torch.from_numpy(dtw_matrix).to(device) - dtw_normalized = (dtw_tensor - dtw_tensor.mean()) / dtw_tensor.std() - semantic_adj = torch.exp(-dtw_normalized ** 2 / args['sigma1'] ** 2) - semantic_adj = (semantic_adj > args['thres1']).float() - - # 空间矩阵计算(带缓存) - spatial_path = f'{data_dir}{dataset_name}/{dataset_name}_spatial_distance.npy' - if not os.path.exists(spatial_path): - # 使用Pandas高效读取 - df = pd.read_csv(f'{data_dir}{file_csv}', header=None) - if num_node == 358: # 特殊处理节点ID映射 - with open(f'{data_dir}{dataset_name}/{dataset_name}.txt') as f: - node_ids = [int(line.strip()) for line in f] - id_map = {nid: idx for idx, nid in enumerate(node_ids)} - df[0] = df[0].map(id_map) - df[1] = df[1].map(id_map) - - # 构建稀疏矩阵 - spatial_adj = torch.full((num_node, num_node), float('inf'), device=device) - for row in df.itertuples(): - i, j, d = int(row[1]), int(row[2]), float(row[3]) - spatial_adj[i, j] = spatial_adj[j, i] = d - spatial_adj = spatial_adj.cpu().numpy() - np.save(spatial_path, spatial_adj) - else: - spatial_adj = np.load(spatial_path) - - # 空间矩阵标准化(GPU加速) - mask = spatial_adj != float('inf') - valid_values = torch.from_numpy(spatial_adj[mask]).to(device) - spatial_normalized = (spatial_adj - valid_values.mean().item()) / valid_values.std().item() - spatial_adj = torch.exp(-torch.tensor(spatial_normalized) ** 2 / args['sigma2'] ** 2) - spatial_adj = (spatial_adj > args['thres2']).float() - - # 归一化处理 - def normalize_adj(adj): - D = adj.sum(1) - D = torch.clamp(D, min=1e-5) - D_inv_sqrt = 1.0 / torch.sqrt(D) - return 0.8 * (torch.eye(adj.size(0), device=device) + - 0.8 * D_inv_sqrt.view(-1, 1) * adj * D_inv_sqrt.view(1, -1)) - - return (normalize_adj(semantic_adj.to(args['device'])), normalize_adj(spatial_adj.to(args['device']))) - - -# 测试代码 -if __name__ == '__main__': - config = { - 'sigma1': 0.1, - 'sigma2': 10, - 'thres1': 0.6, - 'thres2': 0.5, - 'device': 'cuda:0' if torch.cuda.is_available() else 'cpu' - } - - for nodes in [358, 883, 170]: - args = {'num_nodes': nodes, **config} - get_A_hat(args) \ No newline at end of file