mv dir name
This commit is contained in:
parent
26758e761b
commit
be5e810c54
|
|
@ -2,7 +2,9 @@
|
||||||
review_lab/
|
review_lab/
|
||||||
scripts/
|
scripts/
|
||||||
experiments/
|
experiments/
|
||||||
|
*.csv
|
||||||
|
*.npz
|
||||||
|
*.pkl
|
||||||
|
|
||||||
# ---> Python
|
# ---> Python
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
|
|
|
||||||
|
|
@ -94,22 +94,22 @@ def load_st_dataset(dataset, sample):
|
||||||
# output B, N, D
|
# output B, N, D
|
||||||
match dataset:
|
match dataset:
|
||||||
case 'PEMSD3':
|
case 'PEMSD3':
|
||||||
data_path = os.path.join('./data/PeMS03/PEMS03.npz')
|
data_path = os.path.join('./data/PEMS03/PEMS03.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD4':
|
case 'PEMSD4':
|
||||||
data_path = os.path.join('./data/PeMS04/PEMS04.npz')
|
data_path = os.path.join('./data/PEMS04/PEMS04.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7':
|
case 'PEMSD7':
|
||||||
data_path = os.path.join('./data/PeMS07/PEMS07.npz')
|
data_path = os.path.join('./data/PEMS07/PEMS07.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD8':
|
case 'PEMSD8':
|
||||||
data_path = os.path.join('./data/PeMS08/PeMS08.npz')
|
data_path = os.path.join('./data/PEMS08/PEMS08.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7(L)':
|
case 'PEMSD7(L)':
|
||||||
data_path = os.path.join('./data/PeMS07(L)/PEMS07L.npz')
|
data_path = os.path.join('./data/PEMS07(L)/PEMS07L.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7(M)':
|
case 'PEMSD7(M)':
|
||||||
data_path = os.path.join('./data/PeMS07(M)/V_228.csv')
|
data_path = os.path.join('./data/PEMS07(M)/V_228.csv')
|
||||||
data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy
|
data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy
|
||||||
case 'METR-LA':
|
case 'METR-LA':
|
||||||
data_path = os.path.join('./data/METR-LA/METR.h5')
|
data_path = os.path.join('./data/METR-LA/METR.h5')
|
||||||
|
|
|
||||||
|
|
@ -94,22 +94,22 @@ def load_st_dataset(dataset, sample):
|
||||||
# output B, N, D
|
# output B, N, D
|
||||||
match dataset:
|
match dataset:
|
||||||
case 'PEMSD3':
|
case 'PEMSD3':
|
||||||
data_path = os.path.join('./data/PeMS03/PEMS03.npz')
|
data_path = os.path.join('./data/PEMS03/PEMS03.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD4':
|
case 'PEMSD4':
|
||||||
data_path = os.path.join('./data/PeMS04/PEMS04.npz')
|
data_path = os.path.join('./data/PEMS04/PEMS04.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7':
|
case 'PEMSD7':
|
||||||
data_path = os.path.join('./data/PeMS07/PEMS07.npz')
|
data_path = os.path.join('./data/PEMS07/PEMS07.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD8':
|
case 'PEMSD8':
|
||||||
data_path = os.path.join('./data/PeMS08/PeMS08.npz')
|
data_path = os.path.join('./data/PEMS08/PEMS08.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7(L)':
|
case 'PEMSD7(L)':
|
||||||
data_path = os.path.join('./data/PeMS07(L)/PEMS07L.npz')
|
data_path = os.path.join('./data/PEMS07(L)/PEMS07L.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7(M)':
|
case 'PEMSD7(M)':
|
||||||
data_path = os.path.join('./data/PeMS07(M)/V_228.csv')
|
data_path = os.path.join('./data/PEMS07(M)/V_228.csv')
|
||||||
data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy
|
data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy
|
||||||
case 'METR-LA':
|
case 'METR-LA':
|
||||||
data_path = os.path.join('./data/METR-LA/METR.h5')
|
data_path = os.path.join('./data/METR-LA/METR.h5')
|
||||||
|
|
|
||||||
|
|
@ -94,22 +94,22 @@ def load_st_dataset(dataset):
|
||||||
# output B, N, D
|
# output B, N, D
|
||||||
match dataset:
|
match dataset:
|
||||||
case 'PEMSD3':
|
case 'PEMSD3':
|
||||||
data_path = os.path.join('./data/PeMS03/PEMS03.npz')
|
data_path = os.path.join('./data/PEMS03/PEMS03.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD4':
|
case 'PEMSD4':
|
||||||
data_path = os.path.join('./data/PeMS04/PEMS04.npz')
|
data_path = os.path.join('./data/PEMS04/PEMS04.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7':
|
case 'PEMSD7':
|
||||||
data_path = os.path.join('./data/PeMS07/PEMS07.npz')
|
data_path = os.path.join('./data/PEMS07/PEMS07.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD8':
|
case 'PEMSD8':
|
||||||
data_path = os.path.join('./data/PeMS08/PeMS08.npz')
|
data_path = os.path.join('./data/PEMS08/PEMS08.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7(L)':
|
case 'PEMSD7(L)':
|
||||||
data_path = os.path.join('./data/PeMS07(L)/PEMS07L.npz')
|
data_path = os.path.join('./data/PEMS07(L)/PEMS07L.npz')
|
||||||
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
data = np.load(data_path)['data'][:, :, 0] # only the first dimension, traffic flow data
|
||||||
case 'PEMSD7(M)':
|
case 'PEMSD7(M)':
|
||||||
data_path = os.path.join('./data/PeMS07(M)/V_228.csv')
|
data_path = os.path.join('./data/PEMS07(M)/V_228.csv')
|
||||||
data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy
|
data = np.genfromtxt(data_path, delimiter=',') # Read CSV directly with numpy
|
||||||
case 'METR-LA':
|
case 'METR-LA':
|
||||||
data_path = os.path.join('./data/METR-LA/METR.h5')
|
data_path = os.path.join('./data/METR-LA/METR.h5')
|
||||||
|
|
|
||||||
|
|
@ -7,13 +7,13 @@ from tqdm import tqdm
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
files = {
|
files = {
|
||||||
358: ['PeMS03/PEMS03.npz', 'PeMS03/PEMS03.csv'],
|
358: ['PEMS03/PEMS03.npz', 'PEMS03/PEMS03.csv'],
|
||||||
307: ['PeMS04/PEMS04.npz', 'PeMS04/PEMS04.csv'],
|
307: ['PEMS04/PEMS04.npz', 'PEMS04/PEMS04.csv'],
|
||||||
883: ['PeMS07/PEMS07.npz', 'PeMS07/PEMS07.csv'],
|
883: ['PEMS07/PEMS07.npz', 'PEMS07/PEMS07.csv'],
|
||||||
170: ['PeMS08/PEMS08.npz', 'PeMS08/PEMS08.csv'],
|
170: ['PEMS08/PEMS08.npz', 'PEMS08/PEMS08.csv'],
|
||||||
# 'pemsbay': ['PEMSBAY/pems_bay.npz', 'PEMSBAY/distance.csv'],
|
# 'pemsbay': ['PEMSBAY/pems_bay.npz', 'PEMSBAY/distance.csv'],
|
||||||
# 'pemsD7M': ['PeMSD7M/PeMSD7M.npz', 'PeMSD7M/distance.csv'],
|
# 'pemsD7M': ['PEMSD7M/PEMSD7M.npz', 'PEMSD7M/distance.csv'],
|
||||||
# 'pemsD7L': ['PeMSD7L/PeMSD7L.npz', 'PeMSD7L/distance.csv']
|
# 'pemsD7L': ['PEMSD7L/PEMSD7L.npz', 'PEMSD7L/distance.csv']
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -43,7 +43,7 @@ def get_A_hat(args):
|
||||||
data = (data - mean_value) / std_value
|
data = (data - mean_value) / std_value
|
||||||
|
|
||||||
# 计算dtw_distance, 如果存在缓存则直接读取缓存
|
# 计算dtw_distance, 如果存在缓存则直接读取缓存
|
||||||
if not os.path.exists(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy'):
|
if not os.path.exists(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy'):
|
||||||
data_mean = np.mean([data[:, :, 0][24 * 12 * i: 24 * 12 * (i + 1)] for i in range(data.shape[0] // (24 * 12))],
|
data_mean = np.mean([data[:, :, 0][24 * 12 * i: 24 * 12 * (i + 1)] for i in range(data.shape[0] // (24 * 12))],
|
||||||
axis=0)
|
axis=0)
|
||||||
data_mean = data_mean.squeeze().T
|
data_mean = data_mean.squeeze().T
|
||||||
|
|
@ -54,9 +54,9 @@ def get_A_hat(args):
|
||||||
for i in range(num_node):
|
for i in range(num_node):
|
||||||
for j in range(i):
|
for j in range(i):
|
||||||
dtw_distance[i][j] = dtw_distance[j][i]
|
dtw_distance[i][j] = dtw_distance[j][i]
|
||||||
np.save(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy', dtw_distance)
|
np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy', dtw_distance)
|
||||||
|
|
||||||
dist_matrix = np.load(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy')
|
dist_matrix = np.load(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy')
|
||||||
|
|
||||||
mean = np.mean(dist_matrix)
|
mean = np.mean(dist_matrix)
|
||||||
std = np.std(dist_matrix)
|
std = np.std(dist_matrix)
|
||||||
|
|
@ -67,9 +67,9 @@ def get_A_hat(args):
|
||||||
dtw_matrix[dist_matrix > args['thres1']] = 1
|
dtw_matrix[dist_matrix > args['thres1']] = 1
|
||||||
|
|
||||||
# 计算spatial_distance, 如果存在缓存则直接读取缓存
|
# 计算spatial_distance, 如果存在缓存则直接读取缓存
|
||||||
if not os.path.exists(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy'):
|
if not os.path.exists(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy'):
|
||||||
if num_node == 358:
|
if num_node == 358:
|
||||||
with open(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}.txt', 'r') as f:
|
with open(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}.txt', 'r') as f:
|
||||||
id_dict = {int(i): idx for idx, i in enumerate(f.read().strip().split('\n'))} # 建立映射列表
|
id_dict = {int(i): idx for idx, i in enumerate(f.read().strip().split('\n'))} # 建立映射列表
|
||||||
# 使用 pandas 读取 CSV 文件,跳过标题行
|
# 使用 pandas 读取 CSV 文件,跳过标题行
|
||||||
df = pd.read_csv(filepath + file[1], skiprows=1, header=None)
|
df = pd.read_csv(filepath + file[1], skiprows=1, header=None)
|
||||||
|
|
@ -79,7 +79,7 @@ def get_A_hat(args):
|
||||||
end = int(id_dict[row[1]])
|
end = int(id_dict[row[1]])
|
||||||
dist_matrix[start][end] = float(row[2])
|
dist_matrix[start][end] = float(row[2])
|
||||||
dist_matrix[end][start] = float(row[2])
|
dist_matrix[end][start] = float(row[2])
|
||||||
np.save(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix)
|
np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix)
|
||||||
else:
|
else:
|
||||||
# 使用 pandas 读取 CSV 文件,跳过标题行
|
# 使用 pandas 读取 CSV 文件,跳过标题行
|
||||||
df = pd.read_csv(filepath + file[1], skiprows=1, header=None)
|
df = pd.read_csv(filepath + file[1], skiprows=1, header=None)
|
||||||
|
|
@ -89,7 +89,7 @@ def get_A_hat(args):
|
||||||
end = int(row[1])
|
end = int(row[1])
|
||||||
dist_matrix[start][end] = float(row[2])
|
dist_matrix[start][end] = float(row[2])
|
||||||
dist_matrix[end][start] = float(row[2])
|
dist_matrix[end][start] = float(row[2])
|
||||||
np.save(f'data/PeMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix)
|
np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix)
|
||||||
# normalization
|
# normalization
|
||||||
std = np.std(dist_matrix[dist_matrix != float('inf')])
|
std = np.std(dist_matrix[dist_matrix != float('inf')])
|
||||||
mean = np.mean(dist_matrix[dist_matrix != float('inf')])
|
mean = np.mean(dist_matrix[dist_matrix != float('inf')])
|
||||||
|
|
|
||||||
|
|
@ -1,116 +0,0 @@
|
||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
from fastdtw import fastdtw
|
|
||||||
from tqdm import tqdm
|
|
||||||
import torch
|
|
||||||
from joblib import Parallel, delayed
|
|
||||||
|
|
||||||
files = {
|
|
||||||
358: ('PeMS03/PEMS03.npz', 'PeMS03/PEMS03.csv'),
|
|
||||||
307: ('PeMS04/PEMS04.npz', 'PEMS04.csv'),
|
|
||||||
883: ('PeMS07/PEMS07.npz', 'PEMS07.csv'),
|
|
||||||
170: ('PeMS08/PEMS08.npz', 'PEMS08.csv')
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def compute_dtw_pair(i, j, data_mean):
|
|
||||||
return i, j, fastdtw(data_mean[i], data_mean[j], radius=6)[0]
|
|
||||||
|
|
||||||
|
|
||||||
def get_A_hat(args):
|
|
||||||
"""Optimized version with GPU support and parallel computing"""
|
|
||||||
# 基础配置
|
|
||||||
device = torch.device(args['device'])
|
|
||||||
data_dir = './data/'
|
|
||||||
num_node = args['num_nodes']
|
|
||||||
file_npz, file_csv = files[num_node]
|
|
||||||
dataset_name = file_npz.split('/')[0]
|
|
||||||
os.makedirs(f'{data_dir}{dataset_name}', exist_ok=True)
|
|
||||||
|
|
||||||
# 数据加载与标准化
|
|
||||||
with np.load(f'{data_dir}{file_npz}') as data:
|
|
||||||
arr_data = data['data']
|
|
||||||
arr_data = (arr_data - arr_data.mean((0, 1))) / arr_data.std((0, 1))
|
|
||||||
arr_data = torch.from_numpy(arr_data).float().to(device)
|
|
||||||
|
|
||||||
# DTW矩阵计算(带缓存)
|
|
||||||
dtw_path = f'{data_dir}{dataset_name}/{dataset_name}_dtw_distance.npy'
|
|
||||||
if not os.path.exists(dtw_path):
|
|
||||||
# 使用GPU加速的均值计算
|
|
||||||
daily_data = arr_data[..., 0].unfold(0, 288, 288).mean(dim=0).T.cpu().numpy()
|
|
||||||
|
|
||||||
# 并行计算DTW
|
|
||||||
print("Computing DTW matrix with parallel optimization...")
|
|
||||||
results = Parallel(n_jobs=-1)(
|
|
||||||
delayed(compute_dtw_pair)(i, j, daily_data)
|
|
||||||
for i in tqdm(range(num_node)) for j in range(i, num_node)
|
|
||||||
)
|
|
||||||
|
|
||||||
dtw_matrix = np.full((num_node, num_node), np.inf)
|
|
||||||
for i, j, d in results:
|
|
||||||
dtw_matrix[i, j] = d
|
|
||||||
dtw_matrix[j, i] = d
|
|
||||||
np.save(dtw_path, dtw_matrix)
|
|
||||||
else:
|
|
||||||
dtw_matrix = np.load(dtw_path)
|
|
||||||
|
|
||||||
# DTW矩阵标准化(GPU加速)
|
|
||||||
dtw_tensor = torch.from_numpy(dtw_matrix).to(device)
|
|
||||||
dtw_normalized = (dtw_tensor - dtw_tensor.mean()) / dtw_tensor.std()
|
|
||||||
semantic_adj = torch.exp(-dtw_normalized ** 2 / args['sigma1'] ** 2)
|
|
||||||
semantic_adj = (semantic_adj > args['thres1']).float()
|
|
||||||
|
|
||||||
# 空间矩阵计算(带缓存)
|
|
||||||
spatial_path = f'{data_dir}{dataset_name}/{dataset_name}_spatial_distance.npy'
|
|
||||||
if not os.path.exists(spatial_path):
|
|
||||||
# 使用Pandas高效读取
|
|
||||||
df = pd.read_csv(f'{data_dir}{file_csv}', header=None)
|
|
||||||
if num_node == 358: # 特殊处理节点ID映射
|
|
||||||
with open(f'{data_dir}{dataset_name}/{dataset_name}.txt') as f:
|
|
||||||
node_ids = [int(line.strip()) for line in f]
|
|
||||||
id_map = {nid: idx for idx, nid in enumerate(node_ids)}
|
|
||||||
df[0] = df[0].map(id_map)
|
|
||||||
df[1] = df[1].map(id_map)
|
|
||||||
|
|
||||||
# 构建稀疏矩阵
|
|
||||||
spatial_adj = torch.full((num_node, num_node), float('inf'), device=device)
|
|
||||||
for row in df.itertuples():
|
|
||||||
i, j, d = int(row[1]), int(row[2]), float(row[3])
|
|
||||||
spatial_adj[i, j] = spatial_adj[j, i] = d
|
|
||||||
spatial_adj = spatial_adj.cpu().numpy()
|
|
||||||
np.save(spatial_path, spatial_adj)
|
|
||||||
else:
|
|
||||||
spatial_adj = np.load(spatial_path)
|
|
||||||
|
|
||||||
# 空间矩阵标准化(GPU加速)
|
|
||||||
mask = spatial_adj != float('inf')
|
|
||||||
valid_values = torch.from_numpy(spatial_adj[mask]).to(device)
|
|
||||||
spatial_normalized = (spatial_adj - valid_values.mean().item()) / valid_values.std().item()
|
|
||||||
spatial_adj = torch.exp(-torch.tensor(spatial_normalized) ** 2 / args['sigma2'] ** 2)
|
|
||||||
spatial_adj = (spatial_adj > args['thres2']).float()
|
|
||||||
|
|
||||||
# 归一化处理
|
|
||||||
def normalize_adj(adj):
|
|
||||||
D = adj.sum(1)
|
|
||||||
D = torch.clamp(D, min=1e-5)
|
|
||||||
D_inv_sqrt = 1.0 / torch.sqrt(D)
|
|
||||||
return 0.8 * (torch.eye(adj.size(0), device=device) +
|
|
||||||
0.8 * D_inv_sqrt.view(-1, 1) * adj * D_inv_sqrt.view(1, -1))
|
|
||||||
|
|
||||||
return (normalize_adj(semantic_adj.to(args['device'])), normalize_adj(spatial_adj.to(args['device'])))
|
|
||||||
|
|
||||||
|
|
||||||
# 测试代码
|
|
||||||
if __name__ == '__main__':
|
|
||||||
config = {
|
|
||||||
'sigma1': 0.1,
|
|
||||||
'sigma2': 10,
|
|
||||||
'thres1': 0.6,
|
|
||||||
'thres2': 0.5,
|
|
||||||
'device': 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
|
||||||
}
|
|
||||||
|
|
||||||
for nodes in [358, 883, 170]:
|
|
||||||
args = {'num_nodes': nodes, **config}
|
|
||||||
get_A_hat(args)
|
|
||||||
Loading…
Reference in New Issue