import csv import os import pandas as pd import numpy as np from fastdtw import fastdtw from tqdm import tqdm import torch files = { 358: ['PEMS03/PEMS03.npz', 'PEMS03/PEMS03.csv'], 307: ['PEMS04/PEMS04.npz', 'PEMS04/PEMS04.csv'], 883: ['PEMS07/PEMS07.npz', 'PEMS07/PEMS07.csv'], 170: ['PEMS08/PEMS08.npz', 'PEMS08/PEMS08.csv'], # 'pemsbay': ['PEMSBAY/pems_bay.npz', 'PEMSBAY/distance.csv'], # 'pemsD7M': ['PEMSD7M/PEMSD7M.npz', 'PEMSD7M/distance.csv'], # 'pemsD7L': ['PEMSD7L/PEMSD7L.npz', 'PEMSD7L/distance.csv'] } def get_A_hat(config): """read data, generate spatial adjacency matrix and semantic adjacency matrix by dtw Args: sigma1: float, default=0.1, sigma for the semantic matrix sigma2: float, default=10, sigma for the spatial matrix thres1: float, default=0.6, the threshold for the semantic matrix thres2: float, default=0.5, the threshold for the spatial matrix Returns: data: tensor, T * N * 1 dtw_matrix: array, semantic adjacency matrix sp_matrix: array, spatial adjacency matrix """ file_path = config['data']['graph_pkl_filename'] filename = config['basic']['dataset'] dataset_path = config['data']['dataset_dir'] args = config['model'] data = np.load(file_path) data = np.nan_to_num(data, nan=0.0, posinf=0.0, neginf=0.0) num_node = data.shape[1] mean_value = np.mean(data, axis=(0, 1)).reshape(1, 1, -1) std_value = np.std(data, axis=(0, 1)).reshape(1, 1, -1) data = (data - mean_value) / std_value # 计算dtw_distance, 如果存在缓存则直接读取缓存 if not os.path.exists(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy'): data_mean = np.mean([data[:, :, 0][24 * 12 * i: 24 * 12 * (i + 1)] for i in range(data.shape[0] // (24 * 12))], axis=0) data_mean = data_mean.squeeze().T dtw_distance = np.zeros((num_node, num_node)) for i in tqdm(range(num_node)): for j in range(i, num_node): dtw_distance[i][j] = fastdtw(data_mean[i], data_mean[j], radius=6)[0] for i in range(num_node): for j in range(i): dtw_distance[i][j] = dtw_distance[j][i] np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy', dtw_distance) dist_matrix = np.load(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy') mean = np.mean(dist_matrix) std = np.std(dist_matrix) dist_matrix = (dist_matrix - mean) / std sigma = args['sigma1'] dist_matrix = np.exp(-dist_matrix ** 2 / sigma ** 2) dtw_matrix = np.zeros_like(dist_matrix) dtw_matrix[dist_matrix > args['thres1']] = 1 # 计算spatial_distance, 如果存在缓存则直接读取缓存 if not os.path.exists(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy'): if num_node == 358: with open(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}.txt', 'r') as f: id_dict = {int(i): idx for idx, i in enumerate(f.read().strip().split('\n'))} # 建立映射列表 # 使用 pandas 读取 CSV 文件,跳过标题行 df = pd.read_csv(f'{dataset_path}/{filename}.csv', skiprows=1, header=None) dist_matrix = np.zeros((num_node, num_node)) + float('inf') for _, row in df.iterrows(): start = int(id_dict[row[0]]) end = int(id_dict[row[1]]) dist_matrix[start][end] = float(row[2]) dist_matrix[end][start] = float(row[2]) np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix) else: # 使用 pandas 读取 CSV 文件,跳过标题行 df = pd.read_csv(f'{dataset_path}/{filename}.csv', skiprows=1, header=None) dist_matrix = np.zeros((num_node, num_node)) + float('inf') for _, row in df.iterrows(): start = int(row[0]) end = int(row[1]) dist_matrix[start][end] = float(row[2]) dist_matrix[end][start] = float(row[2]) np.save(f'data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy', dist_matrix) # normalization std = np.std(dist_matrix[dist_matrix != float('inf')]) mean = np.mean(dist_matrix[dist_matrix != float('inf')]) dist_matrix = (dist_matrix - mean) / std sigma = args['sigma2'] sp_matrix = np.exp(- dist_matrix ** 2 / sigma ** 2) sp_matrix[sp_matrix < args['thres2']] = 0 return (get_normalized_adj(dtw_matrix).to(config['basic']['device']), get_normalized_adj(sp_matrix).to(config['basic']['device'])) def get_normalized_adj(A): """ Returns a tensor, the degree normalized adjacency matrix. """ alpha = 0.8 D = np.array(np.sum(A, axis=1)).reshape((-1,)) D[D <= 10e-5] = 10e-5 # Prevent infs diag = np.reciprocal(np.sqrt(D)) A_wave = np.multiply(np.multiply(diag.reshape((-1, 1)), A), diag.reshape((1, -1))) A_reg = alpha / 2 * (np.eye(A.shape[0]) + A_wave) return torch.from_numpy(A_reg.astype(np.float32)) if __name__ == '__main__': config = { 'sigma1': 0.1, 'sigma2': 10, 'thres1': 0.6, 'thres2': 0.5, 'device': 'cuda:0' if torch.cuda.is_available() else 'cpu' } for nodes in [358, 170, 883]: args = {'num_nodes': nodes, **config} get_A_hat(args)