import csv import os import pandas as pd import numpy as np from fastdtw import fastdtw from tqdm import tqdm import torch files = { 358: ["PEMS03/PEMS03.npz", "PEMS03/PEMS03.csv"], 307: ["PEMS04/PEMS04.npz", "PEMS04/PEMS04.csv"], 883: ["PEMS07/PEMS07.npz", "PEMS07/PEMS07.csv"], 170: ["PEMS08/PEMS08.npz", "PEMS08/PEMS08.csv"], # 'pemsbay': ['PEMSBAY/pems_bay.npz', 'PEMSBAY/distance.csv'], # 'pemsD7M': ['PEMSD7M/PEMSD7M.npz', 'PEMSD7M/distance.csv'], # 'pemsD7L': ['PEMSD7L/PEMSD7L.npz', 'PEMSD7L/distance.csv'] } def get_A_hat(args): """read data, generate spatial adjacency matrix and semantic adjacency matrix by dtw Args: sigma1: float, default=0.1, sigma for the semantic matrix sigma2: float, default=10, sigma for the spatial matrix thres1: float, default=0.6, the threshold for the semantic matrix thres2: float, default=0.5, the threshold for the spatial matrix Returns: data: tensor, T * N * 1 dtw_matrix: array, semantic adjacency matrix sp_matrix: array, spatial adjacency matrix """ filepath = "./data/" num_node = args["num_nodes"] file = files[num_node] filename = file[0][:6] data = np.load(filepath + file[0])["data"] num_node = data.shape[1] mean_value = np.mean(data, axis=(0, 1)).reshape(1, 1, -1) std_value = np.std(data, axis=(0, 1)).reshape(1, 1, -1) data = (data - mean_value) / std_value # 计算dtw_distance, 如果存在缓存则直接读取缓存 if not os.path.exists( f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy" ): data_mean = np.mean( [ data[:, :, 0][24 * 12 * i : 24 * 12 * (i + 1)] for i in range(data.shape[0] // (24 * 12)) ], axis=0, ) data_mean = data_mean.squeeze().T dtw_distance = np.zeros((num_node, num_node)) for i in tqdm(range(num_node)): for j in range(i, num_node): dtw_distance[i][j] = fastdtw(data_mean[i], data_mean[j], radius=6)[0] for i in range(num_node): for j in range(i): dtw_distance[i][j] = dtw_distance[j][i] np.save( f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy", dtw_distance, ) dist_matrix = np.load( f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_dtw_distance.npy" ) mean = np.mean(dist_matrix) std = np.std(dist_matrix) dist_matrix = (dist_matrix - mean) / std sigma = args["sigma1"] dist_matrix = np.exp(-(dist_matrix**2) / sigma**2) dtw_matrix = np.zeros_like(dist_matrix) dtw_matrix[dist_matrix > args["thres1"]] = 1 # 计算spatial_distance, 如果存在缓存则直接读取缓存 if not os.path.exists( f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy" ): if num_node == 358: with open(f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}.txt", "r") as f: id_dict = { int(i): idx for idx, i in enumerate(f.read().strip().split("\n")) } # 建立映射列表 # 使用 pandas 读取 CSV 文件,跳过标题行 df = pd.read_csv(filepath + file[1], skiprows=1, header=None) dist_matrix = np.zeros((num_node, num_node)) + float("inf") for _, row in df.iterrows(): start = int(id_dict[row[0]]) end = int(id_dict[row[1]]) dist_matrix[start][end] = float(row[2]) dist_matrix[end][start] = float(row[2]) np.save( f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy", dist_matrix, ) else: # 使用 pandas 读取 CSV 文件,跳过标题行 df = pd.read_csv(filepath + file[1], skiprows=1, header=None) dist_matrix = np.zeros((num_node, num_node)) + float("inf") for _, row in df.iterrows(): start = int(row[0]) end = int(row[1]) dist_matrix[start][end] = float(row[2]) dist_matrix[end][start] = float(row[2]) np.save( f"data/PEMS0{filename[-1]}/PEMS0{filename[-1]}_spatial_distance.npy", dist_matrix, ) # normalization std = np.std(dist_matrix[dist_matrix != float("inf")]) mean = np.mean(dist_matrix[dist_matrix != float("inf")]) dist_matrix = (dist_matrix - mean) / std sigma = args["sigma2"] sp_matrix = np.exp(-(dist_matrix**2) / sigma**2) sp_matrix[sp_matrix < args["thres2"]] = 0 return get_normalized_adj(dtw_matrix).to(args["device"]), get_normalized_adj( sp_matrix ).to(args["device"]) def get_normalized_adj(A): """ Returns a tensor, the degree normalized adjacency matrix. """ alpha = 0.8 D = np.array(np.sum(A, axis=1)).reshape((-1,)) D[D <= 10e-5] = 10e-5 # Prevent infs diag = np.reciprocal(np.sqrt(D)) A_wave = np.multiply(np.multiply(diag.reshape((-1, 1)), A), diag.reshape((1, -1))) A_reg = alpha / 2 * (np.eye(A.shape[0]) + A_wave) return torch.from_numpy(A_reg.astype(np.float32)) if __name__ == "__main__": if __name__ == "__main__": config = { "sigma1": 0.1, "sigma2": 10, "thres1": 0.6, "thres2": 0.5, "device": "cuda:0" if torch.cuda.is_available() else "cpu", } for nodes in [358, 170, 883]: args = {"num_nodes": nodes, **config} get_A_hat(args)