|
|
|
@ -32,58 +32,24 @@ def get_dataloader(args, normalizer="std", single=True):
|
|
|
|
scaler
|
|
|
|
scaler
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _prepare_data_with_windows(data, args, single):
|
|
|
|
def _prepare_data_with_windows(data, args, single):
|
|
|
|
# Generate sliding windows for main data
|
|
|
|
# Generate sliding windows for main data
|
|
|
|
x = add_window_x(data, args["lag"], args["horizon"], single)
|
|
|
|
x = add_window_x(data, args["lag"], args["horizon"], single)
|
|
|
|
y = add_window_y(data, args["lag"], args["horizon"], single)
|
|
|
|
y = add_window_y(data, args["lag"], args["horizon"], single)
|
|
|
|
|
|
|
|
|
|
|
|
# Generate time features
|
|
|
|
|
|
|
|
time_features = _generate_time_features(data.shape[0], args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Add time features to x and y
|
|
|
|
|
|
|
|
x = _add_time_features(x, time_features, args["lag"], args["horizon"], single, add_window_x)
|
|
|
|
|
|
|
|
y = _add_time_features(y, time_features, args["lag"], args["horizon"], single, add_window_y)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return x, y
|
|
|
|
return x, y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _generate_time_features(L, args):
|
|
|
|
|
|
|
|
# For time series data, we generate time features for each time step
|
|
|
|
|
|
|
|
# [L, 1] -> [L, T, 1] by repeating across time dimension
|
|
|
|
|
|
|
|
T = args.get("time_dim", 1) # Get time dimension size if available
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
time_in_day = [i % args["steps_per_day"] / args["steps_per_day"] for i in range(L)]
|
|
|
|
|
|
|
|
time_in_day = np.array(time_in_day)[:, None, None] # [L, 1, 1]
|
|
|
|
|
|
|
|
time_in_day = np.tile(time_in_day, (1, T, 1)) # [L, T, 1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
day_in_week = [(i // args["steps_per_day"]) % args["days_per_week"] for i in range(L)]
|
|
|
|
|
|
|
|
day_in_week = np.array(day_in_week)[:, None, None] # [L, 1, 1]
|
|
|
|
|
|
|
|
day_in_week = np.tile(day_in_week, (1, T, 1)) # [L, T, 1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return time_in_day, day_in_week
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _add_time_features(data, time_features, lag, horizon, single, window_fn):
|
|
|
|
|
|
|
|
time_in_day, day_in_week = time_features
|
|
|
|
|
|
|
|
time_day = window_fn(time_in_day, lag, horizon, single)
|
|
|
|
|
|
|
|
time_week = window_fn(day_in_week, lag, horizon, single)
|
|
|
|
|
|
|
|
return np.concatenate([data, time_day, time_week], axis=-1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _normalize_data(train_data, val_data, test_data, args, normalizer):
|
|
|
|
def _normalize_data(train_data, val_data, test_data, args, normalizer):
|
|
|
|
scaler = normalize_dataset(train_data[..., : args["input_dim"]], normalizer, args["column_wise"])
|
|
|
|
scaler = normalize_dataset(train_data[..., : args["num_nodes"]], normalizer, args["column_wise"])
|
|
|
|
|
|
|
|
|
|
|
|
for data in [train_data, val_data, test_data]:
|
|
|
|
for data in [train_data, val_data, test_data]:
|
|
|
|
data[..., : args["input_dim"]] = scaler.transform(data[..., : args["input_dim"]])
|
|
|
|
data[..., : args["num_nodes"]] = scaler.transform(data[..., : args["num_nodes"]])
|
|
|
|
|
|
|
|
|
|
|
|
return scaler
|
|
|
|
return scaler
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _apply_existing_scaler(train_data, val_data, test_data, scaler, args):
|
|
|
|
def _apply_existing_scaler(train_data, val_data, test_data, scaler, args):
|
|
|
|
for data in [train_data, val_data, test_data]:
|
|
|
|
for data in [train_data, val_data, test_data]:
|
|
|
|
data[..., : args["input_dim"]] = scaler.transform(data[..., : args["input_dim"]])
|
|
|
|
data[..., : args["num_nodes"]] = scaler.transform(data[..., : args["num_nodes"]])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _create_dataloader(X_data, Y_data, batch_size, shuffle, drop_last):
|
|
|
|
def _create_dataloader(X_data, Y_data, batch_size, shuffle, drop_last):
|
|
|
|
@ -105,15 +71,10 @@ def split_data_by_days(data, val_days, test_days, interval=30):
|
|
|
|
def split_data_by_ratio(data, val_ratio, test_ratio):
|
|
|
|
def split_data_by_ratio(data, val_ratio, test_ratio):
|
|
|
|
data_len = data.shape[0]
|
|
|
|
data_len = data.shape[0]
|
|
|
|
test_data = data[-int(data_len * test_ratio) :]
|
|
|
|
test_data = data[-int(data_len * test_ratio) :]
|
|
|
|
val_data = data[
|
|
|
|
val_data = data[-int(data_len * (test_ratio + val_ratio)) : -int(data_len * test_ratio)]
|
|
|
|
-int(data_len * (test_ratio + val_ratio)) : -int(data_len * test_ratio)
|
|
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
train_data = data[: -int(data_len * (test_ratio + val_ratio))]
|
|
|
|
train_data = data[: -int(data_len * (test_ratio + val_ratio))]
|
|
|
|
return train_data, val_data, test_data
|
|
|
|
return train_data, val_data, test_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _generate_windows(data, window=3, horizon=1, offset=0):
|
|
|
|
def _generate_windows(data, window=3, horizon=1, offset=0):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Internal helper function to generate sliding windows.
|
|
|
|
Internal helper function to generate sliding windows.
|
|
|
|
|