96 lines
5.1 KiB
Python
Executable File
96 lines
5.1 KiB
Python
Executable File
import torch
|
|
import torch.nn as nn
|
|
import math, numpy as np
|
|
|
|
|
|
class HierAttnLstm(nn.Module):
|
|
def __init__(self, args):
|
|
super().__init__()
|
|
self.num_nodes, self.feature_dim, self.output_dim = args['num_nodes'], args['feature_dim'], args['output_dim']
|
|
self.input_window, self.output_window = args['input_window'], args['output_window']
|
|
self.hidden_size, self.num_layers = args['hidden_size'], args['num_layers']
|
|
self.natt_hops, self.nfc, self.max_up_len = args['natt_hops'], args['nfc'], args['max_up_len']
|
|
self.input_size = self.num_nodes * self.feature_dim
|
|
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
self.lstm_cells = nn.ModuleList([nn.LSTMCell(self.input_size, self.hidden_size)] +
|
|
[nn.LSTMCell(self.hidden_size, self.hidden_size) for _ in
|
|
range(self.num_layers - 1)])
|
|
self.hidden_state_pooling = nn.ModuleList(
|
|
[SelfAttentionPooling(self.hidden_size) for _ in range(self.num_layers - 1)])
|
|
self.cell_state_pooling = nn.ModuleList(
|
|
[SelfAttentionPooling(self.hidden_size) for _ in range(self.num_layers - 1)])
|
|
self.self_attention = SelfAttention(self.hidden_size, self.natt_hops)
|
|
self.fc_layer = nn.Sequential(
|
|
nn.Linear(self.hidden_size * self.natt_hops, self.nfc), nn.ReLU(),
|
|
nn.Linear(self.nfc, self.num_nodes * self.output_dim))
|
|
|
|
def forward(self, batch):
|
|
src, batch_size = batch.permute(1, 0, 2, 3)[..., :1], batch.shape[0]
|
|
src = src.reshape(self.input_window, batch_size, -1)
|
|
|
|
outputs = []
|
|
for i in range(self.output_window):
|
|
hidden_states, cell_states = [torch.zeros(batch_size, self.hidden_size, device=self.device) for _ in
|
|
range(self.num_layers)], \
|
|
[torch.zeros(batch_size, self.hidden_size, device=self.device) for _ in range(self.num_layers)]
|
|
bottom_layer_outputs, cell_states_history = [], [[] for _ in range(self.num_layers)]
|
|
|
|
for t in range(self.input_window):
|
|
hidden_states[0], cell_states[0] = self.lstm_cells[0](src[t], (hidden_states[0], cell_states[0]))
|
|
bottom_layer_outputs.append(hidden_states[0])
|
|
cell_states_history[0].append(cell_states[0])
|
|
|
|
bottom_layer_outputs, cell_states_history[0] = torch.stack(bottom_layer_outputs, 1), torch.stack(
|
|
cell_states_history[0], 1)
|
|
|
|
for layer in range(1, self.num_layers):
|
|
layer_inputs = bottom_layer_outputs if layer == 1 else layer_outputs
|
|
layer_outputs, cell_states_history[layer] = [], []
|
|
for start, end in self.calculate_stride(layer_inputs.size(1)):
|
|
segment, cell_segment = layer_inputs[:, start:end, :], cell_states_history[layer - 1][:, start:end,
|
|
:]
|
|
pooled_hidden, pooled_cell = self.hidden_state_pooling[layer - 1](segment), self.cell_state_pooling[
|
|
layer - 1](torch.cat([cell_segment, cell_states[layer].unsqueeze(1)], 1))
|
|
hidden_states[layer], cell_states[layer] = self.lstm_cells[layer](pooled_hidden, (
|
|
hidden_states[layer], pooled_cell))
|
|
layer_outputs.append(hidden_states[layer])
|
|
cell_states_history[layer].append(cell_states[layer])
|
|
|
|
layer_outputs, cell_states_history[layer] = torch.stack(layer_outputs, 1), torch.stack(
|
|
cell_states_history[layer], 1)
|
|
|
|
attended_features, _ = self.self_attention(layer_outputs)
|
|
out = self.fc_layer(attended_features.view(batch_size, -1)).view(batch_size, self.num_nodes,
|
|
self.output_dim)
|
|
outputs.append(out.clone())
|
|
if i < self.output_window - 1:
|
|
src = torch.cat((src[1:], out.reshape(batch_size, -1).unsqueeze(0)), 0)
|
|
|
|
return torch.stack(outputs).permute(1, 0, 2, 3)
|
|
|
|
def calculate_stride(self, seq_len):
|
|
idx = np.linspace(0, seq_len - 1, num=min(self.max_up_len, math.ceil(math.sqrt(seq_len))) + 3).astype(int)
|
|
return list(zip(np.append(idx, seq_len - 1)[:-1], idx[1:]))
|
|
|
|
|
|
class SelfAttentionPooling(nn.Module):
|
|
def __init__(self, input_dim):
|
|
super().__init__()
|
|
self.W = nn.Linear(input_dim, 1)
|
|
|
|
def forward(self, batch_rep):
|
|
att_w = nn.functional.softmax(self.W(batch_rep).squeeze(-1), dim=-1).unsqueeze(-1)
|
|
return torch.sum(batch_rep * att_w, dim=1)
|
|
|
|
|
|
class SelfAttention(nn.Module):
|
|
def __init__(self, att_size, att_hops):
|
|
super().__init__()
|
|
self.ut_dense = nn.Sequential(nn.Linear(att_size, att_size), nn.Tanh())
|
|
self.et_dense, self.softmax = nn.Linear(att_size, att_hops), nn.Softmax(dim=-1)
|
|
|
|
def forward(self, inputs):
|
|
att_scores = self.softmax(self.et_dense(self.ut_dense(inputs)).permute(0, 2, 1))
|
|
return torch.bmm(att_scores, inputs), att_scores
|