From 0769a3b2e22c94e492c9d597ead98d4f00902839 Mon Sep 17 00:00:00 2001 From: Chintan Shah Date: Mon, 30 Sep 2019 19:14:24 -0400 Subject: [PATCH] Implemented seq2seq without DCGRU and curriculum learning --- model/pytorch/dcrnn_model.py | 96 +++++++++++++++++++++++------------- 1 file changed, 62 insertions(+), 34 deletions(-) diff --git a/model/pytorch/dcrnn_model.py b/model/pytorch/dcrnn_model.py index 4ea66be..10e5783 100644 --- a/model/pytorch/dcrnn_model.py +++ b/model/pytorch/dcrnn_model.py @@ -1,10 +1,11 @@ import torch import torch.nn as nn +from abc import ABC, abstractmethod device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -class DCRNNModel(nn.Module): +class DCRNNModel(metaclass=ABC): def __init__(self, is_training, scale_factor, adj_mx, **model_kwargs): super().__init__() self.adj_mx = adj_mx @@ -20,25 +21,23 @@ class DCRNNModel(nn.Module): self.input_dim = int(model_kwargs.get('input_dim', 1)) self.hidden_state_size = self.num_nodes * self.rnn_units + @abstractmethod + def dcgru_layers(self): + pass -class EncoderModel(DCRNNModel): - def __init__(self, is_training, scaler, adj_mx, **model_kwargs): - super().__init__(is_training, scaler, adj_mx, **model_kwargs) - self.seq_len = int(model_kwargs.get('seq_len')) # for the encoder - # https://pytorch.org/docs/stable/nn.html#gru + @staticmethod + def _forward_layer(inputs, dcgru_layer, hidden_state): + # inputs shape = (timesteps, batch_size, input_size) + outputs = [] + for cell_input in inputs[:, ]: + hidden_state = dcgru_layer(cell_input, hidden_state) + outputs.append(hidden_state) - # input shape is supposed to be Input (batch_size, timesteps, num_sensor*input_dim) - # first layer takes input shape and subsequent layer take input from the first layer - self.dcgru_layers = [nn.GRUCell(input_size=self.num_nodes * self.input_dim, - hidden_size=self.hidden_state_size, - bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, - hidden_size=self.hidden_state_size, - bias=True) for _ in - range(self.num_rnn_layers - 1)] + return torch.cat(outputs, dim=1) # runs in O(timesteps) not too slow - def forward(self, inputs, hidden_state=None): + def _forward_impl(self, inputs, hidden_state): """ - Encoder forward pass. + forward pass. :param inputs: shape (batch_size, timesteps, num_nodes * input_dim) :param hidden_state: (num_layers, batch_size, self.hidden_state_size) -> optional, zeros if not provided @@ -51,6 +50,7 @@ class EncoderModel(DCRNNModel): hidden_state = torch.zeros((self.num_rnn_layers, batch_size, self.hidden_state_size), device=device) hidden = torch.empty_like(hidden_state) + # noinspection PyTypeChecker for layer_num, dcgru_layer in enumerate(self.dcgru_layers): layer_states = self._forward_layer(layer_input, dcgru_layer, hidden_state[layer_num]) # append last time step's hidden state @@ -60,31 +60,59 @@ class EncoderModel(DCRNNModel): output = layer_input # last layer's output return output, hidden - @staticmethod - def _forward_layer(inputs, dcgru_layer, hidden_state): - # inputs shape = (timesteps, batch_size, input_size) - outputs = [] - for cell_input in inputs[:, ]: - hidden_state = dcgru_layer(cell_input, hidden_state) - outputs.append(hidden_state) - return torch.cat(outputs, dim=1) # runs in O(timesteps) not too slow +class EncoderModel(nn.Module, DCRNNModel): + def __init__(self, is_training, scaler, adj_mx, **model_kwargs): + super().__init__(is_training, scaler, adj_mx, **model_kwargs) + # https://pytorch.org/docs/stable/nn.html#gru + self.seq_len = int(model_kwargs.get('seq_len')) # for the encoder + + def dcgru_layers(self): + # input shape is supposed to be Input (batch_size, timesteps, num_sensor*input_dim) + # first layer takes input shape and subsequent layer take input from the first layer + return [nn.GRUCell(input_size=self.num_nodes * self.input_dim, + hidden_size=self.hidden_state_size, + bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, + hidden_size=self.hidden_state_size, + bias=True) for _ in + range(self.num_rnn_layers - 1)] + + def forward(self, inputs, hidden_state=None): + """ + Encoder forward pass. + + :param inputs: shape (batch_size, timesteps, num_nodes * input_dim) + :param hidden_state: (num_layers, batch_size, self.hidden_state_size) -> optional, zeros if not provided + :return: output: # shape (timesteps, batch_size, self.hidden_state_size) + hidden_state # shape (num_layers, batch_size, self.hidden_state_size) (lower indices mean lower layers) + """ + return self._forward_impl(inputs, hidden_state) -class DecoderModel(DCRNNModel): +class DecoderModel(nn.Module, DCRNNModel): def __init__(self, is_training, scale_factor, adj_mx, **model_kwargs): super().__init__(is_training, scale_factor, adj_mx, **model_kwargs) self.output_dim = int(model_kwargs.get('output_dim', 1)) self.use_curriculum_learning = bool(model_kwargs.get('use_curriculum_learning', False)) self.horizon = int(model_kwargs.get('horizon', 1)) # for the decoder + self.projection_layer = nn.Linear(self.hidden_state_size, self.num_nodes * self.output_dim) - self.dcgru_layers = [nn.GRUCell(input_size=self.num_nodes * self.output_dim, - hidden_size=self.rnn_units, - bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, - hidden_size=self.hidden_state_size, - bias=True) for _ in - range(self.num_rnn_layers - 1)] - self.projection_layer = nn.Linear(self.hidden_state_size, self.rnn_units * self.output_dim) + def dcgru_layers(self): + return [nn.GRUCell(input_size=self.num_nodes * self.output_dim, + hidden_size=self.hidden_state_size, + bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, + hidden_size=self.hidden_state_size, + bias=True) for _ in + range(self.num_rnn_layers - 1)] - def forward(self): - pass # repeat encoder and apply a linear layer to every timestep's output + def forward(self, inputs, hidden_state=None): + """ + Decoder forward pass. + + :param inputs: shape (batch_size, timesteps, num_nodes * input_dim) + :param hidden_state: (num_layers, batch_size, self.hidden_state_size) -> optional, zeros if not provided + :return: output: # shape (timesteps, batch_size, self.num_nodes * self.output_dim) + hidden_state # shape (num_layers, batch_size, self.hidden_state_size) (lower indices mean lower layers) + """ + output, hidden = self._forward_impl(inputs, hidden_state) + return self.projection_layer(output), hidden_state