From 9834b12d5a3bb3fff71a484c62c849c1282eff10 Mon Sep 17 00:00:00 2001 From: Chintan Shah Date: Wed, 2 Oct 2019 22:20:43 -0400 Subject: [PATCH] Cleaned up code, fixed bugs in implementation, seems like it starts training with GRU --- model/pytorch/dcrnn_model.py | 90 +++++++++++++++---------------- model/pytorch/dcrnn_supervisor.py | 27 +++++++--- 2 files changed, 63 insertions(+), 54 deletions(-) diff --git a/model/pytorch/dcrnn_model.py b/model/pytorch/dcrnn_model.py index 91abe89..aaf59c5 100644 --- a/model/pytorch/dcrnn_model.py +++ b/model/pytorch/dcrnn_model.py @@ -4,25 +4,8 @@ import torch.nn as nn device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -# class DCRNNModel: -# def __init__(self, is_training, adj_mx, **model_kwargs): -# self.adj_mx = adj_mx -# self.is_training = is_training -# self.max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) -# self.cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) -# self.filter_type = model_kwargs.get('filter_type', 'laplacian') -# # self.max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) -# self.num_nodes = int(model_kwargs.get('num_nodes', 1)) -# self.num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) -# self.rnn_units = int(model_kwargs.get('rnn_units')) -# self.hidden_state_size = self.num_nodes * self.rnn_units - - -class EncoderModel(nn.Module): +class DCRNNModel: def __init__(self, is_training, adj_mx, **model_kwargs): - # super().__init__(is_training, adj_mx, **model_kwargs) - # https://pytorch.org/docs/stable/nn.html#gru - super().__init__() self.adj_mx = adj_mx self.is_training = is_training self.max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) @@ -33,22 +16,34 @@ class EncoderModel(nn.Module): self.num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) self.rnn_units = int(model_kwargs.get('rnn_units')) self.hidden_state_size = self.num_nodes * self.rnn_units + + +class EncoderModel(nn.Module, DCRNNModel): + def __init__(self, is_training, adj_mx, **model_kwargs): + # super().__init__(is_training, adj_mx, **model_kwargs) + # https://pytorch.org/docs/stable/nn.html#gru + nn.Module.__init__(self) + DCRNNModel.__init__(self, is_training, adj_mx, **model_kwargs) self.input_dim = int(model_kwargs.get('input_dim', 1)) self.seq_len = int(model_kwargs.get('seq_len')) # for the encoder - self.dcgru_layers = [nn.GRUCell(input_size=self.num_nodes * self.input_dim, - hidden_size=self.hidden_state_size, - bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, - hidden_size=self.hidden_state_size, - bias=True) for _ in range(self.num_rnn_layers - 1)] + self.dcgru_layers = nn.ModuleList([nn.GRUCell(input_size=self.num_nodes * self.input_dim, + hidden_size=self.hidden_state_size, + bias=True)] + [ + nn.GRUCell(input_size=self.hidden_state_size, + hidden_size=self.hidden_state_size, + bias=True) for _ in + range(self.num_rnn_layers - 1)]) def forward(self, inputs, hidden_state=None): """ Encoder forward pass. :param inputs: shape (batch_size, self.num_nodes * self.input_dim) - :param hidden_state: (num_layers, batch_size, self.hidden_state_size) -> optional, zeros if not provided + :param hidden_state: (num_layers, batch_size, self.hidden_state_size) + optional, zeros if not provided :return: output: # shape (batch_size, self.hidden_state_size) - hidden_state # shape (num_layers, batch_size, self.hidden_state_size) (lower indices mean lower layers) + hidden_state # shape (num_layers, batch_size, self.hidden_state_size) + (lower indices mean lower layers) """ batch_size, _ = inputs.size() if hidden_state is None: @@ -57,17 +52,18 @@ class EncoderModel(nn.Module): hidden_states = [] output = inputs for layer_num, dcgru_layer in enumerate(self.dcgru_layers): - hidden_state = dcgru_layer(output, hidden_state) - hidden_states.append(hidden_state) - output = hidden_state + next_hidden_state = dcgru_layer(output, hidden_state[layer_num]) + hidden_states.append(next_hidden_state) + output = next_hidden_state - return output, torch.cat(hidden_states, dim=1) # runs in O(num_layers) so not too slow # todo: check dim + return output, torch.stack(hidden_states) # runs in O(num_layers) so not too slow -class DecoderModel(nn.Module): +class DecoderModel(nn.Module, DCRNNModel): def __init__(self, is_training, adj_mx, **model_kwargs): # super().__init__(is_training, adj_mx, **model_kwargs) - super().__init__() + nn.Module.__init__(self) + DCRNNModel.__init__(self, is_training, adj_mx, **model_kwargs) self.adj_mx = adj_mx self.is_training = is_training self.max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) @@ -82,32 +78,30 @@ class DecoderModel(nn.Module): self.use_curriculum_learning = bool(model_kwargs.get('use_curriculum_learning', False)) self.horizon = int(model_kwargs.get('horizon', 1)) # for the decoder self.projection_layer = nn.Linear(self.hidden_state_size, self.num_nodes * self.output_dim) - self.dcgru_layers = [nn.GRUCell(input_size=self.num_nodes * self.output_dim, - hidden_size=self.hidden_state_size, - bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, - hidden_size=self.hidden_state_size, - bias=True) for _ in - range(self.num_rnn_layers - 1)] + self.dcgru_layers = nn.ModuleList([nn.GRUCell(input_size=self.num_nodes * self.output_dim, + hidden_size=self.hidden_state_size, + bias=True)] + [ + nn.GRUCell(input_size=self.hidden_state_size, + hidden_size=self.hidden_state_size, + bias=True) for _ in + range(self.num_rnn_layers - 1)]) def forward(self, inputs, hidden_state=None): """ Decoder forward pass. :param inputs: shape (batch_size, self.num_nodes * self.output_dim) - :param hidden_state: (num_layers, batch_size, self.hidden_state_size) -> optional, zeros if not provided + :param hidden_state: (num_layers, batch_size, self.hidden_state_size) + optional, zeros if not provided :return: output: # shape (batch_size, self.num_nodes * self.output_dim) - hidden_state # shape (num_layers, batch_size, self.hidden_state_size) (lower indices mean lower layers) + hidden_state # shape (num_layers, batch_size, self.hidden_state_size) + (lower indices mean lower layers) """ - batch_size, _ = inputs.size() - if hidden_state is None: - hidden_state = torch.zeros((self.num_rnn_layers, batch_size, self.hidden_state_size), - device=device) hidden_states = [] output = inputs for layer_num, dcgru_layer in enumerate(self.dcgru_layers): - hidden_state = dcgru_layer(output, hidden_state) - hidden_states.append(hidden_state) - output = hidden_state + next_hidden_state = dcgru_layer(output, hidden_state[layer_num]) + hidden_states.append(next_hidden_state) + output = next_hidden_state - return self.projection_layer(output), torch.cat(hidden_states, - dim=1) # runs in O(num_layers) so not too slow #todo: check dim + return self.projection_layer(output), torch.stack(hidden_states) diff --git a/model/pytorch/dcrnn_supervisor.py b/model/pytorch/dcrnn_supervisor.py index 8f39be3..d496dd5 100644 --- a/model/pytorch/dcrnn_supervisor.py +++ b/model/pytorch/dcrnn_supervisor.py @@ -7,6 +7,8 @@ import torch from lib import utils from model.pytorch.dcrnn_model import EncoderModel, DecoderModel +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + class DCRNNSupervisor: def __init__(self, adj_mx, **kwargs): @@ -87,7 +89,8 @@ class DCRNNSupervisor: batch_size = inputs.size(1) inputs = inputs.view(self.seq_len, batch_size, self.num_nodes * self.input_dim) - labels = labels.view(self.horizon, batch_size, self.num_nodes * self.output_dim) + labels = labels[..., :self.output_dim].view(self.horizon, batch_size, + self.num_nodes * self.output_dim) loss = 0 @@ -95,6 +98,7 @@ class DCRNNSupervisor: for t in range(self.seq_len): _, encoder_hidden_state = self.encoder_model.forward(inputs[t], encoder_hidden_state) + self._logger.info("Encoder complete, starting decoder") go_symbol = torch.zeros((batch_size, self.num_nodes * self.output_dim)) decoder_hidden_state = encoder_hidden_state @@ -113,6 +117,7 @@ class DCRNNSupervisor: loss += criterion(self.standard_scaler.inverse_transform(decoder_output), self.standard_scaler.inverse_transform(labels[t])) + self._logger.info("Decoder complete, starting backprop") loss.backward() encoder_optimizer.step() decoder_optimizer.step() @@ -135,16 +140,26 @@ class DCRNNSupervisor: start_time = time.time() - for x, y in train_iterator: - loss = self._train_one_batch(x, y, batches_seen, encoder_optimizer, decoder_optimizer, criterion) + for _, (x, y) in enumerate(train_iterator): + x = torch.from_numpy(x).float() + y = torch.from_numpy(y).float() + self._logger.debug("X: {}".format(x.size())) + self._logger.debug("y: {}".format(y.size())) + x = x.permute(1, 0, 2, 3) + y = y.permute(1, 0, 2, 3) + loss = self._train_one_batch(x, y, batches_seen, encoder_optimizer, + decoder_optimizer, criterion) losses.append(loss) batches_seen += 1 end_time = time.time() if epoch_num % log_every == 0: - message = 'Epoch [{}/{}] ({}) train_mae: {:.4f}, val_mae: {:.4f} lr:{:.6f} {:.1f}s'.format( - epoch_num, epochs, batches_seen, np.mean(losses), 0.0, 0.0, (end_time - start_time)) + message = 'Epoch [{}/{}] ({}) train_mae: {:.4f}, val_mae: {:.4f} ' \ + 'lr:{:.6f} {:.1f}s'.format(epoch_num, epochs, batches_seen, + np.mean(losses), 0.0, + 0.0, (end_time - start_time)) self._logger.info(message) def _compute_sampling_threshold(self, batches_seen): - return self.cl_decay_steps / (self.cl_decay_steps + np.exp(batches_seen / self.cl_decay_steps)) + return self.cl_decay_steps / ( + self.cl_decay_steps + np.exp(batches_seen / self.cl_decay_steps))