From f96a8c0d59b0d2c220c840d38d101a6175ff4828 Mon Sep 17 00:00:00 2001 From: Chintan Shah Date: Wed, 2 Oct 2019 18:09:33 -0400 Subject: [PATCH] Dirty commit - setup model but [GRUCell] not working, tried ParameterList, did not work --- dcrnn_train_pytorch.py | 33 ++++++++++++++ model/pytorch/dcrnn_model.py | 75 ++++++++++++++++++------------- model/pytorch/dcrnn_supervisor.py | 8 ++-- 3 files changed, 81 insertions(+), 35 deletions(-) create mode 100644 dcrnn_train_pytorch.py diff --git a/dcrnn_train_pytorch.py b/dcrnn_train_pytorch.py new file mode 100644 index 0000000..b01f541 --- /dev/null +++ b/dcrnn_train_pytorch.py @@ -0,0 +1,33 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import tensorflow as tf +import yaml + +from lib.utils import load_graph_data +from model.pytorch.dcrnn_supervisor import DCRNNSupervisor + +def main(args): + with open(args.config_filename) as f: + supervisor_config = yaml.load(f) + + graph_pkl_filename = supervisor_config['data'].get('graph_pkl_filename') + sensor_ids, sensor_id_to_ind, adj_mx = load_graph_data(graph_pkl_filename) + + # if args.use_cpu_only: + # tf_config = tf.ConfigProto(device_count={'GPU': 0}) + # with tf.Session(config=tf_config) as sess: + supervisor = DCRNNSupervisor(adj_mx=adj_mx, **supervisor_config) + + supervisor.train() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--config_filename', default=None, type=str, + help='Configuration filename for restoring the model.') + parser.add_argument('--use_cpu_only', default=False, type=bool, help='Set to true to only use cpu.') + args = parser.parse_args() + main(args) diff --git a/model/pytorch/dcrnn_model.py b/model/pytorch/dcrnn_model.py index 4a9415c..91abe89 100644 --- a/model/pytorch/dcrnn_model.py +++ b/model/pytorch/dcrnn_model.py @@ -4,12 +4,27 @@ import torch.nn as nn device = torch.device("cuda" if torch.cuda.is_available() else "cpu") -class DCRNNModel: - def __init__(self, is_training, scale_factor, adj_mx, **model_kwargs): +# class DCRNNModel: +# def __init__(self, is_training, adj_mx, **model_kwargs): +# self.adj_mx = adj_mx +# self.is_training = is_training +# self.max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) +# self.cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) +# self.filter_type = model_kwargs.get('filter_type', 'laplacian') +# # self.max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) +# self.num_nodes = int(model_kwargs.get('num_nodes', 1)) +# self.num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) +# self.rnn_units = int(model_kwargs.get('rnn_units')) +# self.hidden_state_size = self.num_nodes * self.rnn_units + + +class EncoderModel(nn.Module): + def __init__(self, is_training, adj_mx, **model_kwargs): + # super().__init__(is_training, adj_mx, **model_kwargs) + # https://pytorch.org/docs/stable/nn.html#gru super().__init__() self.adj_mx = adj_mx self.is_training = is_training - self.scale_factor = scale_factor self.max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) self.cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) self.filter_type = model_kwargs.get('filter_type', 'laplacian') @@ -18,25 +33,13 @@ class DCRNNModel: self.num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) self.rnn_units = int(model_kwargs.get('rnn_units')) self.hidden_state_size = self.num_nodes * self.rnn_units - - -class EncoderModel(nn.Module, DCRNNModel): - def __init__(self, is_training, scaler, adj_mx, **model_kwargs): - super().__init__(is_training, scaler, adj_mx, **model_kwargs) - # https://pytorch.org/docs/stable/nn.html#gru self.input_dim = int(model_kwargs.get('input_dim', 1)) self.seq_len = int(model_kwargs.get('seq_len')) # for the encoder - - @property - def dcgru_layers(self): - # input shape is supposed to be Input (batch_size, num_sensor*input_dim) - # first layer takes input shape and subsequent layer take input from the first layer - return [nn.GRUCell(input_size=self.num_nodes * self.input_dim, - hidden_size=self.hidden_state_size, - bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, - hidden_size=self.hidden_state_size, - bias=True) for _ in - range(self.num_rnn_layers - 1)] + self.dcgru_layers = [nn.GRUCell(input_size=self.num_nodes * self.input_dim, + hidden_size=self.hidden_state_size, + bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, + hidden_size=self.hidden_state_size, + bias=True) for _ in range(self.num_rnn_layers - 1)] def forward(self, inputs, hidden_state=None): """ @@ -61,22 +64,30 @@ class EncoderModel(nn.Module, DCRNNModel): return output, torch.cat(hidden_states, dim=1) # runs in O(num_layers) so not too slow # todo: check dim -class DecoderModel(nn.Module, DCRNNModel): - def __init__(self, is_training, scale_factor, adj_mx, **model_kwargs): - super().__init__(is_training, scale_factor, adj_mx, **model_kwargs) +class DecoderModel(nn.Module): + def __init__(self, is_training, adj_mx, **model_kwargs): + # super().__init__(is_training, adj_mx, **model_kwargs) + super().__init__() + self.adj_mx = adj_mx + self.is_training = is_training + self.max_diffusion_step = int(model_kwargs.get('max_diffusion_step', 2)) + self.cl_decay_steps = int(model_kwargs.get('cl_decay_steps', 1000)) + self.filter_type = model_kwargs.get('filter_type', 'laplacian') + # self.max_grad_norm = float(model_kwargs.get('max_grad_norm', 5.0)) + self.num_nodes = int(model_kwargs.get('num_nodes', 1)) + self.num_rnn_layers = int(model_kwargs.get('num_rnn_layers', 1)) + self.rnn_units = int(model_kwargs.get('rnn_units')) + self.hidden_state_size = self.num_nodes * self.rnn_units self.output_dim = int(model_kwargs.get('output_dim', 1)) self.use_curriculum_learning = bool(model_kwargs.get('use_curriculum_learning', False)) self.horizon = int(model_kwargs.get('horizon', 1)) # for the decoder self.projection_layer = nn.Linear(self.hidden_state_size, self.num_nodes * self.output_dim) - - @property - def dcgru_layers(self): - return [nn.GRUCell(input_size=self.num_nodes * self.output_dim, - hidden_size=self.hidden_state_size, - bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, - hidden_size=self.hidden_state_size, - bias=True) for _ in - range(self.num_rnn_layers - 1)] + self.dcgru_layers = [nn.GRUCell(input_size=self.num_nodes * self.output_dim, + hidden_size=self.hidden_state_size, + bias=True)] + [nn.GRUCell(input_size=self.hidden_state_size, + hidden_size=self.hidden_state_size, + bias=True) for _ in + range(self.num_rnn_layers - 1)] def forward(self, inputs, hidden_state=None): """ diff --git a/model/pytorch/dcrnn_supervisor.py b/model/pytorch/dcrnn_supervisor.py index 0194a8f..8f39be3 100644 --- a/model/pytorch/dcrnn_supervisor.py +++ b/model/pytorch/dcrnn_supervisor.py @@ -9,9 +9,7 @@ from model.pytorch.dcrnn_model import EncoderModel, DecoderModel class DCRNNSupervisor: - def __init__(self, adj_mx, encoder_model: EncoderModel, decoder_model: DecoderModel, **kwargs): - self.decoder_model = decoder_model - self.encoder_model = encoder_model + def __init__(self, adj_mx, **kwargs): self._kwargs = kwargs self._data_kwargs = kwargs.get('data') self._model_kwargs = kwargs.get('model') @@ -35,6 +33,10 @@ class DCRNNSupervisor: self._model_kwargs.get('use_curriculum_learning', False)) self.horizon = int(self._model_kwargs.get('horizon', 1)) # for the decoder + # setup model + self.encoder_model = EncoderModel(True, adj_mx, **self._model_kwargs) + self.decoder_model = DecoderModel(True, adj_mx, **self._model_kwargs) + @staticmethod def _get_log_dir(kwargs): log_dir = kwargs['train'].get('log_dir')