diff --git a/data/model/dcrnn_config.yaml b/data/model/dcrnn_config.yaml index aaa21f2..a0109c6 100644 --- a/data/model/dcrnn_config.yaml +++ b/data/model/dcrnn_config.yaml @@ -21,6 +21,7 @@ max_grad_norm: 5 min_learning_rate: 2.0e-06 null_val: 0 num_rnn_layers: 2 +output_dim: 1 patience: 50 rnn_units: 64 seq_len: 12 diff --git a/data/model/dcrnn_test_config.yaml b/data/model/dcrnn_test_config.yaml index e9af25d..b49b410 100644 --- a/data/model/dcrnn_test_config.yaml +++ b/data/model/dcrnn_test_config.yaml @@ -22,6 +22,7 @@ method_type: GCRNN min_learning_rate: 2.0e-06 null_val: 0 num_rnn_layers: 2 +output_dim: 1 patience: 50 rnn_units: 16 seq_len: 3 diff --git a/model/dcrnn_model.py b/model/dcrnn_model.py index 1479bad..e0820f2 100644 --- a/model/dcrnn_model.py +++ b/model/dcrnn_model.py @@ -28,12 +28,13 @@ class DCRNNModel(TFModel): rnn_units = int(config.get('rnn_units')) seq_len = int(config.get('seq_len')) use_curriculum_learning = bool(config.get('use_curriculum_learning', False)) - - assert input_dim == output_dim, 'input_dim: %d != output_dim: %d' % (input_dim, output_dim) + aux_dim = input_dim - output_dim + # assert input_dim == output_dim, 'input_dim: %d != output_dim: %d' % (input_dim, output_dim) # Input (batch_size, timesteps, num_sensor, input_dim) self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs') - # Labels: (batch_size, timesteps, num_sensor, output_dim) - self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, output_dim), name='labels') + # Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension. + self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, input_dim), name='labels') + GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim)) cell = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, @@ -49,20 +50,30 @@ class DCRNNModel(TFModel): # Outputs: (batch_size, timesteps, num_nodes, output_dim) with tf.variable_scope('DCRNN_SEQ'): inputs = tf.unstack(tf.reshape(self._inputs, (batch_size, seq_len, num_nodes * input_dim)), axis=1) - labels = tf.unstack(tf.reshape(self._labels, (batch_size, horizon, num_nodes * output_dim)), axis=1) + labels = tf.unstack( + tf.reshape(self._labels[..., :output_dim], (batch_size, horizon, num_nodes * output_dim)), axis=1) + if aux_dim > 0: + aux_info = tf.unstack(self._labels[..., output_dim:], axis=1) + aux_info.insert(0, None) labels.insert(0, GO_SYMBOL) - loop_function = None - if is_training: - if use_curriculum_learning: - def loop_function(prev, i): + + def loop_function(prev, i): + if is_training: + # Return either the model's prediction or the previous ground truth in training. + if use_curriculum_learning: c = tf.random_uniform((), minval=0, maxval=1.) threshold = self._compute_sampling_threshold(global_step, cl_decay_steps) result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev) - return result - else: - # Return the output of the model. - def loop_function(prev, _): - return prev + else: + result = labels[i] + else: + # Return the prediction of the model in testing. + result = prev + if aux_dim > 0: + result = tf.reshape(result, (batch_size, num_nodes, output_dim)) + result = tf.concat([result, aux_info[i]], axis=-1) + result = tf.reshape(result, (batch_size, num_nodes * input_dim)) + return result _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32) outputs, final_state = legacy_seq2seq.rnn_decoder(labels, enc_state, decoding_cells, @@ -72,20 +83,21 @@ class DCRNNModel(TFModel): outputs = tf.stack(outputs[:-1], axis=1) self._outputs = tf.reshape(outputs, (batch_size, horizon, num_nodes, output_dim), name='outputs') - preds = self._outputs[..., 0] - labels = self._labels[..., 0] + # preds = self._outputs[..., 0] + preds = self._outputs + labels = self._labels[..., :output_dim] null_val = config.get('null_val', 0.) self._mae = masked_mae_loss(self._scaler, null_val)(preds=preds, labels=labels) if loss_func == 'MSE': - self._loss = masked_mse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) + self._loss = masked_mse_loss(self._scaler, null_val)(preds=preds, labels=labels) elif loss_func == 'MAE': - self._loss = masked_mae_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) + self._loss = masked_mae_loss(self._scaler, null_val)(preds=preds, labels=labels) elif loss_func == 'RMSE': - self._loss = masked_rmse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) + self._loss = masked_rmse_loss(self._scaler, null_val)(preds=preds, labels=labels) else: - self._loss = masked_mse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) + self._loss = masked_mse_loss(self._scaler, null_val)(preds=preds, labels=labels) if is_training: optimizer = tf.train.AdamOptimizer(self._lr) tvars = tf.trainable_variables() diff --git a/model/dcrnn_supervisor.py b/model/dcrnn_supervisor.py index 78c1497..7595269 100644 --- a/model/dcrnn_supervisor.py +++ b/model/dcrnn_supervisor.py @@ -61,7 +61,7 @@ class DCRNNSupervisor(TFModelSupervisor): # Builds the model. input_dim = self._x_train.shape[-1] num_nodes = self._df_test.shape[-1] - output_dim = self._y_train.shape[-1] + output_dim = self._get_config('output_dim') test_batch_size = self._get_config('test_batch_size') train_config = dict(self._config) train_config.update({