Update the seq2seq training logic.
This commit is contained in:
parent
562514cc30
commit
e08002d72d
|
|
@ -21,6 +21,7 @@ max_grad_norm: 5
|
|||
min_learning_rate: 2.0e-06
|
||||
null_val: 0
|
||||
num_rnn_layers: 2
|
||||
output_dim: 1
|
||||
patience: 50
|
||||
rnn_units: 64
|
||||
seq_len: 12
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ method_type: GCRNN
|
|||
min_learning_rate: 2.0e-06
|
||||
null_val: 0
|
||||
num_rnn_layers: 2
|
||||
output_dim: 1
|
||||
patience: 50
|
||||
rnn_units: 16
|
||||
seq_len: 3
|
||||
|
|
|
|||
|
|
@ -28,12 +28,13 @@ class DCRNNModel(TFModel):
|
|||
rnn_units = int(config.get('rnn_units'))
|
||||
seq_len = int(config.get('seq_len'))
|
||||
use_curriculum_learning = bool(config.get('use_curriculum_learning', False))
|
||||
|
||||
assert input_dim == output_dim, 'input_dim: %d != output_dim: %d' % (input_dim, output_dim)
|
||||
aux_dim = input_dim - output_dim
|
||||
# assert input_dim == output_dim, 'input_dim: %d != output_dim: %d' % (input_dim, output_dim)
|
||||
# Input (batch_size, timesteps, num_sensor, input_dim)
|
||||
self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs')
|
||||
# Labels: (batch_size, timesteps, num_sensor, output_dim)
|
||||
self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, output_dim), name='labels')
|
||||
# Labels: (batch_size, timesteps, num_sensor, input_dim), same format with input except the temporal dimension.
|
||||
self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, input_dim), name='labels')
|
||||
|
||||
GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim))
|
||||
|
||||
cell = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes,
|
||||
|
|
@ -49,20 +50,30 @@ class DCRNNModel(TFModel):
|
|||
# Outputs: (batch_size, timesteps, num_nodes, output_dim)
|
||||
with tf.variable_scope('DCRNN_SEQ'):
|
||||
inputs = tf.unstack(tf.reshape(self._inputs, (batch_size, seq_len, num_nodes * input_dim)), axis=1)
|
||||
labels = tf.unstack(tf.reshape(self._labels, (batch_size, horizon, num_nodes * output_dim)), axis=1)
|
||||
labels = tf.unstack(
|
||||
tf.reshape(self._labels[..., :output_dim], (batch_size, horizon, num_nodes * output_dim)), axis=1)
|
||||
if aux_dim > 0:
|
||||
aux_info = tf.unstack(self._labels[..., output_dim:], axis=1)
|
||||
aux_info.insert(0, None)
|
||||
labels.insert(0, GO_SYMBOL)
|
||||
loop_function = None
|
||||
if is_training:
|
||||
if use_curriculum_learning:
|
||||
def loop_function(prev, i):
|
||||
|
||||
def loop_function(prev, i):
|
||||
if is_training:
|
||||
# Return either the model's prediction or the previous ground truth in training.
|
||||
if use_curriculum_learning:
|
||||
c = tf.random_uniform((), minval=0, maxval=1.)
|
||||
threshold = self._compute_sampling_threshold(global_step, cl_decay_steps)
|
||||
result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev)
|
||||
return result
|
||||
else:
|
||||
# Return the output of the model.
|
||||
def loop_function(prev, _):
|
||||
return prev
|
||||
else:
|
||||
result = labels[i]
|
||||
else:
|
||||
# Return the prediction of the model in testing.
|
||||
result = prev
|
||||
if aux_dim > 0:
|
||||
result = tf.reshape(result, (batch_size, num_nodes, output_dim))
|
||||
result = tf.concat([result, aux_info[i]], axis=-1)
|
||||
result = tf.reshape(result, (batch_size, num_nodes * input_dim))
|
||||
return result
|
||||
|
||||
_, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32)
|
||||
outputs, final_state = legacy_seq2seq.rnn_decoder(labels, enc_state, decoding_cells,
|
||||
|
|
@ -72,20 +83,21 @@ class DCRNNModel(TFModel):
|
|||
outputs = tf.stack(outputs[:-1], axis=1)
|
||||
self._outputs = tf.reshape(outputs, (batch_size, horizon, num_nodes, output_dim), name='outputs')
|
||||
|
||||
preds = self._outputs[..., 0]
|
||||
labels = self._labels[..., 0]
|
||||
# preds = self._outputs[..., 0]
|
||||
preds = self._outputs
|
||||
labels = self._labels[..., :output_dim]
|
||||
|
||||
null_val = config.get('null_val', 0.)
|
||||
self._mae = masked_mae_loss(self._scaler, null_val)(preds=preds, labels=labels)
|
||||
|
||||
if loss_func == 'MSE':
|
||||
self._loss = masked_mse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels)
|
||||
self._loss = masked_mse_loss(self._scaler, null_val)(preds=preds, labels=labels)
|
||||
elif loss_func == 'MAE':
|
||||
self._loss = masked_mae_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels)
|
||||
self._loss = masked_mae_loss(self._scaler, null_val)(preds=preds, labels=labels)
|
||||
elif loss_func == 'RMSE':
|
||||
self._loss = masked_rmse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels)
|
||||
self._loss = masked_rmse_loss(self._scaler, null_val)(preds=preds, labels=labels)
|
||||
else:
|
||||
self._loss = masked_mse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels)
|
||||
self._loss = masked_mse_loss(self._scaler, null_val)(preds=preds, labels=labels)
|
||||
if is_training:
|
||||
optimizer = tf.train.AdamOptimizer(self._lr)
|
||||
tvars = tf.trainable_variables()
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ class DCRNNSupervisor(TFModelSupervisor):
|
|||
# Builds the model.
|
||||
input_dim = self._x_train.shape[-1]
|
||||
num_nodes = self._df_test.shape[-1]
|
||||
output_dim = self._y_train.shape[-1]
|
||||
output_dim = self._get_config('output_dim')
|
||||
test_batch_size = self._get_config('test_batch_size')
|
||||
train_config = dict(self._config)
|
||||
train_config.update({
|
||||
|
|
|
|||
Loading…
Reference in New Issue