handling nans in loss tensor

This commit is contained in:
Chintan Shah 2019-10-06 18:55:35 -04:00
parent 5dd0f1dd3a
commit 96d8dc4417
3 changed files with 8 additions and 5 deletions

View File

@ -120,8 +120,8 @@ class DCRNNSupervisor:
return mean_loss return mean_loss
def _train(self, base_lr, def _train(self, base_lr,
steps, patience=50, epochs=100, lr_decay_ratio=0.1, log_every=10, save_model=1, steps, patience=50, epochs=100, lr_decay_ratio=0.1, log_every=1, save_model=1,
test_every_n_epochs=10, **kwargs): test_every_n_epochs=10):
# steps is used in learning rate - will see if need to use it? # steps is used in learning rate - will see if need to use it?
min_val_loss = float('inf') min_val_loss = float('inf')
wait = 0 wait = 0
@ -171,14 +171,14 @@ class DCRNNSupervisor:
np.mean(losses), np.mean(losses),
batches_seen) batches_seen)
if epoch_num % log_every == log_every - 1: if (epoch_num % log_every) == log_every - 1:
message = 'Epoch [{}/{}] ({}) train_mae: {:.4f}, val_mae: {:.4f}, lr: {:.6f}, ' \ message = 'Epoch [{}/{}] ({}) train_mae: {:.4f}, val_mae: {:.4f}, lr: {:.6f}, ' \
'{:.1f}s'.format(epoch_num, epochs, batches_seen, '{:.1f}s'.format(epoch_num, epochs, batches_seen,
np.mean(losses), val_loss, lr_scheduler.get_lr()[0], np.mean(losses), val_loss, lr_scheduler.get_lr()[0],
(end_time - start_time)) (end_time - start_time))
self._logger.info(message) self._logger.info(message)
if epoch_num % test_every_n_epochs == test_every_n_epochs - 1: if (epoch_num % test_every_n_epochs) == test_every_n_epochs - 1:
test_loss = self.evaluate(dataset='test', batches_seen=batches_seen) test_loss = self.evaluate(dataset='test', batches_seen=batches_seen)
message = 'Epoch [{}/{}] ({}) train_mae: {:.4f}, test_mae: {:.4f}, lr: {:.6f}, ' \ message = 'Epoch [{}/{}] ({}) train_mae: {:.4f}, test_mae: {:.4f}, lr: {:.6f}, ' \
'{:.1f}s'.format(epoch_num, epochs, batches_seen, '{:.1f}s'.format(epoch_num, epochs, batches_seen,

View File

@ -6,4 +6,6 @@ def masked_mae_loss(y_pred, y_true):
mask /= mask.mean() mask /= mask.mean()
loss = torch.abs(y_pred - y_true) loss = torch.abs(y_pred - y_true)
loss = loss * mask loss = loss * mask
# trick for nans: https://discuss.pytorch.org/t/how-to-set-nan-in-tensor-to-0/3918/3
loss[loss != loss] = 0
return loss.mean() return loss.mean()

View File

@ -7,3 +7,4 @@ statsmodels
tensorflow>=1.3.0 tensorflow>=1.3.0
torch torch
tables tables
future