Adds baseline methods for evaluation.
This commit is contained in:
parent
37f0734cb4
commit
8c20ca1a9c
|
|
@ -10,8 +10,9 @@ Yaguang Li, Rose Yu, Cyrus Shahabi, Yan Liu, [Diffusion Convolutional Recurrent
|
|||
- scipy>=0.19.0
|
||||
- numpy>=1.12.1
|
||||
- pandas>=0.19.2
|
||||
- tensorflow>=1.3.0
|
||||
- pyaml
|
||||
- statsmodels
|
||||
- tensorflow>=1.3.0
|
||||
|
||||
|
||||
Dependency can be installed using the following command:
|
||||
|
|
@ -80,6 +81,12 @@ Each epoch takes about 5min or 10 min on a single GTX 1080 Ti for METR-LA or PEM
|
|||
There is a chance that the training loss will explode, the temporary workaround is to restart from the last saved model before the explosion, or to decrease the learning rate earlier in the learning rate schedule.
|
||||
|
||||
|
||||
## Eval baseline methods
|
||||
```bash
|
||||
# METR-LA
|
||||
python -m scripts.eval_baseline_methods --traffic_reading_filename=data/metr-la.h5
|
||||
```
|
||||
|
||||
More details are being added ...
|
||||
|
||||
## Citation
|
||||
|
|
|
|||
|
|
@ -2,4 +2,5 @@ scipy>=0.19.0
|
|||
numpy>=1.12.1
|
||||
pandas>=0.19.2
|
||||
pyyaml
|
||||
statsmodels
|
||||
tensorflow>=1.3.0
|
||||
|
|
@ -0,0 +1,141 @@
|
|||
import argparse
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from statsmodels.tsa.vector_ar.var_model import VAR
|
||||
|
||||
from lib import utils
|
||||
from lib.metrics import masked_rmse_np, masked_mape_np, masked_mae_np
|
||||
from lib.utils import StandardScaler
|
||||
|
||||
|
||||
def historical_average_predict(df, period=12 * 24 * 7, test_ratio=0.2, null_val=0.):
|
||||
"""
|
||||
Calculates the historical average of sensor reading.
|
||||
:param df:
|
||||
:param period: default 1 week.
|
||||
:param test_ratio:
|
||||
:param null_val: default 0.
|
||||
:return:
|
||||
"""
|
||||
n_sample, n_sensor = df.shape
|
||||
n_test = int(round(n_sample * test_ratio))
|
||||
n_train = n_sample - n_test
|
||||
y_test = df[-n_test:]
|
||||
y_predict = pd.DataFrame.copy(y_test)
|
||||
|
||||
for i in range(n_train, min(n_sample, n_train + period)):
|
||||
inds = [j for j in range(i % period, n_train, period)]
|
||||
historical = df.iloc[inds, :]
|
||||
y_predict.iloc[i - n_train, :] = historical[historical != null_val].mean()
|
||||
# Copy each period.
|
||||
for i in range(n_train + period, n_sample, period):
|
||||
size = min(period, n_sample - i)
|
||||
start = i - n_train
|
||||
y_predict.iloc[start:start + size, :] = y_predict.iloc[start - period: start + size - period, :].values
|
||||
return y_predict, y_test
|
||||
|
||||
|
||||
def static_predict(df, n_forward, test_ratio=0.2):
|
||||
"""
|
||||
Assumes $x^{t+1} = x^{t}$
|
||||
:param df:
|
||||
:param n_forward:
|
||||
:param test_ratio:
|
||||
:return:
|
||||
"""
|
||||
test_num = int(round(df.shape[0] * test_ratio))
|
||||
y_test = df[-test_num:]
|
||||
y_predict = df.shift(n_forward).iloc[-test_num:]
|
||||
return y_predict, y_test
|
||||
|
||||
|
||||
def var_predict(df, n_forwards=(1, 3), n_lags=4, test_ratio=0.2):
|
||||
"""
|
||||
Multivariate time series forecasting using Vector Auto-Regressive Model.
|
||||
:param df: pandas.DataFrame, index: time, columns: sensor id, content: data.
|
||||
:param n_forwards: a tuple of horizons.
|
||||
:param n_lags: the order of the VAR model.
|
||||
:param test_ratio:
|
||||
:return: [list of prediction in different horizon], dt_test
|
||||
"""
|
||||
n_sample, n_output = df.shape
|
||||
n_test = int(round(n_sample * test_ratio))
|
||||
n_train = n_sample - n_test
|
||||
df_train, df_test = df[:n_train], df[n_train:]
|
||||
|
||||
scaler = StandardScaler(mean=df_train.values.mean(), std=df_train.values.std())
|
||||
data = scaler.transform(df_train.values)
|
||||
var_model = VAR(data)
|
||||
var_result = var_model.fit(n_lags)
|
||||
max_n_forwards = np.max(n_forwards)
|
||||
# Do forecasting.
|
||||
result = np.zeros(shape=(len(n_forwards), n_test, n_output))
|
||||
start = n_train - n_lags - max_n_forwards + 1
|
||||
for input_ind in range(start, n_sample - n_lags):
|
||||
prediction = var_result.forecast(scaler.transform(df.values[input_ind: input_ind + n_lags]), max_n_forwards)
|
||||
for i, n_forward in enumerate(n_forwards):
|
||||
result_ind = input_ind - n_train + n_lags + n_forward - 1
|
||||
if 0 <= result_ind < n_test:
|
||||
result[i, result_ind, :] = prediction[n_forward - 1, :]
|
||||
|
||||
df_predicts = []
|
||||
for i, n_forward in enumerate(n_forwards):
|
||||
df_predict = pd.DataFrame(scaler.inverse_transform(result[i]), index=df_test.index, columns=df_test.columns)
|
||||
df_predicts.append(df_predict)
|
||||
return df_predicts, df_test
|
||||
|
||||
|
||||
def eval_static(traffic_reading_df):
|
||||
logger.info('Static')
|
||||
horizons = [1, 3, 6, 12]
|
||||
logger.info('\t'.join(['Model', 'Horizon', 'RMSE', 'MAPE', 'MAE']))
|
||||
for horizon in horizons:
|
||||
y_predict, y_test = static_predict(traffic_reading_df, n_forward=horizon, test_ratio=0.2)
|
||||
rmse = masked_rmse_np(preds=y_predict.as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
mape = masked_mape_np(preds=y_predict.as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
mae = masked_mae_np(preds=y_predict.as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
line = 'Static\t%d\t%.2f\t%.2f\t%.2f' % (horizon, rmse, mape * 100, mae)
|
||||
logger.info(line)
|
||||
|
||||
|
||||
def eval_historical_average(traffic_reading_df, period):
|
||||
y_predict, y_test = historical_average_predict(traffic_reading_df, period=period, test_ratio=0.2)
|
||||
rmse = masked_rmse_np(preds=y_predict.as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
mape = masked_mape_np(preds=y_predict.as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
mae = masked_mae_np(preds=y_predict.as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
logger.info('Historical Average')
|
||||
logger.info('\t'.join(['Model', 'Horizon', 'RMSE', 'MAPE', 'MAE']))
|
||||
for horizon in [1, 3, 6, 12]:
|
||||
line = 'HA\t%d\t%.2f\t%.2f\t%.2f' % (horizon, rmse, mape * 100, mae)
|
||||
logger.info(line)
|
||||
|
||||
|
||||
def eval_var(traffic_reading_df, n_lags=3):
|
||||
n_forwards = [1, 3, 6, 12]
|
||||
y_predicts, y_test = var_predict(traffic_reading_df, n_forwards=n_forwards, n_lags=n_lags,
|
||||
test_ratio=0.2)
|
||||
logger.info('VAR (lag=%d)' % n_lags)
|
||||
logger.info('Model\tHorizon\tRMSE\tMAPE\tMAE')
|
||||
for i, horizon in enumerate(n_forwards):
|
||||
rmse = masked_rmse_np(preds=y_predicts[i].as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
mape = masked_mape_np(preds=y_predicts[i].as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
mae = masked_mae_np(preds=y_predicts[i].as_matrix(), labels=y_test.as_matrix(), null_val=0)
|
||||
line = 'VAR\t%d\t%.2f\t%.2f\t%.2f' % (horizon, rmse, mape * 100, mae)
|
||||
logger.info(line)
|
||||
|
||||
|
||||
def main(args):
|
||||
traffic_reading_df = pd.read_hdf(args.traffic_reading_filename)
|
||||
eval_static(traffic_reading_df)
|
||||
eval_historical_average(traffic_reading_df, period=7 * 24 * 12)
|
||||
eval_var(traffic_reading_df, n_lags=3)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger = utils.get_logger('data/model', 'Baseline')
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--traffic_reading_filename', default="data/metr-la.h5", type=str,
|
||||
help='Path to the traffic Dataframe.')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
Loading…
Reference in New Issue