Exemplo n.º 1
0
def train(config: dict, save_model: bool):
    name = '{}_{}H_{}'.format(config['model_type'], config['h_mult'],
                              config['loss'])
    results_dir = mk_clear_dir(get_results_dir(config['load_subset']), False)
    model_dir = mk_clear_dir(get_model_dir(results_dir, name), False)

    m4 = M4Dataset(sources=M4Sources()).update_param(**config).read_source()

    config.update({
        'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult,
        'forecast_length': m4.H[Subset(config['load_subset'])],
        'name': name
    })
    interpretable_mod(config)

    trainparms = TrainParams().update_param(**config)
    netparams = NBeatsParams().update_param(**config)

    train_data_fn = partial(m4.dataset,
                            trainparms.epoch_sample_size,
                            lh=config['lh'])
    model = NBeatsTF.create_model(netparams, trainparms)
    model_file_name = os.path.join(model_dir, f'model.mdl')
    model.train(train_data_fn, trainparms.epochs, trainparms.batch_size,
                save_model, model_file_name)

    with open(os.path.join(model_dir, 'config.yaml'), 'w') as f:
        yaml.dump(config, f)
Exemplo n.º 2
0
def train(config: dict, save_model: bool, delete_existing: bool = False) -> tf.keras.models.Model:
    results_dir = mk_clear_dir(get_results_dir(config), delete_existing)
    model_dir = mk_clear_dir(get_model_dir(results_dir), delete_existing)

    m4 = M4Dataset(sources=M4Sources()).update_param(**config).read_source(config['test'])

    # Update backast and forecast lengths
    config.update({'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult,
                   'forecast_length': m4.H[Subset(config['load_subset'])]})

    # For interpretable model, set seasonality theta dim to H
    if config['model_type'] == 'interpretable':
        th_dim = eval(config['thetas_dim'])
        assert eval(config['stack_types']) == (BlockType.TREND_BLOCK, BlockType.SEASONALITY_BLOCK)
        config['thetas_dim'] = str((th_dim[0], config['forecast_length']))

    trainparms = TrainParams().update_param(**config)
    netparams = NBeatsParams().update_param(**config)

    train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=config['lh'])
    model = NBeatsTF.create_model(netparams, trainparms)
    model_file_name = os.path.join(model_dir, f'model.mdl')
    model.train(train_data_fn, trainparms.epochs, trainparms.batch_size, save_model, model_file_name)

    with open(os.path.join(results_dir, 'config.yaml'), 'w') as f:
        yaml.dump(config, f)

    return model
Exemplo n.º 3
0
def load_ensemble(load_subset: Subset) -> tf.keras.Model:
    results_dir = get_results_dir(load_subset.value)
    ensemble_model = NBeatsEnsemble(
        EnsembleParams().update_param(**load_config('ensemble.yaml')))
    params = []
    for d in Path(results_dir).rglob('mdl_*'):
        with open(os.path.join(d, 'config.yaml')) as f:
            params.append((NBeatsParams().update_param(**yaml.safe_load(f)),
                           os.path.join(d, f'model.mdl')))
    ensemble_model.load(params)
    return ensemble_model
Exemplo n.º 4
0
    def load_ensemble(ensemble_root_dir: str, trainparms: TrainParams, metric: str = 'smape',
                      run_eagerly: bool = False, ignore_loading_mixer_weights: bool = False) \
            -> Tuple[keras.Model, Dict, Iterable[Dict]]:
        """
            Load an already generated ensemble saved with 'NBeatsWeightedEnsemble.save_ensemble()'
            Return: model, ensemble config, submodel configs
        """

        # A: Instantiate ensemble
        # 1 - ensemble params
        ensemble_cfg = load_config(
            os.path.join(ensemble_root_dir, 'weighted_ensemble.yaml'))
        ensemble_params = WeightedEnsembleParams().update_param(**ensemble_cfg)

        # 2 - submodel params
        submodel_params = dict()
        submodel_files = dict()
        submodel_configs = dict()
        for d in Path(ensemble_root_dir).rglob('mdl_*'):
            cfg_file = os.path.join(d, 'config.yaml')
            mdl_file = os.path.join(d, 'model.mdl')
            parms = NBeatsParams().update_param(**load_config(cfg_file))
            if parms.name in ensemble_params.submodel_names:
                submodel_params[parms.name] = parms
                submodel_files[parms.name] = mdl_file
                submodel_configs[parms.name] = load_config(cfg_file)
        assert len(
            submodel_files) > 0, 'Ensemble submodels could not be loaded'

        # 3 - instantiate ensemble
        ensemble_model = NBeatsWeightedEnsemble.create_model(
            ensemble_params,
            list(submodel_params.values()),
            trainparms,
            metric,
            freeze_submodels=False,
            run_eagerly=run_eagerly)

        # B: Load weights
        # 1 - weights of sub-models
        [
            mdl.load(submodel_files[nm])
            for nm, mdl in ensemble_model.sub_models.items()
        ]

        # 2 - weights of mixer
        if not ignore_loading_mixer_weights:
            mixer_file = str(list(Path(ensemble_root_dir).rglob('mixer_*'))[0])
            mixer_file = os.path.join(mixer_file, 'model.mdl')
            ensemble_model.mixer.load(mixer_file)

        return ensemble_model, ensemble_cfg, submodel_configs.values()
Exemplo n.º 5
0
def test_ensemble(subset: Subset,
                  gen_plots: bool = False,
                  show_plots: bool = False):
    config = load_config('ensemble.yaml')
    config['load_subset'] = subset.value
    results_dir = get_results_dir(subset.value)

    # Load model
    model = NBeatsEnsemble(EnsembleParams().update_param(**config))
    params = []
    for d in Path(results_dir).rglob(f'mdl_*'):
        with open(os.path.join(d, 'config.yaml')) as f:
            params.append((NBeatsParams().update_param(**yaml.safe_load(f)),
                           os.path.join(d, f'model.mdl')))
    model.load(params)

    m4 = M4Dataset(sources=M4Sources()).update_param(
        **config).read_source(True)
    test_dset = m4.test_dataset()
    x, y, w = next(test_dset.batch(len(test_dset)).as_numpy_iterator())
    model_outs = dict()
    yhat = model.call(x, False, model_outputs=model_outs).numpy()

    test_result = 'test smape: {:.3f}'.format(smape_simple(y, yhat, w))
    print(test_result)

    with open(
            os.path.join(results_dir,
                         f'{model.aggregation_method}_results.txt'), 'w') as f:
        f.write(test_result)

    if len(model_outs) > 0:
        import pickle
        with open(
                os.path.join(results_dir,
                             f'{model.aggregation_method}_model_outputs.pkl'),
                'wb') as f:
            pickle.dump({k: v.numpy() for k, v in model_outs.items()}, f)

    if gen_plots:
        plot_dir = mk_clear_dir(
            get_plot_dir(results_dir, model.aggregation_method), True)
        samples2print = list(range(64))
        plot_past_future(x[..., 0],
                         yhat[..., 0],
                         y[..., 0],
                         plot_dir,
                         n=samples2print,
                         show=show_plots)
Exemplo n.º 6
0
def train(ensemble_config: Dict,
          submodel_configs: Iterable[Dict],
          save_model=True,
          resume_training=False) -> tf.keras.models.Model:
    results_dir = mk_clear_dir(get_results_dir(ensemble_config), False)
    model_dir = mk_clear_dir(get_model_dir(results_dir, ensemble_config),
                             False)
    trainparms = TrainParams().update_param(**ensemble_config)
    m4 = M4Dataset(sources=M4Sources()).update_param(
        **ensemble_config).read_source()

    if resume_training:
        model = NBeatsWeightedEnsemble.load_ensemble(model_dir, trainparms)
    else:
        # Instantiate new ensemble
        ensemble_params = WeightedEnsembleParams().update_param(
            **ensemble_config)
        submodel_params = []
        for cfg in submodel_configs:
            name = '{}_{}H'.format(cfg['model_type'], cfg['h_mult'])
            cfg.update({
                'backcast_length': m4.H[m4.load_subset] * cfg['h_mult'],
                'forecast_length': m4.H[m4.load_subset],
                'name': name
            })
            interpretable_mod(cfg)
            submodel_params.append(NBeatsParams().update_param(**cfg))
        model = NBeatsWeightedEnsemble.create_model(ensemble_params,
                                                    submodel_params)

    save_fun = partial(NBeatsWeightedEnsemble.save_ensemble,
                       ensemble_config=ensemble_config,
                       model_configs=submodel_configs,
                       ensemble_root_dir=model_dir,
                       delete_root_dir=False)

    train_data_fn = partial(m4.dataset,
                            trainparms.epoch_sample_size,
                            lh=ensemble_config['lh'])
    model.train(trainparms=trainparms,
                train_data_fn=train_data_fn,
                epochs=trainparms.epochs,
                batch_size=trainparms.batch_size,
                save_model=save_model,
                save_callback=save_fun,
                run_eagerly=False)

    return model
    def load_ensemble(ensemble_root_dir: str, trainparms: TrainParams, metric: str = 'smape',
                      run_eagerly: bool = False) -> keras.Model:
        """
            Load an already generated ensemble saved with 'NBeatsWeightedEnsemble.save_ensemble()'
        """

        # A: Instantiate ensemble
        # 1 - ensemble params
        cfg = load_config(os.path.join(ensemble_root_dir, 'weighted_ensemble.yaml'))
        ensemble_params = WeightedEnsembleParams().update_param(**cfg)

        # 2 - submodel params
        submodel_params = dict()
        submodel_files = dict()
        for d in Path(ensemble_root_dir).rglob('mdl_*'):
            cfg_file = os.path.join(d, 'config.yaml')
            mdl_file = os.path.join(d, 'model.mdl')
            parms = NBeatsParams().update_param(**load_config(cfg_file))
            if parms.name in ensemble_params.submodel_names:
                submodel_params[parms.name] = parms
                submodel_files[parms.name] = mdl_file
        assert len(submodel_files) > 0, 'Ensemble submodels could not be loaded'

        # 3 - instantiate ensemble
        ensemble_model = NBeatsWeightedEnsemble.create_model(ensemble_params, list(submodel_params.values()))
        ensemble_model.m_compile(trainparms, metric, run_eagerly)

        # B: Load weights
        # 1 - weights of sub-models
        [mdl.load(submodel_files[nm]) for nm, mdl in ensemble_model.sub_models.items()]

        # 2 - weights of mixer
        mixer_file = str(list(Path(ensemble_root_dir).rglob('mixer_*'))[0])
        mixer_file = os.path.join(mixer_file, 'model.mdl')
        ensemble_model.mixer.load(mixer_file)

        import pickle
        with open(os.path.join(ensemble_root_dir, 'meta.pkl'), 'rb') as f:
            meta = pickle.load(f)
            ensemble_model.m_history = meta[0]
            ensemble_model.optimizer = meta[1]
        return ensemble_model
Exemplo n.º 8
0
def train(config: dict, save_model: bool, delete_existing: bool = False) -> tf.keras.models.Model:
    results_dir = mk_clear_dir(get_results_dir(config), delete_existing)
    model_dir = mk_clear_dir(get_model_dir(results_dir), delete_existing)

    m4 = M4Dataset(sources=M4SourcesLite()).update_param(**config).read_source(config['test'])

    # Update backast and forecast lengths
    config.update({'backcast_length': m4.H[Subset(config['load_subset'])] * m4.h_mult,
                   'forecast_length': m4.H[Subset(config['load_subset'])]})

    trainparms = TrainParams().update_param(**config)
    netparams = NBeatsParams().update_param(**config)

    train_data_fn = partial(m4.dataset, trainparms.epoch_sample_size, lh=config['lh'])
    model = NBeatsTF.create_model(netparams, trainparms)
    model_file_name = os.path.join(model_dir, f'model.mdl')
    model.train(train_data_fn, trainparms.epochs, trainparms.batch_size, save_model, model_file_name)

    with open(os.path.join(results_dir, 'config.yaml'), 'w') as f:
        yaml.dump(config, f)

    return model
Exemplo n.º 9
0
def test(config: dict, gen_plots: bool = False):
    results_dir = get_results_dir(config)
    plot_dir = mk_clear_dir(get_plot_dir(results_dir), True) if gen_plots else None
    model_dir = get_model_dir(results_dir)
    model_file_name = os.path.join(model_dir, f'model.mdl')

    m4 = M4Dataset(sources=M4SourcesLite()).update_param(**config).read_source(True)
    model = NBeatsTF.create_model(NBeatsParams().update_param(**config), TrainParams().update_param(**config))
    model.load(model_file_name)

    test_dset = m4.test_dataset()
    x, y, w = next(test_dset.batch(len(test_dset)).as_numpy_iterator())

    stack_coll = defaultdict(lambda: 0)
    yhat = model.call(x, False, stack_out=stack_coll).numpy()

    if gen_plots:
        samples2print = list(range(8))
        try:
            if config['model_type'] == 'generic':
                labels = ['Stack1-Generic', 'Stack2-Generic']
            elif config['model_type'] == 'interpretable':
                labels = ['Stack1-Interp', 'Stack2-Interp']
            else:
                raise Exception()

            plot_stack(y, yhat, dict(stack_coll), samples2print, labels=labels, block_plot_cnt=2, plot_dir=plot_dir,
                       show=False)
            plot_past_future(x[..., 0], yhat[..., 0], y[..., 0], plot_dir, n=samples2print, show=False)
        except:
            pass

    test_result = 'test smape: {:.3f}'.format(smape_simple(y, yhat, w))
    print(test_result)

    with open(os.path.join(results_dir, 'results.txt'), 'w') as f:
        f.write(test_result)