Exemplo n.º 1
0
def main(input_filepath, output_filepath, config_file):
    """Runs data loading and cleaning and pre-processing scripts and
    saves data in ../processed."""
    logger = logging.getLogger(__name__)
    logger.info('Loading training data set, setting up pipeline, tuning,'
                'training and evaluating final model.')

    # Parse config file
    # config = parse_config(config_file)

    # Load training data
    X_train = pd.read_csv(input_filepath + '/X_train.csv')
    y_train = pd.read_csv(input_filepath + '/y_train.csv').values.ravel()

    # Pre-processing and modeling pipeline
    cat_features = X_train.select_dtypes(exclude='float64').columns
    num_features = X_train.select_dtypes(include='float64').columns

    pipe = Pipeline([('preprocessing',
                      preprocessing_pipeline(cat_features, num_features)),
                     ('model',
                      TransformedTargetRegressor(regressor=SVR(),
                                                 func=np.log1p,
                                                 inverse_func=np.expm1))])

    # Tune or select model
    #   kf = KFold(config['modeling']['num_folds'], shuffle=True,
    #   random_state=rng).get_n_splits(X_train.values)

    model = Model(model=pipe)

    # Train model
    model.train(X_train, y_train)

    # Save model
    model.save(output_filepath + model.name + '.pkl')
                        fake_img = postprocess(fake_img)  # [b, h, w, 3]
                        for i in range(fake_img.shape[0]):
                            sample_name = os.path.join(
                                args.path, 'eval',
                                val_dataset.load_name(index))
                            imsave(fake_img[i], sample_name)
                            index += 1

                        eval_progbar.add(fake_img.shape[0])

                score_dict = get_inpainting_metrics(config.eval_path,
                                                    os.path.join(
                                                        args.path, 'eval'),
                                                    logger,
                                                    fid_test=config.fid_test)
                if config.save_best and 'fid' in score_dict:
                    if 'fid' not in best_score or best_score[
                            'fid'] >= score_dict['fid']:
                        best_score = score_dict.copy()
                        best_score['iteration'] = iteration
                        model.save(prefix='best_fid')

            if iteration % config.save_iters == 0:
                model.save(prefix='last')

            if iteration >= config.max_iters:
                keep_training = False
                break

    logger.info('Best score: ' + str(best_score))