예제 #1
0
def split_by_file(run_id, train_fname, test_fname):
    """
    The method splits the training data by file
    """
    logger = custom_logger.CustomLogger(run_id+':'+file_id)
    train_df = load_data.load_from_tsv(train_fname, 'training')
    test_df = load_data.load_from_tsv(test_fname, 'test')
    return train_df, test_df
예제 #2
0
def unpickle_model(model_fanme):
    """
    Load model from 'config.info.model_file'
    """
    print(model_fanme)
    logger = custom_logger.CustomLogger(result_filing.run_id + ':' + file_id)
    if model_fanme == '':
        logger.error('Filepath for model is not provided')
        sys.exit('Filepath for model is not provided')
    else:
        loaded_model = pickle.load(open(model_fanme, 'rb'))
        return loaded_model
예제 #3
0
def save_meta_file(gen_dict, f_name):
    """
    Write train_model or test_results metadata to specified file
    """
    logger = custom_logger.CustomLogger(run_id + ':' + file_id)
    filename = run_id + '_' + f_name + '.meta'
    f = open(os.path.join(unique_op_dir, filename), 'a')
    print('Output stored in %s' % (str(os.path.join(unique_op_dir, filename))))
    logger.info('Output stored in %s' %
                (str(os.path.join(unique_op_dir, filename))))
    for key, val in gen_dict.items():
        line = str(key) + " : " + str(val) + "\n"
        f.write(line)
예제 #4
0
def split_by_date(run_id, train_fname, split_date):
    """
    The method splits the training data by date
    """
    logger = custom_logger.CustomLogger(run_id+':'+file_id)
    if split_date == '':
        logger.error('No split date provided')
        sys.exit('No split date provided')
    df_intraday = load_data.load_from_tsv(train_fname, 'training')
    date_start = str(df_intraday.head(1).index.date[0])
    date_end = str(df_intraday.tail(1).index.date[0])
    train_df = df_intraday[date_start:split_date]
    date_split_1 = datetime.strptime(split_date, "%Y-%m-%d")+ timedelta(days=1)
    test_df =  df_intraday[date_split_1:date_end]
    return train_df, test_df
def main():
    """
    The start of the flow handles all initializations, configuration loading
    and performs training or deployment based upon 'config.info.operation_type'
    For more details on configuration options look up commnets in config.yaml
    """
    parser = get_parser()
    config = parser.parse_args(['--cfg', 'config.yaml'])
    result_filing.init_config_vars(config)
    run_id = config.info.run_id
    logger = custom_logger.CustomLogger(run_id + ':' + file_id)

    operation = config.info.operation_type
    logger.info("Selected operation type %s." % (operation))
    if operation == const.TRAIN_OP:
        train.train_model(config)
    elif operation == const.DEPLOY_OP:
        test.test_model(config)
def test_model(config):
    """The method loads model given model, test data and
    saves the test results at specified output directory.
    """
    run_id = run_id = config.info.run_id
    logger = custom_logger.CustomLogger(run_id + ':' + file_id)
    test_df = load_data.load_from_tsv(config.train_test_split.test, 'testing')
    if not test_df.shape[0] == 0:
        logger.info('The data is loaded successfully')
    else:
        logger.error('Empty dataframe loaded')
        sys.exit('Empty dataframe loaded')
    print('Test df : %s' % (str(test_df.shape)))
    # m past smaples to consider for prediction
    m = config.info.m
    # n next steps to predict
    n = config.info.n
    model_file = config.info.model_file
    logger.info(
        'Test is to predict next %d steps using past %d steps using model %s.'
        % (n, m, model_file))
    test_dict = dict()
    X_test, y_test = load_data.create_custom_data_structure(test_df, m, n)
    test_dict['X_test_shape'] = X_test.shape
    test_dict['y_test_shape'] = y_test.shape
    print('X_test : %s and y_test: %s ' %
          (str(X_test.shape), str(y_test.shape)))
    has_null = y_test.isnull().sum().sum() + X_test.isnull().sum().sum()
    if not has_null:
        logger.info(
            'Successfuly built custom data structure for (%d input steps, %d output steps) supervised prediction'
            % (m, n))
    else:
        logger.error('Built custom dataframes have ', has_null, ' NaN values')
        sys.exit('Built custom dataframes have ', has_null, ' NaN values')
    model = model_manipulation.unpickle_model(model_file)
    y_test_predict = model.predict(X_test)
    avg_test_error = explained_variance_score(y_test,
                                              y_test_predict,
                                              multioutput='uniform_average')
    test_dict['avg_test_error'] = avg_test_error
    result_filing.save_meta_file(test_dict, 'test_results')
    logger.info('Loaded model predicts with %d average validation error.' %
                (avg_test_error))
예제 #7
0
def load_from_tsv(filename, type):
    """
    The method loads tsv file
    """
    #run_id is global variable
    logger = custom_logger.CustomLogger(result_filing.run_id+':'+file_id)
    if filename == '':
        logger.error('Filepath for %s is not provided'%(type))
        sys.exit('Filepath for %s is not provided'%(type))
    else:
        loaded_df = pd.read_csv(filename, sep="\t", infer_datetime_format=True, parse_dates=['delivery_start'], index_col=['delivery_start'])
        # nan check
        has_null = loaded_df.isnull().sum().sum()
        if not has_null:
            logger.info('No NaN values in dataframe loaded from %s'%(filename))
        else:
            logger.error('Dataframe loaded from %s has %d NaN values'%(filename, has_null))
            sys.exit('Dataframe loaded from %s has %d NaN values'%(filename, has_null))
    return loaded_df
예제 #8
0
def visualize_train_data(train_df, fname):
    """
    Visualize the time series input
    """
    logger = custom_logger.CustomLogger(run_id + ':' + file_id)
    fig, axs = plt.subplots(3, figsize=(15, 15))
    fig.suptitle('EPEX Intraday Continuous market electricity prices')

    axs[0].plot(train_df.index, train_df['low'], color='red')
    axs[0].set_title("Lowest Price")
    axs[0].set(xlabel='time', ylabel='price (Euros)')

    axs[1].plot(train_df.index, train_df['high'], color='green')
    axs[1].set_title("Highest Pice")
    axs[1].set(xlabel='time', ylabel='price (Euros)')

    axs[2].plot(train_df.index, train_df['weight_avg'], color='blue')
    axs[2].set_title("volume-weighted Average Price")
    axs[2].set(xlabel='time', ylabel='price (Euros)')

    fig.savefig(os.path.join(unique_op_dir, fname))
    logger.info('Training data plots stored at ',
                os.path.join(unique_op_dir, fname))