def __init__(self, model, filename=None, num_rows=None):
        self.logger = pocket_logger.get_my_logger()
        self.model = model

        use_col = feature_engineerer.get_necessary_col()
        dtypes = csv_loader.get_featured_dtypes()
        local_filename = HOLDOUT_DATA
        if filename is None:
            filename = HOLDOUT_DATA
        if num_rows is None:
            self.holdout_df = dd.read_csv(filename, dtype=dtypes, usecols=use_col).compute()
        else:
            self.holdout_df = dd.read_csv(filename, dtype=dtypes, nrows=num_rows, usecols=use_col).compute()

        print(self.holdout_df.info())
        print("Initialized validator.")
Пример #2
0
OUTPUT_DATA8 = os.path.join(OUTPUT_DIR, "long_train_day8.feather")
OUTPUT_DATA9 = os.path.join(OUTPUT_DIR, "long_train_day9.feather")
OUTPUT_TEST = os.path.join(OUTPUT_DIR, "long_test.feather")
MAMAS_INDEX = os.path.join(INPUT_DIR, "last_test_idx.npy")

OUTPUT_FILE = os.path.join(OUTPUT_DIR, "sub_long.csv")

import pandas as pd
import numpy as np
from sklearn import model_selection
import gc
from dask import dataframe as dd
from talkingdata.fe import column_selector, runtime_fe
from talkingdata.common import csv_loader, pocket_lgb, pocket_timer, pocket_logger

logger = pocket_logger.get_my_logger()
timer = pocket_timer.GoldenTimer(logger)

predict_col = column_selector.get_predict_col()
dtypes = csv_loader.get_featured_dtypes()
train7 = pd.read_feather(OUTPUT_DATA7)
train8 = pd.read_feather(OUTPUT_DATA8)
train9 = pd.read_feather(OUTPUT_DATA9)
test = pd.read_feather(OUTPUT_TEST)
timer.time("load csv in ")

train = train7.append(train8).append(train9)
print(train.info())
print(test.info())
del train7, train8, train9
gc.collect()
 def __init__(self, model, df, predict_col):
     self.logger = pocket_logger.get_my_logger()
     self.model = model
     self.holdout_df = df
     self.predict_col = predict_col
     print("Initialized validator.")