Пример #1
0
    def train(self, train_csv: str, mode: str):
        self.config["task"] = "train"
        self.config["mode"] = mode
        self.config["model"] = {}
        self.config["ensemble"] = {"lgb": 1}

        self.config.tmp_dir = self.config.model_dir + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        # load holiday
        path_holiday = './holiday.csv'
        holiday = pd.read_csv(path_holiday, \
                      encoding='utf-8', low_memory=False, dtype={'holiday':str})['holiday'].values
        self.config['holiday'] = set(holiday)

        df = read_df(train_csv, self.config)
        print(df.shape)

        holiday_detect(df, self.config)

        preprocess(df, self.config)

        y = df["target"]
        X = df.drop("target", axis=1)

        train(X, y, self.config)
Пример #2
0
    def train(self, train_csv: str, mode: str):
        self.config["task"] = "train"
        self.config["mode"] = mode
        self.config.tmp_dir = self.config.model_dir + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        df = read_df(train_csv, self.config)
        preprocess(df, self.config)

        y = df["target"]
        X = df.drop("target", axis=1)
        train(X, y, self.config)
Пример #3
0
    def train(self, train_csv: str, mode: str):
        self.config["task"] = "train"
        self.config["mode"] = mode
        self.config.tmp_dir = self.config.model_dir + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        ## prepare data
        df = read_df(train_csv, self.config)
        
        ## preprecessing
        preprocess(df, self.config)

        
        y = df["target"]
        X = df.drop("target", axis=1)
        log('drop target')
        log('####### cur time = ' + str(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S")))
        log('################## after FE #########################')
        log(X.shape)
        log('#####################################################')
        train(X, y, self.config)
Пример #4
0
    def train(self, train_csv: str, mode: str):
        self.config["task"] = "train"
        self.config["mode"] = mode

        self.config[
            "objective"] = "regression" if mode == "regression" else "binary"
        self.config["metric"] = "rmse" if mode == "regression" else "auc"

        self.config.tmp_dir = self.config.model_dir + "/tmp"
        os.makedirs(self.config.tmp_dir, exist_ok=True)

        df = read_df(train_csv, self.config)
        df = preprocess(df, self.config)

        y = df["target"].copy()
        X = df.drop("target", axis=1).copy()
        del df
        gc.collect()

        self.config["columns"] = list(X)

        train(X, y, self.config)
Пример #5
0
### Formating Data
y_train_cat = np_utils.to_categorical(y_train)
y_test_cat = np_utils.to_categorical(y_test)
x_train = x_train.reshape((x_train.shape[0], depth, width, height)).astype('float32')
x_test = x_test.reshape((x_test.shape[0], depth, width, height)).astype('float32')

################################ PART 1 ################################
print("\n[PART 1]\n")

### Parameters
NUM_EPOCHS = 1
SIZE_BATCHS = 128
NUM_BATCHS = 60000 // SIZE_BATCHS

### Training Model
(trained_model, history) = model.train(x_train, y_train_cat, SIZE_BATCHS, NUM_EPOCHS,
                         width, height, depth, num_classes)

### Testing Model
(y_pred, accuracy) = model.fit(x_test, y_test_cat, trained_model, "output_0")

### Save and Plot results
graph.output_graphs(y_test, y_pred, history, NUM_BATCHS, "output_0")


################################ PART 2 ################################
print("\n[PART 2]\n")

### Store results
error_p2 = list()
std_list = list()
Пример #6
0
import pprint
import random
from lib import csv_helper
from lib import vectors
from modifiers import credit
from lib import model

pp = pprint.PrettyPrinter(width=150)

training_data_csv = csv_helper.csv_to_dict_list('datasets/training.csv')
test_data_csv = csv_helper.csv_to_dict_list('datasets/test.csv')

training_data = vectors.to_vectors(credit.modifier, training_data_csv)
test_data = vectors.to_vectors(credit.modifier, test_data_csv)

params = model.train(training_data, 8, 'class')

weights = params['weights']
bias = params['bias']
features = params['features']

# pp.pprint(bias)
# pp.pprint(weights)
predictions = list(
    map(
        lambda example: model.test(weights, bias, features, example['class'],
                                   example), test_data))

correct = list(filter(lambda pred: pred == True, predictions))

accuracy = (len(correct) / len(test_data)) * 100
Пример #7
0
        conv_2_2 = Conv(128,
                        128,
                        adjs_dist=self.placeholders['adj_dist_3'],
                        adjs_rad=self.placeholders['adj_rad_3'],
                        logging=self.logging)
        max_pool_2 = MaxPool(size=4)
        average_pool = AveragePool()
        fc_1 = FC(128,
                  data.num_classes,
                  act=lambda x: x,
                  bias=False,
                  dropout=self.placeholders['dropout'],
                  logging=self.logging)

        self.layers = [
            conv_1_1, conv_1_2, max_pool_1, conv_2_1, conv_2_2, max_pool_2,
            average_pool, fc_1
        ]


placeholders = generate_placeholders(BATCH_SIZE, LEVELS, NUM_FEATURES,
                                     data.num_classes)

model = Model(placeholders=placeholders,
              learning_rate=LEARNING_RATE,
              train_dir=TRAIN_DIR,
              log_dir=LOG_DIR)

train(model, data, preprocess_algorithm, BATCH_SIZE, DROPOUT,
      AUGMENT_TRAIN_EXAMPLES, MAX_STEPS, PREPROCESS_FIRST, DISPLAY_STEP)
Пример #8
0
std_max = 0
burn_in = 0
if 'num_burn_in_steps' in optim_params:
    burn_in = optim_params['num_burn_in_steps']
batch_evaluator = lib.evaluation.BatchEvaluator(test_loader,
                                                burn_in=burn_in,
                                                thinning=100)
# print('burn_in: ', burn_in)

state_accum = []

for epoch in range(1, epochs + 1):
    t0 = time.time()

    print('current_lr: ', current_lr)
    model.train()
    for data, target in train_loader:
        step += 1
        data = data.cuda()
        target = target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        if precond:
            precond.step()
        if block_size > 0 and block_decay > 0 and lr_param:
            optimizer.step(lr=current_lr)
        else:
            optimizer.step()
Пример #9
0
#!/usr/bin/env python3
import argparse
import logging
import pathlib
from fse import IndexedLineDocument
from fse.models import SIF

from lib import data, utils, model
import gensim.downloader as api

log = logging.getLogger("train_model")

EXPECTED_LINES = 66836199


if __name__ == "__main__":
    utils.setup_logging()
    parser = argparse.ArgumentParser()
    parser.add_argument("-d", "--data", default=data.DEFAULT_OUTPUT_PREFIX,
                        help="Prefix of input data to read, default=" + data.DEFAULT_OUTPUT_PREFIX)
    parser.add_argument("-o", "--output", default=model.DEFAULT_MODEL_FILE,
                        help="File name to save model, default=" + model.DEFAULT_MODEL_FILE)
    args = parser.parse_args()

    glove = api.load("glove-wiki-gigaword-100")
    input_path = pathlib.Path(args.data).with_suffix(".txt")
    sents = IndexedLineDocument(str(input_path))
    model = SIF(glove, workers=2)
    model.train(sents)
    model.save(args.output)