def train(self, train_csv: str, mode: str): self.config["task"] = "train" self.config["mode"] = mode self.config["model"] = {} self.config["ensemble"] = {"lgb": 1} self.config.tmp_dir = self.config.model_dir + "/tmp" os.makedirs(self.config.tmp_dir, exist_ok=True) # load holiday path_holiday = './holiday.csv' holiday = pd.read_csv(path_holiday, \ encoding='utf-8', low_memory=False, dtype={'holiday':str})['holiday'].values self.config['holiday'] = set(holiday) df = read_df(train_csv, self.config) print(df.shape) holiday_detect(df, self.config) preprocess(df, self.config) y = df["target"] X = df.drop("target", axis=1) train(X, y, self.config)
def train(self, train_csv: str, mode: str): self.config["task"] = "train" self.config["mode"] = mode self.config.tmp_dir = self.config.model_dir + "/tmp" os.makedirs(self.config.tmp_dir, exist_ok=True) df = read_df(train_csv, self.config) preprocess(df, self.config) y = df["target"] X = df.drop("target", axis=1) train(X, y, self.config)
def train(self, train_csv: str, mode: str): self.config["task"] = "train" self.config["mode"] = mode self.config.tmp_dir = self.config.model_dir + "/tmp" os.makedirs(self.config.tmp_dir, exist_ok=True) ## prepare data df = read_df(train_csv, self.config) ## preprecessing preprocess(df, self.config) y = df["target"] X = df.drop("target", axis=1) log('drop target') log('####### cur time = ' + str(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))) log('################## after FE #########################') log(X.shape) log('#####################################################') train(X, y, self.config)
def train(self, train_csv: str, mode: str): self.config["task"] = "train" self.config["mode"] = mode self.config[ "objective"] = "regression" if mode == "regression" else "binary" self.config["metric"] = "rmse" if mode == "regression" else "auc" self.config.tmp_dir = self.config.model_dir + "/tmp" os.makedirs(self.config.tmp_dir, exist_ok=True) df = read_df(train_csv, self.config) df = preprocess(df, self.config) y = df["target"].copy() X = df.drop("target", axis=1).copy() del df gc.collect() self.config["columns"] = list(X) train(X, y, self.config)
### Formating Data y_train_cat = np_utils.to_categorical(y_train) y_test_cat = np_utils.to_categorical(y_test) x_train = x_train.reshape((x_train.shape[0], depth, width, height)).astype('float32') x_test = x_test.reshape((x_test.shape[0], depth, width, height)).astype('float32') ################################ PART 1 ################################ print("\n[PART 1]\n") ### Parameters NUM_EPOCHS = 1 SIZE_BATCHS = 128 NUM_BATCHS = 60000 // SIZE_BATCHS ### Training Model (trained_model, history) = model.train(x_train, y_train_cat, SIZE_BATCHS, NUM_EPOCHS, width, height, depth, num_classes) ### Testing Model (y_pred, accuracy) = model.fit(x_test, y_test_cat, trained_model, "output_0") ### Save and Plot results graph.output_graphs(y_test, y_pred, history, NUM_BATCHS, "output_0") ################################ PART 2 ################################ print("\n[PART 2]\n") ### Store results error_p2 = list() std_list = list()
import pprint import random from lib import csv_helper from lib import vectors from modifiers import credit from lib import model pp = pprint.PrettyPrinter(width=150) training_data_csv = csv_helper.csv_to_dict_list('datasets/training.csv') test_data_csv = csv_helper.csv_to_dict_list('datasets/test.csv') training_data = vectors.to_vectors(credit.modifier, training_data_csv) test_data = vectors.to_vectors(credit.modifier, test_data_csv) params = model.train(training_data, 8, 'class') weights = params['weights'] bias = params['bias'] features = params['features'] # pp.pprint(bias) # pp.pprint(weights) predictions = list( map( lambda example: model.test(weights, bias, features, example['class'], example), test_data)) correct = list(filter(lambda pred: pred == True, predictions)) accuracy = (len(correct) / len(test_data)) * 100
conv_2_2 = Conv(128, 128, adjs_dist=self.placeholders['adj_dist_3'], adjs_rad=self.placeholders['adj_rad_3'], logging=self.logging) max_pool_2 = MaxPool(size=4) average_pool = AveragePool() fc_1 = FC(128, data.num_classes, act=lambda x: x, bias=False, dropout=self.placeholders['dropout'], logging=self.logging) self.layers = [ conv_1_1, conv_1_2, max_pool_1, conv_2_1, conv_2_2, max_pool_2, average_pool, fc_1 ] placeholders = generate_placeholders(BATCH_SIZE, LEVELS, NUM_FEATURES, data.num_classes) model = Model(placeholders=placeholders, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, log_dir=LOG_DIR) train(model, data, preprocess_algorithm, BATCH_SIZE, DROPOUT, AUGMENT_TRAIN_EXAMPLES, MAX_STEPS, PREPROCESS_FIRST, DISPLAY_STEP)
std_max = 0 burn_in = 0 if 'num_burn_in_steps' in optim_params: burn_in = optim_params['num_burn_in_steps'] batch_evaluator = lib.evaluation.BatchEvaluator(test_loader, burn_in=burn_in, thinning=100) # print('burn_in: ', burn_in) state_accum = [] for epoch in range(1, epochs + 1): t0 = time.time() print('current_lr: ', current_lr) model.train() for data, target in train_loader: step += 1 data = data.cuda() target = target.cuda() optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() if precond: precond.step() if block_size > 0 and block_decay > 0 and lr_param: optimizer.step(lr=current_lr) else: optimizer.step()
#!/usr/bin/env python3 import argparse import logging import pathlib from fse import IndexedLineDocument from fse.models import SIF from lib import data, utils, model import gensim.downloader as api log = logging.getLogger("train_model") EXPECTED_LINES = 66836199 if __name__ == "__main__": utils.setup_logging() parser = argparse.ArgumentParser() parser.add_argument("-d", "--data", default=data.DEFAULT_OUTPUT_PREFIX, help="Prefix of input data to read, default=" + data.DEFAULT_OUTPUT_PREFIX) parser.add_argument("-o", "--output", default=model.DEFAULT_MODEL_FILE, help="File name to save model, default=" + model.DEFAULT_MODEL_FILE) args = parser.parse_args() glove = api.load("glove-wiki-gigaword-100") input_path = pathlib.Path(args.data).with_suffix(".txt") sents = IndexedLineDocument(str(input_path)) model = SIF(glove, workers=2) model.train(sents) model.save(args.output)