from utils import checkmate as cm from utils import data_helpers as dh # Parameters # ================================================== TRAIN_OR_RESTORE = input("☛ Train or Restore?(T/R): ") while not (TRAIN_OR_RESTORE.isalpha() and TRAIN_OR_RESTORE.upper() in ['T', 'R']): TRAIN_OR_RESTORE = input("✘ The format of your input is illegal, please re-input: ") logging.info("✔︎ The format of your input is legal, now loading to next step...") TRAIN_OR_RESTORE = TRAIN_OR_RESTORE.upper() if TRAIN_OR_RESTORE == 'T': logger = dh.logger_fn("tflog", "logs/training-{0}.log".format(time.asctime())) if TRAIN_OR_RESTORE == 'R': logger = dh.logger_fn("tflog", "logs/restore-{0}.log".format(time.asctime())) TRAININGSET_DIR = '../data/Train.json' VALIDATIONSET_DIR = '../data/Validation.json' METADATA_DIR = '../data/metadata.tsv' # Data Parameters tf.flags.DEFINE_string("training_data_file", TRAININGSET_DIR, "Data source for the training data.") tf.flags.DEFINE_string("validation_data_file", VALIDATIONSET_DIR, "Data source for the validation data.") tf.flags.DEFINE_string("metadata_file", METADATA_DIR, "Metadata file for embedding visualization" "(Each line is a word segment in metadata_file).") tf.flags.DEFINE_string("train_or_restore", TRAIN_OR_RESTORE, "Train or Restore.") # Model Hyperparameters
__author__ = 'Randolph' import os import sys import time import numpy as np import tensorflow as tf from utils import checkmate as cm from utils import data_helpers as dh from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score # Parameters # ================================================== logger = dh.logger_fn("tflog", "logs/test-{0}.log".format(time.asctime())) MODEL = input("☛ Please input the model file you want to test, it should be like(1490175368): ") while not (MODEL.isdigit() and len(MODEL) == 10): MODEL = input("✘ The format of your input is illegal, it should be like(1490175368), please re-input: ") logger.info("✔︎ The format of your input is legal, now loading to next step...") TRAININGSET_DIR = '../data/Trains.json' VALIDATIONSET_DIR = '../data/Validations.json' TESTSET_DIR = '../data/Test1.json' MODEL_DIR = 'runs/' + MODEL + '/checkpoints/' BEST_MODEL_DIR = 'runs/' + MODEL + '/bestcheckpoints/' SAVE_DIR = 'results/' + MODEL # Data Parameters
# -*- coding:utf-8 -*- import os import sys import time import numpy as np import tensorflow as tf from utils import data_helpers as dh # Parameters # ================================================== logger = dh.logger_fn('tflog', 'logs/test-{0}.log'.format(time.asctime())) MODEL = input( "Please input the model file you want to test, it should be like(1490175368): " ) while not (MODEL.isdigit() and len(MODEL) == 10): MODEL = input( 'The format of your input is illegal, it should be like(1490175368), please re-input: ' ) logger.info('The format of your input is legal, now loading to next step...') TRAININGSET_DIR = '../data/Train.json' VALIDATIONSET_DIR = '../data/Validation.json' TESTSET_DIR = '../data/Test.json' MODEL_DIR = 'runs/' + MODEL + '/checkpoints/' SAVE_DIR = 'results/' + MODEL # Data Parameters
sys.path.append('../') logging.getLogger('tensorflow').disabled = True import numpy as np import tensorflow as tf from tensorboard.plugins import projector from text_han import TextHAN from utils import checkmate as cm from utils import data_helpers as dh from utils import param_parser as parser from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score args = parser.parameter_parser() OPTION = dh._option(pattern=0) logger = dh.logger_fn("tflog", "logs/{0}-{1}.log".format('Train' if OPTION == 'T' else 'Restore', time.asctime())) def train_han(): """Training HAN model.""" # Print parameters used for the model dh.tab_printer(args, logger) # Load sentences, labels, and training parameters logger.info("Loading data...") logger.info("Data processing...") train_data = dh.load_data_and_labels(args.train_file, args.num_classes, args.word2vec_file, data_aug_flag=False) val_data = dh.load_data_and_labels(args.validation_file, args.num_classes, args.word2vec_file, data_aug_flag=False) logger.info("Data padding...") x_train, y_train = dh.pad_data(train_data, args.pad_seq_len)
# Parameters # ================================================== TRAIN_OR_RESTORE = input("☛ Train or Restore?(T/R) \n") while not (TRAIN_OR_RESTORE.isalpha() and TRAIN_OR_RESTORE.upper() in ['T', 'R']): TRAIN_OR_RESTORE = input( '✘ The format of your input is illegal, please re-input: ') logging.info( '✔︎ The format of your input is legal, now loading to next step...') TRAIN_OR_RESTORE = TRAIN_OR_RESTORE.upper() if TRAIN_OR_RESTORE == 'T': logger = dh.logger_fn('tflog', 'logs/training-{0}.log'.format(time.asctime())) if TRAIN_OR_RESTORE == 'R': logger = dh.logger_fn('tflog', 'logs/restore-{0}.log'.format(time.asctime())) TRAININGSET_DIR = '../data/Train.json' VALIDATIONSET_DIR = '../data/Validation.json' METADATA_DIR = '../data/metadata.tsv' # Data Parameters tf.flags.DEFINE_string("training_data_file", TRAININGSET_DIR, "Data source for the training data.") tf.flags.DEFINE_string("validation_data_file", VALIDATIONSET_DIR, "Data source for the validation data.") tf.flags.DEFINE_string( "metadata_file", METADATA_DIR, "Metadata file for embedding visualization"
# -*- coding:utf-8 -*- import os import time import xgboost import catboost # from catboost import CatBoostClassifier from utils import data_helpers as dh data_path = '../data/' result_path = '../result/' logs_path = '../logs/' logger = dh.logger_fn('catlog', (logs_path + 'cat-{}.log').format(time.asctime())) def main(): # Preprocessing Data # ================================================== logger.info('Preprocessing data...') if os.path.exists(data_path + 'train.hdf') and os.path.exists(data_path + 'test.hdf'): pass else: dh.preprocess() logger.info('Done preprocessing...') # Creating Features # ==================================================
from utils import data_helpers as dh from utils import embedding as eb TRAIN_OR_RESTORE = 'T' while not (TRAIN_OR_RESTORE.isalpha() and TRAIN_OR_RESTORE.upper() in ['T', 'R']): TRAIN_OR_RESTORE = input( "The format of your input is illegal, please re-input: ") logging.info("The format of your input is legal, now loading to next step...") TRAIN_OR_RESTORE = TRAIN_OR_RESTORE.upper() if TRAIN_OR_RESTORE == 'T': logger = dh.logger_fn( "tflog", "logs/training-{0}.log".format(time.asctime()).replace(':', '_')) if TRAIN_OR_RESTORE == 'R': logger = dh.logger_fn( "tflog", "logs/restore-{0}.log".format(time.asctime()).replace(':', '_')) tf.flags.DEFINE_string("train_or_restore", TRAIN_OR_RESTORE, "Train or Restore.") tf.flags.DEFINE_float("learning_rate", 0.005, "Learning rate") tf.flags.DEFINE_float( "norm_ratio", 5, "The ratio of the sum of gradients norms of trainable variable (default: 1.25)" ) tf.flags.DEFINE_float("keep_prob", 1, "Keep probability for dropout") tf.flags.DEFINE_integer("evaluation_interval", 1,
import time import torch sys.path.append('../') from layers import HMIDP, Loss from utils import checkmate as cm from utils import data_helpers as dh from utils import param_parser as parser from tqdm import trange from torch.utils.data import TensorDataset, DataLoader from sklearn.metrics import mean_squared_error, r2_score args = parser.parameter_parser() MODEL = dh.get_model_name() logger = dh.logger_fn("ptlog", "logs/Test-{0}.log".format(time.asctime())) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') CPT_DIR = os.path.abspath(os.path.join(os.path.curdir, "runs", MODEL)) SAVE_DIR = os.path.abspath(os.path.join(os.path.curdir, "outputs", MODEL)) def test(): logger.info("Loading Data...") logger.info("Data processing...") test_data = dh.load_data_and_labels(args.test_file, args.word2vec_file) logger.info("Data padding...") test_dataset = dh.MyData(test_data, args.pad_seq_len, device) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)
# -*- coding:utf-8 -*- import os import sys import time import numpy as np import tensorflow as tf from utils import checkmate as cm from utils import data_helpers as dh from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score from sklearn.metrics import hamming_loss, roc_auc_score, accuracy_score, label_ranking_average_precision_score # Parameters # ================================================== logger = dh.logger_fn( "tflog", "logs/test-{0}.log".format(time.asctime()).replace(":", "_")) MODEL = input( "Please input the model file you want to test, it should be like(1490175368): " ) while not (MODEL.isdigit() and len(MODEL) == 10): MODEL = input( "The format of your input is illegal, it should be like(1490175368), please re-input: " ) logger.info("The format of your input is legal, now loading to next step...") TRAININGSET_DIR = '../data/train.json' VALIDATIONSET_DIR = '../data/validation.json' TESTSET_DIR = '../data/test.json' MODEL_DIR = 'runs/' + MODEL + '/checkpoints/'
import gc import time import numpy as np import pandas as pd import xgboost as xgb from tqdm import * from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from utils import data_helpers as dh data_path = '../data/' result_path = '../result/' logs_path = '../logs/' logger = dh.logger_fn('xgblog', (logs_path + 'xgb-{}.log').format(time.asctime())) # Loading Data # ================================================== logger.info('Loading data...') train = pd.read_csv(data_path + 'train.csv') test = pd.read_csv(data_path + 'test.csv') logger.info('Done loading...') # Checking nan value # ================================================== logger.info('Checking nan value...')
import os import sys import time import numpy as np import tensorflow as tf from sklearn import metrics from math import sqrt from sklearn.metrics import r2_score from sklearn.metrics import mean_squared_error from utils import checkmate as cm from utils import data_helpers as dh import csv # Parameters # ================================================== logger = dh.logger_fn( "tflog", "logs/test-{0}.log".format(time.asctime()).replace(':', '_')) MODEL = input( "Please input the model file you want to test, it should be like(1490175368): " ) while not (MODEL.isdigit() and len(MODEL) == 10): MODEL = input( "The format of your input is illegal, it should be like(1490175368), please re-input: " ) logger.info("The format of your input is legal, now loading to next step...") TESTSET_DIR = 'data/assist2009_updated_all.csv' MODEL_DIR = 'runs/' + MODEL + '/checkpoints/' BEST_MODEL_DIR = 'runs/' + MODEL + '/bestcheckpoints/' SAVE_DIR = 'results/' + MODEL
import os import math import random import time import logging import pickle import torch import numpy as np from math import ceil from utils import data_helpers as dh from config import Config from rnn_model import DRModel logging.info("✔︎ DREAM Model Training...") logger = dh.logger_fn("torch-log", "logs/training-{0}.log".format(time.asctime())) dilim = '-' * 120 logger.info(dilim) for attr in sorted(Config().__dict__): logger.info('{:>50}|{:<50}'.format(attr.upper(), Config().__dict__[attr])) logger.info(dilim) def train(): # Load data logger.info("✔︎ Loading data...") logger.info("✔︎ Training data processing...") train_data = dh.load_data(Config().TRAININGSET_DIR)
# Parameters # ================================================== TRAIN_OR_RESTORE = input("Train or Restore?(T/R) \n") while not (TRAIN_OR_RESTORE.isalpha() and TRAIN_OR_RESTORE.upper() in ['T', 'R']): TRAIN_OR_RESTORE = input( 'The format of your input is illegal, please re-input: ') logging.info('The format of your input is legal, now loading to next step...') TRAIN_OR_RESTORE = TRAIN_OR_RESTORE.upper() if TRAIN_OR_RESTORE == 'T': logger = dh.logger_fn( 'tflog', 'logs/training-{0}.log'.format(time.asctime().replace(":", "_"))) if TRAIN_OR_RESTORE == 'R': logger = dh.logger_fn( 'tflog', 'logs/restore-{0}.log'.format(time.asctime().replace(":", "_"))) TRAININGSET_DIR = '../data/Train.json' VALIDATIONSET_DIR = '../data/Validation.json' METADATA_DIR = './data/metadata.tsv' # Data Parameters tf.flags.DEFINE_string("training_data_file", TRAININGSET_DIR, "Data source for the training data.") tf.flags.DEFINE_string("validation_data_file", VALIDATIONSET_DIR, "Data source for the validation data.")
# -*- coding:utf-8 -*- import gc import time import numpy as np import pandas as pd import lightgbm as lgb from utils import data_helpers as dh data_path = '../data/' result_path = '../result/' logs_path = '../logs/' logger = dh.logger_fn('lgbmlog', (logs_path + 'lgbm-{}.log').format(time.asctime())) # Loading Data # ================================================== logger.info('Loading data...') train = pd.read_csv(data_path + 'train_lgbm.csv', dtype={ 'target': np.uint8, 'bd': np.uint8, 'membership_days': np.uint16, 'song_length': np.uint16, 'genre_count': np.uint8, 'artist_count': np.uint8, 'composer_count': np.uint8, 'lyricist_count': np.uint8,