示例#1
0
from utils import checkmate as cm
from utils import data_helpers as dh

# Parameters
# ==================================================

TRAIN_OR_RESTORE = input("☛ Train or Restore?(T/R): ")

while not (TRAIN_OR_RESTORE.isalpha() and TRAIN_OR_RESTORE.upper() in ['T', 'R']):
    TRAIN_OR_RESTORE = input("✘ The format of your input is illegal, please re-input: ")
logging.info("✔︎ The format of your input is legal, now loading to next step...")

TRAIN_OR_RESTORE = TRAIN_OR_RESTORE.upper()

if TRAIN_OR_RESTORE == 'T':
    logger = dh.logger_fn("tflog", "logs/training-{0}.log".format(time.asctime()))
if TRAIN_OR_RESTORE == 'R':
    logger = dh.logger_fn("tflog", "logs/restore-{0}.log".format(time.asctime()))

TRAININGSET_DIR = '../data/Train.json'
VALIDATIONSET_DIR = '../data/Validation.json'
METADATA_DIR = '../data/metadata.tsv'

# Data Parameters
tf.flags.DEFINE_string("training_data_file", TRAININGSET_DIR, "Data source for the training data.")
tf.flags.DEFINE_string("validation_data_file", VALIDATIONSET_DIR, "Data source for the validation data.")
tf.flags.DEFINE_string("metadata_file", METADATA_DIR, "Metadata file for embedding visualization"
                                                      "(Each line is a word segment in metadata_file).")
tf.flags.DEFINE_string("train_or_restore", TRAIN_OR_RESTORE, "Train or Restore.")

# Model Hyperparameters
示例#2
0
__author__ = 'Randolph'

import os
import sys
import time
import numpy as np
import tensorflow as tf

from utils import checkmate as cm
from utils import data_helpers as dh
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

# Parameters
# ==================================================

logger = dh.logger_fn("tflog", "logs/test-{0}.log".format(time.asctime()))

MODEL = input("☛ Please input the model file you want to test, it should be like(1490175368): ")

while not (MODEL.isdigit() and len(MODEL) == 10):
    MODEL = input("✘ The format of your input is illegal, it should be like(1490175368), please re-input: ")
logger.info("✔︎ The format of your input is legal, now loading to next step...")

TRAININGSET_DIR = '../data/Trains.json'
VALIDATIONSET_DIR = '../data/Validations.json'
TESTSET_DIR = '../data/Test1.json'
MODEL_DIR = 'runs/' + MODEL + '/checkpoints/'
BEST_MODEL_DIR = 'runs/' + MODEL + '/bestcheckpoints/'
SAVE_DIR = 'results/' + MODEL

# Data Parameters
示例#3
0
# -*- coding:utf-8 -*-

import os
import sys
import time
import numpy as np
import tensorflow as tf
from utils import data_helpers as dh

# Parameters
# ==================================================

logger = dh.logger_fn('tflog', 'logs/test-{0}.log'.format(time.asctime()))

MODEL = input(
    "Please input the model file you want to test, it should be like(1490175368): "
)

while not (MODEL.isdigit() and len(MODEL) == 10):
    MODEL = input(
        'The format of your input is illegal, it should be like(1490175368), please re-input: '
    )
logger.info('The format of your input is legal, now loading to next step...')

TRAININGSET_DIR = '../data/Train.json'
VALIDATIONSET_DIR = '../data/Validation.json'
TESTSET_DIR = '../data/Test.json'
MODEL_DIR = 'runs/' + MODEL + '/checkpoints/'
SAVE_DIR = 'results/' + MODEL

# Data Parameters
sys.path.append('../')
logging.getLogger('tensorflow').disabled = True

import numpy as np
import tensorflow as tf

from tensorboard.plugins import projector
from text_han import TextHAN
from utils import checkmate as cm
from utils import data_helpers as dh
from utils import param_parser as parser
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score

args = parser.parameter_parser()
OPTION = dh._option(pattern=0)
logger = dh.logger_fn("tflog", "logs/{0}-{1}.log".format('Train' if OPTION == 'T' else 'Restore', time.asctime()))


def train_han():
    """Training HAN model."""
    # Print parameters used for the model
    dh.tab_printer(args, logger)

    # Load sentences, labels, and training parameters
    logger.info("Loading data...")
    logger.info("Data processing...")
    train_data = dh.load_data_and_labels(args.train_file, args.num_classes, args.word2vec_file, data_aug_flag=False)
    val_data = dh.load_data_and_labels(args.validation_file, args.num_classes, args.word2vec_file, data_aug_flag=False)

    logger.info("Data padding...")
    x_train, y_train = dh.pad_data(train_data, args.pad_seq_len)
示例#5
0
# Parameters
# ==================================================

TRAIN_OR_RESTORE = input("☛ Train or Restore?(T/R) \n")

while not (TRAIN_OR_RESTORE.isalpha()
           and TRAIN_OR_RESTORE.upper() in ['T', 'R']):
    TRAIN_OR_RESTORE = input(
        '✘ The format of your input is illegal, please re-input: ')
logging.info(
    '✔︎ The format of your input is legal, now loading to next step...')

TRAIN_OR_RESTORE = TRAIN_OR_RESTORE.upper()

if TRAIN_OR_RESTORE == 'T':
    logger = dh.logger_fn('tflog',
                          'logs/training-{0}.log'.format(time.asctime()))
if TRAIN_OR_RESTORE == 'R':
    logger = dh.logger_fn('tflog',
                          'logs/restore-{0}.log'.format(time.asctime()))

TRAININGSET_DIR = '../data/Train.json'
VALIDATIONSET_DIR = '../data/Validation.json'
METADATA_DIR = '../data/metadata.tsv'

# Data Parameters
tf.flags.DEFINE_string("training_data_file", TRAININGSET_DIR,
                       "Data source for the training data.")
tf.flags.DEFINE_string("validation_data_file", VALIDATIONSET_DIR,
                       "Data source for the validation data.")
tf.flags.DEFINE_string(
    "metadata_file", METADATA_DIR, "Metadata file for embedding visualization"
# -*- coding:utf-8 -*-

import os
import time
import xgboost
import catboost

# from catboost import CatBoostClassifier
from utils import data_helpers as dh

data_path = '../data/'
result_path = '../result/'
logs_path = '../logs/'

logger = dh.logger_fn('catlog', (logs_path + 'cat-{}.log').format(time.asctime()))


def main():
    # Preprocessing Data
    # ==================================================
    logger.info('Preprocessing data...')

    if os.path.exists(data_path + 'train.hdf') and os.path.exists(data_path + 'test.hdf'):
        pass
    else:
        dh.preprocess()

    logger.info('Done preprocessing...')

    # Creating Features
    # ==================================================
示例#7
0
文件: train.py 项目: Shaoxmor/TCN-KT
from utils import data_helpers as dh
from utils import embedding as eb

TRAIN_OR_RESTORE = 'T'

while not (TRAIN_OR_RESTORE.isalpha()
           and TRAIN_OR_RESTORE.upper() in ['T', 'R']):
    TRAIN_OR_RESTORE = input(
        "The format of your input is illegal, please re-input: ")
logging.info("The format of your input is legal, now loading to next step...")

TRAIN_OR_RESTORE = TRAIN_OR_RESTORE.upper()

if TRAIN_OR_RESTORE == 'T':
    logger = dh.logger_fn(
        "tflog",
        "logs/training-{0}.log".format(time.asctime()).replace(':', '_'))
if TRAIN_OR_RESTORE == 'R':
    logger = dh.logger_fn(
        "tflog",
        "logs/restore-{0}.log".format(time.asctime()).replace(':', '_'))

tf.flags.DEFINE_string("train_or_restore", TRAIN_OR_RESTORE,
                       "Train or Restore.")
tf.flags.DEFINE_float("learning_rate", 0.005, "Learning rate")
tf.flags.DEFINE_float(
    "norm_ratio", 5,
    "The ratio of the sum of gradients norms of trainable variable (default: 1.25)"
)
tf.flags.DEFINE_float("keep_prob", 1, "Keep probability for dropout")
tf.flags.DEFINE_integer("evaluation_interval", 1,
示例#8
0
import time
import torch

sys.path.append('../')

from layers import HMIDP, Loss
from utils import checkmate as cm
from utils import data_helpers as dh
from utils import param_parser as parser
from tqdm import trange
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_squared_error, r2_score

args = parser.parameter_parser()
MODEL = dh.get_model_name()
logger = dh.logger_fn("ptlog", "logs/Test-{0}.log".format(time.asctime()))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

CPT_DIR = os.path.abspath(os.path.join(os.path.curdir, "runs", MODEL))
SAVE_DIR = os.path.abspath(os.path.join(os.path.curdir, "outputs", MODEL))


def test():
    logger.info("Loading Data...")
    logger.info("Data processing...")
    test_data = dh.load_data_and_labels(args.test_file, args.word2vec_file)
    logger.info("Data padding...")
    test_dataset = dh.MyData(test_data, args.pad_seq_len, device)
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             shuffle=False)
示例#9
0
# -*- coding:utf-8 -*-
import os
import sys
import time
import numpy as np
import tensorflow as tf

from utils import checkmate as cm
from utils import data_helpers as dh
from sklearn.metrics import precision_score, recall_score, f1_score, average_precision_score
from sklearn.metrics import hamming_loss, roc_auc_score, accuracy_score, label_ranking_average_precision_score

# Parameters
# ==================================================

logger = dh.logger_fn(
    "tflog", "logs/test-{0}.log".format(time.asctime()).replace(":", "_"))

MODEL = input(
    "Please input the model file you want to test, it should be like(1490175368): "
)

while not (MODEL.isdigit() and len(MODEL) == 10):
    MODEL = input(
        "The format of your input is illegal, it should be like(1490175368), please re-input: "
    )
logger.info("The format of your input is legal, now loading to next step...")

TRAININGSET_DIR = '../data/train.json'
VALIDATIONSET_DIR = '../data/validation.json'
TESTSET_DIR = '../data/test.json'
MODEL_DIR = 'runs/' + MODEL + '/checkpoints/'
示例#10
0
import gc
import time
import numpy as np
import pandas as pd
import xgboost as xgb

from tqdm import *
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from utils import data_helpers as dh

data_path = '../data/'
result_path = '../result/'
logs_path = '../logs/'

logger = dh.logger_fn('xgblog',
                      (logs_path + 'xgb-{}.log').format(time.asctime()))

# Loading Data
# ==================================================

logger.info('Loading data...')

train = pd.read_csv(data_path + 'train.csv')
test = pd.read_csv(data_path + 'test.csv')

logger.info('Done loading...')

# Checking nan value
# ==================================================

logger.info('Checking nan value...')
示例#11
0
import os
import sys
import time
import numpy as np
import tensorflow as tf
from sklearn import metrics
from math import sqrt
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from utils import checkmate as cm
from utils import data_helpers as dh
import csv
# Parameters
# ==================================================

logger = dh.logger_fn(
    "tflog", "logs/test-{0}.log".format(time.asctime()).replace(':', '_'))

MODEL = input(
    "Please input the model file you want to test, it should be like(1490175368): "
)

while not (MODEL.isdigit() and len(MODEL) == 10):
    MODEL = input(
        "The format of your input is illegal, it should be like(1490175368), please re-input: "
    )
logger.info("The format of your input is legal, now loading to next step...")

TESTSET_DIR = 'data/assist2009_updated_all.csv'
MODEL_DIR = 'runs/' + MODEL + '/checkpoints/'
BEST_MODEL_DIR = 'runs/' + MODEL + '/bestcheckpoints/'
SAVE_DIR = 'results/' + MODEL
import os
import math
import random
import time
import logging
import pickle
import torch
import numpy as np
from math import ceil
from utils import data_helpers as dh
from config import Config
from rnn_model import DRModel

logging.info("✔︎ DREAM Model Training...")
logger = dh.logger_fn("torch-log",
                      "logs/training-{0}.log".format(time.asctime()))

dilim = '-' * 120
logger.info(dilim)
for attr in sorted(Config().__dict__):
    logger.info('{:>50}|{:<50}'.format(attr.upper(), Config().__dict__[attr]))
logger.info(dilim)


def train():
    # Load data
    logger.info("✔︎ Loading data...")

    logger.info("✔︎ Training data processing...")
    train_data = dh.load_data(Config().TRAININGSET_DIR)
# Parameters
# ==================================================

TRAIN_OR_RESTORE = input("Train or Restore?(T/R) \n")

while not (TRAIN_OR_RESTORE.isalpha()
           and TRAIN_OR_RESTORE.upper() in ['T', 'R']):
    TRAIN_OR_RESTORE = input(
        'The format of your input is illegal, please re-input: ')
logging.info('The format of your input is legal, now loading to next step...')

TRAIN_OR_RESTORE = TRAIN_OR_RESTORE.upper()

if TRAIN_OR_RESTORE == 'T':
    logger = dh.logger_fn(
        'tflog',
        'logs/training-{0}.log'.format(time.asctime().replace(":", "_")))
if TRAIN_OR_RESTORE == 'R':
    logger = dh.logger_fn(
        'tflog',
        'logs/restore-{0}.log'.format(time.asctime().replace(":", "_")))

TRAININGSET_DIR = '../data/Train.json'
VALIDATIONSET_DIR = '../data/Validation.json'
METADATA_DIR = './data/metadata.tsv'

# Data Parameters
tf.flags.DEFINE_string("training_data_file", TRAININGSET_DIR,
                       "Data source for the training data.")
tf.flags.DEFINE_string("validation_data_file", VALIDATIONSET_DIR,
                       "Data source for the validation data.")
# -*- coding:utf-8 -*-

import gc
import time
import numpy as np
import pandas as pd
import lightgbm as lgb
from utils import data_helpers as dh

data_path = '../data/'
result_path = '../result/'
logs_path = '../logs/'

logger = dh.logger_fn('lgbmlog',
                      (logs_path + 'lgbm-{}.log').format(time.asctime()))

# Loading Data
# ==================================================

logger.info('Loading data...')

train = pd.read_csv(data_path + 'train_lgbm.csv',
                    dtype={
                        'target': np.uint8,
                        'bd': np.uint8,
                        'membership_days': np.uint16,
                        'song_length': np.uint16,
                        'genre_count': np.uint8,
                        'artist_count': np.uint8,
                        'composer_count': np.uint8,
                        'lyricist_count': np.uint8,