np.random.seed(0) # 4. Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # 5. Create the input data pipeline logging.info("Loading the datasets...") # 5.1 specify features from collections import OrderedDict data_encoder = utils.load_obj( os.path.join(args.model_dir, 'data_encoder.pkl')) label_encoder = utils.load_obj( os.path.join(args.model_dir, 'label_encoder.pkl')) # 5.2 load data data_loader = DataLoader(params, args.data_dir, data_encoder, label_encoder) data = data_loader.load_data(['test']) test_data = data['test'] # 5.3 specify the train and val dataset sizes params.test_size = test_data['size'] test_data_iterator = data_loader.batch_iterator(test_data, params, shuffle=False) logging.info("- done.") # 6. Modeling # 6.1 Define the model model = LSTMCRF( params=params, char_vocab_length=data_encoder[CharEncoder.FEATURE_NAME].vocab_length, num_tags=label_encoder[EntityEncoder.FEATURE_NAME].num_tags,
data_encoder = utils.load_obj( os.path.join(pretrained_model_dir, 'data_encoder.pkl')) pretrained_label_encoder = utils.load_obj( os.path.join(pretrained_model_dir, 'label_encoder.pkl')) label_encoder = OrderedDict() label_encoder[ClassEncoder.FEATURE_NAME] = ClassEncoder( os.path.join(args.data_dir, 'feats')) # 5.2 load data k_fold = None combine_train_dev = False train_on_dev = False data_loader = DataLoader(params, args.data_dir, data_encoder, label_encoder) if k_fold: logging.info('K-Fold turned on with folds: {}'.format(k_fold)) splits_dir = [ os.path.join(args.data_dir, 'split_' + str(split_num)) for split_num in range(1, k_fold + 1) ] else: splits_dir = [args.data_dir] for split_dir in splits_dir: logging.info('training for: {}'.format(split_dir)) args.data_dir = split_dir if k_fold: split_model_dir = os.path.join(args.model_dir, os.path.basename(split_dir))
np.random.seed(0) # 4. Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # 5. Create the input data pipeline logging.info("Loading the datasets...") # 5.1 specify features data_encoder = utils.load_obj( os.path.join(args.model_dir, 'data_encoder.pkl')) label_encoder = utils.load_obj( os.path.join(args.model_dir, 'label_encoder.pkl')) # 5.2 load data data_loader = DataLoader(params, args.data_dir, data_encoder, label_encoder) data = data_loader.load_data([data_to_use]) test_data = data[data_to_use] # 5.3 specify the train and val dataset sizes params.test_size = test_data['size'] test_data_iterator = data_loader.batch_iterator(test_data, params, shuffle=False, sort_by_legth=False) logging.info("- done.") # 6. Modeling # 6.1 Define the model from src.tc.model.net import CNNTC model = CNNTC(