def train_models(self, *args): # initialize models p_len = self.config['VocabularyModel'].getint('PrefixSize') s_len = self.config['VocabularyModel'].getint('SuffixSize') regex = self.config['Tokenizer'].get('Regex') self.vocab_model = VocabularyModel(p_len, s_len, regex) # load corpus style_corpus = [] for path in self._get_styles(): style_corpus += loader.load_corpus(path) # train style models self.trainer = Trainer(style_corpus, self.vocab_model) self.trainer.on_update(self._update_progress) self.trainer.train_all()
class PoemBot: def __init__(self, config, builder): self.config_path = config self.config = configparser.ConfigParser() self.config.read(self.config_path) self.builder = builder self.forms = loader.get_forms() self.forms_store = builder.get_object('forms_list_store') self.styles = loader.get_dirs('data') self.styles_store = self.builder.get_object('styles_list_store') self.config_store = builder.get_object('config_tree_store') self.poem_view = builder.get_object('poem_view') self.window = self.builder.get_object('main_window') self.window.connect('delete-event', Gtk.main_quit) self.handlers = { 'select_style': self.select_style, 'select_form': self.select_form, 'edit_config': self.edit_config, 'train_models': self.train_models, 'build_poem': self.build_poem } def select_style(self, widget, path): self.styles_store[path][1] = not self.styles_store[path][1] def select_form(self, widget, path): self.forms_store[path][2] = not self.forms_store[path][2] def edit_config(self, widget, path, text): row = self.config_store[path] if row.parent is not None: # can't edit top-level rows row[1] = text # save new config file self.config[row.parent[0]][row[0]] = text with open(self.config_path, 'w') as fh: self.config.write(fh) def train_models(self, *args): # initialize models p_len = self.config['VocabularyModel'].getint('PrefixSize') s_len = self.config['VocabularyModel'].getint('SuffixSize') regex = self.config['Tokenizer'].get('Regex') self.vocab_model = VocabularyModel(p_len, s_len, regex) # load corpus style_corpus = [] for path in self._get_styles(): style_corpus += loader.load_corpus(path) # train style models self.trainer = Trainer(style_corpus, self.vocab_model) self.trainer.on_update(self._update_progress) self.trainer.train_all() def build_poem(self, *args): self.form_model = self._pick_form()() self.pipeline = Pipeline( self.vocab_model.weight, self.form_model.weight ) # start the state as the empty string state = [''] # start with no known transitions transitions = [] for i in range(50): state += self._pick(self.pipeline.pipe(state, transitions)) self.poem_view.get_buffer().set_text(' '.join(state)) def start(self): self._load_styles() self._load_forms() self._load_config() self.builder.connect_signals(self.handlers) self.window.show_all() Gtk.main() def _update_progress(self): pass def _pick(self, options): """Pick a choice from a list of weighted options. Arguments: options: A list of (choice, probability) tuples. Where probability is within [0, 1] and the sum of all probabilities is [0, 1] """ roll = random.random() result = None cumsum = 0 while cumsum < roll and options: result = options.pop() cumsum += result[1] return result[0] def _get_styles(self): """Get the selected style paths.""" paths = [] for row in self.styles_store: if row[1]: paths.append(row[0]) return paths def _get_forms(self): """Get the selected forms.""" paths = [] for row in self.forms_store: if row[2]: paths.append(globals()[row[1]]) return paths def _load_styles(self): for s in self.styles: self.styles_store.append((s, False)) def _load_forms(self): for f in self.forms: self.forms_store.append((f.name, f.__name__, False)) def _load_config(self): for section in self.config.sections(): piter = self.config_store.append(None, (section,'')) for key in self.config[section]: val = self.config[section][key] self.config_store.append(piter, (key, val)) def _pick_form(self): """Pick a random selected form.""" return random.choice(self._get_forms()) def _on_update(self): pass
def train(args): if args.hyperparametercsv is not None: args = readHyperparameterCSV(args) region = "HOLL_2018_MT_pilot" traindataloader = getDataloader( dataset=args.dataset, partition=args.train_on, batch_size=args.batchsize, num_workers=args.workers, shuffle=True, pin_memory=True, train_valid_split_ratio=args.train_valid_split_ratio, train_valid_split_seed=args.train_valid_split_seed, region=region, classmapping=args.classmapping, seed=args.seed, ndvi=args.ndvi, nsamples=args.nsamples) testdataloader = getDataloader( dataset=args.dataset, partition=args.test_on, batch_size=args.batchsize, num_workers=args.workers, shuffle=False, pin_memory=True, train_valid_split_ratio=args.train_valid_split_ratio, train_valid_split_seed=args.train_valid_split_seed, region=region, classmapping=args.classmapping, seed=args.seed, ndvi=args.ndvi, nsamples=args.nsamples) #evaldataloader = getDataloader(dataset=args.dataset, # partition="eval", # batch_size=args.batchsize, # num_workers=args.workers, # shuffle=False, # pin_memory=True, # train_valid_split_ratio=args.train_valid_split_ratio, # train_valid_split_seed=args.train_valid_split_seed, # region=region) args.nclasses = traindataloader.dataset.nclasses args.seqlength = traindataloader.dataset.sequencelength args.input_dims = traindataloader.dataset.ndims model = getModel(args) # np.array([np.array(p.shape).prod() for p in model.parameters()]).sum() if not args.no_visdom: visdomenv = "{}_{}_{}".format(args.experiment, args.dataset, args.loss_mode.replace("_", "-")) else: visdomenv = None print("Visdom Environment: {}".format(visdomenv)) config = dict(epochs=args.epochs, learning_rate=args.learning_rate, earliness_factor=args.earliness_factor, visdomenv=visdomenv, switch_epoch=args.switch_epoch, loss_mode=args.loss_mode, show_n_samples=args.show_n_samples, store=os.path.join(args.store, args.experiment, args.dataset), overwrite=args.overwrite, ptsepsilon=args.epsilon, test_every_n_epochs=args.test_every_n_epochs, entropy_factor=args.entropy_factor, resume_optimizer=args.resume_optimizer, warmup_steps=args.warmup_steps, earliness_reward_power=args.earliness_reward_power) trainer = Trainer(model, traindataloader, testdataloader, **config) trainer.fit() #stats = trainer.test_epoch(evaldataloader) pass
def main(logger, args): df_train, _ = load_data(INPUT_DIR, logger) logger.info('Preprocess text') if args['debug']: df_train = df_train.iloc[:200000] else: df_train = preprocess_text(df_train) seq_train, tokenizer = tokenize_text(df_train, logger) logger.info('Pad train text data') seq_train = pad_sequences(seq_train, maxlen=PADDING_LENGTH) mask_train = np.not_equal(seq_train, 0) label_train = df_train['target'].values.reshape(-1, 1) logger.info('Load multiple embeddings') if args['debug']: embedding_matrix = np.random.rand(len(tokenizer.word_index) + 1, 300) else: embedding_matrices = load_multiple_embeddings( tokenizer.word_index, embed_types=[0, 2], max_workers=args['max_workers']) embedding_matrix = np.array(embedding_matrices).mean(0) # ===== training and evaluation loop ===== # device_ids = args['device_ids'] output_device = device_ids[0] torch.cuda.set_device(device_ids[0]) torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True batch_size = args['batch_size'] * len(device_ids) epochs = EPOCHS logger.info('Start training and evaluation loop') model_specs = [{ 'attention_type': 'general', 'mask': False }, { 'attention_type': 'dot', 'mask': False }, { 'attention_type': 'general', 'mask': True }, { 'attention_type': 'dot', 'mask': True }] model_name_base = 'AttentionMaskRNN' for spec_id, spec in enumerate(model_specs): model_name = model_name_base + f'_specId={spec_id}_type={spec["attention_type"]}_mask={spec["mask"]}' skf = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=SEED) oof_preds_optimized = np.zeros(seq_train.shape[0]) oof_preds_majority = np.zeros(seq_train.shape[0]) results = [] for fold, (index_train, index_valid) in enumerate( skf.split(label_train, label_train)): logger.info( f'Fold {fold + 1} / {KFOLD} - create dataloader and build model' ) if spec['mask']: x_train = { 'sequence': seq_train[index_train].astype(int), 'mask': mask_train[index_train].astype(np.float32) } x_valid = { 'sequence': seq_train[index_valid].astype(int), 'mask': mask_train[index_valid].astype(np.float32) } else: x_train, x_valid = seq_train[index_train].astype( int), seq_train[index_valid].astype(int) y_train, y_valid = label_train[index_train].astype( np.float32), label_train[index_valid].astype(np.float32) model = AttentionMaskRNN(embedding_matrix, hidden_size=64, out_hidden_dim=64, embed_drop=0.2, out_drop=0.3, attention_type=spec['attention_type'], mask=spec['mask']) config = { 'epochs': epochs, 'batch_size': batch_size, 'output_device': output_device, 'criterion_type': 'bce', 'criteria_weights': [0.5, 0.5], 'criterion_gamma': 2.0, 'criterion_alpha': 0.25, 'optimizer': 'adam', 'optimizer_lr': 0.003, 'num_snapshots': NUM_SNAPSHOTS, 'scheduler_type': 'cyclic', 'base_lr': 0.00001, 'max_lr': 0.003, 'step_size': 1200, 'scheduler_mode': 'triangular', 'scheduler_gamma': 0.9, 'scheduler_trigger_steps': 4000, 'sampler_type': 'normal', 'seed': SEED } trainer = Trainer(model, logger, config) eval_results = trainer.train_and_eval_fold(x_train, y_train, x_valid, y_valid, fold) oof_preds_majority[index_valid] = np.array( [res['preds_binary'] for res in eval_results]).mean(0) > 0.5 oof_majority_f1 = f1_score( label_train.reshape(-1, )[index_valid], oof_preds_majority[index_valid]) oof_preds_proba = np.array( [res['preds_proba'] for res in eval_results]).mean(0) oof_threshold_mean: float = np.mean( [res['best_threshold'] for res in eval_results]) oof_preds_optimized[ index_valid] = oof_preds_proba > oof_threshold_mean oof_optimized_f1 = f1_score( label_train.reshape(-1, )[index_valid], oof_preds_optimized[index_valid]) message = f'Fold {fold + 1} / {KFOLD} has been done.\n' message += f'Score: majority voting - {oof_majority_f1:.6f}, optimized threshold - {oof_optimized_f1:.6f}' logger.post(message) post_to_snapshot_spreadsheet( logger, SPREADSHEET_SNAPSHOT_URL, eval_type='SNAPSHOT', tag='SCORE', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, snapshot_info=[res['f1'] for res in eval_results]) post_to_snapshot_spreadsheet( logger, SPREADSHEET_SNAPSHOT_URL, eval_type='SNAPSHOT', tag='THRESHOLD', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, snapshot_info=[res['best_threshold'] for res in eval_results]) post_to_main_spreadsheet(logger, SPREADSHEET_MAIN_URL, eval_type='SNAPSHOT', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, f1_majority=oof_majority_f1, f1_optimized=oof_optimized_f1, threshold=oof_threshold_mean) results.append({ 'f1_majority': oof_majority_f1, 'f1_optimized': oof_optimized_f1, 'threshold': oof_threshold_mean }) f1_majority_mean = np.mean([res['f1_majority'] for res in results]) f1_majority_std = np.std([res['f1_majority'] for res in results]) f1_optimized_mean = np.mean([res['f1_optimized'] for res in results]) f1_optimized_std = np.std([res['f1_optimized'] for res in results]) threshold_mean = np.mean([res['threshold'] for res in results]) total_metrics = [ f1_majority_mean, f1_majority_std, f1_optimized_mean, f1_optimized_std, threshold_mean ] post_to_main_spreadsheet(logger, SPREADSHEET_MAIN_URL, eval_type='SNAPSHOT', script_name=SCRIPT_NAME, model_name=model_name, fold=-1, f1_majority=-1, f1_optimized=-1, threshold=-1, others=total_metrics) message = 'KFold training and evaluation has been done.\n' message += f'F1 majority voting - Avg: {f1_majority_mean}, Std: {f1_majority_std}\n' message += f'F1 optimized - Avg: {f1_optimized_mean}, Std: {f1_optimized_std}\n' message += f'Threshold - Avg: {threshold_mean}' logger.post(message)
from utils.trainer import Trainer from utils.modeling import CBOW from utils.data_loader import DataLoader window_size = 5 num_neg_samples = 100 hidden_dim = 100 batch_size = 1000 epochs = 20 corpus_path = "./data/corpus.txt" sp_path = "./tokenizer/aozora_8k_model.model" x_dist = np.load("./out/x_dist.npy") optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3) # optimizer = tf.keras.optimizers.Adadelta(learning_rate=1e-2) # optimizer = tf.keras.optimizers.SGD(learning_rate=1e-5) loader = DataLoader(window_size, num_neg_samples, corpus_path=corpus_path, sp_path=sp_path) vocab_size = loader.vocab_size model = CBOW(hidden_dim, vocab_size, window_size) model.compile(optimizer='adam', loss='categorical_crossentropy') trainer = Trainer(model, loader, x_dist, optimizer) trainer.train(batch_size, epochs=epochs // 2) trainer.optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4) trainer.train(batch_size, epochs=epochs // 2) trainer.save_model() print("Model has been saved.")
get_logger(CONFIG.log_dir) writer = get_writer(CONFIG.write_dir) train_transform, val_transform, test_transform = get_transforms(CONFIG) train_dataset, val_dataset, test_dataset = get_dataset( train_transform, val_transform, test_transform, CONFIG) train_loader, val_loader, test_loader = get_dataloader( train_dataset, val_dataset, test_dataset, CONFIG) lookup_table = LookUpTable(CONFIG) criterion = cross_encropy_with_label_smoothing layers_config = lookup_table.decode_arch_param(arch_param) model = Model(layers_config, CONFIG.dataset, CONFIG.classes) model = model.to(device) if (device.type == "cuda" and CONFIG.ngpu >= 1): model = nn.DataParallel(model, list(range(CONFIG.ngpu))) cal_model_efficient(model, CONFIG) optimizer = get_optimizer(model, CONFIG.optim_state) scheduler = get_lr_scheduler(optimizer, len(train_loader), CONFIG) start_time = time.time() trainer = Trainer(criterion, optimizer, scheduler, writer, device, CONFIG) trainer.train_loop(train_loader, test_loader, model) logging.info("Total training time : {:.2f}".format(time.time() - start_time))
def train_cycle_gan(**kwargs): opt._parse(kwargs) torch.manual_seed(opt.seed) # Write standard output into file sys.stdout = Logger(os.path.join(opt.save_dir, 'log_train.txt')) print('========user config========') pprint(opt._state_dict()) print('===========end=============') if opt.use_gpu: print('currently using GPU') torch.cuda.manual_seed_all(opt.seed) else: print('currently using cpu') pin_memory = True if opt.use_gpu else False print('initializing dataset {}'.format(opt.dataset_mode)) dataset = UnalignedDataset(opt) trainloader = DataLoader(dataset, opt.batchSize, True, num_workers=opt.workers, pin_memory=pin_memory) summaryWriter = SummaryWriter(os.path.join(opt.save_dir, 'tensorboard_log')) print('initializing model ... ') use_dropout = not opt.no_dropout netG_A = define_G(opt.input_nc, opt.output_nc, opt.ndf, opt.which_model_netG, opt.norm, use_dropout) netG_B = define_G(opt.output_nc, opt.input_nc, opt.ndf, opt.which_model_netG, opt.norm, use_dropout) use_sigmoid = opt.no_lsgan netD_A = define_D(opt.output_nc, opt.ndf, opt.which_model_netD, opt.n_layers_D, opt.norm, use_sigmoid) netD_B = define_D(opt.input_nc, opt.ndf, opt.which_model_netD, opt.n_layers_D, opt.norm, use_sigmoid) # print(netD_A) optimizer_G = torch.optim.Adam(itertools.chain(netG_A.parameters(), netG_B.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) optimizer_D = torch.optim.Adam(itertools.chain(netD_A.parameters(), netD_B.parameters()), lr=opt.lr, betas=(opt.beta1, 0.999)) def get_scheduler(optimizer, opt): if opt.lr_policy == 'lambda': def lambda_rule(epoch): lr_l = 1.0 - max(0, epoch + 1 + opt.start_epoch - opt.niter) / float(opt.lr_decay_iters + 1) return lr_l scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) elif opt.lr_policy == 'step': scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) elif opt.lr_policy == 'plateau': scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) else: return NotImplementedError( 'learning rate policy [{}] is not implemented'.format( opt.lr_policy)) return scheduler scheduler_G = get_scheduler(optimizer_G, opt) scheduler_D = get_scheduler(optimizer_D, opt) start_epoch = opt.start_epoch if opt.use_gpu: netG_A = torch.nn.DataParallel(netG_A).cuda() netG_B = torch.nn.DataParallel(netG_B).cuda() netD_A = torch.nn.DataParallel(netD_A).cuda() netD_B = torch.nn.DataParallel(netD_B).cuda() # get trainer cycleganTrainer = Trainer(opt, netG_A, netG_B, netD_A, netD_B, optimizer_G, optimizer_D, summaryWriter) # start training for epoch in range(start_epoch, opt.max_epoch): scheduler_G.step() scheduler_D.step() # train over whole dataset cycleganTrainer.train(epoch, trainloader) if (epoch + 1) % opt.save_freq == 0 or (epoch + 1) == opt.max_epoch: if opt.use_gpu: state_dict_netG_A = netG_A.module.state_dict() state_dict_netG_B = netG_B.module.state_dict() state_dict_netD_A = netD_A.module.state_dict() state_dict_netD_B = netD_B.module.state_dict() else: state_dict_netG_A = netG_A.state_dict() state_dict_netG_B = netG_B.state_dict() state_dict_netD_A = netD_A.state_dict() state_dict_netD_B = netD_B.state_dict() save_checkpoint( { 'netG_A': state_dict_netG_A, 'netG_B': state_dict_netG_B, 'netD_A': state_dict_netD_A, 'netD_B': state_dict_netD_B, 'epoch': epoch + 1, }, False, save_dir=opt.save_dir, filename='checkpoint_ep' + str(epoch + 1))
def main(): args = parse_train_arg() task = task_dict[args.task] init_distributed_mode(args) logger = init_logger(args) if hasattr(args, 'base_model_name'): logger.warning('Argument base_model_name is deprecated! Use `--table-bert-extra-config` instead!') init_signal_handler() train_data_dir = args.data_dir / 'train' dev_data_dir = args.data_dir / 'dev' table_bert_config = task['config'].from_file( args.data_dir / 'config.json', **args.table_bert_extra_config) if args.is_master: args.output_dir.mkdir(exist_ok=True, parents=True) with (args.output_dir / 'train_config.json').open('w') as f: json.dump(vars(args), f, indent=2, sort_keys=True, default=str) logger.info(f'Table Bert Config: {table_bert_config.to_log_string()}') # copy the table bert config file to the working directory # shutil.copy(args.data_dir / 'config.json', args.output_dir / 'tb_config.json') # save table BERT config table_bert_config.save(args.output_dir / 'tb_config.json') assert args.data_dir.is_dir(), \ "--data_dir should point to the folder of files made by pregenerate_training_data.py!" if args.cpu: device = torch.device('cpu') else: device = torch.device(f'cuda:{torch.cuda.current_device()}') logger.info("device: {} gpu_id: {}, distributed training: {}, 16-bits training: {}".format( device, args.local_rank, bool(args.multi_gpu), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format( args.gradient_accumulation_steps)) real_batch_size = args.train_batch_size # // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if not args.cpu: torch.cuda.manual_seed_all(args.seed) if args.output_dir.is_dir() and list(args.output_dir.iterdir()): logger.warning(f"Output directory ({args.output_dir}) already exists and is not empty!") args.output_dir.mkdir(parents=True, exist_ok=True) # Prepare model if args.multi_gpu and args.global_rank != 0: torch.distributed.barrier() if args.no_init: raise NotImplementedError else: model = task['model'](table_bert_config) if args.multi_gpu and args.global_rank == 0: torch.distributed.barrier() if args.fp16: model = model.half() model = model.to(device) if args.multi_gpu: if args.ddp_backend == 'pytorch': model = nn.parallel.DistributedDataParallel( model, find_unused_parameters=True, device_ids=[args.local_rank], output_device=args.local_rank, broadcast_buffers=False ) else: import apex model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) model_ptr = model.module else: model_ptr = model # set up update parameters for LR scheduler dataset_cls = task['dataset'] train_set_info = dataset_cls.get_dataset_info(train_data_dir, args.max_epoch) total_num_updates = train_set_info['total_size'] // args.train_batch_size // args.world_size // args.gradient_accumulation_steps args.max_epoch = train_set_info['max_epoch'] logger.info(f'Train data size: {train_set_info["total_size"]} for {args.max_epoch} epochs, total num. updates: {total_num_updates}') args.total_num_update = total_num_updates args.warmup_updates = int(total_num_updates * 0.1) trainer = Trainer(model, args) checkpoint_file = args.output_dir / 'model.ckpt.bin' is_resumed = False # trainer.save_checkpoint(checkpoint_file) if checkpoint_file.exists(): logger.info(f'Logging checkpoint file {checkpoint_file}') is_resumed = True trainer.load_checkpoint(checkpoint_file) model.train() # we also partitation the dev set for every local process logger.info('Loading dev set...') sys.stdout.flush() dev_set = dataset_cls(epoch=0, training_path=dev_data_dir, tokenizer=model_ptr.tokenizer, config=table_bert_config, multi_gpu=args.multi_gpu, debug=args.debug_dataset) logger.info("***** Running training *****") logger.info(f" Current config: {args}") if trainer.num_updates > 0: logger.info(f'Resume training at epoch {trainer.epoch}, ' f'epoch step {trainer.in_epoch_step}, ' f'global step {trainer.num_updates}') start_epoch = trainer.epoch for epoch in range(start_epoch, args.max_epoch): # inclusive model.train() with torch.random.fork_rng(devices=None if args.cpu else [device.index]): torch.random.manual_seed(131 + epoch) epoch_dataset = dataset_cls(epoch=trainer.epoch, training_path=train_data_dir, config=table_bert_config, tokenizer=model_ptr.tokenizer, multi_gpu=args.multi_gpu, debug=args.debug_dataset) train_sampler = RandomSampler(epoch_dataset) train_dataloader = DataLoader(epoch_dataset, sampler=train_sampler, batch_size=real_batch_size, num_workers=0, collate_fn=epoch_dataset.collate) samples_iter = GroupedIterator(iter(train_dataloader), args.gradient_accumulation_steps) trainer.resume_batch_loader(samples_iter) with tqdm(total=len(samples_iter), initial=trainer.in_epoch_step, desc=f"Epoch {epoch}", file=sys.stdout, disable=not args.is_master, miniters=100) as pbar: for samples in samples_iter: logging_output = trainer.train_step(samples) pbar.update(1) pbar.set_postfix_str(', '.join(f"{k}: {v:.4f}" for k, v in logging_output.items())) if ( 0 < trainer.num_updates and trainer.num_updates % args.save_checkpoint_every_niter == 0 and args.is_master ): # Save model checkpoint logger.info("** ** * Saving checkpoint file ** ** * ") trainer.save_checkpoint(checkpoint_file) logger.info(f'Epoch {epoch} finished.') if args.is_master: # Save a trained table_bert logger.info("** ** * Saving fine-tuned table_bert ** ** * ") model_to_save = model_ptr # Only save the table_bert it-self output_model_file = args.output_dir / f"pytorch_model_epoch{epoch:02d}.bin" torch.save(model_to_save.state_dict(), str(output_model_file)) # perform validation logger.info("** ** * Perform validation ** ** * ") dev_results = trainer.validate(dev_set) if args.is_master: logger.info('** ** * Validation Results ** ** * ') logger.info(f'Epoch {epoch} Validation Results: {dev_results}') # flush logging information to disk sys.stderr.flush() trainer.next_epoch()
def main(params, greedy, beam_size, test): """ The main function for decoding a trained MT model Arguments: params: parameters related to the `model` that is being decoded greedy: whether or not to do greedy decoding beam_size: size of beam if doing beam search """ print("Loading dataset...") _, dev_iter, test_iterator, DE, EN = load_dataset(params.data_path, params.train_batch_size, params.dev_batch_size) de_size, en_size = len(DE.vocab), len(EN.vocab) print("[DE Vocab Size: ]: {}, [EN Vocab Size]: {}".format( de_size, en_size)) params.src_vocab_size = de_size params.tgt_vocab_size = en_size params.sos_index = EN.vocab.stoi["<s>"] params.pad_token = EN.vocab.stoi["<pad>"] params.eos_index = EN.vocab.stoi["</s>"] params.itos = EN.vocab.itos device = torch.device('cuda' if params.cuda else 'cpu') params.device = device # make the Seq2Seq model model = make_seq2seq_model(params) # load the saved model for evaluation if params.average > 1: print("Averaging the last {} checkpoints".format(params.average)) checkpoint = {} checkpoint["state_dict"] = average_checkpoints(params.model_dir, params.average) model = Trainer.load_checkpoint(model, checkpoint) else: model_path = os.path.join(params.model_dir + "checkpoints/", params.model_file) print("Restoring parameters from {}".format(model_path)) model = Trainer.load_checkpoint(model, model_path) # evaluate on the test set if test: print("Doing Beam Search on the Test Set") test_decoder = Translator(model, test_iterator, params, device) test_beam_search_outputs = test_decoder.beam_decode( beam_width=beam_size) test_decoder.output_decoded_translations( test_beam_search_outputs, "beam_search_outputs_size_test={}.en".format(beam_size)) return # instantiate a Translator object to translate SRC langauge to TRG language using Greedy/Beam Decoding decoder = Translator(model, dev_iter, params, device) if greedy: print("Doing Greedy Decoding...") greedy_outputs = decoder.greedy_decode(max_len=100) decoder.output_decoded_translations(greedy_outputs, "greedy_outputs.en") print("Evaluating BLEU Score on Greedy Tranlsation...") subprocess.call([ './utils/eval.sh', params.model_dir + "outputs/greedy_outputs.en" ]) if beam_size: print("Doing Beam Search...") beam_search_outputs = decoder.beam_decode(beam_width=beam_size) decoder.output_decoded_translations( beam_search_outputs, "beam_search_outputs_size={}.en".format(beam_size)) print("Evaluating BLEU Score on Beam Search Translation") subprocess.call([ './utils/eval.sh', params.model_dir + "outputs/beam_search_outputs_size={}.en".format(beam_size) ])
soft_gumbel_softmax=False, hard_gumbel_softmax=False, batch_discriminator=False, batch=batch_dim) model() # optimizer optimizer = GraphGANOptimizer(model, learning_rate=1e-3, feature_matching=False) # session session = tf.Session() session.run(tf.global_variables_initializer()) # trainer trainer = Trainer(model, optimizer, session) print('Parameters: {}'.format(np.sum([np.prod(e.shape) for e in session.run(tf.trainable_variables())]))) trainer.train(batch_dim=batch_dim, # 128 epochs=epochs, steps=steps, train_fetch_dict=train_fetch_dict, train_feed_dict=train_feed_dict, eval_fetch_dict=eval_fetch_dict, eval_feed_dict=eval_feed_dict, test_fetch_dict=test_fetch_dict, test_feed_dict=test_feed_dict, save_every=save_every, directory='', # here users need to first create and then specify a folder where to save the model _eval_update=_eval_update,
elif name == "nezha": from model.nezha.modeling_nezha import BertConfig, BertForSequenceClassification vocab = read_pkl(F.vocab_file) F.vocab_size = len(vocab) conf = BertConfig.from_dict(F.__dict__) model = BertForSequenceClassification(conf, F.num_label) else: print("model not found!") sys.exit(-1) return model if __name__ == "__main__": T = Trainer() parser = T.get_parser() parser.add_argument('--n_fold', default=5, type=int) parser.add_argument('--pred_W', default=1, type=int) F = parser.parse_args() if F.config_file is not None: load_config(F.config_file, F, ignore_keys=['random_seed', "debug", "n_fold", "pred_W"]) # show_config(F) # sys.exit(0) setup_seed(F.random_seed)
def train(model_name, root_dir, dataset_mode, max_iter): # output folder to save models output_dir = os.path.join('train_results', model_name + '_' + dataset_mode) os.makedirs(output_dir, exist_ok=True) # get folders depending on dataset_mode folders_train = [] folders_test = [] for curr_dir in os.listdir(root_dir): with open(os.path.join(root_dir, curr_dir, 'meta.json')) as f: meta = json.load(f) if meta["set"] == "train_good_weather": folders_train.append(curr_dir) elif meta[ "set"] == "train_good_and_bad_weather" and dataset_mode == "good_and_bad_weather": folders_train.append(curr_dir) elif meta["set"] == "test": folders_test.append(curr_dir) def gen_boundingbox(bbox, angle): theta = np.deg2rad(-angle) R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]]) points = np.array([[bbox[0], bbox[1]], [bbox[0] + bbox[2], bbox[1]], [bbox[0] + bbox[2], bbox[1] + bbox[3]], [bbox[0], bbox[1] + bbox[3]]]).T cx = bbox[0] + bbox[2] / 2 cy = bbox[1] + bbox[3] / 2 T = np.array([[cx], [cy]]) points = points - T points = np.matmul(R, points) + T points = points.astype(int) min_x = np.min(points[0, :]) min_y = np.min(points[1, :]) max_x = np.max(points[0, :]) max_y = np.max(points[1, :]) return min_x, min_y, max_x, max_y def get_radar_dicts(folders): dataset_dicts = [] idd = 0 folder_size = len(folders) for folder in folders: radar_folder = os.path.join(root_dir, folder, 'Navtech_Cartesian') annotation_path = os.path.join(root_dir, folder, 'annotations', 'annotations.json') with open(annotation_path, 'r') as f_annotation: annotation = json.load(f_annotation) radar_files = os.listdir(radar_folder) radar_files.sort() for frame_number in range(len(radar_files)): record = {} objs = [] bb_created = False idd += 1 filename = os.path.join(radar_folder, radar_files[frame_number]) if (not os.path.isfile(filename)): print(filename) continue record["file_name"] = filename record["image_id"] = idd record["height"] = 1152 record["width"] = 1152 for object in annotation: if (object['bboxes'][frame_number]): class_obj = object['class_name'] if (class_obj != 'pedestrian' and class_obj != 'group_of_pedestrians'): bbox = object['bboxes'][frame_number]['position'] angle = object['bboxes'][frame_number]['rotation'] bb_created = True if cfg.MODEL.PROPOSAL_GENERATOR.NAME == "RRPN": cx = bbox[0] + bbox[2] / 2 cy = bbox[1] + bbox[3] / 2 wid = bbox[2] hei = bbox[3] obj = { "bbox": [cx, cy, wid, hei, angle], "bbox_mode": BoxMode.XYWHA_ABS, "category_id": 0, "iscrowd": 0 } else: xmin, ymin, xmax, ymax = gen_boundingbox( bbox, angle) obj = { "bbox": [xmin, ymin, xmax, ymax], "bbox_mode": BoxMode.XYXY_ABS, "category_id": 0, "iscrowd": 0 } objs.append(obj) if bb_created: record["annotations"] = objs dataset_dicts.append(record) return dataset_dicts dataset_train_name = dataset_mode + '_train' dataset_test_name = dataset_mode + '_test' DatasetCatalog.register(dataset_train_name, lambda: get_radar_dicts(folders_train)) MetadataCatalog.get(dataset_train_name).set(thing_classes=["vehicle"]) DatasetCatalog.register(dataset_test_name, lambda: get_radar_dicts(folders_test)) MetadataCatalog.get(dataset_test_name).set(thing_classes=["vehicle"]) cfg_file = os.path.join('test', 'config', model_name + '.yaml') cfg = get_cfg() cfg.OUTPUT_DIR = output_dir cfg.merge_from_file(cfg_file) cfg.DATASETS.TRAIN = (dataset_train_name, ) cfg.DATASETS.TEST = (dataset_test_name, ) cfg.DATALOADER.NUM_WORKERS = 2 cfg.SOLVER.IMS_PER_BATCH = 2 cfg.SOLVER.STEPS: (25000, 35000) cfg.SOLVER.MAX_ITER = max_iter cfg.SOLVER.BASE_LR = 0.00025 cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.2 cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[8, 16, 32, 64, 128]] os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) if cfg.MODEL.PROPOSAL_GENERATOR.NAME == "RRPN": trainer = RotatedTrainer(cfg) else: trainer = Trainer(cfg) trainer.resume_or_load(resume=resume) trainer.train()
class RayTrainer(ray.tune.Trainable): def _setup(self, config): # one iteration is five training epochs, one test epoch self.epochs = EPOCHS // TUNE_EPOCH_CHUNKS print(config) args = Namespace(**config) self.traindataloader, self.validdataloader = prepare_dataset(args) nclasses = self.traindataloader.dataset.nclasses seqlength = self.traindataloader.dataset.sequencelength input_dims = self.traindataloader.dataset.ndims self.model, self.optimizer = prepare_model_and_optimizer( args, input_dims, seqlength, nclasses) self.criterion = prepare_loss_criterion(args) if torch.cuda.is_available(): self.model = self.model.cuda() if "model" in config.keys(): config.pop('model', None) #trainer = Trainer(self.model, self.traindataloader, self.validdataloader, **databases) self.trainer = Trainer(self.model, self.traindataloader, self.validdataloader, self.optimizer, self.criterion, store=args.local_dir, test_every_n_epochs=999, visdomlogger=None) def _train(self): # epoch is used to distinguish training phases. epoch=None will default to (first) cross entropy phase # train five epochs and then infer once. to avoid overhead on these small datasets for i in range(self.epochs): trainstats = self.trainer.train_epoch(epoch=None) stats = self.trainer.test_epoch(self.validdataloader) stats["score"] = .5 * stats["accuracy"] + .5 * (1 - stats["earliness"]) stats.pop("inputs") stats.pop("confusion_matrix") stats.pop("probas") #stats["lossdelta"] = trainstats["loss"] - stats["loss"] #stats["trainloss"] = trainstats["loss"] return stats def _save(self, path): path = path + ".pth" torch.save(self.model.state_dict(), path) return path def _restore(self, path): state_dict = torch.load(path, map_location="cpu") self.model.load_state_dict(state_dict)
def eval(dataset, batchsize, workers, num_rnn_layers, dropout, hidden_dims, store="/tmp", epochs=30, switch_epoch=30, learning_rate=1e-3, visdomenv="run", earliness_factor=.75, show_n_samples=1, modelname="DualOutputRNN", loss_mode=None, load_weights=None, entropy_factor=0): if dataset == "synthetic": traindataset = SyntheticDataset(num_samples=2000, T=100) validdataset = SyntheticDataset(num_samples=1000, T=100) else: traindataset = UCRDataset(dataset, partition="trainvalid") validdataset = UCRDataset(dataset, partition="test") nclasses = traindataset.nclasses np.random.seed(0) torch.random.manual_seed(0) traindataloader = torch.utils.data.DataLoader(traindataset, batch_size=batchsize, shuffle=True, num_workers=workers, pin_memory=True) np.random.seed(1) torch.random.manual_seed(1) validdataloader = torch.utils.data.DataLoader(validdataset, batch_size=batchsize, shuffle=False, num_workers=workers, pin_memory=True) if modelname == "DualOutputRNN": model = DualOutputRNN(input_dim=1, nclasses=nclasses, hidden_dim=hidden_dims, num_rnn_layers=num_rnn_layers, dropout=dropout) else: raise ValueError( "Invalid Model, Please insert either 'DualOutputRNN' or 'AttentionRNN'" ) if load_weights is not None: model.load(load_weights) # parse epoch 29 from filename e.g., 'models/TwoPatterns/run/model_29.pth' start_epoch = int( os.path.basename(load_weights).split("_")[-1].split(".")[0]) else: start_epoch = 0 if torch.cuda.is_available(): model = model.cuda() #if run is None: # visdomenv = "{}_{}_{}".format(args.experiment, dataset,args.loss_mode.replace("_","-")) # storepath = store #else: # visdomenv = run #storepath = os.path.join(store, dataset) if switch_epoch is None: switch_epoch = int(epochs / 2) config = dict(epochs=epochs, learning_rate=learning_rate, earliness_factor=earliness_factor, visdomenv=visdomenv, switch_epoch=switch_epoch, loss_mode=loss_mode, show_n_samples=show_n_samples, store=store, entropy_factor=entropy_factor) trainer = Trainer(model, traindataloader, validdataloader, config=config) logged_data = trainer.fit(start_epoch=start_epoch) return logged_data
def main(): """ main """ config = get_config() if torch.cuda.is_available() and config.gpu >= 0: device = torch.device(config.gpu) else: device = torch.device('cpu') # Data definition tokenizer = lambda x: x.split() src_field = Field( sequential=True, tokenize=tokenizer, lower=True, batch_first=True, include_lengths=True ) tgt_field = Field( sequential=True, tokenize=tokenizer, lower=True, batch_first=True, init_token=BOS_TOKEN, eos_token=EOS_TOKEN ) fields = { 'src': ('src', src_field), 'tgt': ('tgt', tgt_field), } train_data = TabularDataset( path=config.train_path, format='json', fields=fields ) valid_data = TabularDataset( path=config.valid_path, format='json', fields=fields ) if not os.path.exists(config.vocab_dir): if not config.share_vocab: src_field.build_vocab( train_data.src, max_size=config.max_vocab_size, min_freq=config.min_freq ) tgt_field.build_vocab( train_data.tgt, max_size=config.max_vocab_size, min_freq=config.min_freq ) else: src_field.build_vocab( train_data.src, train_data.tgt, max_size=config.max_vocab_size, min_freq=config.min_freq ) tgt_field.vocab = src_field.vocab os.makedirs(config.vocab_dir) with open(os.path.join(config.vocab_dir, 'src.vocab.pkl'), 'wb') as src_vocab: pickle.dump(src_field.vocab, src_vocab) with open(os.path.join(config.vocab_dir, 'tgt.vocab.pkl'), 'wb') as tgt_vocab: pickle.dump(tgt_field.vocab, tgt_vocab) else: with open(os.path.join(config.vocab_dir, 'src.vocab.pkl'), 'rb') as src_vocab: src_field.vocab = pickle.load(src_vocab) with open(os.path.join(config.vocab_dir, 'tgt.vocab.pkl'), 'rb') as tgt_vocab: tgt_field.vocab = pickle.load(tgt_vocab) train_iter = BucketIterator( train_data, batch_size=config.batch_size, device=device, shuffle=True ) valid_iter = BucketIterator( valid_data, batch_size=config.batch_size, device=device, shuffle=False ) # Model definition if not config.share_vocab: src_embedding = nn.Embedding(len(src_field.vocab), config.embedding_size) tgt_embedding = nn.Embedding(len(tgt_field.vocab), config.embedding_size) else: src_embedding = tgt_embedding = nn.Embedding(len(tgt_field.vocab), config.embedding_size) assert config.model in ['rnn', 'transformer'] if config.model == 'rnn': model = Seq2Seq( src_embedding=src_embedding, tgt_embedding=tgt_embedding, embedding_size=config.embedding_size, hidden_size=config.hidden_size, vocab_size=len(tgt_field.vocab), start_index=tgt_field.vocab.stoi[BOS_TOKEN], end_index=tgt_field.vocab.stoi[EOS_TOKEN], padding_index=tgt_field.vocab.stoi[PAD_TOKEN], bidirectional=config.bidirectional, num_layers=config.num_layers, dropout=config.dropout ) elif config.model == 'transformer': model = Transformer( src_embedding=src_embedding, tgt_embedding=tgt_embedding, embedding_size=config.embedding_size, hidden_size=config.hidden_size, vocab_size=len(tgt_field.vocab), start_index=tgt_field.vocab.stoi[BOS_TOKEN], end_index=tgt_field.vocab.stoi[EOS_TOKEN], padding_index=tgt_field.vocab.stoi[PAD_TOKEN], num_heads=config.num_heads, num_layers=config.num_layers, dropout=config.dropout, learning_position_embedding=config.learning_position_embedding, embedding_scale=config.embedding_scale, num_positions=config.num_positions ) model.to(device) # Optimizer definition assert config.optimizer in ['sgd', 'adam'] if config.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=config.lr) else: optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) # Learning rate scheduler if config.lr_decay is not None and 0 < config.lr_decay < 1.0: lr_scheduler = \ torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=optimizer, factor=config.lr_decay, patience=1, verbose=True, min_lr=1e-5) else: lr_scheduler = None # Save directory if not os.path.exists(config.save_dir): os.mkdir(config.save_dir) # Logger definition logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG, format="%(message)s") fh = logging.FileHandler(os.path.join(config.save_dir, "train.log")) logger.addHandler(fh) # Save config params_file = os.path.join(config.save_dir, "params.json") with open(params_file, 'w') as fp: json.dump(config.__dict__, fp, indent=4, sort_keys=True) print("Saved params to '{}'".format(params_file)) logger.info(model) # Train logger.info("Training starts ...") trainer = Trainer( model=model, optimizer=optimizer, train_iter=train_iter, valid_iter=valid_iter, logger=logger, valid_metric_name=config.valid_metric, num_epochs=config.num_epochs, save_dir=config.save_dir, log_steps=config.log_steps, valid_steps=config.valid_steps, grad_clip=config.grad_clip, lr_scheduler=lr_scheduler, save_summary=False) if config.ckpt is not None: trainer.load(file_prefix=config.ckpt) elif config.pretrained is not None: model.load(config.pretrained) trainer.train() logger.info("Training done!")
transforms=Hyperparameter.TRAIN_TRANSFORMS) val_set= DataLoader('./dataset', batch_size= Hyperparameter.BATCH_SIZE, use='valid', n_classes=Hyperparameter.NUM_CLASSES, transforms=Hyperparameter.VALID_TRANSFORMS) test_set= DataLoader('./dataset', batch_size= Hyperparameter.BATCH_SIZE, use='valid', n_classes=Hyperparameter.NUM_CLASSES, transforms=Hyperparameter.VALID_TRANSFORMS) # Train trainer = Trainer(train_set= train_set, val_set= val_set, test_set= test_set, model= net, optimizer= optimizer, scheduler= scheduler, num_classes = num_classes, loss_fn= loss_fn, accuracy_fn= accuracy_fn, patience= Hyperparameter.PATIENCE, writer=writer, save_path=os.path.join(log_path,'best_model.pt'), device= Hyperparameter.device) train_loss, train_acc, train_pre, train_rec, train_f1,\ val_loss, val_acc, val_pre, val_rec, val_f1, \ best_val_loss = trainer.train_loop(num_epochs=Hyperparameter.NUM_EPOCHS) plot_loss_graph(train_loss=train_loss, train_acc=train_acc, train_pre=train_pre, train_rec=train_rec, train_f1=train_f1,
discriminator_units=((256, 128), 128, (256, 128)), decoder=decoder_adj, discriminator=encoder_rgcn, soft_gumbel_softmax=False, hard_gumbel_softmax=False, batch_discriminator=False) # optimizer optimizer = GraphGANOptimizer(model, learning_rate=1e-3, feature_matching=False) # session session = tf.Session() session.run(tf.global_variables_initializer()) # trainer trainer = Trainer(model, optimizer, session, runname) print('Parameters: {}'.format(np.sum([np.prod(e.shape) for e in session.run(tf.trainable_variables())]))) trainer.train(batch_dim=batch_dim, epochs=epochs, steps=steps, la=la, train_fetch_dict=train_fetch_dict, train_feed_dict=train_feed_dict, eval_fetch_dict=eval_fetch_dict, eval_feed_dict=eval_feed_dict, test_fetch_dict=test_fetch_dict, test_feed_dict=test_feed_dict, save_every=save_every, _eval_update=_eval_test_update,
class RayTrainer(ray.tune.Trainable): def _setup(self, config): self.dataset = config["dataset"] self.earliness_factor = config["earliness_factor"] hparams = pd.read_csv(config["hyperparametercsv"]) # select only current dataset hparams = hparams.set_index("dataset").loc[config["dataset"]] config["learning_rate"] = float(hparams.learning_rate) config["num_layers"] = int(hparams.num_layers) config["hidden_dims"] = int(hparams.hidden_dims) config["shapelet_width_increment"] = int(hparams.shapelet_width_increment) logging.debug(hparams) logging.debug(config["batchsize"]) self.epochs = config["epochs"] # handles multitxhreaded batching andconfig shuffling #self.traindataloader = torch.utils.data.DataLoader(traindataset, batch_size=config["batchsize"], shuffle=True, # num_workers=config["workers"], # pin_memory=False) #self.validdataloader = torch.utils.data.DataLoader(validdataset, batch_size=config["batchsize"], shuffle=False, # # num_workers=config["workers"], pin_memory=False) # dict to namespace args = Namespace(**config) args.model = "Conv1D" args.shapelet_width_in_percent = False args.dropout = args.drop_probability self.traindataloader = getDataloader(dataset=args.dataset, partition="trainvalid", batch_size=config["batchsize"], num_workers=config["workers"], shuffle=True, pin_memory=True) self.validdataloader = getDataloader(dataset=args.dataset, partition="test", batch_size=config["batchsize"], num_workers=config["workers"], shuffle=False, pin_memory=True) args.nclasses = self.traindataloader.dataset.nclasses args.seqlength = self.traindataloader.dataset.sequencelength args.input_dims = self.traindataloader.dataset.ndims self.model = getModel(args) #self.model = ConvShapeletModel(num_layers=config["num_layers"], # hidden_dims=config["hidden_dims"], # ts_dim=1, # n_classes=nclasses, # use_time_as_feature=True, # drop_probability=config["drop_probability"], # scaleshapeletsize=False, # shapelet_width_increment=config["shapelet_width_increment"]) if torch.cuda.is_available(): self.model = self.model.cuda() # namespace to dict config = vars(args) config.pop("model") # delete string Conv1D to avoid confusion with model class self.config = config self.trainer = Trainer(self.model, self.traindataloader, self.validdataloader, **config) def _train(self): for epoch in range(self.epochs): self.trainer.new_epoch() # important for updating the learning rate stats = self.trainer.train_epoch(epoch) stats = self.trainer.test_epoch(dataloader=self.validdataloader) self.log(stats) return stats def log(self, stats): msg = "dataset {}, accuracy {:0.2f}, earliness {:0.2f}, mean_precision {:0.2f}, mean_recall {:0.2f}, kappa {:0.2f}, loss {:0.2f}" #print(self.dataset) #print(self.config) print(msg.format(self.dataset, stats["accuracy"], stats["earliness"], stats["mean_precision"], stats["mean_recall"], stats["kappa"], stats["loss"])) def _save(self, path): path = path + ".pth" torch.save(self.model.state_dict(), path) return path def _restore(self, path): state_dict = torch.load(path, map_location="cpu") self.model.load_state_dict(state_dict)
args.interpolation, args.aug, args.aug_mode, args.crop_size, args.upscale_factor) eval_set = get_eval_set(args.band_mode, args.data_dir, args.interpolation, args.aug, args.aug_mode, args.crop_size, args.upscale_factor) datasets = [train_set, val_set] print('===> Building model') model = ESPCN(nb_channel=args.nb_channel, upscale_factor=args.upscale_factor, base_kernel=args.base_kernel).to(device) #criterion = nn.MSELoss() model.optimizer = optim.Adam(model.parameters(), lr=args.lr) trainer = Trainer(args, method) trainer.training(model, datasets) trainer.save_log() trainer.learning_curve() trainer.evaluating(model, train_set, 'train') trainer.evaluating(model, val_set, 'val') trainer.evaluating(model, eval_set, "test") print('===> Complete training') model_name = trainer.save_checkpoint(model) if args.test: if args.train: args.test_model_name = model_name model_name = args.test_model_name test_dir = args.test_dir
def _setup(self, config): self.dataset = config["dataset"] self.earliness_factor = config["earliness_factor"] hparams = pd.read_csv(config["hyperparametercsv"]) # select only current dataset hparams = hparams.set_index("dataset").loc[config["dataset"]] config["learning_rate"] = float(hparams.learning_rate) config["num_layers"] = int(hparams.num_layers) config["hidden_dims"] = int(hparams.hidden_dims) config["shapelet_width_increment"] = int(hparams.shapelet_width_increment) logging.debug(hparams) logging.debug(config["batchsize"]) self.epochs = config["epochs"] # handles multitxhreaded batching andconfig shuffling #self.traindataloader = torch.utils.data.DataLoader(traindataset, batch_size=config["batchsize"], shuffle=True, # num_workers=config["workers"], # pin_memory=False) #self.validdataloader = torch.utils.data.DataLoader(validdataset, batch_size=config["batchsize"], shuffle=False, # # num_workers=config["workers"], pin_memory=False) # dict to namespace args = Namespace(**config) args.model = "Conv1D" args.shapelet_width_in_percent = False args.dropout = args.drop_probability self.traindataloader = getDataloader(dataset=args.dataset, partition="trainvalid", batch_size=config["batchsize"], num_workers=config["workers"], shuffle=True, pin_memory=True) self.validdataloader = getDataloader(dataset=args.dataset, partition="test", batch_size=config["batchsize"], num_workers=config["workers"], shuffle=False, pin_memory=True) args.nclasses = self.traindataloader.dataset.nclasses args.seqlength = self.traindataloader.dataset.sequencelength args.input_dims = self.traindataloader.dataset.ndims self.model = getModel(args) #self.model = ConvShapeletModel(num_layers=config["num_layers"], # hidden_dims=config["hidden_dims"], # ts_dim=1, # n_classes=nclasses, # use_time_as_feature=True, # drop_probability=config["drop_probability"], # scaleshapeletsize=False, # shapelet_width_increment=config["shapelet_width_increment"]) if torch.cuda.is_available(): self.model = self.model.cuda() # namespace to dict config = vars(args) config.pop("model") # delete string Conv1D to avoid confusion with model class self.config = config self.trainer = Trainer(self.model, self.traindataloader, self.validdataloader, **config)
def main(logger, args): df_train, _ = load_data(INPUT_DIR, logger) if args['debug']: df_train = df_train.iloc[:30000] texts_train = df_train['question_text'] else: logger.info('Preprocess text') texts_train = preprocess_text(df_train, return_df=False) seq_train, tokenizer = tokenize_texts(texts_train, logger) logger.info('Pad train text data') seq_train = pad_sequences(seq_train, maxlen=PADDING_LENGTH) label_train = df_train['target'].values.reshape(-1, 1) embed_types = [0, 1, 2] logger.info( 'Start multiprocess nlp feature extraction and embedding matrices loading' ) with mp.Pool(processes=2) as p: results = p.map(parallel_apply, [(extract_nlp_features, (df_train, )), (load_multiple_embeddings, (tokenizer.word_index, embed_types, args['debug']))]) df_train_extracted = results[0] embedding_matrices = results[1] embedding_matrix = np.concatenate( [np.array([embedding_matrices[i] for i in [0, 1, 2]]).mean(0)] + [embedding_matrices[j] for j in [1]], axis=1) nlp_columns = [ 'total_length', 'n_capitals', 'n_words', 'n_puncts', 'n_?', 'n_!', 'n_you' ] for col in nlp_columns: scaler = StandardScaler() df_train_extracted[col] = scaler.fit_transform( df_train_extracted[col].values.astype(np.float32).reshape( -1, 1)).reshape(-1, ) x_nlp = [ df_train_extracted[col].values.reshape(-1, 1) for col in nlp_columns ] nlp_size = len(x_nlp) # ===== training and evaluation loop ===== # device_ids = args['device_ids'] output_device = device_ids[0] torch.cuda.set_device(device_ids[0]) torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True batch_size = args['batch_size'] * len(device_ids) trigger = TRIGGER if args['debug']: epochs = 3 n_splits = 2 else: epochs = EPOCHS n_splits = KFOLD logger.info('Start training and evaluation loop') model_specs = [ { 'nlp_layer_types': ({ 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }, { 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }), 'tcn_layer_types': ({ 'num_channels': [16, 16, 16], 'kernel_size': 3, 'dropout': 0.2 }, ), 'rnn_layer_types': ({ 'type': 'lstm', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }, { 'type': 'gru', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }), 'upper_layer_types': ({ 'dim': 64, 'dropout': 0.3 }, ) }, { 'nlp_layer_types': ({ 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }, { 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }), 'tcn_layer_types': ({ 'num_channels': [16, 16, 16], 'kernel_size': 2, 'dropout': 0.2 }, ), 'rnn_layer_types': ({ 'type': 'lstm', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }, { 'type': 'gru', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }), 'upper_layer_types': ({ 'dim': 64, 'dropout': 0.3 }, ) }, { 'nlp_layer_types': ({ 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }, { 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }), 'tcn_layer_types': ({ 'num_channels': [32, 32, 32], 'kernel_size': 3, 'dropout': 0.2 }, ), 'rnn_layer_types': ({ 'type': 'lstm', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }, { 'type': 'gru', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }), 'upper_layer_types': ({ 'dim': 64, 'dropout': 0.3 }, ) }, { 'nlp_layer_types': ({ 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }, { 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }), 'tcn_layer_types': ({ 'num_channels': [16, 16], 'kernel_size': 3, 'dropout': 0.2 }, ), 'rnn_layer_types': ({ 'type': 'lstm', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }, { 'type': 'gru', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }), 'upper_layer_types': ({ 'dim': 64, 'dropout': 0.3 }, ) }, { 'nlp_layer_types': ({ 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }, { 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }), 'tcn_layer_types': ({ 'num_channels': [16, 16, 16], 'kernel_size': 3, 'dropout': 0.2 }, ), 'rnn_layer_types': ({ 'type': 'lstm', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }, { 'type': 'gru', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }), 'upper_layer_types': ( { 'dim': 64, 'dropout': 0.5 }, { 'dim': 64, 'dropout': 0.3 }, ) }, { 'nlp_layer_types': ({ 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }, { 'activation': 'relu', 'dim': 16, 'dropout': 0.2 }), 'tcn_layer_types': ({ 'num_channels': [16, 16, 16], 'kernel_size': 3, 'dropout': 0.2 }, { 'num_channels': [16, 16, 16], 'kernel_size': 4, 'dropout': 0.2 }), 'rnn_layer_types': ({ 'type': 'lstm', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }, { 'type': 'gru', 'dim': 64, 'num_layers': 1, 'dropout': 0.0 }), 'upper_layer_types': ({ 'dim': 64, 'dropout': 0.3 }, ) }, ] model_name_base = 'NLPFeaturesTCNRNN' for spec_id, spec in enumerate(model_specs): model_name = model_name_base + f'_specId={spec_id}' skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED) oof_mv_preds = np.zeros(len(seq_train)) oof_preds_proba = np.zeros(len(seq_train)) oof_opt_preds = np.zeros(len(seq_train)) oof_reopt_preds = np.zeros(len(seq_train)) results_list = [] for fold, (index_train, index_valid) in enumerate( skf.split(label_train, label_train)): logger.info( f'Fold {fold + 1} / {KFOLD} - create dataloader and build model' ) x_train = { 'text': seq_train[index_train].astype(int), 'nlp': [x[index_train] for x in x_nlp] } x_valid = { 'text': seq_train[index_valid].astype(int), 'nlp': [x[index_valid] for x in x_nlp] } y_train, y_valid = label_train[index_train].astype( np.float32), label_train[index_valid].astype(np.float32) model = NLPFeaturesTCNRNN( embedding_matrix, PADDING_LENGTH, nlp_size, embed_drop=0.2, mask=True, nlp_layer_types=spec['nlp_layer_types'], tcn_layer_types=spec['tcn_layer_types'], rnn_layer_types=spec['rnn_layer_types'], upper_layer_types=spec['upper_layer_types']) steps_per_epoch = seq_train[index_train].shape[0] // batch_size scheduler_trigger_steps = steps_per_epoch * trigger step_size = steps_per_epoch * (epochs - trigger) // NUM_SNAPSHOTS config = { 'epochs': epochs, 'batch_size': batch_size, 'output_device': output_device, 'criterion_type': 'bce', 'criteria_weights': [1.0, 1.0], 'criterion_gamma': 2.0, 'criterion_alpha': 0.75, 'optimizer': 'adam', 'optimizer_lr': 0.003, 'num_snapshots': NUM_SNAPSHOTS, 'scheduler_type': 'cyclic', 'base_lr': 0.0005, 'max_lr': 0.003, 'step_size': step_size, 'scheduler_mode': 'triangular', 'scheduler_gamma': 0.9, 'scheduler_trigger_steps': scheduler_trigger_steps, 'sampler_type': 'normal', 'seed': SEED } trainer = Trainer(model, logger, config) eval_results = trainer.train_and_eval_fold(x_train, y_train, x_valid, y_valid, fold) fold_results = calculate_fold_metrics( eval_results, label_train[index_valid].reshape(-1, )) results_list.append(fold_results) message = f'Fold {fold + 1} / {KFOLD} has been done.\n' message += f'Majority Voting - F1: {fold_results["oof_mv_f1"]}, ' message += f'Precision: {fold_results["oof_mv_precision"]}, Recall: {fold_results["oof_mv_recall"]}\n' message += f'Optimized - F1: {fold_results["oof_opt_f1"]}, ' message += f'Precision: {fold_results["oof_opt_precision"]}, Recall: {fold_results["oof_opt_recall"]}\n' message += f'Re-optimized - F1: {fold_results["oof_reopt_f1"]}, ' message += f'Precision: {fold_results["oof_reopt_precision"]}, Recall: {fold_results["oof_reopt_recall"]}\n' message += f'Focal Loss: {fold_results["oof_focal_loss"]}, ' message += f'Optimized Threshold: {fold_results["oof_opt_threshold"]}, ' message += f'Re-optimized Threshold: {fold_results["oof_reopt_threshold"]}, ' logger.post(message) eval_results_addition = { 'date': datetime.now(), 'script_name': SCRIPT_NAME, 'spec_id': spec_id, 'model_name': model_name, 'fold_id': fold } for res in eval_results: res.update(eval_results_addition) post_to_snapshot_metrics_table(data=res, project_id=BQ_PROJECT_ID, dataset_name=BQ_DATASET) fold_results_addition = { 'date': datetime.now(), 'script_name': SCRIPT_NAME, 'spec_id': spec_id, 'model_name': model_name, 'fold_id': fold } fold_results.update(fold_results_addition) post_to_fold_metrics_table(fold_results, project_id=BQ_PROJECT_ID, dataset_name=BQ_DATASET) oof_mv_preds[index_valid] = fold_results['oof_mv_preds'] oof_opt_preds[index_valid] = fold_results['oof_opt_preds'] oof_reopt_preds[index_valid] = fold_results['oof_reopt_preds'] oof_preds_proba[index_valid] = fold_results['oof_preds_proba'] results = calculate_total_metrics(results_list) results_addition = { 'date': datetime.now(), 'script_name': SCRIPT_NAME, 'spec_id': spec_id, 'model_name': model_name } results.update(results_addition) post_to_total_metrics_table(results, project_id=BQ_PROJECT_ID, dataset_name=BQ_DATASET) logger.post(f'Spec ID: {spec_id}\nModel Spec: {spec}') message = 'KFold training and evaluation has been done.\n' message += f'Majority Voting - F1: avg = {results["mv_f1_avg"]}, std = {results["mv_f1_std"]}, ' message += f'Precision: {results["mv_precision_avg"]}, Recall: {results["mv_recall_avg"]}\n' message += f'Optimized - F1: avg = {results["opt_f1_avg"]}, std = {results["opt_f1_std"]}, ' message += f'Precision: {results["opt_precision_avg"]}, Recall: {results["opt_recall_avg"]}\n' message += f'Re-optimized - F1: avg = {results["reopt_f1_avg"]}, std = {results["reopt_f1_std"]}, ' message += f'Precision: {results["reopt_precision_avg"]}, Recall: {results["reopt_recall_avg"]}\n' mv_thresholds = ", ".join( [str(th) for th in results["mv_thresholds_avg"]]) message += f'Focal Loss: {results["focal_loss_avg"]}, ' message += f'Optimized Threshold: {results["opt_threshold_avg"]}, ' message += f'Re-optimized Threshold: {results["reopt_threshold_avg"]}\n' message += f'Majority Voting Thresholds: {mv_thresholds}' logger.post(message)
def main(config: dict): ''' zel pipeline example. ''' # print tag. print(10 * '*', __file__, config) saved_dir = os.path.dirname(config['saved_file']) if not os.path.exists(saved_dir): os.makedirs(saved_dir) # create datasets. provide train and eval data. dataset = Crossel( '../datasets/crossel', { 'TRAIN_WAY_PORTION': 0.9, 'CANDIDATE_USING_TEXT': config['context'], 'TEST_LANGUAGE': 'uk' # it doesn't matter. }) # tensorizer. convert an example to tensors. tensorizer = EasyBertTokenizer.from_pretrained( '../pretrain/multi_cased_L-12_H-768_A-12', { 'FIXED_LEN': config['fixed_len'], 'DO_LOWER_CASE': True }) # adapter. call tensorizer, convert a batch of examples to big tensors. adapter = ZelAdapter(tensorizer, tensorizer) # embedding model. for predication. bert = Bert.from_pretrained('../pretrain/multi_cased_L-12_H-768_A-12', { 'POOLING_METHOD': 'avg', 'FINETUNE_LAYER_RANGE': '9:12' }) # siamese bert for training. model = SimilarNet(bert, bert, bert.config.hidden_size, { 'DROP_OUT_PROB': 0.1, 'ACT_NAME': 'relu', 'USE_BIAS': False }) # trainer. to train siamese bert. trainer = Trainer({ 'dataset': dataset, 'adapter': adapter, 'model': model, 'DEVICE': torch.device(config['device']), 'TRAIN_BATCH_SIZE': 150, 'VALID_BATCH_SIZE': 500, 'ROUND': 10 }) # train start here. trainer.train() # train done, fetch bert model to prediction. tester = ZelPredictor( model, adapter, { 'TEST_BATCH_SIZE': 200, 'EMB_BATCH_SIZE': 1000, 'DEVICE': torch.device(config['device']) }) # add candidates. tester.set_candidates(dataset.all_candidates()) tester.save(config['saved_file']) # we start test here. for lan in config['lan']: print(f'we are testing lan: {lan}') test_dataset = Crossel('../datasets/crossel', {'TEST_LANGUAGE': lan}) test_data = test_dataset.test_data() for i in config['top_what']: print(f'now top-{i} ACC:') tester.test(test_data, i)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) epoch = 0 resume = False ## to resume your training after a certain checkpoint set resume to True and epoch to the respective starting epoch ## and desired model checkpoint ## default value for resume set to False to start training from scratch model_checkpoint = 'model_TT_final25_1.pt' if(resume): checkpoint = torch.load(os.getcwd() + '/Output/checkpoints/' + model_checkpoint + '/',map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] # trainer trainer = Trainer(model=model, device=device, criterion_denoise= criterion_denoise, criterion_segment=criterion_seg, optimizer=optimizer, training_DataLoader=dataloader_training, validation_DataLoader=dataloader_validation, lr_scheduler=None, epochs=epochs, epoch=epoch, notebook=False, model_name=model_name) # start training training_losses, validation_losses, lr_rates = trainer.run_trainer() fig = plot_training(training_losses, validation_losses, lr_rates, gaussian=True, sigma=1, figsize=(10,4)) torch.save(model.state_dict(), os.getcwd() + '/Output/' + model.name) fig.savefig(os.getcwd() + '/LossesFigs/' + model_name +'.jpg')
def main(): # script for training a model using 100% train set args = argument_parser.parse_args() print(args) torch.manual_seed(args.seed) lmdb_handle = dataset_base.LMDBHandle( os.path.join(constants.HDD_DATASET_ROOT, args.dataset, "dataset.lmdb"), args.memory_hog) train_set = IndoorScenes(args.dataset, lmdb_handle, args.base_size, 'train') val_set = IndoorScenes(args.dataset, lmdb_handle, args.base_size, 'val') test_set = IndoorScenes(args.dataset, lmdb_handle, args.base_size, 'test') train_set.make_dataset_multiple_of_batchsize(args.batch_size) model = DeepLab(num_classes=train_set.num_classes, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn) model = model.cuda() class_weights = None if args.use_balanced_weights: class_weights = calculate_weights_labels(train_set) saver = Saver(args) trainer = Trainer(args, model, train_set, val_set, test_set, class_weights, Saver(args)) summary = TensorboardSummary(saver.experiment_dir) writer = summary.create_summary() start_epoch = 0 if args.resume: args.resume = os.path.join(constants.RUNS, args.dataset, args.resume, 'checkpoint.pth.tar') if not os.path.isfile(args.resume): raise RuntimeError(f"=> no checkpoint found at {args.resume}") checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] trainer.model.load_state_dict(checkpoint['state_dict']) trainer.optimizer.load_state_dict(checkpoint['optimizer']) trainer.best_pred = checkpoint['best_pred'] print( f'=> loaded checkpoint {args.resume} (epoch {checkpoint["epoch"]})' ) lr_scheduler = trainer.lr_scheduler for epoch in range(start_epoch, args.epochs): trainer.training(epoch) if epoch % args.eval_interval == (args.eval_interval - 1): trainer.validation(epoch) if lr_scheduler: lr_scheduler.step() epoch = trainer.load_best_checkpoint() _, best_mIoU, best_mIoU_20, best_Acc, best_Acc_class, best_FWIoU = trainer.validation( epoch, test=True) writer.add_scalar('test/mIoU', best_mIoU, epoch) writer.add_scalar('test/mIoU_20', best_mIoU_20, epoch) writer.add_scalar('test/Acc', best_Acc, epoch) writer.add_scalar('test/Acc_class', best_Acc_class, epoch) writer.add_scalar('test/fwIoU', best_FWIoU, epoch) trainer.train_writer.close() trainer.val_writer.close()
import logging logging.basicConfig( format='%(asctime)s [%(levelname)s] %(message)s', level=logging.INFO) # gets env vars + set up MAX_NUM_THREADS from config import * # training_utils imports from utils.trainer import Trainer from utils.data import get_data, DataBunch logger = logging.getLogger(__name__) if __name__ == "__main__": x, y = get_data(seed=10) # PREPROCESSING data = DataBunch(x, y) data.process() # TRAINING trainer = Trainer(data) trainer.fit(n_epochs=5) sys.exit(0)
opts = parser.parse_args() # set threads num torch.set_num_threads(opts.thread) # load the data train_features_list = torch.load(opts.data_dir + '/train.sst') dev_features_list = torch.load(opts.data_dir + '/dev.sst') test_features_list = torch.load(opts.data_dir + '/test.sst') # load word-level vocab vocab = torch.load(opts.data_dir + '/vocab.sst') # load char-level vocab char_vocab = torch.load(opts.data_dir + '/char_vocab.sst') label_vocab = torch.load(opts.data_dir + '/label_vocab.sst') rel_vocab = torch.load(opts.data_dir + '/rel_vocab.sst') train_dev_test = (train_features_list, dev_features_list, test_features_list) #build batch # build_batcher = Build_Batch(features=train_features_list, opts=opts, pad_idx=vocab) vocab = (vocab, char_vocab) train = Trainer(train_dev_test, opts, vocab, label_vocab, rel_vocab=rel_vocab) train.train()
hard_gumbel_softmax=False, batch_discriminator=False) # optimizer optimizer = GraphGANOptimizer(model, learning_rate=1e-3, feature_matching=False) # session config = tf.ConfigProto() config.gpu_options.allow_growth = True session = tf.Session(config=config) session.run(tf.global_variables_initializer()) # trainer trainer = Trainer(model, optimizer, session) print('Parameters: {}'.format( np.sum([np.prod(e.shape) for e in session.run(tf.trainable_variables())]))) if is_pretraining: current_dir = (os.path.dirname(os.path.realpath(__file__)) + "/results/pretraining/molgan_la" + str(la)) else: current_dir = (os.path.dirname(os.path.realpath(__file__)) + "/results/noPretraining/molgan_la" + str(la)) if is_training: print("train") print('lambda', la) trainer.train(batch_dim=batch_dim,
'inference': False } config = { 'optimizer': 'adam', # 'SGD' | 'adam' | 'RMSprop' 'learningRate': { 'lr': 0.0005 }, # learning rate to the optimizer 'weight_decay': 0, # weight_decay value 'VggFc7Size': 4096, # Fixed, do not change 'embedding_size': 128, # word embedding size 'vocabulary_size': 4000, # number of different words 'truncated_backprop_length': 20, 'hidden_state_sizes': 256, # 'num_rnn_layers': 2, # number of stacked rnn's 'cellType': 'RNN' # RNN or GRU } # create an instance of the model you want model = Model(config, modelParam) # create an instacne of the saver and resoterer class saveRestorer = SaverRestorer(config, modelParam) model = saveRestorer.restore(model) # create your data generator dataLoader = DataLoaderWrapper(config, modelParam) # here you train your model trainer = Trainer(model, modelParam, config, dataLoader, saveRestorer) trainer.train()
# save input_data for decode data.save_model_info(args.save_model_info_dir) train_loader = Data.DataLoader( dataset=train_data, batch_size=args.batch_size, shuffle=True, collate_fn=collate_fn if not use_cuda else collate_fn_cuda) dev_loader = Data.DataLoader( dataset=dev_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn if not use_cuda else collate_fn_cuda) test_loader = Data.DataLoader( dataset=test_data, batch_size=args.batch_size, shuffle=False, collate_fn=collate_fn if not use_cuda else collate_fn_cuda) print("Building Model...") model = BiLSTM_MFVI(data) if use_cuda: model.cuda() print(model) trainer = Trainer(model, args, train_loader) evaluator = Evaluator(label_vocab) trainer.train(train_loader, dev_loader, test_loader, evaluator) print("finish")
prior_pool = PriorPool( lookup_table, arch_param_nums, generator, model, test_loader, CONFIG) optimizer = get_optimizer(model, CONFIG.optim_state) g_optimizer = get_optimizer(generator, CONFIG.g_optim_state) scheduler = get_lr_scheduler(optimizer, len(train_loader), CONFIG) start_time = time.time() trainer = Trainer( criterion, optimizer, g_optimizer, scheduler, writer, device, lookup_table, prior_pool, CONFIG) # 调用trainer的函数开始训练超网(warmup>0时)并训练generator(从warmup epochs开始计算) trainer.search_train_loop(train_loader, val_loader, val_loader, model, generator) logging.info("Total search time: {:.2f}".format(time.time() - start_time)) logging.info("=================================== Experiment title : {} End ===========================".format(args.title))
def main(logger, args): df_train, _ = load_data(INPUT_DIR, logger) if args['debug']: df_train = df_train.iloc[:200000] logger.info('Extract nlp features') df_train = extract_nlp_features(df_train) else: logger.info('Extract nlp features') df_train = extract_nlp_features(df_train) logger.info('Preprocess text') df_train = preprocess_text(df_train) seq_train, tokenizer = tokenize_text(df_train, logger) logger.info('Pad train text data') seq_train = pad_sequences(seq_train, maxlen=PADDING_LENGTH) label_train = df_train['target'].values.reshape(-1, 1) if args['debug']: embedding_matrix = np.random.rand(len(tokenizer.word_index) + 1, 300).astype(np.float32) else: logger.info('Load multiple embeddings') embedding_matrices = load_multiple_embeddings( tokenizer.word_index, embed_types=[0, 2], max_workers=args['max_workers']) embedding_matrix = np.array(embedding_matrices).mean(0) continuous_columns = [ 'total_length', 'n_capitals', 'n_words', 'n_puncts', 'n_?', 'n_!', 'n_you' ] for col in continuous_columns: scaler = StandardScaler() df_train[col] = scaler.fit_transform(df_train[col].values.astype( np.float32).reshape(-1, 1)).reshape(-1, ) x_continuous = [ df_train[col].values.reshape(-1, 1) for col in continuous_columns ] # ===== training and evaluation loop ===== # device_ids = args['device_ids'] output_device = device_ids[0] torch.cuda.set_device(device_ids[0]) torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True batch_size = args['batch_size'] * len(device_ids) epochs = EPOCHS trigger = TRIGGER logger.info('Start training and evaluation loop') model_specs = [ { 'nlp_dim': 64, 'nlp_dropout': 0.2, 'num_dense_layers': 1, 'mask': False }, { 'nlp_dim': 32, 'nlp_dropout': 0.2, 'num_dense_layers': 1, 'mask': False }, { 'nlp_dim': 16, 'nlp_dropout': 0.2, 'num_dense_layers': 2, 'mask': False }, { 'nlp_dim': 32, 'nlp_dropout': 0.2, 'num_dense_layers': 2, 'mask': False }, { 'nlp_dim': 64, 'nlp_dropout': 0.5, 'num_dense_layers': 2, 'mask': False }, { 'nlp_dim': 32, 'nlp_dropout': 0.5, 'num_dense_layers': 1, 'mask': False }, { 'nlp_dim': 64, 'nlp_dropout': 0.2, 'num_dense_layers': 1, 'mask': True }, { 'nlp_dim': 32, 'nlp_dropout': 0.2, 'num_dense_layers': 2, 'mask': True }, ] model_name_base = 'NLPFeaturesRNN' for spec_id, spec in enumerate(model_specs): model_name = model_name_base + f'_specId={spec_id}_nlpdim={spec["nlp_dim"]}_nlpdrop={spec["nlp_dropout"]}' model_name += f'_numlayers={spec["num_dense_layers"]}_mask={spec["mask"]}' skf = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=SEED) oof_preds_optimized = np.zeros(len(seq_train)) oof_preds_majority = np.zeros(len(seq_train)) results = [] for fold, (index_train, index_valid) in enumerate( skf.split(label_train, label_train)): logger.info( f'Fold {fold + 1} / {KFOLD} - create dataloader and build model' ) x_train = { 'text': seq_train[index_train].astype(int), 'continuous': [x[index_train] for x in x_continuous] } x_valid = { 'text': seq_train[index_valid].astype(int), 'continuous': [x[index_valid] for x in x_continuous] } y_train, y_valid = label_train[index_train].astype( np.float32), label_train[index_valid].astype(np.float32) model = NLPFeaturesRNN({'continuous': len(x_continuous)}, embedding_matrix, PADDING_LENGTH, hidden_size=64, out_hidden_dim=64, out_drop=0.3, embed_drop=0.1, dense_activate='relu', nlp_hidden_dim=spec['nlp_dim'], mask=spec['mask'], nlp_dropout=spec['nlp_dropout'], factorize=False, num_dense_layers=spec['num_dense_layers']) steps_per_epoch = seq_train[index_train].shape[0] // batch_size scheduler_trigger_steps = steps_per_epoch * trigger step_size = steps_per_epoch * (epochs - trigger) // NUM_SNAPSHOTS config = { 'epochs': epochs, 'batch_size': batch_size, 'output_device': output_device, 'criterion_type': 'bce', 'criteria_weights': [1.0, 1.0], 'criterion_gamma': 2.0, 'criterion_alpha': 0.75, 'optimizer': 'adam', 'optimizer_lr': 0.003, 'num_snapshots': NUM_SNAPSHOTS, 'scheduler_type': 'cyclic', 'base_lr': 0.0005, 'max_lr': 0.003, 'step_size': step_size, 'scheduler_mode': 'triangular', 'scheduler_gamma': 0.9, 'scheduler_trigger_steps': scheduler_trigger_steps, 'sampler_type': 'normal', 'seed': SEED } trainer = Trainer(model, logger, config) eval_results = trainer.train_and_eval_fold(x_train, y_train, x_valid, y_valid, fold) oof_preds_majority[index_valid] = np.array( [res['preds_binary'] for res in eval_results]).mean(0) > 0.5 oof_majority_f1 = f1_score( label_train.reshape(-1, )[index_valid], oof_preds_majority[index_valid]) oof_preds_proba = np.array( [res['preds_proba'] for res in eval_results]).mean(0) oof_threshold_mean: float = np.mean( [res['best_threshold'] for res in eval_results]) oof_preds_optimized[ index_valid] = oof_preds_proba > oof_threshold_mean oof_optimized_f1 = f1_score( label_train.reshape(-1, )[index_valid], oof_preds_optimized[index_valid]) message = f'Fold {fold + 1} / {KFOLD} has been done.\n' message += f'Score: majority voting - {oof_majority_f1:.6f}, optimized threshold - {oof_optimized_f1:.6f}' logger.post(message) post_to_snapshot_spreadsheet( logger, SPREADSHEET_SNAPSHOT_URL, eval_type='SNAPSHOT', tag='SCORE', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, snapshot_info=[res['f1'] for res in eval_results]) post_to_snapshot_spreadsheet( logger, SPREADSHEET_SNAPSHOT_URL, eval_type='SNAPSHOT', tag='THRESHOLD', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, snapshot_info=[res['best_threshold'] for res in eval_results]) post_to_main_spreadsheet(logger, SPREADSHEET_MAIN_URL, eval_type='SNAPSHOT', script_name=SCRIPT_NAME, model_name=model_name, fold=fold, f1_majority=oof_majority_f1, f1_optimized=oof_optimized_f1, threshold=oof_threshold_mean) results.append({ 'f1_majority': oof_majority_f1, 'f1_optimized': oof_optimized_f1, 'threshold': oof_threshold_mean }) f1_majority_mean = np.mean([res['f1_majority'] for res in results]) f1_majority_std = np.std([res['f1_majority'] for res in results]) f1_optimized_mean = np.mean([res['f1_optimized'] for res in results]) f1_optimized_std = np.std([res['f1_optimized'] for res in results]) threshold_mean = np.mean([res['threshold'] for res in results]) total_metrics = [ f1_majority_mean, f1_majority_std, f1_optimized_mean, f1_optimized_std, threshold_mean ] post_to_main_spreadsheet(logger, SPREADSHEET_MAIN_URL, eval_type='SNAPSHOT', script_name=SCRIPT_NAME, model_name=model_name, fold=-1, f1_majority=-1, f1_optimized=-1, threshold=-1, others=total_metrics) message = 'KFold training and evaluation has been done.\n' message += f'F1 majority voting - Avg: {f1_majority_mean}, Std: {f1_majority_std}\n' message += f'F1 optimized - Avg: {f1_optimized_mean}, Std: {f1_optimized_std}\n' message += f'Threshold - Avg: {threshold_mean}' logger.post(message)
def main(logger, args): df_train, _ = load_data(INPUT_DIR, logger) if args['debug']: df_train = df_train.iloc[:30000] logger.info('Extract nlp features') df_train = extract_nlp_features(df_train) else: logger.info('Extract nlp features') df_train = extract_nlp_features(df_train) logger.info('Preprocess text') df_train = preprocess_text(df_train) seq_train, tokenizer = tokenize_text(df_train, logger) logger.info('Pad train text data') seq_train = pad_sequences(seq_train, maxlen=PADDING_LENGTH) label_train = df_train['target'].values.reshape(-1, 1) logger.info('Load multiple embeddings') embedding_matrices = load_multiple_embeddings( tokenizer.word_index, embed_types=[0, 1, 2], max_workers=args['max_workers']) continuous_columns = [ 'total_length', 'n_capitals', 'n_words', 'n_puncts', 'n_?', 'n_!', 'n_you' ] for col in continuous_columns: scaler = StandardScaler() df_train[col] = scaler.fit_transform(df_train[col].values.astype( np.float32).reshape(-1, 1)).reshape(-1, ) x_continuous = [ df_train[col].values.reshape(-1, 1) for col in continuous_columns ] # ===== training and evaluation loop ===== # device_ids = args['device_ids'] output_device = device_ids[0] torch.cuda.set_device(device_ids[0]) torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True batch_size = args['batch_size'] * len(device_ids) trigger = TRIGGER if args['debug']: epochs = 3 n_splits = 2 else: epochs = EPOCHS n_splits = KFOLD logger.info('Start training and evaluation loop') model_specs = [] for h in ['mean', 'concat']: for t in [[0, 1], [0, 2], [1, 2], [0, 1, 2]]: for d in [0.1, 0.2]: model_specs.append({'how': h, 'type': t, 'embed_drop': d}) embed_type_map = {0: 'glove', 1: 'fasttext', 2: 'paragram'} model_name_base = 'NLPFeaturesRNN' for spec_id, spec in enumerate(model_specs): if spec['how'] == 'mean': joint = 'x' embedding_matrix = np.array( [embedding_matrices[i] for i in spec['type']]).mean(0) else: joint = '+' embedding_matrix = np.concatenate( [embedding_matrices[i] for i in spec['type']], axis=1) embed_type = joint.join([embed_type_map[t] for t in spec['type']]) model_name = model_name_base + f'_specId={spec_id}_embedtype={embed_type}_embeddrop={spec["embed_drop"]}' skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED) oof_mv_preds = np.zeros(len(seq_train)) oof_preds_proba = np.zeros(len(seq_train)) oof_opt_preds = np.zeros(len(seq_train)) oof_reopt_preds = np.zeros(len(seq_train)) results_list = [] for fold, (index_train, index_valid) in enumerate( skf.split(label_train, label_train)): logger.info( f'Fold {fold + 1} / {KFOLD} - create dataloader and build model' ) x_train = { 'text': seq_train[index_train].astype(int), 'continuous': [x[index_train] for x in x_continuous] } x_valid = { 'text': seq_train[index_valid].astype(int), 'continuous': [x[index_valid] for x in x_continuous] } y_train, y_valid = label_train[index_train].astype( np.float32), label_train[index_valid].astype(np.float32) model = NLPFeaturesRNN({'continuous': len(x_continuous)}, embedding_matrix, PADDING_LENGTH, hidden_size=64, out_hidden_dim=64, out_drop=0.3, embed_drop=spec['embed_drop'], dense_activate='relu', nlp_hidden_dim=16, mask=True, nlp_dropout=0.2, factorize=False, num_dense_layers=2) steps_per_epoch = seq_train[index_train].shape[0] // batch_size scheduler_trigger_steps = steps_per_epoch * trigger step_size = steps_per_epoch * (epochs - trigger) // NUM_SNAPSHOTS config = { 'epochs': epochs, 'batch_size': batch_size, 'output_device': output_device, 'criterion_type': 'bce', 'criteria_weights': [1.0, 1.0], 'criterion_gamma': 2.0, 'criterion_alpha': 0.75, 'optimizer': 'adam', 'optimizer_lr': 0.003, 'num_snapshots': NUM_SNAPSHOTS, 'scheduler_type': 'cyclic', 'base_lr': 0.0005, 'max_lr': 0.003, 'step_size': step_size, 'scheduler_mode': 'triangular', 'scheduler_gamma': 0.9, 'scheduler_trigger_steps': scheduler_trigger_steps, 'sampler_type': 'normal', 'seed': SEED } trainer = Trainer(model, logger, config) eval_results = trainer.train_and_eval_fold(x_train, y_train, x_valid, y_valid, fold) fold_results = calculate_fold_metrics( eval_results, label_train[index_valid].reshape(-1, )) results_list.append(fold_results) message = f'Fold {fold + 1} / {KFOLD} has been done.\n' message += f'Majority Voting - F1: {fold_results["oof_mv_f1"]}, ' message += f'Precision: {fold_results["oof_mv_precision"]}, Recall: {fold_results["oof_mv_recall"]}\n' message += f'Optimized - F1: {fold_results["oof_opt_f1"]}, ' message += f'Precision: {fold_results["oof_opt_precision"]}, Recall: {fold_results["oof_opt_recall"]}\n' message += f'Re-optimized - F1: {fold_results["oof_reopt_f1"]}, ' message += f'Precision: {fold_results["oof_reopt_precision"]}, Recall: {fold_results["oof_reopt_recall"]}\n' message += f'Focal Loss: {fold_results["oof_focal_loss"]}, ' message += f'Optimized Threshold: {fold_results["oof_opt_threshold"]}, ' message += f'Re-optimized Threshold: {fold_results["oof_reopt_threshold"]}, ' logger.post(message) eval_results_addition = { 'date': datetime.now(), 'script_name': SCRIPT_NAME, 'spec_id': spec_id, 'model_name': model_name, 'fold_id': fold } for res in eval_results: res.update(eval_results_addition) post_to_snapshot_metrics_table(data=res, project_id=BQ_PROJECT_ID, dataset_name=BQ_DATASET) fold_results_addition = { 'date': datetime.now(), 'script_name': SCRIPT_NAME, 'spec_id': spec_id, 'model_name': model_name, 'fold_id': fold } fold_results.update(fold_results_addition) post_to_fold_metrics_table(fold_results, project_id=BQ_PROJECT_ID, dataset_name=BQ_DATASET) oof_mv_preds[index_valid] = fold_results['oof_mv_preds'] oof_opt_preds[index_valid] = fold_results['oof_opt_preds'] oof_reopt_preds[index_valid] = fold_results['oof_reopt_preds'] oof_preds_proba[index_valid] = fold_results['oof_preds_proba'] results = calculate_total_metrics(results_list) results_addition = { 'date': datetime.now(), 'script_name': SCRIPT_NAME, 'spec_id': spec_id, 'model_name': model_name } results.update(results_addition) post_to_total_metrics_table(results, project_id=BQ_PROJECT_ID, dataset_name=BQ_DATASET) message = 'KFold training and evaluation has been done.\n' message += f'Majority Voting - F1: avg = {results["mv_f1_avg"]}, std = {results["mv_f1_std"]}, ' message += f'Precision: {results["mv_precision_avg"]}, Recall: {results["mv_recall_avg"]}\n' message += f'Optimized - F1: avg = {results["opt_f1_avg"]}, std = {results["opt_f1_std"]}, ' message += f'Precision: {results["opt_precision_avg"]}, Recall: {results["opt_recall_avg"]}\n' message += f'Re-optimized - F1: avg = {results["reopt_f1_avg"]}, std = {results["reopt_f1_std"]}, ' message += f'Precision: {results["reopt_precision_avg"]}, Recall: {results["reopt_recall_avg"]}\n' mv_thresholds = ", ".join( [str(th) for th in results["mv_thresholds_avg"]]) message += f'Focal Loss: {results["focal_loss_avg"]}, ' message += f'Optimized Threshold: {results["opt_threshold_avg"]}, ' message += f'Re-optimized Threshold: {results["reopt_threshold_avg"]}\n' message += f'Majority Voting Thresholds: {mv_thresholds}' logger.post(message)