def main(): manager = Manager.init() models = [["model", MobileNetV2(**manager.args.model)]] manager.init_model(models) args = manager.args criterion = Criterion() optimizer, scheduler = Optimizer(models, args.optim).init() args.cuda = args.cuda and torch.cuda.is_available() if args.cuda: for item in models: item[1].cuda() criterion.cuda() dataloader = DataLoader(args.dataloader, args.cuda) summary = manager.init_summary() trainer = Trainer(models, criterion, optimizer, scheduler, dataloader, summary, args.cuda) for epoch in range(args.runtime.start_epoch, args.runtime.num_epochs + args.runtime.start_epoch): try: print("epoch {}...".format(epoch)) trainer.train(epoch) manager.save_checkpoint(models, epoch) if (epoch + 1) % args.runtime.test_every == 0: trainer.validate() except KeyboardInterrupt: print("Training had been Interrupted\n") break trainer.test()
def main(): CUDA_OK = torch.cuda.is_available() args = parse() dl = DataLoader() train_iter, valid_iter = dl.load_translation( data_path=args.data_path, exts=('.' + args.src, '.' + args.tgt), # ('.zh', '.en') batch_size=args.batch_size, dl_save_path=args.dl_path) args.n_src_words, args.n_tgt_words = len(dl.SRC.vocab), len(dl.TGT.vocab) args.src_pdx, args.tgt_pdx = dl.src_padding_index, dl.tgt_padding_index print(args) model = build_model(args, cuda_ok=CUDA_OK) trainer = Trainer(args, model=model, optimizer=torch.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.98), eps=1e-9), criterion=nn.CrossEntropyLoss(ignore_index=args.tgt_pdx, reduction='mean'), cuda_ok=CUDA_OK) trainer.train(train_iter, valid_iter, n_epochs=args.n_epochs, save_path=args.ckpt_path)
def main(): logger.info("Load Config") data_and_support = CortexEpfl() cfg = config.load_config(data_and_support.name) logger.info("Initialize Experiment") trial_path, trial_id, log_msg = init(cfg) logger.info('Experiment ID: {}, Trial ID: {}, GPU: {}'.format(cfg.experiment_idx, trial_id, GPU_index)) logger.info("Network config") model_config = NetworkConfig(cfg.step_count, cfg.first_layer_channels, cfg.num_classes, cfg.num_input_channel, True, cfg.ndims, 'same', trial_id, cfg.batch_size, cfg) logger.info("Create network") classifier = network(model_config) classifier.cuda() logger.info("Load data") cfg.patch_shape = model_config.in_out_shape(cfg.hint_patch_shape) data = data_and_support.load_data(cfg) loader = DataLoader(data[DataModes.TRAINING], batch_size=classifier.config.batch_size, shuffle=True) logger.info("Trainset length: {}".format(loader.__len__())) logger.info("Initialize optimizer") optimizer = optim.Adam(filter(lambda p: p.requires_grad, classifier.parameters()), lr=cfg.learning_rate) logger.info("Initialize evaluator") evaluator = Evaluator(classifier, optimizer, data, trial_path, cfg, data_and_support, cfg.train_mode) logger.info("Initialize trainer") trainer = Trainer(classifier, loader, optimizer, cfg.numb_of_epochs, cfg.eval_every, trial_path, evaluator, log_msg) trainer.train()
def main(model_name): # yaml 로드 config = yaml.load(open("./config/" + str(model_name) + ".yaml", "r"), Loader=yaml.FullLoader) trainset = MyTrainSetWrapper(**config['train']) # Trainer 클래스 초기화. train 실행. downstream = Trainer(trainset, model_name, config) downstream.train()
def main(): args = args_parser() if args.task == 'train': # conll process data_vocab_class, processor_class, conll_config_path = dataset_name_to_class[args.dataset] conll_configs = config_loader(conll_config_path) if not os.path.exists(os.path.join(conll_configs['data_path'], 'train.txt')): data_vocab = data_vocab_class(conll_configs) conll_to_train_test_dev(conll_configs['label_file'], conll_configs['data_path']) # config configs = config_loader(args.config_path) configs['data_dir'] = os.path.join(configs['data_dir'], args.dataset.lower()) configs['finetune_model_dir'] = os.path.join(configs['finetune_model_dir'], args.dataset.lower()) configs['output_dir'] = os.path.join(configs['output_dir'], args.dataset.lower()) check_dir(configs['data_dir']) check_dir(configs['finetune_model_dir']) check_dir(configs['output_dir']) # train processor = processor_class() for model_class in configs['model_class']: print('Begin Training %s Model on corpus %s' %(model_class, args.dataset)) trainer = Trainer(configs, model_class, processor) trainer.train() if args.task == 'eval': data_vocab_class, processor_class, conll_config_path = dataset_name_to_class[args.dataset] conll_configs = config_loader(conll_config_path) if not os.path.exists(os.path.join(conll_configs['data_path'], 'test.txt')): data_vocab = data_vocab_class(conll_configs) conll_to_train_test_dev(conll_configs['label_file'], conll_configs['data_path']) configs = config_loader(args.config_path) configs['data_dir'] = os.path.join(configs['data_dir'], args.dataset.lower()) configs['finetune_model_dir'] = os.path.join(configs['finetune_model_dir'], args.dataset.lower()) configs['output_dir'] = os.path.join(configs['output_dir'], args.dataset.lower()) check_dir(configs['data_dir']) check_dir(configs['finetune_model_dir']) check_dir(configs['output_dir']) processor = processor_class() for model_class in configs['model_class']: print('Begin Evaluate %s Model on corpus %s' %(model_class, args.dataset)) predicter = Predictor(configs, model_class, processor) predicter.eval()
def run(model_cls, loss, predictor, acc_calc, train_dataset, valid_dataset, sps): # log setting alias = generate_alias(model_cls, task='Age') msg = generate_file_msg(sps, loss, predictor, acc_calc) tb_log_path = os.path.join('runs', alias) save_dir = os.path.join('models', alias) logger_alias = alias config = Config(epoch_num=sps['epoch_num'], momentum=sps['momentum'], weight_decay=sps['weight_decay'], learning_rates=sps['learning_rates'], decay_points=sps['decay_points'], batch_size=sps['batch_size'], parameters_func=parameters_func, tb_log_path=tb_log_path, save_dir=save_dir, pretrain=sps['pretrain'], pretrained_model_dir=sps['pretrained_model_dir'], load_function=sps['load_function'], logger_alias=logger_alias, gpu_id=gpu_id) logger = Logger() logger.open_file(os.path.join('log'), alias=alias, file_name=alias + '.txt', file_msg=msg) trainer = Trainer(model_cls=model_cls, loss=loss, predictor=predictor, calculator=calculator, train_dataset=train_dataset, val_dataset=valid_dataset, config=config, logger=logger) trainer.train() logger.close_file(alias)
def train(data,config): id2label=data['id2label'] label_size=data['label_size'] config['num_labels']=label_size model=Classifier(config)(num_labels=label_size) optimizer = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optimizer.set_parameters(model.parameters()) if config.classifier=='BertCNN' or config.classifier=='BertRCNN' or config.classifier=='BertDPCNN' or config.classifier=='BertFC': trainer = Trainer(config, model=model, logger=logger, criterion=BCEWithLogLoss(), optimizer=optimizer, early_stopping=None, epoch_metrics=[AUC(average='micro', task_type='binary'),MultiLabelReport(id2label=id2label),F1Score(average='micro')]) elif config.classifier=='BertSGM' or config.classifier=='SGM': criterion = nn.CrossEntropyLoss(ignore_index=dict_helper.PAD, reduction='none') if config.n_gpu!='': criterion.cuda() trainer = Trainer(config, model=model, logger=logger, criterion=criterion, optimizer=optimizer, early_stopping=None, epoch_metrics=[AUC(average='micro', task_type='binary'), F1Score(average='micro')]) elif config.classifier=='BertSeq2Set': trainer = Trainer(config, model=model, logger=logger, criterion=None, optimizer=optimizer, early_stopping=None, epoch_metrics=[AUC(average='micro', task_type='binary'),F1Score(average='micro')]) trainer.train(data=data,seed=config.seed)
from utils.dataloader import DataLoader import torch from model import model_utils from optimizer.optimizer import NoamOpt from train.trainer import Trainer hidden_size = 256 num_encoder = 6 num_decoder = 6 n_head = 8 pf_dim = 1024 drop_out = 0.5 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device = 'cpu' dataloader = DataLoader(device) train_iterator, valid_iterator, test_iterator = dataloader.load_data(64) model = model_utils.create_model(dataloader.src_vocab_size(), dataloader.trg_vocab_size(), hidden_size, num_encoder, num_decoder, n_head, pf_dim, drop_out, dataloader.get_pad_idx(), device) print(model_utils.count_parameters(model)) model_utils.init(model) optimizer = NoamOpt(hidden_size , 1, 2000, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) trainer = Trainer(train_iterator, valid_iterator, model, optimizer, dataloader.get_pad_idx(), device) trainer.train(5) # for i, batch in enumerate(train_iterator): # src = batch.src.permute(1, 0).to(device) # trg = batch.trg.permute(1, 0).to(device)
def train(): trainer = Trainer(flags.cfg) trainer.train()
"""Run script Author: Alaaeldin Ali""" from train.trainer import Trainer import argparse parser = argparse.ArgumentParser() parser.add_argument('--lr', default=2.5e-4) parser.add_argument('--vis_screen', default='Relnet') parser.add_argument('--save_path', default=None) parser.add_argument('-warmup', action='store_true') parser.add_argument('--batch_size', default=64) args = parser.parse_args() trainer = Trainer(lr=args.lr, screen=args.screen, batch_size=args.batch_size, save_path=args.save_path, warmup=args.warmup) trainer.train()
def train(args): ########### data ########### processor = Postprocessor( config["postprocessor"])(do_lower_case=args.do_lower_case) label_list = processor.get_labels(config['data_dir'] / "labels.txt") id2label = {i: label for i, label in enumerate(label_list)} train_data = processor.get_train(config['data_dir'] / "{}.train.pkl".format(args.data_name)) train_examples = processor.create_examples( lines=train_data, example_type='train', cached_examples_file=config["data_dir"] / "cached_train_examples_{}".format(args.pretrain)) train_features = processor.create_features( examples=train_examples, max_seq_len=args.train_max_seq_len, cached_features_file=config["data_dir"] / "cached_train_features_{}_{}".format(args.train_max_seq_len, args.pretrain)) train_dataset = processor.create_dataset(train_features, is_sorted=args.sorted) if args.sorted: train_sampler = SequentialSampler(train_dataset) else: train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) valid_data = processor.get_dev(config["data_dir"] / "{}.valid.pkl".format(args.data_name)) valid_examples = processor.create_examples( lines=valid_data, example_type='valid', cached_examples_file=config["data_dir"] / "cached_valid_examples_{}".format(args.pretrain)) valid_features = processor.create_features( examples=valid_examples, max_seq_len=args.eval_max_seq_len, cached_features_file=config["data_dir"] / "cached_valid_features_{}_{}".format(args.eval_max_seq_len, args.pretrain)) valid_dataset = processor.create_dataset(valid_features) valid_sampler = SequentialSampler(valid_dataset) valid_dataloader = DataLoader(valid_dataset, sampler=valid_sampler, batch_size=args.eval_batch_size) if config["pretrain"] == "Nopretrain": config["vocab_size"] = processor.vocab_size ########### model ########### logger.info("========= initializing model =========") if args.resume_path: resume_path = Path(args.resume_path) model = Classifier(config["classifier"], config["pretrain"], resume_path)(num_labels=len(label_list)) else: model = Classifier(config["classifier"], config["pretrain"], "")(num_labels=len(label_list)) t_total = int( len(train_dataloader) / args.gradient_accumulation_steps * args.epochs) param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] warmup_steps = int(t_total * args.warmup_proportion) optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) lr_scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps, t_total=t_total) if args.fp16: try: from apex import amp except ImportError as e: raise ImportError( "Please install apex github.com/nvidia/apex to use fp16.") model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) ########### callback ########### logger.info("========= initializing callbacks =========") train_monitor = TrainingMonitor(file_dir=config['figure_dir'], arch=args.pretrain) model_checkpoint = ModelCheckpoint(checkpoint_dir=config['checkpoint_dir'], mode=args.mode, monitor=args.monitor, arch=args.pretrain, save_best_only=args.save_best) ########### train ########### logger.info("========= Running training =========") logger.info(" Num examples = {}".format(len(train_examples))) logger.info(" Num Epochs = {}".format(args.epochs)) logger.info(" Total train batch size \ (w. parallel, distributed & accumulation) = {}".format( args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1))) logger.info(" Gradient Accumulation steps = {}".format( args.gradient_accumulation_steps)) logger.info(" Total optimization steps = {}".format(t_total)) trainer = Trainer( n_gpu=args.n_gpu, model=model, epochs=args.epochs, logger=logger, criterion=BCEWithLogLoss(), optimizer=optimizer, lr_scheduler=lr_scheduler, early_stopping=None, training_monitor=train_monitor, fp16=args.fp16, resume_path=args.resume_path, grad_clip=args.grad_clip, model_checkpoint=model_checkpoint, gradient_accumulation_steps=args.gradient_accumulation_steps, batch_metrics=[AccuracyThresh(thresh=0.5)], epoch_metrics=[ AUC(average='micro', task_type='binary'), MultiLabelReport(id2label=id2label) ]) trainer.train(train_data=train_dataloader, valid_data=valid_dataloader, seed=args.seed)
def train(train_dataloader, config, logger): net = SiameseNetwork(config) if config.optim == "adam": optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=config.lr) elif config.optim == "sgd": optimizer = optim.SGD(filter(lambda p: p.requires_grad, net.parameters()), lr=config.lr, momentum=0.78) # Resume the model if needed start_epoch = 0 if config.load_model != '': net, optimizer, start_epoch = load_model(net, config.load_model, optimizer, config.resume, config.lr) trainer = Trainer(config, net, optimizer, logger) trainer.set_device() trainer.freeze() loss_history = [] min_loss = 1e10 if config.load_model != '' and config.resume: model_best_loc = model_last_loc = config.load_model else: model_best_loc = os.path.join( config.save_dir, config.arch + get_name(config) + '_best.pth') model_last_loc = os.path.join( config.save_dir, config.arch + get_name(config) + '_last.pth') try: print('Starting training...') txt = '=' * 5 + 'Starting training...' + '=' * 5 + '\n' txt += "*" * 25 + "\n" args = dict((name, getattr(config, name)) for name in dir(config) if not name.startswith('_')) for k, v in sorted(args.items()): txt += '{}: {}\n'.format(str(k), str(v)) txt += "*" * 25 + "\n" logger.write(txt, False) for epoch in tqdm(range(start_epoch + 1, config.num_epochs + 1), desc='Train'): loss = trainer.train(epoch, train_dataloader) if min_loss > loss: min_loss = loss save_model(model_best_loc, epoch, net, optimizer) save_model(model_last_loc, epoch, net, optimizer) loss_history.append(loss) logger.write('Epoch {0}: Loss = {1}\n'.format(epoch, loss), False) print('\n\nSaving plot') save_plot(list(range(1, len(loss_history) + 1)), loss_history, config.save_dir) logger.write('=' * 5 + 'End training...' + '=' * 5, False) except KeyboardInterrupt: print('\n\nSaving plot') save_plot(list(range(1, len(loss_history) + 1)), loss_history, config.save_dir) logger.write( 'Please check error log for more details.\n' + '=' * 5 + 'End training...' + '=' * 5, False) logger.error(KeyboardInterrupt) print('Byeee...') except Exception as exception: logger.write( 'Please check error log for more details.\n' + '=' * 5 + 'End training...' + '=' * 5, False) logger.error(exception)
def main(): parser = argparse.ArgumentParser(description='Train a simple LSTM model on the prepared violence dataset') parser.add_argument( '--fight', type=str, help='the path to fight data', required=True ) parser.add_argument( '--non-fight', type=str, help='the path to non_fight data', required=True ) parser.add_argument( '--fight-val', type=str, help='the path to fight val data', required=True ) parser.add_argument( '--non-fight-val', type=str, help='the path to non_fight val data', required=True ) parser.add_argument( '--series-length', type=int, help='the length of a pose series', default=10 ) parser.add_argument( '--min-poses', type=int, help='minimum number of poses detected for a series to be considered valid', default=7 ) parser.add_argument( '--batch-size', type=int, help='batch size', default=4 ) parser.add_argument( '--learning-rate', '-lr', type=float, help='learning rate', default=0.0001 ) parser.add_argument( '--num-epochs', type=int, help='number of epochs to train', default=10 ) parser.add_argument( '--save-dir', type=str, help='the folder to save best model in', required=True ) args = parser.parse_args() # Load LSTM model lstm_model = DatLSTM(39, 64, 2, args.series_length) # Define an optimizer # sgd = optim.SGD(lstm_model.parameters(), args.learning_rate) adam = optim.Adam(lstm_model.parameters(), args.learning_rate) # Define a loss function # bce = nn.BCELoss() ce = nn.CrossEntropyLoss() # Load data dataset = ViolenceDataset(args.fight, args.non_fight, args.series_length, args.min_poses) val_dataset = ViolenceValDataset(args.fight_val, args.non_fight_val, args.series_length, args.min_poses) # Create dataloader train_loader = DataLoader(dataset, args.batch_size, shuffle=True) valid_loader = DataLoader(val_dataset, args.batch_size, shuffle=False) # Create save dir if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Create a Trainer trainer = Trainer(lstm_model, train_loader, valid_loader, args.num_epochs, adam, ce) # Train trainer.train(args.save_dir)