def main(): args = parse_args() set_seed(args.seed) env = gym.envs.make(args.env_id) net = get_net(env) approximator = Approximator(net, alpha=args.alpha, loss=nn.MSELoss) get_eps = get_get_epsilon(args.it_at_min, args.min_epsilon) train(approximator, env, get_epsilon=get_eps, **vars(args))
def main(cli_args): # Read from config file and make args with open( os.path.join(cli_args.config_dir, cli_args.task, cli_args.config_file)) as f: args = AttrDict(json.load(f)) logger.info("Training/evaluation parameters {}".format(args)) args.output_dir = os.path.join(args.ckpt_dir, args.output_dir) init_logger() set_seed(args) processor = processors[args.task](args) labels = processor.get_labels() if output_modes[args.task] == "regression": config = CONFIG_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, num_labels=tasks_num_labels[args.task]) else: config = CONFIG_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, num_labels=tasks_num_labels[args.task], id2label={str(i): label for i, label in enumerate(labels)}, label2id={label: i for i, label in enumerate(labels)}, ) tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, do_lower_case=args.do_lower_case) model = MODEL_FOR_SEQUENCE_CLASSIFICATION[args.model_type].from_pretrained( args.model_name_or_path, config=config) # GPU or CPU args.device = "cuda" if torch.cuda.is_available( ) and not args.no_cuda else "cpu" model.to(args.device) # Load dataset train_dataset = load_and_cache_examples( args, tokenizer, mode="train") if args.train_file else None dev_dataset = load_and_cache_examples( args, tokenizer, mode="dev") if args.dev_file else None test_dataset = load_and_cache_examples( args, tokenizer, mode="test") if args.test_file else None if dev_dataset == None: args.evaluate_test_during_training = True # If there is no dev dataset, only use testset if args.do_train: global_step, tr_loss = train(args, model, train_dataset, dev_dataset, test_dataset) logger.info(" global_step = {}, average loss = {}".format( global_step, tr_loss)) results = {} if args.do_eval: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True))) if not args.eval_all_checkpoints: checkpoints = checkpoints[-1:] else: logging.getLogger("transformers.configuration_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split("-")[-1] model = MODEL_FOR_SEQUENCE_CLASSIFICATION[ args.model_type].from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, test_dataset, mode="test", global_step=global_step) result = dict( (k + "_{}".format(global_step), v) for k, v in result.items()) results.update(result) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as f_w: for key in sorted(results.keys()): f_w.write("{} = {}\n".format(key, str(results[key])))
def train(args, train_dataset, model, tokenizer): """ Train the model """ train_sampler = RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1 else: t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs # Prepare optimizer and schedule (linear warmup and decay) no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], "weight_decay": args.weight_decay, }, {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0}, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=int(t_total * args.warmup_proportion), num_training_steps=t_total ) # Check if saved optimizer or scheduler states exist if os.path.isfile(os.path.join(args.model_name_or_path, "optimizer.pt")) and os.path.isfile( os.path.join(args.model_name_or_path, "scheduler.pt") ): # Load in optimizer and scheduler states optimizer.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "optimizer.pt"))) scheduler.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "scheduler.pt"))) # Train! logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) logger.info(" Num Epochs = %d", args.num_train_epochs) logger.info(" Train batch size per GPU = %d", args.train_batch_size) logger.info( " Total train batch size (w. parallel, distributed & accumulation) = %d", args.train_batch_size * args.gradient_accumulation_steps) logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) global_step = 1 epochs_trained = 0 steps_trained_in_current_epoch = 0 # Check if continuing training from a checkpoint if os.path.exists(args.model_name_or_path): try: # set global_step to gobal_step of last saved checkpoint from model path checkpoint_suffix = args.model_name_or_path.split("-")[-1].split("/")[0] global_step = int(checkpoint_suffix) epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps) steps_trained_in_current_epoch = global_step % (len(train_dataloader) // args.gradient_accumulation_steps) logger.info(" Continuing training from checkpoint, will skip to saved global_step") logger.info(" Continuing training from epoch %d", epochs_trained) logger.info(" Continuing training from global step %d", global_step) logger.info(" Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch) except ValueError: logger.info(" Starting fine-tuning.") tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() mb = master_bar(range(int(args.num_train_epochs))) # Added here for reproductibility set_seed(args) for epoch in mb: epoch_iterator = progress_bar(train_dataloader, parent=mb) for step, batch in enumerate(epoch_iterator): # Skip past any already trained steps if resuming training if steps_trained_in_current_epoch > 0: steps_trained_in_current_epoch -= 1 continue model.train() batch = tuple(t.to(args.device) for t in batch) inputs = { "input_ids": batch[0], "attention_mask": batch[1], "token_type_ids": batch[2], "start_positions": batch[3], "end_positions": batch[4], } if args.model_type in ["xlm", "roberta", "distilbert", "distilkobert", "xlm-roberta"]: del inputs["token_type_ids"] if args.model_type in ["xlnet", "xlm"]: inputs.update({"cls_index": batch[5], "p_mask": batch[6]}) if args.version_2_with_negative: inputs.update({"is_impossible": batch[7]}) if hasattr(model, "config") and hasattr(model.config, "lang2id"): inputs.update( {"langs": (torch.ones(batch[0].shape, dtype=torch.int64) * args.lang_id).to(args.device)} ) outputs = model(**inputs) # model outputs are always tuple in transformers (see doc) loss = outputs[0] if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps loss.backward() tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 # Log metrics if args.logging_steps > 0 and global_step % args.logging_steps == 0: # Only evaluate when single GPU otherwise metrics may not average well if args.evaluate_during_training: results = evaluate(args, model, tokenizer, global_step=global_step) for key in sorted(results.keys()): logger.info(" %s = %s", key, str(results[key])) logging_loss = tr_loss # Save model checkpoint if args.save_steps > 0 and global_step % args.save_steps == 0: output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step)) if not os.path.exists(output_dir): os.makedirs(output_dir) # Take care of distributed/parallel training model_to_save = model.module if hasattr(model, "module") else model model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) torch.save(args, os.path.join(output_dir, "training_args.bin")) logger.info("Saving model checkpoint to %s", output_dir) if args.save_optimizer: torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt")) torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")) logger.info("Saving optimizer and scheduler states to %s", output_dir) if args.max_steps > 0 and global_step > args.max_steps: break mb.write("Epoch {} done".format(epoch+1)) if args.max_steps > 0 and global_step > args.max_steps: break return global_step, tr_loss / global_step
def main(cli_args): # Read from config file and make args with open(os.path.join(cli_args.config_dir, cli_args.task, cli_args.config_file)) as f: args = AttrDict(json.load(f)) logger.info("Training/evaluation parameters {}".format(args)) args.output_dir = os.path.join(args.ckpt_dir, args.output_dir) if args.doc_stride >= args.max_seq_length - args.max_query_length: logger.warning( "WARNING - You've set a doc stride which may be superior to the document length in some " "examples. This could result in errors when building features from the examples. Please reduce the doc " "stride or increase the maximum length to ensure the features are correctly built." ) init_logger() set_seed(args) logging.getLogger("transformers.data.metrics.squad_metrics").setLevel(logging.WARN) # Reduce model loading logs # Load pretrained model and tokenizer config = CONFIG_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, ) tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained( args.model_name_or_path, do_lower_case=args.do_lower_case, ) model = MODEL_FOR_QUESTION_ANSWERING[args.model_type].from_pretrained( args.model_name_or_path, config=config, ) # GPU or CPU args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu" model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Training if args.do_train: train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False) global_step, tr_loss = train(args, train_dataset, model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory results = {} if args.do_eval: checkpoints = list( os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True)) ) if not args.eval_all_checkpoints: checkpoints = checkpoints[-1:] else: logging.getLogger("transformers.configuration_utils").setLevel(logging.WARN) # Reduce model loading logs logging.getLogger("transformers.modeling_utils").setLevel(logging.WARN) # Reduce model loading logs logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: # Reload the model global_step = checkpoint.split("-")[-1] model = MODEL_FOR_QUESTION_ANSWERING[args.model_type].from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, tokenizer, global_step=global_step) result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items()) results.update(result) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as f_w: for key in sorted(results.keys()): f_w.write("{} = {}\n".format(key, str(results[key])))
def main(cli_args): # Read from config file and make args with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f: args = AttrDict(json.load(f)) logger.info("Training/evaluation parameters {}".format(args)) logger.info("cliargs parameters {}".format(cli_args)) args.output_dir = os.path.join(args.ckpt_dir, cli_args.result_dir) args.model_mode = cli_args.model_mode args.margin = cli_args.margin init_logger() set_seed(args) model_link = None if cli_args.transformer_mode.upper() == "T5": model_link = "t5-base" elif cli_args.transformer_mode.upper() == "ELECTRA": model_link = "google/electra-base-discriminator" elif cli_args.transformer_mode.upper() == "ALBERT": model_link = "albert-base-v2" elif cli_args.transformer_mode.upper() == "ROBERTA": model_link = "roberta-base" elif cli_args.transformer_mode.upper() == "BERT": model_link = "bert-base-uncased" print(model_link) tokenizer = AutoTokenizer.from_pretrained(model_link) args.test_file = os.path.join(cli_args.dataset, args.test_file) args.dev_file = os.path.join(cli_args.dataset, args.dev_file) args.train_file = os.path.join(cli_args.dataset, args.train_file) # Load dataset train_dataset = BaseDataset(args, tokenizer, mode="train") if args.train_file else None dev_dataset = BaseDataset(args, tokenizer, mode="dev") if args.dev_file else None test_dataset = BaseDataset(args, tokenizer, mode="test") if args.test_file else None if dev_dataset == None: args.evaluate_test_during_training = True # If there is no dev dataset, only use testset args.logging_steps = int(len(train_dataset) / args.train_batch_size) + 1 args.save_steps = args.logging_steps labelNumber = train_dataset.getLabelNumber() labels = [str(i) for i in range(labelNumber)] config = AutoConfig.from_pretrained(model_link) # GPU or CPU args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" config.device = args.device args.model_mode = cli_args.model_mode model = MODEL_LIST[cli_args.model_mode](model_link, args.model_type, args.model_name_or_path, config, labelNumber, args.margin) model.to(args.device) if args.do_train: global_step, tr_loss = train(args, model, train_dataset, dev_dataset, test_dataset) logger.info(" global_step = {}, average loss = {}".format( global_step, tr_loss)) results = {} if args.do_eval: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True))) if not args.eval_all_checkpoints: checkpoints = checkpoints[-1:] else: logging.getLogger("transformers.configuration_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split("-")[-1] model = MODEL_LIST[args.model_type].from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, test_dataset, mode="test", global_step=global_step) result = dict( (k + "_{}".format(global_step), v) for k, v in result.items()) results.update(result) output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as f_w: for key in sorted(results.keys()): f_w.write("{} = {}\n".format(key, str(results[key])))
def main(cli_args): # Read from config file and make args max_checkpoint = "checkpoint-best" args = torch.load( os.path.join("ckpt", cli_args.result_dir, max_checkpoint, "training_args.bin")) with open(os.path.join(cli_args.config_dir, cli_args.config_file)) as f: args = AttrDict(json.load(f)) logger.info("Training/evaluation parameters {}".format(args)) logger.info("cliargs parameters {}".format(cli_args)) args.output_dir = os.path.join(args.ckpt_dir, cli_args.result_dir) args.model_mode = cli_args.model_mode args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" init_logger() set_seed(args) model_link = None if cli_args.transformer_mode.upper() == "T5": model_link = "t5-base" elif cli_args.transformer_mode.upper() == "ELECTRA": model_link = "google/electra-base-discriminator" elif cli_args.transformer_mode.upper() == "ALBERT": model_link = "albert-base-v2" elif cli_args.transformer_mode.upper() == "ROBERTA": model_link = "roberta-base" elif cli_args.transformer_mode.upper() == "BERT": model_link = "bert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_link) args.test_file = os.path.join(cli_args.dataset, args.test_file) args.dev_file = os.path.join(cli_args.dataset, args.train_file) args.train_file = os.path.join(cli_args.dataset, args.train_file) # Load dataset train_dataset = BaseDataset(args, tokenizer, mode="train") if args.train_file else None dev_dataset = BaseDataset(args, tokenizer, mode="dev") if args.dev_file else None test_dataset = BaseDataset(args, tokenizer, mode="test") if args.test_file else None if dev_dataset == None: args.evaluate_test_during_training = True # If there is no dev dataset, only use testset args.logging_steps = int(len(train_dataset) / args.train_batch_size) + 1 args.save_steps = args.logging_steps labelNumber = train_dataset.getLabelNumber() labels = [str(i) for i in range(labelNumber)] config = AutoConfig.from_pretrained(model_link) args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" config.device = args.device args.model_mode = cli_args.model_mode logger.info("Testing model checkpoint to {}".format(max_checkpoint)) global_step = max_checkpoint.split("-")[-1] # GPU or CPU args.device = "cuda:{}".format( cli_args.gpu ) if torch.cuda.is_available() and not args.no_cuda else "cpu" config.device = args.device args.model_mode = cli_args.model_mode model = MODEL_LIST[cli_args.model_mode](model_link, args.model_type, args.model_name_or_path, config, labelNumber, -0.75) model.load_state_dict( torch.load( os.path.join("ckpt", cli_args.result_dir, max_checkpoint, "training_model.bin"))) model.to(args.device) preds, labels, result, txt_all = evaluate(args, model, test_dataset, mode="test", global_step=global_step) pred_and_labels = pd.DataFrame([]) pred_and_labels["data"] = txt_all pred_and_labels["pred"] = preds pred_and_labels["label"] = labels pred_and_labels["result"] = preds == labels decode_result = list(pred_and_labels["data"].apply( lambda x: tokenizer.convert_ids_to_tokens(tokenizer(x)["input_ids"]))) pred_and_labels["tokenizer"] = decode_result pred_and_labels.to_csv(os.path.join( "ckpt", cli_args.result_dir, "test_result_" + max_checkpoint + ".csv"), encoding="utf-8")