accr = model_eval(test_df, model, istransfer=True) if accr > high_acc: high_acc = accr best_model = model.state_dict() # torch.save(model.state_dict(), 'maml/transfer') print('model is saved') writer.add_scalar("Loss/Train", total_loss / total_count, epoch + 1) writer.add_scalar("LearningRate/Train", scheduler.get_last_lr()[0], epoch + 1) print("[Epoch {}/{}] Train Loss: {:.4f}, Learning Rate: {:.7f}".format( epoch + 1, epochs, total_loss / total_count, scheduler.get_last_lr()[0], )) torch.save(best_model, './reptile') # compress_object('reptile.zip', './reptile') compress_object(args.transfer, './reptile') try: save_object(client, args.bucket, args.transfer) except: print("model save error to minio") metadata = {"outputs": [{"type": "tensorboard", "source": args.logdir}]} with open("/opt/mlpipeline-ui-metadata.json", "w") as fd: json.dump(metadata, fd)
scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmupsteps, num_training_steps=len(dataset) * args.epochs, ) trainer = Trainer( model=model, args=training_args, data_collator=data_collator, train_dataset=dataset, prediction_loss_only=True, optimizers=(optimizer, scheduler), ) trainer.train() trainer.save_model("./pretrained") compress_object(args.pretrained, "./pretrained") try: save_object(client, args.bucket, args.pretrained) except: print("*****************model save error to minio*******************") #pass metadata = {"outputs": [{"type": "tensorboard", "source": args.logdir}]} with open("/opt/mlpipeline-ui-metadata.json", "w") as fd: json.dump(metadata, fd)
parser.add_argument("-K", "--secretkey", help="secret key") args = parser.parse_args() try: client = connect_server(args.host, args.accesskey, args.secretkey) load_object(client, args.bucket, args.corpusdata) # except Exception as e: # print('error', e) except : pass os.makedirs("./pretrained", exist_ok=True) paths = [str(x) for x in Path(".").glob("**/{}".format(args.corpusdata))] tokenizer = ByteLevelBPETokenizer() tokenizer.train( files=paths, vocab_size=args.vocabsize, min_frequency=50, special_tokens=["<s>", "<pad>", "</s>", "<unk>", "<mask>"], ) tokenizer.save_model("./pretrained") compress_object(args.tokenizer, "./pretrained") try: save_object(client, args.bucket, args.tokenizer) except: pass
param.requires_grad = True downstream_training(epochs=20, learning_rate=LEARN_RATE/2, denum=4) #unfreezing the classifier except model for fine tuning print("**********************freezing model**************************") for param, state in zip(model.parameters(), model.state_dict()) : if 'fc.' in state : param.requires_grad = False else : param.requires_grad = True downstream_training(epochs=10, learning_rate=LEARN_RATE, denum=4) for param, state in zip(model.parameters(), model.state_dict()) : param.requires_grad = True torch.save(model.state_dict(), './contra-downstream') compress_object('contrastive.zip', './contra-downstream') try: save_object(client, args.bucket, 'contrastive.zip') except: print("model save error to minio") metadata = {"outputs": [{"type": "tensorboard", "source": args.logdir}]} with open("/opt/mlpipeline-ui-metadata.json", "w") as fd: json.dump(metadata, fd)
tokenizer.encode(t, max_length=512, truncation=True) for t in text ] padded_list = [ e[:512] + [0] * (512 - len(e[:512])) for e in encoded_list ] sample = torch.tensor(padded_list) sample, label = sample.to(device), label.to(device) labels = torch.tensor(label) outputs = model(sample, labels=labels) _, logits = outputs pred = torch.argmax(F.softmax(logits), dim=1) correct = pred.eq(labels) total_correct += correct.sum().item() total_len += len(labels) print("Test accuracy: ", total_correct / total_len) model.save_pretrained("./pretrained") compress_object(args.downstream, "./pretrained") try: save_object(client, args.bucket, args.downstream) except: pass metadata = {"outputs": [{"type": "tensorboard", "source": args.logdir}]} with open("/opt/mlpipeline-ui-metadata.json", "w") as fd: json.dump(metadata, fd)