training_engine = TrainingEngine() trX, trM = training_engine.transform_veracity(trX) vaX, vaM = training_engine.transform_veracity(vaX) if submit: teX, teM = training_engine.transform_veracity(teX) n_train = len(trY) n_valid = len(vaY) n_batch_train = args.n_batch * max(n_gpu, 1) n_updates_total = (n_train // n_batch_train) * args.n_iter dh_model = DoubleHeadModel(args, clf_token, ('classification', 3), vocab, n_ctx) criterion = nn.CrossEntropyLoss(reduction='none') model_opt = OpenAIAdam(dh_model.parameters(), lr=args.lr, schedule=args.lr_schedule, warmup=args.lr_warmup, t_total=n_updates_total, b1=args.b1, b2=args.b2, e=args.e, l2=args.l2, vector_l2=args.vector_l2, max_grad_norm=args.max_grad_norm) compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion, args.lm_coef, model_opt) openAIModel = OpenAIModel() openAIModel.load_openai_pretrained_model(dh_model.transformer, n_ctx=n_ctx,
trX, trM = transform_roc(trX1, trX2, trX3) vaX, vaM = transform_roc(vaX1, vaX2, vaX3) if submit: teX, teM = transform_roc(teX1, teX2, teX3) n_train = len(trY) n_valid = len(vaY) n_batch_train = args.n_batch * max(n_gpu, 1) n_updates_total = (n_train // n_batch_train) * args.n_iter dh_model = DoubleHeadModel(args, clf_token, 'multiple_choice', vocab, n_ctx) criterion = nn.CrossEntropyLoss(reduce=False) model_opt = OpenAIAdam( params=dh_model.parameters(), lr=args.lr, # 6.25e-5 schedule=args.lr_schedule, # warmup_linear warmup=args.lr_warmup, # 0.002 t_total=n_updates_total, # 748 b1=args.b1, # 0.9 b2=args.b2, # 0.999 e=args.e, # 1e-8 l2=args.l2, # 0.01 vector_l2=args.vector_l2, max_grad_norm=args.max_grad_norm # 1 ) compute_loss_fct = MultipleChoiceLossCompute(criterion, criterion, args.lm_coef, model_opt) load_openai_pretrained_model(dh_model.transformer, n_ctx=n_ctx,