def train(): # Prepare gym env = create_env() h, w, c = env.observation_space.shape # Prepare models device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_dir, fn = "./policy_grad", '{}.pth' model = Baseline(h, w).to(device) model.train() optimizer = optim.RMSprop(model.parameters(), lr=LEARN_RATE, weight_decay=WEIGHT_DECAY) # Train steps_done = 0 num_episodes = 2000 episode_rewards = [] for i_episode in tqdm(range(num_episodes)): # Complete 1 episode print("Episode {}".format(i_episode + 1)) i_rewards, i_states, i_actions, steps_done = generate_episode( env, model, device, steps_done, episode_rewards) # Update model optimize_model(device, model, optimizer, i_rewards, i_actions, i_states) # Save model every couple episodes if (i_episode + 1) % SAVE_EPI == 0: path = os.path.join(model_dir, fn.format(episode_rewards[-1])) torch.save(model.state_dict(), path) print('Complete') np.save('./rewards_policy_grad.npy', episode_rewards) env.close() plt.ioff() plt.show()
print(model_name) model = EfficientNet.from_name(model_name) #model = EfficientNet.from_pretrained(model_name, num_classes=350) #summary(model,input_size=(3,224,224)) else: model = Baseline(args.hidden_size, args.output_size) optimizer = optim.Adam(model.parameters(), args.learning_rate) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True) criterion = nn.CrossEntropyLoss() #multi-class classification task model = model.to(device) model.train() # DONOTCHANGE: They are reserved for nsml bind_model(model) # below the nsml load nsml.load(checkpoint='15', session='team_62/airush1/40') nsml.save('stillgoing') if args.pause: nsml.paused(scope=locals()) if args.mode == "train": # Warning: Do not load data before this line dataloader = train_dataloader(args.input_size, args.batch_size, args.num_workers) for epoch_idx in range(1, args.epochs + 1): total_loss = 0
def train(args): start_time = time.time() device = torch.device('cuda' if args.cuda else 'cpu') pprint(args.__dict__) interface = FileInterface(**args.__dict__) piqa_model = Baseline(**args.__dict__).to(device) loss_model = Loss().to(device) optimizer = torch.optim.Adam(p for p in piqa_model.parameters() if p.requires_grad) batch_size = args.batch_size char_vocab_size = args.char_vocab_size glove_vocab_size = args.glove_vocab_size word_vocab_size = args.word_vocab_size glove_size = args.glove_size elmo = args.elmo draft = args.draft def preprocess(interface_): # get data print('Loading train and dev data') train_examples = load_squad(interface_.train_path, draft=draft) dev_examples = load_squad(interface_.test_path, draft=draft) # iff creating processor print('Loading GloVe') glove_words, glove_emb_mat = load_glove( glove_size, vocab_size=args.glove_vocab_size - 2, glove_dir=interface_.glove_dir, draft=draft) print('Constructing processor') processor = SquadProcessor(char_vocab_size, glove_vocab_size, word_vocab_size, elmo=elmo) processor.construct(train_examples, glove_words) # data loader print('Preprocessing datasets') train_dataset = tuple( processor.preprocess(example) for example in train_examples) dev_dataset = tuple( processor.preprocess(example) for example in dev_examples) print('Creating data loaders') train_sampler = SquadSampler(train_dataset, max_context_size=256, max_question_size=32, bucket=True, shuffle=True) train_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=processor.collate, sampler=train_sampler) dev_sampler = SquadSampler(dev_dataset, bucket=True) dev_loader = DataLoader(dev_dataset, batch_size=batch_size, collate_fn=processor.collate, sampler=dev_sampler) if args.preload: train_loader = tuple(train_loader) dev_loader = tuple(dev_loader) out = { 'glove_emb_mat': glove_emb_mat, 'processor': processor, 'train_dataset': train_dataset, 'dev_dataset': dev_dataset, 'train_loader': train_loader, 'dev_loader': dev_loader } return out out = interface.cache( preprocess, interface_=interface) if args.cache else preprocess(interface) glove_emb_mat = out['glove_emb_mat'] processor = out['processor'] train_dataset = out['train_dataset'] dev_dataset = out['dev_dataset'] train_loader = out['train_loader'] dev_loader = out['dev_loader'] print("Initializing model weights") piqa_model.load_glove(torch.tensor(glove_emb_mat)) bind_model(interface, processor, piqa_model, optimizer=optimizer) step = 0 best_report = None print('Training') piqa_model.train() for epoch_idx in range(args.epochs): for i, train_batch in enumerate(train_loader): train_batch = { key: val.to(device) for key, val in train_batch.items() } model_output = piqa_model(step=step, **train_batch) train_results = processor.postprocess_batch( train_dataset, train_batch, model_output) train_loss = loss_model(step=step, **model_output, **train_batch) train_f1 = float( np.mean([result['f1'] for result in train_results])) train_em = float( np.mean([result['em'] for result in train_results])) # optimize optimizer.zero_grad() train_loss.backward() optimizer.step() step += 1 # report & eval & save if step % args.report_period == 1: report = OrderedDict(step=step, train_loss=train_loss.item(), train_f1=train_f1, train_em=train_em, time=time.time() - start_time) interface.report(**report) print(', '.join('%s=%.5r' % (s, r) for s, r in report.items())) if step % args.eval_save_period == 1: with torch.no_grad(): piqa_model.eval() loss_model.eval() pred = {} dev_losses, dev_results = [], [] for dev_batch, _ in zip(dev_loader, range(args.eval_steps)): dev_batch = { key: val.to(device) for key, val in dev_batch.items() } model_output = piqa_model(**dev_batch) results = processor.postprocess_batch( dev_dataset, dev_batch, model_output) dev_loss = loss_model(step=step, **dev_batch, **model_output) for result in results: pred[result['id']] = result['pred'] dev_results.extend(results) dev_losses.append(dev_loss.item()) dev_loss = float(np.mean(dev_losses)) dev_f1 = float( np.mean([result['f1'] for result in dev_results])) dev_em = float( np.mean([result['em'] for result in dev_results])) report = OrderedDict(step=step, dev_loss=dev_loss, dev_f1=dev_f1, dev_em=dev_em, time=time.time() - start_time) summary = False if best_report is None or report['dev_f1'] > best_report[ 'dev_f1']: best_report = report summary = True interface.save(iteration=step) interface.pred(pred) interface.report(summary=summary, **report) print( ', '.join('%s=%.5r' % (s, r) for s, r in report.items()), '(dev_f1_best=%.5r @%d)' % (best_report['dev_f1'], best_report['step'])) piqa_model.train() loss_model.train() if step == args.train_steps: break if step == args.train_steps: break