def __init__(self, data_dir): self.model_name = 'mmoe' self.train_path = data_dir + 'census-income.data.gz' self.test_path = data_dir + 'census-income.test.gz' self.save_path = './saved_dict/' + self.model_name + '.ckpt' self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.require_improvement = 1000 self.dropout = 0.5 self.learning_rate = 3e-5 self.label_columns = ['income_50k', 'marital_stat'] self.label_dict = [2, 2] self.num_feature = 0 self.num_experts = 3 self.num_tasks = 2 self.units = 16 self.hidden_units = 8 self.embed_size = 300 self.batch_size = 256 self.field_size = 0 self.towers_hidden = 16 self.SB_hidden = 1024 self.SB_output = 512 self.num_epochs = 100 self.loss_fn = loss_fn('binary')
def train_protonet(model,train_loader,valid_loader,conf,num_batches_tr,num_batches_vd): '''Model training Args: -model: Model -train_laoder: Training loader -valid_load: Valid loader -conf: configuration object -num_batches_tr: number of training batches -num_batches_vd: Number of validation batches Out: -best_val_acc: Best validation accuracy -model -best_state: State dictionary for the best validation accuracy ''' if conf.train.device == 'cuda': device = torch.device('cuda') else: device = torch.device('cpu') optim = torch.optim.Adam(model.parameters(), lr=conf.train.lr_rate) lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optim, gamma=conf.train.scheduler_gamma, step_size=conf.train.scheduler_step_size) num_epochs = conf.train.epochs best_model_path = conf.path.best_model last_model_path = conf.path.last_model train_loss = [] val_loss = [] train_acc = [] val_acc = [] best_val_acc = 0.0 model.to(device) for epoch in range(num_epochs): print("Epoch {}".format(epoch)) train_iterator = iter(train_loader) for batch in tqdm(train_iterator): optim.zero_grad() model.train() x, y = batch x = x.to(device) y = y.to(device) x_out = model(x) tr_loss,tr_acc = loss_fn(x_out,y,conf.train.n_shot) train_loss.append(tr_loss.item()) train_acc.append(tr_acc.item()) tr_loss.backward() optim.step() avg_loss_tr = np.mean(train_loss[-num_batches_tr:]) avg_acc_tr = np.mean(train_acc[-num_batches_tr:]) print('Average train loss: {} Average training accuracy: {}'.format(avg_loss_tr,avg_acc_tr)) lr_scheduler.step() model.eval() val_iterator = iter(valid_loader) for batch in tqdm(val_iterator): x,y = batch x = x.to(device) x_val = model(x) valid_loss, valid_acc = loss_fn(x_val, y, conf.train.n_shot) val_loss.append(valid_loss.item()) val_acc.append(valid_acc.item()) avg_loss_vd = np.mean(val_loss[-num_batches_vd:]) avg_acc_vd = np.mean(val_acc[-num_batches_vd:]) print ('Epoch {}, Validation loss {:.4f}, Validation accuracy {:.4f}'.format(epoch,avg_loss_vd,avg_acc_vd)) if avg_acc_vd > best_val_acc: print("Saving the best model with valdation accuracy {}".format(avg_acc_vd)) best_val_acc = avg_acc_vd best_state = model.state_dict() torch.save(model.state_dict(),best_model_path) torch.save(model.state_dict(),last_model_path) return best_val_acc,model,best_state