cuda = torch.device('cuda:1')
model = Classifier(config)
init_range = 0.1
model.encoder.bilstm.word_embedding.weight.data.uniform_(
    -init_range, init_range)
model.encoder.bilstm.word_embedding.weight[0, :] = 0.0
model.encoder.bilstm.word_embedding.weight[1, :] = 0.0
model.encoder.bilstm.word_embedding.weight.requires_grad = True
model.to(cuda)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),
                       lr=0.001,
                       betas=[0.9, 0.999],
                       eps=1e-8,
                       weight_decay=0)
train_loader = DataLoader(Yelp_Dataset('train', cuda),
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=0)
valid_loader = DataLoader(Yelp_Dataset('dev', cuda),
                          batch_size=BATCH_SIZE,
                          shuffle=True,
                          num_workers=0)
min_loss = float('inf')
count = 0
for epoch in range(0, 50):
    train(model, train_loader, optimizer, epoch)
    valid_loss = valid(model, valid_loader, optimizer, epoch)
    if valid_loss < min_loss:
        count = 0
        min_loss = valid_loss
            end = time.time()
            times.update(end-begin)
            print('epoch %d, %d/%d, validation loss: %f, time estimated: %.2f seconds'%(epoch, i+1,len(valid_loader),losses.avg, times.avg*len(valid_loader)), end='\r')
        print("\n")
    return losses.avg


BATCH_SIZE = 50
cuda = torch.device('cuda:1')
model = Classifier(config)
model.encoder.bilstm.word_embedding.weight.requires_grad = True
model.encoder.bilstm.pos_embedding.weight.requires_grad = True
model.to(cuda)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=[0.9, 0.999], eps=1e-8, weight_decay=0)
train_loader = DataLoader(Yelp_Dataset('train',cuda),batch_size=BATCH_SIZE,shuffle=True,num_workers=0)
valid_loader = DataLoader(Yelp_Dataset('dev',cuda),batch_size=BATCH_SIZE,shuffle=True,num_workers=0)
min_loss = float('inf')
count = 0
for epoch in range(0,50):
    train(model, train_loader, optimizer, epoch)
    valid_loss = valid(model, valid_loader, optimizer, epoch)
    if valid_loss<min_loss:
        count = 0
        min_loss = valid_loss
        model_path = '/scratch/near/anlp/saved_model_pos_fine_tune/2/epoch_%d_%.2fmodel'%(epoch,valid_loss)
        directory = os.path.dirname(model_path)
        if not os.path.exists(directory):
            os.makedirs(directory)
        torch.save(model,model_path)