Exemplo n.º 1
0
import warnings

warnings.filterwarnings(action='ignore')

if __name__ == '__main__':
    opt = parse_opts()
    
    # Detect devices
    use_cuda = torch.cuda.is_available()                   # check if GPU exists
    device = torch.device("cuda" if use_cuda else "cpu")   # use CPU or GPU

    
    train_loader, valid_loader = dataLoadFunc(opt)

    StudentModel = StudentModel(opt)
    StudentModel = StudentModel.to(device)
    smartModel = None
    perturbation_model = None
    if opt.smart_model:
        opt.is_smart_model = True
        smartModel = SmartModel(opt)
        smartModel = smartModel.to(device)

    # Parallelize model to multiple GPUs
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs!")
        StudentModel = nn.DataParallel(StudentModel)
        parms = list(StudentModel.module.parameters())
    
        if opt.isSource:
            smartModel = nn.DataParallel(smartModel)
Exemplo n.º 2
0
            bridge = bridges.to(args.device)
            t_model.eval()
        elif args.task=='student':
            model = StudentModel(params=args, pretrained_embedding=torch.tensor(pretrained_embedding).float())
            param_groups = [{'params': model.parameters(), 'lr': args.learning_rate}]
        elif args.task=='teacher':
            model = TeacherModel(params=args, pretrained_embedding=torch.tensor(pretrained_embedding).float())
            param_groups = [{'params': model.parameters(), 'lr': args.learning_rate}]
        if args.classify_loss:
            classifier = PathClassifier(params=args)
            param_groups.append({'params': classifier.parameters(),'lr':args.learning_rate})
            classifiers = classifier.to(args.device)
        optimizer = SGD(param_groups, lr=args.learning_rate)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=args.gamma)

        model = model.to(args.device)

        total_step = 0
        eval_result = {}
        accum_train_link_loss, accum_train_label_loss = 0, 0
        accum_distill_loss, accum_classify_loss = 0, 0
        accum_eval_loss = 0
        scheduler_step = 0
        best_eval_result = None
        stop_sign=0

        for epoch in range(args.epoches):
            print('{} epoch training..'.format(epoch + 1))
            print('dialogue model learning rate {:.4f}'.format(optimizer.param_groups[0]['lr']))
            model.train()
            for batch in tqdm(train_dataloader):