def time_profiling(results_dir, model, task_sampler, loader_train, device, config):
    """
    Measure the time necessary to perform forward / backward prop on current device
    :param results_dir:
    :param model:
    :param task_sampler:
    :param loader_train:
    :param device:
    :param config:
    :return:
    """
    # creating input
    all_tasks = task_sampler.get_all(return_metrics=False)
    cycle_train = cycle(loader_train)

    # training stuff
    optimizer = get_optimizer(model.parameters(), config["TRAIN"]["OPTIMIZER"])
    criterion = get_criterion(config["TRAIN"]["CRITERION"])

    # output
    times_forward = []
    times_backward = []

    # warm-up
    for i in tqdm(range(10), ncols=100):
        task = all_tasks[i]
        x, y = next(cycle_train)
        x, y = x.to(device), y.to(device)

        preds = model(x, task)
        loss = criterion(preds, y)
        loss.backward()

    for i in tqdm(range(len(all_tasks)), ncols=100):
        task = all_tasks[i]
        x, y = next(cycle_train)
        x, y = x.to(device), y.to(device)

        # forward time
        t_f = time.time()
        pred = model(x, task)
        dt_f = time.time() - t_f
        times_forward.append(dt_f)

        # backward time
        t_b = time.time()
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        dt_b = time.time() - t_b
        times_backward.append(dt_b)

    with open(os.path.join(results_dir, "forward_times.pkl"), "wb") as f:
        pickle.dump(times_forward, f)
    with open(os.path.join(results_dir, "backward_times.pkl"), "wb") as f:
        pickle.dump(times_backward, f)
Пример #2
0
 def __init__(self, hparams):
     super(Net, self).__init__()
     # self.hparams = hparams
     self.hparams.update(vars(hparams))
     self.model = get_model(hparams)
     self.criterion = get_criterion(args)
     if hparams.cutmix:
         self.cutmix = CutMix(hparams.size, beta=1.)
     if hparams.mixup:
         self.mixup = MixUp(alpha=1.)
     self.log_image_flag = hparams.api_key is None
Пример #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--ckpt', type=str, required=True)
    parser.add_argument('--outdir', type=str, default='test_out')
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--num_workers', type=int, default=4)
    parser.add_argument('--device', type=str, default='cuda')
    args = parser.parse_args()

    ckpt = torch.load(args.ckpt)
    config = ckpt['config']
    state_dict = ckpt['state_dict']
    epoch = ckpt['epoch']

    if args.outdir is None:
        outdir = pathlib.Path(args.ckpt).parent
    else:
        outdir = pathlib.Path(args.outdir)
        outdir.mkdir(exist_ok=True, parents=True)

    use_gpu = args.device != 'cpu' and torch.cuda.is_available()
    device = torch.device('cuda' if use_gpu else 'cpu')

    data_config = config['data_config']
    data_config['batch_size'] = args.batch_size
    data_config['num_workers'] = args.num_workers
    data_config['use_gpu'] = use_gpu
    _, test_loader = get_loader(data_config)

    model = utils.load_model(config['model_config'])
    try:
        model.load_state_dict(state_dict)
    except Exception:
        model = torch.nn.DataParallel(model)
        model.load_state_dict(state_dict)
        model = model.module
    model.to(device)

    _, test_criterion = utils.get_criterion(config['data_config'])

    preds, probs, labels, loss, acc = predict(model, test_criterion,
                                              test_loader, device)

    outpath = outdir / 'preds_{:04}.npz'.format(epoch)
    np.savez(outpath,
             preds=preds,
             probs=probs,
             labels=labels,
             loss=loss,
             acc=acc)
def ft_weights(model, task, train_iter, device, config):
    """
    Fine-tune the weights of the super-net to the task at hand
    :param model:
    :param task:
    :param train_iter:
    :param device:
    :param config:
    :return:
    """
    # fine-tuning stuff
    model.train()

    # optimizer
    optimizer = get_optimizer(model.parameters(), config["TRAIN"]["OPTIMIZER"])

    # scheduler
    scheduler_config = {
        "name": config["TRAIN"]["SCHEDULER"]["name"],
        "T_max": config["EVAL"]["n_ft_weights"]
    }
    scheduler = get_scheduler(optimizer, scheduler_config)

    # criterion
    criterion = get_criterion(config["TRAIN"]["CRITERION"])

    n_steps = 0
    while n_steps < config["EVAL"]["n_ft_weights"]:
        for x_t, y_t in train_iter:

            x_t, y_t = x_t.to(device), y_t.to(device)
            preds_t = model.forward(x_t, task)
            loss_t = criterion(preds_t, y_t)

            optimizer.zero_grad()
            loss_t.backward()
            optimizer.step()
            scheduler.step()

            n_steps += 1
            if n_steps >= config["EVAL"]["n_ft_weights"]:
                break
Пример #5
0
def main(args):
    model = get_model(args).to(device)
    optimizer = get_optimizer(args, model)
    lr_scheduler = get_scheduler(args, optimizer)
    criterion = get_criterion(args)
    start_epoch = 0

    trainloader, valloader, testloader = load_data(args)

    CHECKPOINT_PATH = f'{args.checkpoint_dir}/checkpoint.tar'

    if os.path.exists(CHECKPOINT_PATH):
        model, optimizer, lr_scheduler, start_epoch = checkpoint_load(
            model, optimizer, lr_scheduler, CHECKPOINT_PATH)

    print('Started training!')

    mean_losses = []
    for epoch in range(start_epoch, args.n_epochs):
        if lr_scheduler is not None:
            lr_scheduler.step()

        mean_train_loss = train_epoch(args, model, optimizer, criterion,
                                      trainloader)
        mean_losses.append(mean_train_loss)

        train_log(args, epoch, model, criterion, valloader, mean_train_loss)

        if epoch % args.checkpoint_interval == 0:
            checkpoint_save(model, optimizer, lr_scheduler, epoch,
                            CHECKPOINT_PATH)

    if args.final_save_fpath is not None:
        torch.save(
            {
                'mean_train_loss': np.mean(mean_losses),
                'args': vars(args),
                'model_state_dict': model.state_dict()
            }, args.final_save_fpath)
    def __init__(self, params, ispretrain):
        super(EncoderTrainer, self).__init__()
        self.ispretrain = ispretrain
        self.input_option = params['input_option']
        self.weight = params

        # initiate the network modules
        #self.model = resnet34_Mano(ispretrain=ispretrain, input_option=params['input_option'])
        self.model = torch.nn.DataParallel(
            resnet34_Mano(input_option=params['input_option']))
        self.model = self.model.module
        self.mean_3d = torch.zeros(3)

        # setup the optimizer
        lr = params.lr
        beta1 = params.beta1
        beta2 = params.beta2
        #p_view = self.model.state_dict()
        self.encoder_opt = torch.optim.Adam(
            [p for p in self.model.parameters() if p.requires_grad],
            lr=lr,
            betas=(beta1, beta2),
            weight_decay=params.weight_decay)
        self.encoder_opt = nn.DataParallel(self.encoder_opt).module

        self.encoder_scheduler = get_scheduler(self.encoder_opt, params)

        # set loss fn
        if self.ispretrain:
            self.param_recon_criterion = get_criterion(
                params['pretrain_loss_fn'])

        # Network weight initialization
        self.model.apply(weights_init(params.init))

        self.transformer = mm2px.JointTransfomer('BB')
Пример #7
0
def main():
    # parse command line argument and generate config dictionary
    config = parse_args()
    logger.info(json.dumps(config, indent=2))

    run_config = config['run_config']
    optim_config = config['optim_config']

    # TensorBoard SummaryWriter
    if run_config['tensorboard']:
        writer = SummaryWriter(run_config['outdir'])
    else:
        writer = None

    # set random seed
    seed = run_config['seed']
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    epoch_seeds = np.random.randint(
        np.iinfo(np.int32).max // 2, size=optim_config['epochs'])

    # create output directory
    outdir = pathlib.Path(run_config['outdir'])
    outdir.mkdir(exist_ok=True, parents=True)

    # save config as json file in output directory
    outpath = outdir / 'config.json'
    with open(outpath, 'w') as fout:
        json.dump(config, fout, indent=2)

    # load data loaders
    train_loader, test_loader = get_loader(config['data_config'])

    # load model
    logger.info('Loading model...')
    model = utils.load_model(config['model_config'])
    n_params = sum([param.view(-1).size()[0] for param in model.parameters()])
    logger.info('n_params: {}'.format(n_params))

    if run_config['fp16']:
        model.half()
        for layer in model.modules():
            if isinstance(layer, nn.BatchNorm2d):
                layer.float()

    device = run_config['device']
    if device is not 'cpu' and torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.to(device)
    logger.info('Done')

    train_criterion, test_criterion = utils.get_criterion(
        config['data_config'])

    # create optimizer
    optim_config['steps_per_epoch'] = len(train_loader)
    optimizer, scheduler = utils.create_optimizer(model.parameters(),
                                                  optim_config)

    # run test before start training
    if run_config['test_first']:
        test(0, model, test_criterion, test_loader, run_config, writer)

    state = {
        'config': config,
        'state_dict': None,
        'optimizer': None,
        'epoch': 0,
        'accuracy': 0,
        'best_accuracy': 0,
        'best_epoch': 0,
    }
    epoch_logs = []
    for epoch, seed in zip(range(1, optim_config['epochs'] + 1), epoch_seeds):
        np.random.seed(seed)
        # train
        train_log = train(epoch, model, optimizer, scheduler, train_criterion,
                          train_loader, config, writer)

        # test
        test_log = test(epoch, model, test_criterion, test_loader, run_config,
                        writer)

        epoch_log = train_log.copy()
        epoch_log.update(test_log)
        epoch_logs.append(epoch_log)
        utils.save_epoch_logs(epoch_logs, outdir)

        # update state dictionary
        state = update_state(state, epoch, epoch_log['test']['accuracy'],
                             model, optimizer)

        # save model
        utils.save_checkpoint(state, outdir)
Пример #8
0
def train_scale_equiv(model,n_epochs,train_loader_sup,train_dataset_unsup,val_loader,criterion_supervised,optimizer,scheduler,\
        Loss,gamma,batch_size,save_folder,model_name,benchmark=False,angle_max=30,size_img=520,scale_factor=(0.5,1.2),\
        save_all_ep=True,dataroot_voc='~/data/voc2012',save_best=False, device='cpu',num_classes=21):
    """
        A complete training of rotation equivariance supervised model. 
        save_folder : Path to save the model, the courb of losses,metric...
        benchmark : enable or disable backends.cudnn 
        Loss : Loss for unsupervised training 'KL' 'CE' 'L1' or 'MSE'
        gamma : float btwn [0,1] -> Balancing two losses loss_sup*gamma + (1-gamma)*loss_unsup
        save_all_ep : if True, the model is saved at each epoch in save_folder
        scheduler : if True, the model will apply a lr scheduler during training
        eval_every : Eval Model with different input image angle every n step
        size_img : size of image during evaluation
        scale_factor : scale between min*size_img and max*size_img
    """
    torch.backends.cudnn.benchmark = benchmark
    if scheduler:
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer, lambda x: (1 - x /
                                  (len(train_loader_sup) * n_epochs))**0.9)
    criterion_unsupervised = U.get_criterion(Loss)
    print('Criterion Unsupervised', criterion_unsupervised)
    iou_train = []
    iou_test = []
    combine_loss_train = []
    combine_loss_test = []
    loss_train_unsup = []
    loss_train_sup = []
    loss_test_unsup = []
    loss_test_sup = []
    equiv_accuracy_train = []
    equiv_accuracy_test = []
    accuracy_test = []
    accuracy_train = []
    torch.autograd.set_detect_anomaly(True)
    for ep in range(n_epochs):
        train_loader_equiv = torch.utils.data.DataLoader(train_dataset_unsup,batch_size=batch_size,\
                                                     shuffle=True,drop_last=True)
        print("EPOCH", ep)
        # TRAINING
        d = train_step_scale_equiv(model,train_loader_sup,train_loader_equiv,criterion_supervised,criterion_unsupervised,\
                        optimizer,gamma,Loss,device,size_img=size_img,scale_factor=scale_factor)
        if scheduler:
            lr_scheduler.step()
        combine_loss_train.append(d['loss'])
        loss_train_unsup.append(d['loss_equiv'])
        loss_train_sup.append(d['loss_sup'])
        equiv_accuracy_train.append(d['equiv_acc'])
        iou_train.append(d['iou_train'])
        accuracy_train.append(d['accuracy_train'])
        print('TRAIN - EP:',ep,'iou:',d['iou_train'],'Accuracy:',d['accuracy_train'],'Loss sup:',d['loss_sup'],\
            'Loss equiv:',d['loss_equiv'],'Combine Loss:',d['loss'],'Equivariance Accuracy:',d['equiv_acc'],)
        # EVALUATION
        model.eval()
        with torch.no_grad():
            state = eval_model(model,
                               val_loader,
                               device=device,
                               num_classes=num_classes)
            iou = state.metrics['mean IoU']
            acc = state.metrics['accuracy']
            loss = state.metrics['CE Loss']
            loss_test_sup.append(loss)
            iou_test.append(iou)
            accuracy_test.append(acc)
            print('TEST - EP:', ep, 'iou:', iou, 'Accuracy:', acc, 'Loss CE',
                  loss)

    U.save_curves(path=save_folder,combine_loss_train=combine_loss_train,loss_train_sup=loss_train_sup,\
    loss_train_unsup=loss_train_unsup,iou_train=iou_train,accuracy_train=accuracy_train,equiv_accuracy_train=equiv_accuracy_train,\
    combine_loss_test=combine_loss_test,loss_test_unsup=loss_test_unsup,equiv_accuracy_test=equiv_accuracy_test,\
    loss_test_sup= loss_test_sup,iou_test=iou_test,accuracy_test=accuracy_test)
Пример #9
0
def main():
    config = utils.parse_args()

    if config['cuda'] and torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'

    dataset_args = (config['task'], config['dataset'], config['dataset_path'],
                    'train', config['num_layers'], config['self_loop'],
                    config['normalize_adj'], config['transductive'])
    dataset = utils.get_dataset(dataset_args)
    loader = DataLoader(dataset=dataset,
                        batch_size=config['batch_size'],
                        shuffle=True,
                        collate_fn=dataset.collate_wrapper)
    input_dim, output_dim = dataset.get_dims()

    model = models.GAT(input_dim, config['hidden_dims'], output_dim,
                       config['num_heads'], config['dropout'], device)
    model.to(device)

    if not config['load']:
        criterion = utils.get_criterion(config['task'])
        optimizer = optim.Adam(model.parameters(),
                               lr=config['lr'],
                               weight_decay=config['weight_decay'])
        epochs = config['epochs']
        stats_per_batch = config['stats_per_batch']
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        model.train()
        print('--------------------------------')
        print('Training.')
        for epoch in range(epochs):
            print('Epoch {} / {}'.format(epoch + 1, epochs))
            running_loss = 0.0
            num_correct, num_examples = 0, 0
            for (idx, batch) in enumerate(loader):
                features, node_layers, mappings, rows, labels = batch
                features, labels = features.to(device), labels.to(device)
                optimizer.zero_grad()
                out = model(features, node_layers, mappings, rows)
                loss = criterion(out, labels)
                loss.backward()
                optimizer.step()
                with torch.no_grad():
                    running_loss += loss.item()
                    predictions = torch.max(out, dim=1)[1]
                    num_correct += torch.sum(predictions == labels).item()
                    num_examples += len(labels)
                if (idx + 1) % stats_per_batch == 0:
                    running_loss /= stats_per_batch
                    accuracy = num_correct / num_examples
                    print('    Batch {} / {}: loss {}, accuracy {}'.format(
                        idx + 1, num_batches, running_loss, accuracy))
                    running_loss = 0.0
                    num_correct, num_examples = 0, 0
        print('Finished training.')
        print('--------------------------------')

        if config['save']:
            print('--------------------------------')
            directory = os.path.join(os.path.dirname(os.getcwd()),
                                     'trained_models')
            if not os.path.exists(directory):
                os.makedirs(directory)
            fname = utils.get_fname(config)
            path = os.path.join(directory, fname)
            print('Saving model at {}'.format(path))
            torch.save(model.state_dict(), path)
            print('Finished saving model.')
            print('--------------------------------')

    if config['load']:
        directory = os.path.join(os.path.dirname(os.getcwd()),
                                 'trained_models')
        fname = utils.get_fname(config)
        path = os.path.join(directory, fname)
        model.load_state_dict(torch.load(path))
    dataset_args = (config['task'], config['dataset'], config['dataset_path'],
                    'test', config['num_layers'], config['self_loop'],
                    config['normalize_adj'], config['transductive'])
    dataset = utils.get_dataset(dataset_args)
    loader = DataLoader(dataset=dataset,
                        batch_size=config['batch_size'],
                        shuffle=False,
                        collate_fn=dataset.collate_wrapper)
    criterion = utils.get_criterion(config['task'])
    stats_per_batch = config['stats_per_batch']
    num_batches = int(ceil(len(dataset) / config['batch_size']))
    model.eval()
    print('--------------------------------')
    print('Testing.')
    running_loss, total_loss = 0.0, 0.0
    num_correct, num_examples = 0, 0
    total_correct, total_examples = 0, 0
    for (idx, batch) in enumerate(loader):
        features, node_layers, mappings, rows, labels = batch
        features, labels = features.to(device), labels.to(device)
        out = model(features, node_layers, mappings, rows)
        loss = criterion(out, labels)
        running_loss += loss.item()
        total_loss += loss.item()
        predictions = torch.max(out, dim=1)[1]
        num_correct += torch.sum(predictions == labels).item()
        total_correct += torch.sum(predictions == labels).item()
        num_examples += len(labels)
        total_examples += len(labels)
        if (idx + 1) % stats_per_batch == 0:
            running_loss /= stats_per_batch
            accuracy = num_correct / num_examples
            print('    Batch {} / {}: loss {}, accuracy {}'.format(
                idx + 1, num_batches, running_loss, accuracy))
            running_loss = 0.0
            num_correct, num_examples = 0, 0
    total_loss /= num_batches
    total_accuracy = total_correct / total_examples
    print('Loss {}, accuracy {}'.format(total_loss, total_accuracy))
    print('Finished testing.')
    print('--------------------------------')
Пример #10
0
#model_name = 'rot_equiv_lc.pt' # saved model name
model_name = 'rot_equiv_lc.pt'
folder_model = join(load_dir, exp)
#fcn= True
#pretrained=True

# GPU
gpu = 1
# EVAL PARAMETERS
bs = 2

# LOSS
criterion_supervised = nn.CrossEntropyLoss(
    ignore_index=21)  # On ignore la classe border.
Loss = 'KL'  # Loss = 'KL' or 'CE' or None for L1,MSE…
criterion_unsupervised = U.get_criterion(Loss)

# SEARCH FOR A PARTICULAR MODEL
rotate = False  # random rotation during training
scale = False
split = True  # split the supervised dataset
split_ratio = 0.3
batch_size = 4
pi_rotate = False

#scale_factor = (0.2,0.8)
#size_img = (420,420)
#size_crop = (380,380)

# DEVICE
# Decide which device we want to run on
Пример #11
0
def train_rot_equiv(model,n_epochs,train_loader_sup,train_dataset_unsup,val_loader,criterion_supervised,optimizer,scheduler,\
        Loss,gamma,batch_size,iter_every,save_folder,model_name,benchmark=False,angle_max=30,size_img=520,\
        eval_every=5,save_all_ep=True,dataroot_voc='~/data/voc2012',save_best=False,rot_cpu=False, device='cpu',num_classes=21):
    """
        A complete training of rotation equivariance supervised model. 
        save_folder : Path to save the model, the courb of losses,metric...
        benchmark : enable or disable backends.cudnn 
        Loss : Loss for unsupervised training 'KL' 'CE' 'L1' or 'MSE'
        gamma : float btwn [0,1] -> Balancing two losses loss_sup*gamma + (1-gamma)*loss_unsup
        save_all_ep : if True, the model is saved at each epoch in save_folder
        scheduler : if True, the model will apply a lr scheduler during training
        eval_every : Eval Model with different input image angle every n step
        size_img : size of image during evaluation
        angle_max : max angle rotation for input images
    """
    torch.backends.cudnn.benchmark = benchmark
    if scheduler:
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer, lambda x: (1 - x /
                                  (len(train_loader_sup) * n_epochs))**0.9)
    criterion_unsupervised = U.get_criterion(Loss)
    iou_train = []
    iou_test = []
    combine_loss_train = []
    combine_loss_test = []
    loss_train_unsup = []
    loss_train_sup = []
    loss_test_unsup = []
    loss_test_sup = []
    equiv_accuracy_train = []
    equiv_accuracy_test = []
    accuracy_test = []
    accuracy_train = []
    for ep in range(n_epochs):
        train_loader_equiv = torch.utils.data.DataLoader(train_dataset_unsup,batch_size=batch_size,\
                                                     shuffle=True,drop_last=True)
        print("EPOCH", ep)
        # TRAINING
        d = train_step_rot_equiv(model,train_loader_sup,train_loader_equiv,criterion_supervised,criterion_unsupervised,\
                        optimizer,gamma,Loss,rot_cpu=rot_cpu,device=device,angle_max=angle_max,num_classes=num_classes,iter_every=iter_every)
        if scheduler:
            lr_scheduler.step()
        combine_loss_train.append(d['loss'])
        loss_train_unsup.append(d['loss_equiv'])
        loss_train_sup.append(d['loss_sup'])
        equiv_accuracy_train.append(d['equiv_acc'])
        iou_train.append(d['iou_train'])
        accuracy_train.append(d['accuracy_train'])
        print('TRAIN - EP:',ep,'iou:',d['iou_train'],'Accuracy:',d['accuracy_train'],'Loss sup:',d['loss_sup'],\
            'Loss equiv:',d['loss_equiv'],'Combine Loss:',d['loss'],'Equivariance Accuracy:',d['equiv_acc'],)
        # EVALUATION
        model.eval()
        with torch.no_grad():
            state = eval_model(model,
                               val_loader,
                               device=device,
                               num_classes=num_classes)
            iou = state.metrics['mean IoU']
            acc = state.metrics['accuracy']
            loss = state.metrics['CE Loss']
            loss_test_sup.append(loss)
            iou_test.append(iou)
            accuracy_test.append(acc)
            print('TEST - EP:', ep, 'iou:', iou, 'Accuracy:', acc, 'Loss CE',
                  loss)
            # SAVING MODEL
            U.save_model(model,
                         save_all_ep,
                         save_best,
                         save_folder,
                         model_name,
                         ep=ep,
                         iou=iou,
                         iou_test=iou_test)

            if ep % eval_every == 0:  # Eval loss equiv and equivariance accuracy for the validation dataset
                equiv_acc, m_loss_equiv = U.eval_accuracy_equiv(model,val_loader,criterion=criterion_unsupervised,\
                                nclass=21,device=device,Loss=Loss,plot=False,angle_max=angle_max,random_angle=False)
                loss_test_unsup.append(m_loss_equiv)
                equiv_accuracy_test.append(equiv_acc)
                """  
                print('VOC Dataset Train')
                _ = eval_model_all_angle(model,size_img,dataroot_voc,train=True,device=device,num_classes=num_classes)
                print('VOC Dataset Val')
                _ = eval_model_all_angle(model,size_img,dataroot_voc,train=False,device=device,num_classes=num_classes)
                ## Save model"""


    U.save_curves(path=save_folder,combine_loss_train=combine_loss_train,loss_train_sup=loss_train_sup,\
    loss_train_unsup=loss_train_unsup,iou_train=iou_train,accuracy_train=accuracy_train,equiv_accuracy_train=equiv_accuracy_train,\
    combine_loss_test=combine_loss_test,loss_test_unsup=loss_test_unsup,equiv_accuracy_test=equiv_accuracy_test,\
    loss_test_sup= loss_test_sup,iou_test=iou_test,accuracy_test=accuracy_test)
Пример #12
0
    def __init__(self,
                 train_data,
                 model,
                 dev_data=None,
                 eval_every=-1,
                 patience=200,
                 loss_fn="bce",
                 train_batch_size=32,
                 verbose=True,
                 eval_on="loss",
                 device="cpu",
                 save_path=None,
                 train_epochs=5,
                 keep_ck_num=3,
                 lr=1e-2,
                 eval_batch_size=64,
                 seed=211,
                 use_wandb=False):
        set_seed(seed)
        if not os.path.isdir(save_path):
            os.makedirs(save_path, exist_ok=True)

        if len(os.listdir(save_path)) > 1:
            out = input(
                "Output directory ({}) already exists and is not empty, you wanna remove it before start? (y/n)"
                .format(save_path))
            if out.lower() == "y":
                shutil.rmtree(save_path)
                os.makedirs(save_path, exist_ok=True)
                # we need keep the vocab file
                train_data.save_vocab(save_path)
            else:
                raise ValueError(
                    "Output directory ({}) already exists and is not empty".
                    format(save_path))
        self.tb_writer = SummaryWriter()
        self.eval_every = -1
        self.keep_ck_num = keep_ck_num
        self.train_data = train_data
        self.train_batch_size = train_batch_size
        self.train_dataloader = DataLoader(train_data,
                                           batch_size=train_batch_size,
                                           shuffle=True)
        self.set_logger(save_path)
        self.total_train_steps = len(self.train_dataloader) * train_epochs

        if verbose:
            logger.info(model)
            total_count, trainable_count, non_trainable_count = count_dm_params(
                model)
            logger.info(f'  Total params: {total_count}')
            logger.info(f'  Trainable params: {trainable_count}')
            logger.info(f'  Non-trainable params: {non_trainable_count}')
            logger.info(f"  There are {len(train_data)}  training examples")
            if dev_data != None:
                logger.info(
                    f"  There are {len(dev_data)} examples for development")

        self.model = model.to(device)
        self.train_epochs = train_epochs
        self.device = device
        self.eval_batch_size = eval_batch_size
        self.dev_data = dev_data
        self.criterion = get_criterion(loss_fn)
        self.loss_fn = loss_fn

        # self.optimizer = optim.SGD(self.model.parameters(), lr=0.001, momentum=0.9)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=lr,
                                    betas=(0.9, 0.999),
                                    eps=1e-8)  # use default
        self.lr = lr
        self.eval_every = eval_every
        self.save_path = save_path
        assert eval_on in ["log_loss", "auc", "loss", "accuracy"]
        self.eval_on = eval_on
        self.best_score = -float("inf")
        self.patience = patience
        self.no_improve_count = 0
        self.use_wandb = use_wandb
        self.hyperparams_logging = [
            "train_epochs", "eval_batch_size", "train_batch_size",
            "no_improve_count", "device", "patience", "save_path", "eval_on",
            "eval_every", "use_wandb", "loss_fn", "keep_ck_num", "lr"
        ]
        self.hyperparam_dict = {
            key: self.__dict__[key]
            for key in self.hyperparams_logging
        }

        if is_wandb_available() and use_wandb:
            # keep track of model topology and gradients if is_wandb_available and args!=None
            wandb.init(project="deep_ctr",
                       config=self.hyperparam_dict,
                       name="_".join(save_path.split(os.path.sep)))
            wandb.watch((self.model), log_freq=max(100, eval_every))
Пример #13
0
logger.addHandler(fh)
logger.addHandler(ch)

logger.info(' '.join(sys.argv))
for m in msg:
  logger.warn(m)

config, data_config = parse_config(args.config)

logger.info("Get model")
model = get_model(config['model']['name'], **config['model']['params'])

logger.info("Get optimizer, scheduler and criterion")
optimizer = get_optimizer(config['optimizer'], model.parameters())
scheduler = get_scheduler(config['scheduler'], optimizer)
criterion = get_criterion(config['criterion'])

logger.info("Get trainer")
trainer = Trainer(save_path=args.dir,
                  model=model,
                  optimizer=optimizer,
                  criterion=criterion,
                  # max_epoch=config['max_epoch'],
                  max_steps=config['max_steps'],
                  logger=logger,
                  scheduler=scheduler,
                  auto_resume=args.resume,
                  log_frequence=args.log_frequence,
                  save_frequence=args.save_frequence,
                  eval_frequence=args.eval_frequence)
def train_supernet(results_dir, model, task_sampler, train_iter, valid_iter, device, config):
    """
    :param results_dir:
    :param model:
    :param task_sampler:
    :param train_iter:
    :param valid_iter:
    :param device:
    :param config:
    :return:
    """
    writer = None
    since = time.time()

    seed = set_seed(config["TRAIN"]["train_seed"])
    config["TRAIN"]["train_seed"] = seed
    with open(os.path.join(results_dir, "config.yaml"), "w") as f:
        yaml.dump(config, f)

    # metrics
    total_metrics = {
        "train": [],
        "valid": [],
    }

    # data iterators
    iters = {"train": train_iter, "valid": valid_iter}

    # training stuff
    optimizer = get_optimizer(model.parameters(), config["TRAIN"]["OPTIMIZER"])
    scheduler = get_scheduler(optimizer, config["TRAIN"]["SCHEDULER"])
    criterion = get_criterion(config["TRAIN"]["CRITERION"])

    # training
    for epoch in range(config["TRAIN"]["num_epochs"]):

        print("-" * 100)
        print("Iter Epoch {}/{}".format(epoch + 1, config["TRAIN"]["num_epochs"]))
        print("-" * 100)

        epoch_metrics = {
            "train": {
                "learning_rate": [],
                "losses_train": [],
                "accs_train": [],
            },
            "valid": {
                "losses_valid": [],
                "accs_valid": [],
            }
        }

        for phase in ["train", "valid"]:

            for iter_cpt, (x, y) in tqdm(enumerate(iters[phase]), ncols=100, total=len(iters[phase])):

                # perform an update
                if phase == "train":

                    model.train()
                    tasks = task_sampler.sample(n_monte=config["TRAIN"]["GRAPH_SAMPLER"]["n_monte"])
                    loss_t = None
                    accs_t = []

                    for task in tasks:

                        # forward
                        x_t, y_t = x.to(device), y.to(device)
                        preds_t = model.forward(x_t, task)

                        # computing gradient
                        if loss_t is None:
                            loss_t = criterion(preds_t, y_t) / config["TRAIN"]["GRAPH_SAMPLER"]["n_monte"]
                        else:
                            loss_t += criterion(preds_t, y_t) / config["TRAIN"]["GRAPH_SAMPLER"]["n_monte"]

                        # saving accuracies
                        accs_t.append(np.mean((torch.max(preds_t, dim=1)[1] == y_t).cpu().numpy()))

                    # update
                    loss_t.backward()
                    optimizer.step()
                    scheduler.step(epoch)
                    model.none_grad()

                    # adding metrics
                    epoch_metrics[phase]["learning_rate"].append(scheduler.get_lr())
                    epoch_metrics[phase]["losses_train"].append(loss_t.item())
                    epoch_metrics[phase]["accs_train"].append(np.mean(accs_t))

                elif config["TRAIN"]["perform_valid"]:

                    model.eval()
                    task = task_sampler.sample()[0]

                    # forward
                    x_v, y_v = x.to(device), y.to(device)
                    with torch.no_grad():
                        preds_v = model.forward(x_v, task)
                        loss_v = criterion(preds_v, y_v)

                    # adding metrics
                    epoch_metrics[phase]["losses_valid"].append(loss_v.item())
                    epoch_metrics[phase]["accs_valid"].append(
                        np.mean((torch.max(preds_v, dim=1)[1] == y_v).cpu().numpy()))

                else:
                    break

        # average metrics over epoch
        to_print = "\n"
        for phase in ["train", "valid"]:
            to_print += phase.upper() + ":\n"
            for key in epoch_metrics[phase].keys():
                if len(epoch_metrics[phase][key]) > 0:
                    epoch_metrics[phase][key] = np.mean(epoch_metrics[phase][key])
                    to_print += "{}: {:.4f}".format(key, epoch_metrics[phase][key]) + "\n"
                else:
                    epoch_metrics[phase][key] = None
            total_metrics[phase].append(epoch_metrics[phase])
            to_print += "\n"

        # tensorboard integration to plot nice curves
        if config["TRAIN"]["use_tensorboard"]:
            if config["TRAIN"]["use_tensorboard"] and writer is None:
                writer = SummaryWriter(results_dir)
            for phase in ["train", "valid"]:
                for key, value in epoch_metrics[phase].items():
                    if value is not None:
                        writer.add_scalar(phase + "/" + key, value, epoch)

        time_elapsed = time.time() - since
        print(to_print + "Time Elapsed: {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60))

        # save everything
        if config["TRAIN"]["save"] and ((epoch + 1) % config["TRAIN"]["save_period"] == 0):

            # saving model
            weights_path = os.path.join(results_dir, "model_weights_epoch_{0}_of_{1}.pth".
                                        format(epoch + 1, config["TRAIN"]["num_epochs"]))
            torch.save(model.state_dict(), weights_path)

            # saving stuff to retrieve
            with open(os.path.join(results_dir, "total_metrics.pkl"), "wb") as handle:
                pickle.dump(total_metrics, handle, protocol=pickle.HIGHEST_PROTOCOL)

    time_elapsed = time.time() - since
    print("Training complete in {:.0f}m {:.0f}s".format(time_elapsed // 60, time_elapsed % 60))

    return total_metrics
Пример #15
0
def main():
    config = utils.parse_args()

    if config['cuda'] and torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'

    dataset_args = (config['task'], config['dataset'], config['dataset_path'],
                    config['num_layers'], config['self_loop'],
                    config['normalize_adj'])
    dataset = utils.get_dataset(dataset_args)

    input_dim, output_dim = dataset.get_dims()
    adj, features, labels, idx_train, idx_val, idx_test = dataset.get_data()
    x = features
    y_train = labels[idx_train]
    y_val = labels[idx_val]
    y_test = labels[idx_test]

    model = models.GCN(input_dim, config['hidden_dims'], output_dim,
                       config['dropout'])
    model.to(device)

    if not config['load']:
        criterion = utils.get_criterion(config['task'])
        optimizer = optim.Adam(model.parameters(),
                               lr=config['lr'],
                               weight_decay=config['weight_decay'])
        epochs = config['epochs']
        model.train()
        print('--------------------------------')
        print('Training.')
        for epoch in range(epochs):
            optimizer.zero_grad()
            scores = model(x, adj)[idx_train]
            loss = criterion(scores, y_train)
            loss.backward()
            optimizer.step()
            predictions = torch.max(scores, dim=1)[1]
            num_correct = torch.sum(predictions == y_train).item()
            accuracy = num_correct / len(y_train)
            print('    Training epoch: {}, loss: {:.3f}, accuracy: {:.2f}'.
                  format(epoch + 1, loss.item(), accuracy))
        print('Finished training.')
        print('--------------------------------')

        if config['save']:
            print('--------------------------------')
            directory = os.path.join(os.path.dirname(os.getcwd()),
                                     'trained_models')
            if not os.path.exists(directory):
                os.makedirs(directory)
            fname = utils.get_fname(config)
            path = os.path.join(directory, fname)
            print('Saving model at {}'.format(path))
            torch.save(model.state_dict(), path)
            print('Finished saving model.')
            print('--------------------------------')

    if config['load']:
        directory = os.path.join(os.path.dirname(os.getcwd()),
                                 'trained_models')
        fname = utils.get_fname(config)
        path = os.path.join(directory, fname)
        model.load_state_dict(torch.load(path))
    model.eval()
    print('--------------------------------')
    print('Testing.')
    scores = model(x, adj)[idx_test]
    predictions = torch.max(scores, dim=1)[1]
    num_correct = torch.sum(predictions == y_test).item()
    accuracy = num_correct / len(y_test)
    print('    Test accuracy: {}'.format(accuracy))
    print('Finished testing.')
    print('--------------------------------')
Пример #16
0
    if args.task_name not in processors:
        raise ValueError('Task not found: %s' % (args.task_name))
    processor = processors[args.task_name]()

    args.model_type = args.model_type.lower()
    config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]

    print('Training/evaluation parameters %s' % str(args))

    if args.do_train:
        config = config_class.from_pretrained(
            args.config_name or args.model_name_or_path,
            finetuning_task=args.task_name,
        )
        tokenizer = get_tokenizer(args.model_type, args.tokenizer_name or args.model_name_or_path)
        criterion = get_criterion(args.model_type, tokenizer)
        print(f'*** Criterion ignore_index = {criterion.ignore_index} ***')
        criterion.to(args.device)

        model = model_class(config=config)
        if args.model_type == 'gpt2':
            origin_vocab_size = model.config.vocab_size
            new_vocab_size = tokenizer.vocab_size
            embed_size = model.config.n_embd
            if origin_vocab_size < new_vocab_size:
                print(f'***** Adjusting gpt2 embedding *****')
                wte = torch.nn.Embedding(new_vocab_size, embed_size)
                wte.weight.data[:origin_vocab_size].copy_(model.transformer.wte.weight.data)
                print(f'replace wte: ({model.transformer.wte.weight.data.size()}) -> ({wte.weight.data.size()})')
                model.transformer.wte = wte
                lm_head = torch.nn.Linear(embed_size, new_vocab_size, bias=False)
Пример #17
0
def main():

    # Set up arguments for datasets, models and training.
    config = utils.parse_args()
    config['num_layers'] = len(config['hidden_dims']) + 1

    if config['cuda'] and torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    config['device'] = device

    # Get the dataset, dataloader and model.
    dataset_args = (config['task'], config['dataset'], config['dataset_path'],
                    config['generate_neg_examples'], 'train',
                    config['duplicate_examples'], config['repeat_examples'],
                    config['num_layers'], config['self_loop'],
                    config['normalize_adj'])
    dataset = utils.get_dataset(dataset_args)

    loader = DataLoader(dataset=dataset,
                        batch_size=config['batch_size'],
                        shuffle=True,
                        collate_fn=dataset.collate_wrapper)
    input_dim, output_dim = dataset.get_dims()

    if config['model'] == 'GraphSAGE':
        agg_class = utils.get_agg_class(config['agg_class'])
        model = models.GraphSAGE(input_dim, config['hidden_dims'], output_dim,
                                 config['dropout'], agg_class,
                                 config['num_samples'], config['device'])
    else:
        model = models.GAT(input_dim, config['hidden_dims'], output_dim,
                           config['num_heads'], config['dropout'],
                           config['device'])
        model.apply(models.init_weights)
    model.to(config['device'])
    print(model)

    # Compute ROC-AUC score for the untrained model.
    if not config['load']:
        print('--------------------------------')
        print(
            'Computing ROC-AUC score for the training dataset before training.'
        )
        y_true, y_scores = [], []
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        with torch.no_grad():
            for (idx, batch) in enumerate(loader):
                edges, features, node_layers, mappings, rows, labels = batch
                features, labels = features.to(device), labels.to(device)
                out = model(features, node_layers, mappings, rows)
                all_pairs = torch.mm(out, out.t())
                scores = all_pairs[edges.T]
                y_true.extend(labels.detach().cpu().numpy())
                y_scores.extend(scores.detach().cpu().numpy())
                print('    Batch {} / {}'.format(idx + 1, num_batches))
        y_true = np.array(y_true).flatten()
        y_scores = np.array(y_scores).flatten()
        area = roc_auc_score(y_true, y_scores)
        print('ROC-AUC score: {:.4f}'.format(area))
        print('--------------------------------')

    # Train.
    if not config['load']:
        use_visdom = config['visdom']
        if use_visdom:
            vis = visdom.Visdom()
            loss_window = None
        criterion = utils.get_criterion(config['task'])
        optimizer = optim.Adam(model.parameters(),
                               lr=config['lr'],
                               weight_decay=config['weight_decay'])
        epochs = config['epochs']
        stats_per_batch = config['stats_per_batch']
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.8)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=[300, 600],
                                                   gamma=0.5)
        model.train()
        print('--------------------------------')
        print('Training.')
        for epoch in range(epochs):
            print('Epoch {} / {}'.format(epoch + 1, epochs))
            running_loss = 0.0
            for (idx, batch) in enumerate(loader):
                edges, features, node_layers, mappings, rows, labels = batch
                features, labels = features.to(device), labels.to(device)
                optimizer.zero_grad()
                out = model(features, node_layers, mappings, rows)
                all_pairs = torch.mm(out, out.t())
                scores = all_pairs[edges.T]
                loss = criterion(scores, labels.float())
                loss.backward()
                optimizer.step()
                with torch.no_grad():
                    running_loss += loss.item()
                if (idx + 1) % stats_per_batch == 0:
                    running_loss /= stats_per_batch
                    print('    Batch {} / {}: loss {:.4f}'.format(
                        idx + 1, num_batches, running_loss))
                    if (torch.sum(labels.long() == 0).item() >
                            0) and (torch.sum(labels.long() == 1).item() > 0):
                        area = roc_auc_score(labels.detach().cpu().numpy(),
                                             scores.detach().cpu().numpy())
                        print('    ROC-AUC score: {:.4f}'.format(area))
                    running_loss = 0.0
                    num_correct, num_examples = 0, 0
                if use_visdom:
                    if loss_window is None:
                        loss_window = vis.line(Y=[loss.item()],
                                               X=[epoch * num_batches + idx],
                                               opts=dict(xlabel='batch',
                                                         ylabel='Loss',
                                                         title='Training Loss',
                                                         legend=['Loss']))
                    else:
                        vis.line([loss.item()], [epoch * num_batches + idx],
                                 win=loss_window,
                                 update='append')
                scheduler.step()
        if use_visdom:
            vis.close(win=loss_window)
        print('Finished training.')
        print('--------------------------------')

    if not config['load']:
        if config['save']:
            print('--------------------------------')
            directory = os.path.join(os.path.dirname(os.getcwd()),
                                     'trained_models')
            if not os.path.exists(directory):
                os.makedirs(directory)
            fname = utils.get_fname(config)
            path = os.path.join(directory, fname)
            print('Saving model at {}'.format(path))
            torch.save(model.state_dict(), path)
            print('Finished saving model.')
            print('--------------------------------')

        # Compute ROC-AUC score after training.
        if not config['load']:
            print('--------------------------------')
            print(
                'Computing ROC-AUC score for the training dataset after training.'
            )
            y_true, y_scores = [], []
            num_batches = int(ceil(len(dataset) / config['batch_size']))
            with torch.no_grad():
                for (idx, batch) in enumerate(loader):
                    edges, features, node_layers, mappings, rows, labels = batch
                    features, labels = features.to(device), labels.to(device)
                    out = model(features, node_layers, mappings, rows)
                    all_pairs = torch.mm(out, out.t())
                    scores = all_pairs[edges.T]
                    y_true.extend(labels.detach().cpu().numpy())
                    y_scores.extend(scores.detach().cpu().numpy())
                    print('    Batch {} / {}'.format(idx + 1, num_batches))
            y_true = np.array(y_true).flatten()
            y_scores = np.array(y_scores).flatten()
            area = roc_auc_score(y_true, y_scores)
            print('ROC-AUC score: {:.4f}'.format(area))
            print('--------------------------------')

        # Plot the true positive rate and true negative rate vs threshold.
        if not config['load']:
            tpr, fpr, thresholds = roc_curve(y_true, y_scores)
            tnr = 1 - fpr
            plt.plot(thresholds, tpr, label='tpr')
            plt.plot(thresholds, tnr, label='tnr')
            plt.xlabel('Threshold')
            plt.title('TPR / TNR vs Threshold')
            plt.legend()
            plt.show()

        # Choose an appropriate threshold and generate classification report on the train set.
        idx1 = np.where(tpr <= tnr)[0]
        idx2 = np.where(tpr >= tnr)[0]
        t = thresholds[idx1[-1]]
        total_correct, total_examples = 0, 0
        y_true, y_pred = [], []
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        with torch.no_grad():
            for (idx, batch) in enumerate(loader):
                edges, features, node_layers, mappings, rows, labels = batch
                features, labels = features.to(device), labels.to(device)
                out = model(features, node_layers, mappings, rows)
                all_pairs = torch.mm(out, out.t())
                scores = all_pairs[edges.T]
                predictions = (scores >= t).long()
                y_true.extend(labels.detach().cpu().numpy())
                y_pred.extend(predictions.detach().cpu().numpy())
                total_correct += torch.sum(predictions == labels.long()).item()
                total_examples += len(labels)
                print('    Batch {} / {}'.format(idx + 1, num_batches))
        print('Threshold: {:.4f}, accuracy: {:.4f}'.format(
            t, total_correct / total_examples))
        y_true = np.array(y_true).flatten()
        y_pred = np.array(y_pred).flatten()
        report = classification_report(y_true, y_pred)
        print('Classification report\n', report)

    # Evaluate on the validation set.
    if config['load']:
        directory = os.path.join(os.path.dirname(os.getcwd()),
                                 'trained_models')
        fname = utils.get_fname(config)
        path = os.path.join(directory, fname)
        model.load_state_dict(torch.load(path))
        dataset_args = (config['task'], config['dataset'],
                        config['dataset_path'],
                        config['generate_neg_examples'], 'val',
                        config['duplicate_examples'],
                        config['repeat_examples'], config['num_layers'],
                        config['self_loop'], config['normalize_adj'])
        dataset = utils.get_dataset(dataset_args)
        loader = DataLoader(dataset=dataset,
                            batch_size=config['batch_size'],
                            shuffle=False,
                            collate_fn=dataset.collate_wrapper)
        criterion = utils.get_criterion(config['task'])
        stats_per_batch = config['stats_per_batch']
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        model.eval()
        print('--------------------------------')
        print(
            'Computing ROC-AUC score for the validation dataset after training.'
        )
        running_loss, total_loss = 0.0, 0.0
        num_correct, num_examples = 0, 0
        total_correct, total_examples = 0, 0
        y_true, y_scores, y_pred = [], [], []
        for (idx, batch) in enumerate(loader):
            edges, features, node_layers, mappings, rows, labels = batch
            features, labels = features.to(device), labels.to(device)
            out = model(features, node_layers, mappings, rows)
            all_pairs = torch.mm(out, out.t())
            scores = all_pairs[edges.T]
            loss = criterion(scores, labels.float())
            running_loss += loss.item()
            total_loss += loss.item()
            predictions = (scores >= t).long()
            num_correct += torch.sum(predictions == labels.long()).item()
            total_correct += torch.sum(predictions == labels.long()).item()
            num_examples += len(labels)
            total_examples += len(labels)
            y_true.extend(labels.detach().cpu().numpy())
            y_scores.extend(scores.detach().cpu().numpy())
            y_pred.extend(predictions.detach().cpu().numpy())
            if (idx + 1) % stats_per_batch == 0:
                running_loss /= stats_per_batch
                accuracy = num_correct / num_examples
                print('    Batch {} / {}: loss {:.4f}, accuracy {:.4f}'.format(
                    idx + 1, num_batches, running_loss, accuracy))
                if (torch.sum(labels.long() == 0).item() >
                        0) and (torch.sum(labels.long() == 1).item() > 0):
                    area = roc_auc_score(labels.detach().cpu().numpy(),
                                         scores.detach().cpu().numpy())
                    print('    ROC-AUC score: {:.4f}'.format(area))
                running_loss = 0.0
                num_correct, num_examples = 0, 0
        total_loss /= num_batches
        total_accuracy = total_correct / total_examples
        print('Loss {:.4f}, accuracy {:.4f}'.format(total_loss,
                                                    total_accuracy))
        y_true = np.array(y_true).flatten()
        y_scores = np.array(y_scores).flatten()
        y_pred = np.array(y_pred).flatten()
        report = classification_report(y_true, y_pred)
        area = roc_auc_score(y_true, y_scores)
        print('ROC-AUC score: {:.4f}'.format(area))
        print('Classification report\n', report)
        print('Finished validating.')
        print('--------------------------------')

        # Evaluate on test set.
    if config['load']:
        directory = os.path.join(os.path.dirname(os.getcwd()),
                                 'trained_models')
        fname = utils.get_fname(config)
        path = os.path.join(directory, fname)
        model.load_state_dict(torch.load(path))
        dataset_args = (config['task'], config['dataset'],
                        config['dataset_path'],
                        config['generate_neg_examples'], 'test',
                        config['duplicate_examples'],
                        config['repeat_examples'], config['num_layers'],
                        config['self_loop'], config['normalize_adj'])
        dataset = utils.get_dataset(dataset_args)
        loader = DataLoader(dataset=dataset,
                            batch_size=config['batch_size'],
                            shuffle=False,
                            collate_fn=dataset.collate_wrapper)
        criterion = utils.get_criterion(config['task'])
        stats_per_batch = config['stats_per_batch']
        num_batches = int(ceil(len(dataset) / config['batch_size']))
        model.eval()
        print('--------------------------------')
        print('Computing ROC-AUC score for the test dataset after training.')
        running_loss, total_loss = 0.0, 0.0
        num_correct, num_examples = 0, 0
        total_correct, total_examples = 0, 0
        y_true, y_scores, y_pred = [], [], []
        for (idx, batch) in enumerate(loader):
            edges, features, node_layers, mappings, rows, labels = batch
            features, labels = features.to(device), labels.to(device)
            out = model(features, node_layers, mappings, rows)
            all_pairs = torch.mm(out, out.t())
            scores = all_pairs[edges.T]
            loss = criterion(scores, labels.float())
            running_loss += loss.item()
            total_loss += loss.item()
            predictions = (scores >= t).long()
            num_correct += torch.sum(predictions == labels.long()).item()
            total_correct += torch.sum(predictions == labels.long()).item()
            num_examples += len(labels)
            total_examples += len(labels)
            y_true.extend(labels.detach().cpu().numpy())
            y_scores.extend(scores.detach().cpu().numpy())
            y_pred.extend(predictions.detach().cpu().numpy())
            if (idx + 1) % stats_per_batch == 0:
                running_loss /= stats_per_batch
                accuracy = num_correct / num_examples
                print('    Batch {} / {}: loss {:.4f}, accuracy {:.4f}'.format(
                    idx + 1, num_batches, running_loss, accuracy))
                if (torch.sum(labels.long() == 0).item() >
                        0) and (torch.sum(labels.long() == 1).item() > 0):
                    area = roc_auc_score(labels.detach().cpu().numpy(),
                                         scores.detach().cpu().numpy())
                    print('    ROC-AUC score: {:.4f}'.format(area))
                running_loss = 0.0
                num_correct, num_examples = 0, 0
        total_loss /= num_batches
        total_accuracy = total_correct / total_examples
        print('Loss {:.4f}, accuracy {:.4f}'.format(total_loss,
                                                    total_accuracy))
        y_true = np.array(y_true).flatten()
        y_scores = np.array(y_scores).flatten()
        y_pred = np.array(y_pred).flatten()
        report = classification_report(y_true, y_pred)
        area = roc_auc_score(y_true, y_scores)
        print('ROC-AUC score: {:.4f}'.format(area))
        print('Classification report\n', report)
        print('Finished testing.')
        print('--------------------------------')
Пример #18
0
logger = comet_ml.Experiment(
    api_key=api_key,
    project_name="sim_real",
    auto_metric_logging=True,
    auto_param_logging=True,
)

if args.mixed_precision:
    print("Applied: Mixed Precision")
    tf.keras.mixed_precision.set_global_policy("mixed_float16")

train_ds, test_ds = get_dataset(args)
grid = image_grid(next(iter(train_ds))[0])[0]
logger.log_image(grid.numpy())
model = get_model(args)
criterion = get_criterion(args)
optimizer = get_optimizer(args)
lr_scheduler = get_lr_scheduler(args)
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', mode='max', patience=args.patience, restore_best_weights=True)
experiment_name = get_experiment_name(args)
logger.set_name(experiment_name)
logger.log_parameters(vars(args))
with logger.train():
    filename =f'{args.model_name}.hdf5'
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filename, monitor='val_accuracy', mode='max', save_best_only=True, verbose=True)

    model.compile(loss=criterion, optimizer=optimizer, metrics=['accuracy'])
    if args.dry_run:
        print("[INFO] Turn off all callbacks")
        model.fit(train_ds, validation_data=test_ds, epochs=args.epochs, steps_per_epoch=2)
    else:
    logger.info('n_params: {}'.format(n_params))
    logger.info('first layer weight norm: {}'.format(norm_check))

    if run_config['fp16'] and not run_config['use_amp']:
        model.half()
        for layer in model.modules():
            if isinstance(layer, nn.BatchNorm2d):
                layer.float()

    device = run_config['device']
    if device is not 'cpu' and torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model.to(device)
    logger.info('Done')

    train_criterion, test_criterion = utils.get_criterion(
        config['data_config'])

    # create optimizer
    if optim_config['no_weight_decay_on_bn']:
        params = [
            {
                'params': [
                    param for name, param in model.named_parameters()
                    if 'bn' not in name
                ]
            },
            {
                'params': [
                    param for name, param in model.named_parameters()
                    if 'bn' in name
                ],
Пример #20
0
    config = utils.load_config(args.config)

    global_params = config["globals"]

    utils.set_seed(global_params["seed"])
    device = utils.get_device(global_params)
    output_dir = global_params["output_dir"]

    data_conf = config["data"]
    if args.generate:
        for c in data_conf.values():
            utils.generate_data(c)

    model = models.get_model(config).to()
    criterion = utils.get_criterion(config)
    optimizer = utils.get_optimizer(model, config)
    scheduler = utils.get_scheduler(optimizer, config)
    loaders = {
        phase: utils.get_loader(config, phase)
        for phase in ["train", "valid3", "valid", "valid12"]
    }

    runner = SupervisedRunner(device=device,
                              input_key=["objects", "externals", "triplet"],
                              input_target_key="targets")
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 loaders=loaders,
                 scheduler=scheduler,