def main():
    args = get_args()
    dataset = get_dataset(args.dataset)
    data = dataset.data
    tb_writer = SummaryWriter()
    tb_writer.iteration = 0

    device = torch.device("cuda:" +
                          str(args.device)) if torch.cuda.is_available(
                          ) and args.device != 'cpu' else torch.device("cpu")
    model = get_model(dataset.data.num_features, dataset.num_classes)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    train_loader = DataLoader(
        [(x_, y_) for i, (x_, y_) in enumerate(zip(data.x, data.y))
         if data.train_mask[i]],
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=0,
        pin_memory=True,
    )

    if args.enable_clearml_logger:
        tags = [
            f'Dataset: {args.dataset}',
            f'Pruning method: {args.pruning_method}',
            f'Architecture: {args.gnn}',
        ]
        pruning_param_name = 'num_minhash_funcs' if args.pruning_method == 'minhash_lsh' else 'random_pruning_prob'
        pruning_param = args.num_minhash_funcs if args.pruning_method == 'minhash_lsh' else args.random_pruning_prob
        tags.append(f'{pruning_param_name}: {pruning_param}')
        clearml_logger = get_clearml_logger(project_name="GNN_pruning",
                                            task_name=get_time_str(),
                                            tags=tags)

    for epoch in range(1, args.epochs + 1):
        loss, acc, f1 = train(epoch, dataset, train_loader, model, device,
                              optimizer, tb_writer)
        print(f'Epoch {epoch:02d}, Loss: {loss:.4f}, Approx. Train: {f1:.4f}')

        train_acc, val_acc, test_acc = test(dataset, model, device)
        print(f'Train ACC: {train_acc:.4f}, Val ACC: {val_acc:.4f}, '
              f'Test ACC: {test_acc:.4f}')

        tb_writer.add_scalars('Accuracy', {
            'train': train_acc,
            'Validation': val_acc,
            'Test': test_acc
        }, epoch)
Пример #2
0
def register_logging_files(args):
    tb_writer = None
    best_results_file = None
    log_file = None
    if args.exps_dir is not None:
        exps_dir = Path(
            args.exps_dir
        ) / 'pyg_with_pruning' / args.dataset / args.pruning_method
        if args.pruning_method == 'random':
            exps_dir = exps_dir / str(args.random_pruning_prob)
        elif args.pruning_method == 'minhash_lsh':
            exps_dir = exps_dir / str(args.num_minhash_funcs)

        exps_dir = exps_dir / get_time_str()
        best_results_file = exps_dir / 'best_results.txt'
        log_file = exps_dir / r'log.log'
        tensorboard_dir = exps_dir / 'tensorboard'
        if not tensorboard_dir.exists():
            tensorboard_dir.mkdir(parents=True, exist_ok=True)

        tb_writer = SummaryWriter(log_dir=tensorboard_dir)
        tb_writer.iteration = 0

    register_logger(log_file=log_file, stdout=True)
    log_command()
    log_args_description(args)

    clearml_task = None

    if args.enable_clearml_logger:
        tags = [
            f'Dataset: {args.dataset}',
            f'Pruning method: {args.pruning_method}',
            f'Architecture: {args.gnn}',
        ]
        pruning_param_name = 'num_minhash_funcs' if 'minhash_lsh' in args.pruning_method else 'random_pruning_prob'
        pruning_param = args.num_minhash_funcs if 'minhash_lsh' in args.pruning_method else args.random_pruning_prob
        tags.append(f'{pruning_param_name}: {pruning_param}')

        if pruning_param_name == 'num_minhash_funcs':
            tags.append(f'Sparsity: {args.sparsity}')
            tags.append(f'Complement: {args.complement}')

        clearml_task = get_clearml_logger(
            f"GNN_{args.dataset}_{args.target}_{args.gnn}",
            task_name=get_time_str(),
            tags=tags)

    return tb_writer, best_results_file, log_file, clearml_task
Пример #3
0
# Treimanento completo
net = modelos.min2019()
net = net.to(device)

#optimizer = optim.Adam(net.parameters(), lr=0.0005)
optimizer = optim.SGD(net.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])
# loss_fn =
#loss_fn = torch.nn.BCELoss()
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {'fps': training.BatchTimer(), 'acc': training.accuracy}

# Train
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
net.eval()
training.pass_epoch(net,
                    loss_fn,
                    val_loader,
                    batch_metrics=metrics,
                    show_running=True,
                    device=device,
                    writer=writer)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)
Пример #4
0
    def train(self, save_model=True):
        batch_size = 32
        epochs = 100
        workers = 0 if os.name == 'nt' else 8

        optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        scheduler = MultiStepLR(optimizer, [5, 10])

        dataset = self.get_train_dataset()
        img_inds = np.arange(len(dataset))
        np.random.shuffle(img_inds)
        train_inds = img_inds[:int(0.8 * len(img_inds))]
        val_inds = img_inds[int(0.8 * len(img_inds)):]

        train_loader = DataLoader(dataset,
                                  num_workers=workers,
                                  batch_size=batch_size,
                                  sampler=SubsetRandomSampler(train_inds))
        val_loader = DataLoader(dataset,
                                num_workers=workers,
                                batch_size=batch_size,
                                sampler=SubsetRandomSampler(val_inds))

        loss_fn = torch.nn.CrossEntropyLoss()
        metrics = {'fps': training.BatchTimer(), 'acc': training.accuracy}

        writer = SummaryWriter()
        writer.iteration, writer.interval = 0, 10

        print('\n\nInitial')
        print('-' * 10)
        self.model.eval()
        training.pass_epoch(self.model,
                            loss_fn,
                            val_loader,
                            batch_metrics=metrics,
                            show_running=True,
                            writer=writer)

        for epoch in tqdm(range(epochs)):
            print('\nEpoch {}/{}'.format(epoch + 1, epochs))
            print('-' * 10)

            self.model.train()
            training.pass_epoch(self.model,
                                loss_fn,
                                train_loader,
                                optimizer,
                                scheduler,
                                batch_metrics=metrics,
                                show_running=True,
                                writer=writer)

            self.model.eval()
            training.pass_epoch(self.model,
                                loss_fn,
                                val_loader,
                                batch_metrics=metrics,
                                show_running=True,
                                writer=writer)

            writer.close()

        if save_model:
            self.save_model()
Пример #5
0
def train_model(db_id):
    start_epoch = 0
    batch_size = 32
    epochs = 5
    workers = 2
    train_transform = transforms.Compose([
             transforms.ToPILImage(),
             transforms.RandomHorizontalFlip(p=0.5),
             np.float32,
             transforms.ToTensor(),
             fixed_image_standardization
    ])
    images, num_classes = get_dataset(db_id)
    dataset = MyCustomDataset(images, train_transform)
    train_loader = DataLoader(
                    dataset,
                    num_workers=workers,
                    batch_size=batch_size
                    )
    model = InceptionResnetV1(
                 classify=True,
                 num_classes=num_classes
            ).to(device)
    checkpoint_path, checkpoint_file, label_dict = get_saved_model(db_id)
    if checkpoint_path is not None and os.path.exists(checkpoint_path):
         checkpoint = torch.load(checkpoint_file)
         model.load_state_dict(checkpoint['net'])
         start_epoch = checkpoint['epoch']
    else:
        checkpoint_path = "./checkpoint"

    optimizer = optim.SGD(model.parameters(), lr=0.1)
    scheduler = MultiStepLR(optimizer, [60, 120, 180])
    loss_fn = torch.nn.CrossEntropyLoss()
    metrics = {
      'fps': training.BatchTimer(),
      'acc': training.accuracy
    }

    writer = SummaryWriter(log_dir=None, comment='', purge_step=None, max_queue=10, flush_secs=600, filename_suffix='face_rec_log_')
    writer.iteration, writer.interval = 1, 10

    checkpoint_save_name = 'face_rec_test'
    ckp_dir = checkpoint_path
    ckp_name = ''
    for epoch in range(epochs):
        training.pass_epoch(
              model, loss_fn, train_loader, optimizer, scheduler,
              batch_metrics=metrics, show_running=False, device=device,
              writer=writer
        )

        if (epoch+1) % 50 == 0:
            print('Saving..')
            state = {
               'net': model.state_dict(),
               'epoch': epoch,
               'is_final' : 0
            }
            ckp_name = checkpoint_save_name+'_'+str(epoch+1)
                       #if not os.path.isdir('checkpoint'):
            os.makedirs(ckp_dir, exist_ok=True)
            torch.save(state, ckp_dir+'/'+ckp_name+'.pth')
        writer.close()

    
    state = {
        'net': model.state_dict(),
        'epoch': epochs,
        'is_final' : 1
    }
    ckp_name = checkpoint_save_name+'_final'
    os.makedirs(ckp_dir, exist_ok=True)
    save_path = ckp_dir+'/'+ckp_name+'.pth'
    torch.save(state, save_path)
    update_model(db_id, save_path)
Пример #6
0
def main():
    args = get_args()
    dataset = get_dataset(args.dataset)
    data = dataset.data
    tb_writer = SummaryWriter()
    tb_writer.iteration = 0

    device = torch.device("cuda:" +
                          str(args.device)) if torch.cuda.is_available(
                          ) and args.device != 'cpu' else torch.device("cpu")
    model = get_model(dataset.data.num_features, dataset.num_classes, args.gnn)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    old_edge_count = data.edge_index.shape[1]

    # Pass the whole graph to the pruning mechanism. Consider it as one sample
    pruning_params, prunning_ratio = prune_dataset(
        [data], args, random=np.random.RandomState(0), pruning_params=None)

    edge_count = data.edge_index.shape[1]
    print(
        f"Old number of edges: {old_edge_count}. New one: {edge_count}. Change: {(old_edge_count - edge_count) / old_edge_count * 100}\%"
    )

    train_loader = NeighborSampler(
        data.edge_index,
        node_idx=data.train_mask,
        # sizes=[-1, -1],  # citations
        sizes=[-1, 10],  # reddit
        # sizes=[25, 10],  # default
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=12)
    subgraph_loader = NeighborSampler(data.edge_index,
                                      node_idx=None,
                                      sizes=[-1],
                                      batch_size=1024,
                                      shuffle=False,
                                      num_workers=12)

    clearml_task = None
    if args.enable_clearml_logger:
        tags = [
            f'Dataset: {args.dataset}',
            f'Pruning method: {args.pruning_method}',
            f'Architecture: {args.gnn}',
        ]
        pruning_param_name = 'num_minhash_funcs' if 'minhash_lsh' in args.pruning_method else 'random_pruning_prob'
        pruning_param = args.num_minhash_funcs if 'minhash_lsh' in args.pruning_method else args.random_pruning_prob
        tags.append(f'{pruning_param_name}: {pruning_param}')

        if pruning_param_name == 'num_minhash_funcs':
            tags.append(f'Sparsity: {args.sparsity}')
            tags.append(f'Complement: {args.complement}')

        clearml_task = get_clearml_logger(
            project_name=f"GNN_{args.dataset}_{args.gnn}",
            task_name=get_time_str(),
            tags=tags)

    train_times = []
    val_times = []
    max_train_acc = 0
    max_val_acc = 0
    max_test_acc = 0
    for epoch in range(1, args.epochs + 1):
        loss, acc, f1, avg_time_train = train(epoch, dataset, train_loader,
                                              model, device, optimizer,
                                              tb_writer)
        train_times.append(avg_time_train)
        print(f'Epoch {epoch:02d}, Loss: {loss:.4f}, Approx. Train: {f1:.4f}')

        train_acc, val_acc, test_acc, avg_time_test = test(
            dataset, subgraph_loader, model, device)
        val_times.append(avg_time_test)
        print(f'Train ACC: {train_acc:.4f}, Val ACC: {val_acc:.4f}, '
              f'Test ACC: {test_acc:.4f}')

        tb_writer.add_scalars('Accuracy', {
            'train': train_acc,
            'Validation': val_acc,
            'Test': test_acc
        }, epoch)

        max_train_acc = max(max_train_acc, train_acc)
        max_val_acc = max(max_val_acc, val_acc)
        max_test_acc = max(max_test_acc, test_acc)

    tb_writer.add_scalar('time/train', np.mean(train_times))
    tb_writer.add_scalar('time/val', np.mean(val_times))
    experiment_logs = dict()
    experiment_logs = clearml_task.connect(experiment_logs)
    experiment_logs['time/train'] = np.mean(train_times)
    experiment_logs['time/val'] = np.mean(val_times)
    experiment_logs['keep edges'] = prunning_ratio
    experiment_logs['max train accuracy'] = max_train_acc
    experiment_logs['max val accuracy'] = max_val_acc
    experiment_logs['max test accuracy'] = max_test_acc