def test_CRUD_dataset(capsys):
    datasets.create_dataset(
        service_account_json,
        api_key,
        project_id,
        cloud_region,
        dataset_id)

    datasets.get_dataset(
        service_account_json, api_key, project_id, cloud_region, dataset_id)

    datasets.list_datasets(
        service_account_json, api_key, project_id, cloud_region)

    # Test and also clean up
    datasets.delete_dataset(
        service_account_json, api_key, project_id, cloud_region, dataset_id)

    out, _ = capsys.readouterr()

    # Check that create/get/list/delete worked
    assert 'Created dataset' in out
    assert 'Time zone' in out
    assert 'Dataset' in out
    assert 'Deleted dataset' in out
示例#2
0
def main(argv=None):  # pylint: disable=unused-argument
    assert args.detect or args.segment, "Either detect or segment should be True"
    if args.trunk == 'resnet50':
        net = ResNet
        depth = 50
    if args.trunk == 'vgg16':
        net = VGG
        depth = 16

    net = net(config=net_config, depth=depth, training=True, weight_decay=args.weight_decay)

    if args.dataset == 'voc07':
        dataset = get_dataset('voc07_trainval')
    if args.dataset == 'voc12-trainval':
        dataset = get_dataset('voc12-train-segmentation', 'voc12-val')
    if args.dataset == 'voc12-train':
        dataset = get_dataset('voc12-train-segmentation')
    if args.dataset == 'voc12-val':
        dataset = get_dataset('voc12-val-segmentation')
    if args.dataset == 'voc07+12':
        dataset = get_dataset('voc07_trainval', 'voc12_train', 'voc12_val')
    if args.dataset == 'voc07+12-segfull':
        dataset = get_dataset('voc07-trainval-segmentation', 'voc12-train-segmentation', 'voc12-val')
    if args.dataset == 'voc07+12-segmentation':
        dataset = get_dataset('voc07-trainval-segmentation', 'voc12-train-segmentation')
    if args.dataset == 'coco':
        # support by default for coco trainval35k split
        dataset = get_dataset('coco-train2014-*', 'coco-valminusminival2014-*')
    if args.dataset == 'coco-seg':
        # support by default for coco trainval35k split
        dataset = get_dataset('coco-seg-train2014-*', 'coco-seg-valminusminival2014-*')

    train(dataset, net, net_config)
示例#3
0
文件: gan.py 项目: ywcmaike/OCFGAN
 def __init__(self,
              dset_name,
              imsize,
              nc,
              data_root='./data',
              results_root='./results',
              noise_dim=100,
              dout_dim=1,
              batch_size=64,
              clip_disc=True,
              max_giters=50000,
              lr=1e-4,
              disc_size=64,
              batch_norm=True,
              disc_net='flexible-dcgan',
              gen_net='flexible-dcgan'):
     """Intializer for base GAN model.
     
     Arguments:
         dset_name {str} -- Name of the dataset.
         imsize {int} -- Size of the image.
         nc {int} -- Number of channels.
     
     Keyword Arguments:
         data_root {str} -- Directory where datasets are stored (default: {'./data'}).
         results_root {str} -- Directory where results will be saved (default: {'./results'}).
         noise_dim {int} -- Dimension of noise input to generator (default: {100}).
         dout_dim {int} -- Dimension of output from discriminator (default: {1}).
         batch_size {int} -- Batch size (default: {64}).
         clip_disc {bool} -- Whether to clip the parameters of discriminator in [-0.01, 0.01].
                             This should be True when gradient penalty is not used (default: {True}). 
         max_giters {int} -- Maximum number of generator iterations (default: {50000}).
         lr {[type]} -- Learning rate (default: {1e-4}).
         disc_size {int} -- Number of filters in the first Conv layer of critic. (default: {64})
         batch_norm {bool} -- Whether to use batch norm in discriminator. This should be
                              False when gradient penalty is used (default: {True}).
         disc_net {str} -- Discriminator network type. (default: {'flexible-dcgan'})
         gen_net {str} -- Generator network type. (default: {'flexible-dcgan'})
     """
     self.imsize = imsize
     self.nc = nc
     self.noise_dim = noise_dim
     self.dout_dim = dout_dim
     self.disc_size = disc_size
     self.batch_norm = batch_norm
     self.disc_net = disc_net
     self.gen_net = gen_net
     self._build_model()
     self.g_optim = torch.optim.RMSprop(self.generator.parameters(), lr=lr)
     self.d_optim = torch.optim.RMSprop(self.discriminator.parameters(),
                                        lr=lr)
     self.giters = 1
     self.diters = 5
     self.max_giters = max_giters
     self.data_root = data_root
     suffix = self.__class__.__name__.lower()
     suffix += '_' + str(self.disc_size) if self.disc_size != 64 else ''
     self.results_root = os.path.join(results_root, dset_name, suffix)
     self.clip_disc = clip_disc
     self.model_save_interval = 1000
     self.fixed_im_interval = 100
     self.fixed_noise = torch.cuda.FloatTensor(batch_size, self.noise_dim,
                                               1, 1).normal_(0, 1)
     self.noise_tensor = torch.cuda.FloatTensor(batch_size, self.noise_dim,
                                                1, 1)
     train_dataset = get_dataset(dset_name,
                                 data_root=self.data_root,
                                 imsize=self.imsize)
     self.train_dataloader = tdata.DataLoader(train_dataset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=4,
                                              drop_last=True)
     self.real_data = self.get_real_batch()
示例#4
0
    cv2.imwrite(os.path.join(dirname, 'virtualization', 'image.jpg'), img)
    cv2.imwrite(os.path.join(dirname, 'virtualization', 'bbox.jpg'), img1)
    cv2.imwrite(os.path.join(dirname, 'virtualization', 'brec.jpg'), img2)
    cv2.imwrite(os.path.join(dirname, 'virtualization', 'post_process.jpg'), img3)
    cv2.imwrite(os.path.join(dirname, 'virtualization', 'label_mask.jpg'), label_mask)
    cv2.imwrite(os.path.join(dirname, 'virtualization', 'pred_mask.jpg'), pred_mask)
    cv2.imwrite(os.path.join(dirname, 'virtualization', 'result.jpg'), img4)
    cv2.imwrite(os.path.join(dirname, 'virtualization', 'heat.jpg'), img5)
    for i, chip in enumerate(chip_list):
        cv2.imwrite(os.path.join(dirname, 'virtualization', 'chip_%d.jpg' % i), chip)

    
    plt.show()
    cv2.waitKey(0)

    
if __name__ == '__main__':
    args = parse_args()
    dataset = get_dataset(args.dataset)
    dest_datadir = dataset.region_voc_dir
    image_dir = dest_datadir + '/JPEGImages'
    segmentation_dir = dest_datadir + '/SegmentationClass'
    list_folder = dest_datadir + '/ImageSets'

    pred_mask_dir = '../pytorch-deeplab-xception/run/mask-%s-val' % args.dataset.lower()
    val_list = dataset.get_imglist('val')

    for img_path in val_list[15:]:
        print(img_path)
        _vis(img_path, dataset)
    
示例#5
0
]


def logger(info):
    fold, epoch = info['fold'] + 1, info['epoch']
    val_loss, test_acc = info['val_loss'], info['test_acc']
    print('{:02d}/{:03d}: Val Loss: {:.4f}, Test Accuracy: {:.3f}'.format(
        fold, epoch, val_loss, test_acc))


results = []
for dataset_name, Net in product(datasets, nets):
    best_result = (float('inf'), 0, 0)  # (loss, acc, std)
    print('-----\n{} - {}'.format(dataset_name, Net.__name__))
    for num_layers, hidden in product(layers, hiddens):
        dataset = get_dataset(dataset_name, sparse=Net != DiffPool)
        model = Net(dataset, num_layers, hidden)
        loss, acc, std = cross_validation_with_val_set(
            dataset,
            model,
            folds=10,
            epochs=args.epochs,
            batch_size=args.batch_size,
            lr=args.lr,
            lr_decay_factor=args.lr_decay_factor,
            lr_decay_step_size=args.lr_decay_step_size,
            weight_decay=0,
            logger=None,
        )
        if loss < best_result[0]:
            best_result = (loss, acc, std)
示例#6
0
    def train(self):
        source_dataset, source_test_dataset = get_dataset(self.args, self.config.source)
        source_loader = DataLoader(source_dataset, batch_size=self.config.training.batch_size,
                                   shuffle=True, num_workers=self.config.source.data.num_workers, drop_last=True)
        source_batches = iter(source_loader)

        target_dataset, target_test_dataset = get_dataset(self.args, self.config.target)
        target_loader = DataLoader(target_dataset, batch_size=self.config.training.batch_size,
                                   shuffle=True, num_workers=self.config.target.data.num_workers, drop_last=True)
        target_batches = iter(target_loader)

        cpat = get_compatibility(self.config)
        cpat_opt = get_optimizer(self.config, cpat.parameters())

        if(self.args.resume_training):
            states = torch.load(os.path.join(self.args.log_path, 'checkpoint.pt'))
            cpat.load_state_dict(states[0])
            cpat_opt.load_state_dict(states[1])
            logging.info(f"Resuming training after {states[2]} steps.")


        logging.info("Optimizing the compatibility function.")
        with tqdm(total=self.config.training.n_iters) as progress:
            for d_step in range(self.config.training.n_iters):

                try:
                    (Xs, ys) = next(source_batches)
                    (Xt, yt) = next(target_batches)
                except StopIteration:
                    # Refresh after one epoch
                    source_batches = iter(source_loader)
                    target_batches = iter(target_loader)
                    (Xs, ys) = next(source_batches)
                    (Xt, yt) = next(target_batches)

                Xs = data_transform(self.config.source, Xs)
                Xs = Xs.to(self.config.device)

                Xt = data_transform(self.config.target, Xt)
                Xt = Xt.to(self.config.device)

                obj = cpat_opt.step(lambda: self._cpat_closure(Xs, Xt, cpat, cpat_opt))
                avg_density = torch.mean(cpat.forward(Xs, Xt))

                obj_val = round(obj.item(), 5)
                avg_density_val = round(avg_density.item(), 5)
                progress.update(1)
                progress.set_description_str(f"Average Density: {avg_density_val}")
                self.config.tb_logger.add_scalars('Optimization', {
                    'Objective': obj_val,
                    'Average Density': avg_density_val
                }, d_step)

                if(d_step % self.config.training.snapshot_freq == 0):
                    states = [
                        cpat.state_dict(),
                        cpat_opt.state_dict(),
                        d_step
                    ]

                    torch.save(states, os.path.join(self.args.log_path, f'checkpoint_{d_step}.pth'))
                    torch.save(states, os.path.join(self.args.log_path, f'checkpoint.pth'))
示例#7
0
文件: main.py 项目: yyht/SimSiam
def main(args):

    train_set = get_dataset(
        args.dataset, 
        args.data_dir, 
        transform=get_aug(args.model, args.image_size, True), 
        train=True, 
        download=args.download # default is False
    )
    
    if args.debug:
        args.batch_size = 2 
        args.num_epochs = 1 # train only one epoch
        args.num_workers = 0
        train_set = torch.utils.data.Subset(train_set, range(0, args.batch_size)) # take only one batch

    train_loader = torch.utils.data.DataLoader(
        dataset=train_set,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True
    )

    # define model
    model = get_model(args.model, args.backbone).to(args.device)
    model = torch.nn.DataParallel(model)
    if torch.cuda.device_count() > 1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
    
    # define optimizer
    optimizer = get_optimizer(
        args.optimizer, model, 
        lr=args.base_lr*args.batch_size/256, 
        momentum=args.momentum, 
        weight_decay=args.weight_decay)

    # TODO: linear lr warm up for byol simclr swav
    # args.warm_up_epochs

    # define lr scheduler
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.num_epochs, eta_min=0)

    loss_meter = AverageMeter(name='Loss')

    # Start training
    for epoch in tqdm(range(0, args.num_epochs), desc=f'Training'):
        loss_meter.reset()
        model.train()
        p_bar=tqdm(train_loader, desc=f'Epoch {epoch}/{args.num_epochs}')
        for idx, ((images1, images2), labels) in enumerate(p_bar):
            # breakpoint()
            model.zero_grad()
            loss = model.forward(images1.to(args.device), images2.to(args.device))
            loss.backward()
            optimizer.step()
            loss_meter.update(loss.item())
            p_bar.set_postfix({"loss":loss_meter.val, 'loss_avg':loss_meter.avg})

        lr_scheduler.step()


        # Save checkpoint
        os.makedirs(args.output_dir, exist_ok=True)
        model_path = os.path.join(args.output_dir, f'{args.model}-{args.dataset}-epoch{epoch+1}.pth')
        torch.save({
            'epoch': epoch+1,
            'state_dict':model.module.state_dict(),
            # 'optimizer':optimizer.state_dict(), # will double the checkpoint file size
            'lr_scheduler':lr_scheduler.state_dict(),
            'args':args,
            'loss_meter':loss_meter
        }, model_path)
    print(f"Model saved to {model_path}")
示例#8
0
def train_multi_task(param_file):
    with open('configs.json') as config_params:
        configs = json.load(config_params)

    with open(param_file) as json_params:
        params = json.load(json_params)

    exp_identifier = []
    for (key, val) in params.items():
        if 'tasks' in key:
            continue
        exp_identifier += ['{}={}'.format(key, val)]

    exp_identifier = '|'.join(exp_identifier)
    params['exp_id'] = exp_identifier

    writer = SummaryWriter(log_dir='runs/{}_{}'.format(
        params['exp_id'],
        datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y")))

    train_loader, train_dst, val_loader, val_dst = datasets.get_dataset(
        params, configs)
    loss_fn = losses.get_loss(params)
    metric = metrics.get_metrics(params)

    model = model_selector.get_model(params)
    model_params = []
    for m in model:
        model_params += model[m].parameters()

    if 'RMSprop' in params['optimizer']:
        optimizer = torch.optim.RMSprop(model_params, lr=params['lr'])
    elif 'Adam' in params['optimizer']:
        optimizer = torch.optim.Adam(model_params, lr=params['lr'])
    elif 'SGD' in params['optimizer']:
        optimizer = torch.optim.SGD(model_params,
                                    lr=params['lr'],
                                    momentum=0.9)

    tasks = params['tasks']
    all_tasks = configs[params['dataset']]['all_tasks']
    print('Starting training with parameters \n \t{} \n'.format(str(params)))

    if 'mgda' in params['algorithm']:
        approximate_norm_solution = params['use_approximation']
        if approximate_norm_solution:
            print('Using approximate min-norm solver')
        else:
            print('Using full solver')
    n_iter = 0
    loss_init = {}
    for epoch in tqdm(range(NUM_EPOCHS)):
        start = timer()
        print('Epoch {} Started'.format(epoch))
        if (epoch + 1) % 10 == 0:
            # Every 50 epoch, half the LR
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.85
            print('Half the learning rate{}'.format(n_iter))

        for m in model:
            model[m].train()

        for batch in train_loader:
            n_iter += 1
            # First member is always images
            images = batch[0]
            images = Variable(images.cuda())

            labels = {}
            # Read all targets of all tasks
            for i, t in enumerate(all_tasks):
                if t not in tasks:
                    continue
                labels[t] = batch[i + 1]
                labels[t] = Variable(labels[t].cuda())

            # Scaling the loss functions based on the algorithm choice
            loss_data = {}
            grads = {}
            scale = {}
            mask = None
            masks = {}
            if 'mgda' in params['algorithm']:
                # Will use our MGDA_UB if approximate_norm_solution is True. Otherwise, will use MGDA

                if approximate_norm_solution:
                    optimizer.zero_grad()
                    # First compute representations (z)
                    images_volatile = Variable(images.data, volatile=True)
                    rep, mask = model['rep'](images_volatile, mask)
                    # As an approximate solution we only need gradients for input
                    if isinstance(rep, list):
                        # This is a hack to handle psp-net
                        rep = rep[0]
                        rep_variable = [
                            Variable(rep.data.clone(), requires_grad=True)
                        ]
                        list_rep = True
                    else:
                        rep_variable = Variable(rep.data.clone(),
                                                requires_grad=True)
                        list_rep = False

                    # Compute gradients of each loss function wrt z
                    for t in tasks:
                        optimizer.zero_grad()
                        out_t, masks[t] = model[t](rep_variable, None)
                        loss = loss_fn[t](out_t, labels[t])
                        loss_data[t] = loss.data[0]
                        loss.backward()
                        grads[t] = []
                        if list_rep:
                            grads[t].append(
                                Variable(rep_variable[0].grad.data.clone(),
                                         requires_grad=False))
                            rep_variable[0].grad.data.zero_()
                        else:
                            grads[t].append(
                                Variable(rep_variable.grad.data.clone(),
                                         requires_grad=False))
                            rep_variable.grad.data.zero_()
                else:
                    # This is MGDA
                    for t in tasks:
                        # Comptue gradients of each loss function wrt parameters
                        optimizer.zero_grad()
                        rep, mask = model['rep'](images, mask)
                        out_t, masks[t] = model[t](rep, None)
                        loss = loss_fn[t](out_t, labels[t])
                        loss_data[t] = loss.data[0]
                        loss.backward()
                        grads[t] = []
                        for param in model['rep'].parameters():
                            if param.grad is not None:
                                grads[t].append(
                                    Variable(param.grad.data.clone(),
                                             requires_grad=False))

                # Normalize all gradients, this is optional and not included in the paper. See the notebook for details
                gn = gradient_normalizers(grads, loss_data,
                                          params['normalization_type'])
                for t in tasks:
                    for gr_i in range(len(grads[t])):
                        grads[t][gr_i] = grads[t][gr_i] / gn[t]

                # Frank-Wolfe iteration to compute scales.
                sol, min_norm = MinNormSolver.find_min_norm_element(
                    [grads[t] for t in tasks])
                for i, t in enumerate(tasks):
                    scale[t] = float(sol[i])
            else:
                for t in tasks:
                    masks[t] = None
                    scale[t] = float(params['scales'][t])

            # Scaled back-propagation
            optimizer.zero_grad()
            rep, _ = model['rep'](images, mask)
            for i, t in enumerate(tasks):
                out_t, _ = model[t](rep, masks[t])
                loss_t = loss_fn[t](out_t, labels[t])
                loss_data[t] = loss_t.data[0]
                if i > 0:
                    loss = loss + scale[t] * loss_t
                else:
                    loss = scale[t] * loss_t
            loss.backward()
            optimizer.step()

            writer.add_scalar('training_loss', loss.data[0], n_iter)
            for t in tasks:
                writer.add_scalar('training_loss_{}'.format(t), loss_data[t],
                                  n_iter)

        for m in model:
            model[m].eval()

        tot_loss = {}
        tot_loss['all'] = 0.0
        met = {}
        for t in tasks:
            tot_loss[t] = 0.0
            met[t] = 0.0

        num_val_batches = 0
        for batch_val in val_loader:
            val_images = Variable(batch_val[0].cuda(), volatile=True)
            labels_val = {}

            for i, t in enumerate(all_tasks):
                if t not in tasks:
                    continue
                labels_val[t] = batch_val[i + 1]
                labels_val[t] = Variable(labels_val[t].cuda(), volatile=True)

            val_rep, _ = model['rep'](val_images, None)
            for t in tasks:
                out_t_val, _ = model[t](val_rep, None)
                loss_t = loss_fn[t](out_t_val, labels_val[t])
                tot_loss['all'] += loss_t.data[0]
                tot_loss[t] += loss_t.data[0]
                metric[t].update(out_t_val, labels_val[t])
            num_val_batches += 1

        for t in tasks:
            writer.add_scalar('validation_loss_{}'.format(t),
                              tot_loss[t] / num_val_batches, n_iter)
            metric_results = metric[t].get_result()
            for metric_key in metric_results:
                writer.add_scalar('metric_{}_{}'.format(metric_key, t),
                                  metric_results[metric_key], n_iter)
            metric[t].reset()
        writer.add_scalar('validation_loss', tot_loss['all'] / len(val_dst),
                          n_iter)

        if epoch % 3 == 0:
            # Save after every 3 epoch
            state = {
                'epoch': epoch + 1,
                'model_rep': model['rep'].state_dict(),
                'optimizer_state': optimizer.state_dict()
            }
            for t in tasks:
                key_name = 'model_{}'.format(t)
                state[key_name] = model[t].state_dict()

            torch.save(
                state,
                "saved_models/{}_{}_model.pkl".format(params['exp_id'],
                                                      epoch + 1))

        end = timer()
        print('Epoch ended in {}s'.format(end - start))
示例#9
0
from train import (
    get_trainer,
    loop,
)
from datasets import (
    get_dataset,
    get_gabe_planktons,
)

from pylearn2.models import mlp


warnings.filterwarnings("ignore")

if __name__ == '__main__':
    train, valid, test = get_dataset()
    trainer = get_trainer(train, valid, test)

    in_space = Conv2DSpace(
        shape=[IMG_SIZE, IMG_SIZE],
        num_channels=1,
        # axes=['c', 0, 1, 'b']
    )

    net = mlp.MLP(
        layers=[conv0, conv1, conv2, rect0, rect1, smax],
        input_space=in_space,
        # nvis=784,
    )

    net = loop(trainer, net)
示例#10
0
# parse arguments
args, model_args = parse_args()

# define logger
logdir = args.logdir
logger = Logger(logdir, read_only=args.test_only)
logger.log('args: %s' % str(args))
logger.log('model args: %s' % str(model_args))

# define model
model = models.get_model(args.model, model_args).cuda()
# logger.log('full-model FLOPs: %d' % measure(model, torch.zeros(1, 3, 32, 32).cuda(), k=-1)[0])

# define datasets - 0: train, 1: val, 2: test
datasets = get_dataset(args.dataset, val_size=args.valsize)
dataloaders = []
for d in datasets:
    dataloaders.append(
        DataLoader(d, batch_size=args.batch_size, shuffle=True, num_workers=4))

# define loss
criterion = nn.CrossEntropyLoss().cuda()

# define optimizer
optimizer = optim.SGD(model.parameters(),
                      lr=args.lr,
                      momentum=args.momentum,
                      weight_decay=args.wd,
                      nesterov=args.nesterov)
示例#11
0
def main(opt):

    # make folder
    base_path = 'result'
    os.makedirs(base_path, exist_ok=True)
    result_path = make_folder(base_path, opt.save_folder)

    # Dataset
    print(f'Preparing Dataset....{opt.dataset}')
    transform = {
        'trian': transforms.Compose([
            transforms.RandomHorizontalFlip(),
        ])
    }
    train_transform = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    train_set, test_set = get_dataset(opt.dataset, train_transform,
                                      test_transform)

    # Load Dataset
    train_loader = DataLoader(train_set,
                              batch_size=opt.train_batch_size,
                              shuffle=True)
    test_loader = DataLoader(test_set,
                             batch_size=opt.test_batch_size,
                             shuffle=False)

    # GPU
    if torch.cuda.is_available() and opt.cuda:
        device = 'cuda'
        torch.backends.cudnn.benchmark = True
    else:
        device = 'cpu'
    print(f'Using {device}')

    # model
    from torchvision.models import vgg16_bn
    print(f'Preparing Model....{opt.model}')
    model = get_model(opt.model, opt.num_classes)
    model.to(device)

    # resuming
    if opt.resume:
        print('Resuming from checkpoint')
        assert os.path.isdir(f'{opt.resume}')

        checkpoint = torch.load(f'{opt.resume}/{opt.model}_ckpt.pth')
        model.load_state_dict(checkpoint['model'])

        best_acc = checkpoint['acc']
        start_epoch = checkpoint['epoch']
        train_result = checkpoint['train_result']
        test_result = checkpoint['test_result']

    else:
        start_epoch = 0
        best_acc = 0
        train_result, test_result = [], []

    # optmizer
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=0.0001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)

    # Training
    start = time.time()

    for e in range(start_epoch, start_epoch + opt.epoch):
        train_result += train(model, train_loader, optimizer, loss_func,
                              device, start_epoch, scheduler, e)
        test_result += test(model, test_loader, loss_func, device, start_epoch,
                            e)
        scheduler.step()

        # Save checkpoint
        if test_result[1::2][-1] > best_acc:
            print(f'Saving Model....({result_path})')
            state = {
                'model': model.state_dict(),
                'epoch': e + 1,
                'acc': test_result[1::2][-1],
                'train_result': train_result,
                'test_result': test_result
            }
            torch.save(state, f'{result_path}/{opt.model}_ckpt.pth')
            best = test_result[1::2][-1]

        # Save Result
        if opt.save_result:
            print(f'Saving Result....({result_path})')
            save_result(train_result, test_result, result_path)

    end = time.time()
    with open(f'{result_path}/time_log.txt', 'w') as f:
        f.write(str(datetime.timedelta(seconds=end - start)))
        f.write(str(datetime.timedelta(seconds=end - start)))
        f.close()
def fit_predict_categorical_encoding(datasets, str_preprocess, encoders,
                                     classifiers, test_size, n_splits, n_jobs,
                                     results_path):
    '''
    Learning with dirty categorical variables.
    '''
    results_path = os.path.join(BENCHMARK_HOME, results_path)
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    for dataset in datasets:
        n_rows = choose_nrows(dataset_name=dataset)
        for encoder in encoders:
            print('Dataset: %s' % dataset)
            data = get_dataset(dataset).get_df()
            data.preprocess(n_rows=n_rows, str_preprocess=str_preprocess)
            print('Data shape: %d, %d' % data.df.shape)

            n_cats = len(np.unique(data.df[data.special_column]))
            if (n_cats > 5000) and 'OneHotEncoder' in encoder:
                print('Skipping this encoder, too many categories '
                      '({0})'.format(n_cats))
                continue

            cv = select_cross_val(data.clf_type, n_splits, test_size)
            scaler = preprocessing.StandardScaler(with_mean=False)

            # Define classifiers
            clfs = instantiate_estimators(data.clf_type,
                                          classifiers,
                                          clf_seed,
                                          y=data.df.loc[:, data.ycol].values)

            for i, clf in enumerate(clfs):
                # import pdb; pdb.set_trace()
                # print(
                #     '{}: {} \n{}: {} \n{}: {} \n{}: {} \n{}: {}'.format(
                #         'Prediction column', data.ycol,
                #         'Task type', str(data.clf_type),
                #         'Classifier', clf,
                #         'Encoder', encoder))

                try:
                    try:
                        clf2 = clf.estimator
                    except AttributeError:
                        clf2 = clf
                    clf_name = clf2.__class__.__name__
                    results_dict = {
                        'dataset': data.name,
                        'n_splits': n_splits,
                        'test_size': test_size,
                        'n_rows': n_rows,
                        'encoder': encoder,
                        'str_preprocess': str_preprocess,
                        'clf': [classifiers[i], clf_name,
                                clf2.get_params()],
                        'ShuffleSplit': [cv.__class__.__name__],
                        'scaler':
                        [scaler.__class__.__name__,
                         scaler.get_params()],
                        'sample_seed': sample_seed,
                        'shuffleseed': shuffle_seed,
                        'col_action': data.col_action,
                        'clf_type': data.clf_type,
                    }

                    if verify_if_exists(results_path, results_dict):
                        print('Prediction already exists.\n')
                        continue

                    start = time.time()

                    column_action = get_column_action(data.col_action,
                                                      data.xcols, encoder,
                                                      data.clf_type)

                    pred = Parallel(n_jobs=n_jobs)(
                        delayed(fit_predict_fold)(
                            data, scaler, column_action, clf, encoder, fold,
                            cv.n_splits, train_index, test_index)
                        for fold, (train_index, test_index) in enumerate(
                            cv.split(data.df, data.df[data.ycol].values)))
                    pred = np.array(pred)
                    results = {
                        'fold': list(pred[:, 0]),
                        'n_train_samples': list(pred[:, 1]),
                        'n_train_features': list(pred[:, 2]),
                        'score': list(pred[:, 3]),
                        'train_score': list(pred[:, 4]),
                        'encoding_time': list(pred[:, 5]),
                        'training_time': list(pred[:, 6])
                    }
                    results_dict['results'] = results

                    # Saving results
                    pc_name = socket.gethostname()
                    now = ''.join([
                        c for c in str(datetime.datetime.now()) if c.isdigit()
                    ])
                    filename = (
                        '%s_%s_%s_%s_%s.json' %
                        (pc_name, data.name, classifiers[i], encoder, now))
                    results_file = os.path.join(results_path, filename)
                    results_dict = array2list(results_dict)
                    write_json(results_dict, results_file)
                    print('prediction time: %.1f s.' % (time.time() - start))
                    print('Saving results to: %s\n' % results_file)
                except Exception as e:  # noqa
                    print('Prediction failed: ', str(e))
示例#13
0
from utils.utils import load_checkpoint, set_random_seed

P = parse_args()

### Set torch device ###
if torch.cuda.is_available():
    torch.cuda.set_device(P.local_rank)
device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu")

P.n_gpus = torch.cuda.device_count()
assert P.n_gpus <= 1  # no multi GPU

set_random_seed(P.seed)

### Initialize dataset ###
train_set, test_set, image_size, n_classes = get_dataset(P, dataset=P.dataset, augment=P.augment_type)
P.image_size = image_size
P.n_classes = n_classes

### Define data loader ###
kwargs = {'pin_memory': True, 'num_workers': 8}
train_loader = DataLoader(train_set, shuffle=True, batch_size=P.batch_size, **kwargs)
test_loader = DataLoader(test_set, shuffle=False, batch_size=P.test_batch_size, **kwargs)

if P.augment_type == 'autoaug_sche':
    train_set_second, _, _, _ = get_dataset(P, dataset=P.dataset, augment='autoaug')
    P.train_second_loader = DataLoader(train_set_second, shuffle=True, batch_size=P.batch_size, **kwargs)

### Initialize model ###
model = C.get_classifier(P, n_classes=P.n_classes).to(device)
optimizer, lr_decay_gamma = get_optimizer(P, model)
示例#14
0
# load image
train_img_shape = tuple([int(x) for x in args.train_img_shape])
img_transform = Compose([
    Scale(train_img_shape, Image.BILINEAR),
    ToTensor(),
    Normalize([.485, .456, .406], [.229, .224, .225])
])
label_transform = Compose([
    Scale(train_img_shape, Image.NEAREST),
    ToLabel(),
    ReLabel(255, args.n_class - 1),  # convert label
])

source_dataset = get_dataset(dataset_name='source',
                             img_lists=args.source_list,
                             label_lists=args.source_label_list,
                             img_transform=img_transform,
                             label_transform=label_transform,
                             test=False)
target_dataset = get_dataset(dataset_name='target',
                             img_lists=args.target_list,
                             label_lists=None,
                             img_transform=img_transform,
                             label_transform=None,
                             test=False)

train_loader = torch.utils.data.DataLoader(ConcatDataset(
    source_dataset, target_dataset),
                                           batch_size=args.batch_size,
                                           shuffle=True,
                                           pin_memory=True)
示例#15
0
SAMPLING_MODE = args.sampling_mode
# Pre-computed weights to restore
CHECKPOINT = args.restore
# Learning rate for the SGD
LEARNING_RATE = args.lr
# Automated class balancing
CLASS_BALANCING = args.class_balancing
# Training ground truth file
TRAIN_GT = args.train_set
# Testing ground truth file
TEST_GT = args.test_set
TEST_STRIDE = args.test_stride

if args.download is not None and len(args.download) > 0:
    for dataset in args.download:
        get_dataset(dataset, target_folder=FOLDER)
    quit()

viz = visdom.Visdom(env=DATASET + ' ' + MODEL)
if not viz.check_connection:
    print("Visdom is not connected. Did you run 'python -m visdom.server' ?")

hyperparams = vars(args)
# Load the dataset
img, gt, LABEL_VALUES, IGNORED_LABELS, RGB_BANDS, palette = get_dataset(
    DATASET, FOLDER)
# Number of classes
N_CLASSES = len(LABEL_VALUES)
# Number of bands (last dimension of the image tensor)
N_BANDS = img.shape[-1]
示例#16
0
params = HParams(args.cfg_file)
pprint(params.dict)

os.environ['CUDA_VISIBLE_DEVICES'] = params.gpu
np.random.seed(params.seed)
tf.set_random_seed(params.seed)

############################################################
logging.basicConfig(filename=params.exp_dir + '/train.log',
                    filemode='w',
                    level=logging.INFO,
                    format='%(message)s')
logging.info(pformat(params.dict))
############################################################

trainset = get_dataset('train', params)
validset = get_dataset('valid', params)
testset = get_dataset('test', params)
logging.info(f"trainset: {trainset.size} \
               validset: {validset.size} \
               testset: {testset.size}")

x_ph = tf.placeholder(tf.float32, [None, params.dimension])
y_ph = tf.placeholder(tf.float32, [None])
b_ph = tf.placeholder(tf.float32, [None, params.dimension])
m_ph = tf.placeholder(tf.float32, [None, params.dimension])

model = get_model(params)
model.build(x_ph, y_ph, b_ph, m_ph)

total_params = 0
if args['display']:
    plt.ion()
else:
    plt.ioff()
    plt.switch_backend("agg")

if args['save']:
    if not os.path.exists(args['save_dir']):
        os.makedirs(args['save_dir'])

# set device
device = torch.device("cuda:0" if args['cuda'] else "cpu")

# dataloader
dataset = get_dataset(args['dataset']['name'], args['dataset']['kwargs'])
dataset_it = torch.utils.data.DataLoader(
    dataset,
    batch_size=1,
    shuffle=False,
    drop_last=False,
    num_workers=4,
    pin_memory=True if args['cuda'] else False)

# load model
model = get_model(args['model']['name'], args['model']['kwargs'])
model = torch.nn.DataParallel(model).to(device)

# load snapshot
if os.path.exists(args['checkpoint_path']):
    state = torch.load(args['checkpoint_path'])
 def get_dataset_path(self, dset_name):
     return get_dataset(dset_name)
示例#19
0
def run_exp_lib(dataset_feat_net_triples,
                get_model=get_model_with_default_configs):
    results = []
    exp_nums = len(dataset_feat_net_triples)
    print("-----\nTotal %d experiments in this run:" % exp_nums)
    for exp_id, (dataset_name, feat_str, net) in enumerate(
            dataset_feat_net_triples):
        print('{}/{} - {} - {} - {}'.format(
            exp_id+1, exp_nums, dataset_name, feat_str, net))
    print("Here we go..")
    sys.stdout.flush()
    for exp_id, (dataset_name, feat_str, net) in enumerate(
            dataset_feat_net_triples):
        print('-----\n{}/{} - {} - {} - {}'.format(
            exp_id+1, exp_nums, dataset_name, feat_str, net))
        sys.stdout.flush()
        dataset = get_dataset(
            dataset_name, sparse=True, feat_str=feat_str, root=args.data_root)
        model_func = get_model(net)
        if 'MNIST' in dataset_name or 'CIFAR' in dataset_name:
            train_dataset, test_dataset = dataset
            train_acc, acc, duration = single_train_test(
                train_dataset,
                test_dataset,
                model_func,
                epochs=args.epochs,
                batch_size=args.batch_size,
                lr=args.lr,
                lr_decay_factor=args.lr_decay_factor,
                lr_decay_step_size=args.lr_decay_step_size,
                weight_decay=0,
                epoch_select=args.epoch_select,
                with_eval_mode=args.with_eval_mode)
            std = 0
        else:
            train_acc, acc, std, duration = cross_validation_with_val_set(
                dataset,
                model_func,
                folds=10,
                epochs=args.epochs,
                batch_size=args.batch_size,
                lr=args.lr,
                lr_decay_factor=args.lr_decay_factor,
                lr_decay_step_size=args.lr_decay_step_size,
                weight_decay=0,
                epoch_select=args.epoch_select,
                with_eval_mode=args.with_eval_mode,
                logger=logger, model_PATH=model_PATH, semi_split=args.semi_split)

        with open(log_PATH, "a+") as f:
            f.write(args.model_lr + " " + args.model_epoch + ": ")
            f.write(str(acc) + " " + str(std))
            f.write("\n")

        summary1 = 'data={}, model={}, feat={}, eval={}'.format(
            dataset_name, net, feat_str, args.epoch_select)
        summary2 = 'train_acc={:.2f}, test_acc={:.2f} ± {:.2f}, sec={}'.format(
            train_acc*100, acc*100, std*100, round(duration, 2))
        results += ['{}: {}, {}'.format('fin-result', summary1, summary2)]
        print('{}: {}, {}'.format('mid-result', summary1, summary2))
        sys.stdout.flush()
    print('-----\n{}'.format('\n'.join(results)))
    sys.stdout.flush()