示例#1
0
def main(args):
    seed = util.prepare(args)
    if not cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
    np.random.seed(seed)
    random.seed(seed)
    manual_seed(seed)
    cuda.manual_seed(seed)
    cuda.set_device(args.gpu)
    cudnn.benchmark = False
    cudnn.deterministic = True
    log_config(args)
    start = time.time()
    if args.type == 'search':
        search(args)
    elif args.type == 'train':
        train(args)
    elif args.type == 'test':
        pass
    else:
        raise ValueError
    tot_time = time.time() - start
    m, s = divmod(tot_time, 60)
    h, m = divmod(m, 60)
    logging.info("total time %d:%02d:%02d" % (h, m, s))
示例#2
0
def fix_random_seed(seed=1234):
    # Ref.: https://github.com/bentrevett/pytorch-image-classification/blob/master/5_resnet.ipynb
    random.seed(seed)
    np.random.seed(seed)
    manual_seed(seed)
    cuda.manual_seed(seed)
    backends.cudnn.deterministic = True
示例#3
0
 def activate(self):
     random.seed(self.seed)
     np_random.seed(self.seed)
     manual_seed(self.seed)
     cuda.manual_seed(self.seed)
     cudnn.deterministic = True
     cudnn.benchmark = False
示例#4
0
def set_seed(seed: int) -> None:
    torch.manual_seed(seed)
    cuda.manual_seed(seed)
    cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    cudnn.benchmark = False
    cudnn.deterministic = True
示例#5
0
    def __init__(self, args, crition: nn.CrossEntropyLoss, optimzer):
        seed = random.randint(0, 1000)
        random.seed(seed)
        torch.manual_seed(seed)
        cd.manual_seed(seed)
        cd.manual_seed_all(seed)
        self.args = args
        self.model_name = args.net
        self.config = self._parse_args(args.config)
        net_module = importlib.import_module(f"net.{self.model_name}")
        self.model_class = getattr(net_module, self.model_name)
        self.model = self.model_class(**self._parse_model_args())
        self.crition = crition
        self.base_lr = self.config.get("lr", 0.01)

        self.optimizer = self._get_optimizer(optimzer)
        self.iters = self.config.get("iter", 5000)
        self.power = self.config.get("power", 0.9)
        self.numclass = self.config['numclass']
        self.batch_size = self.config['batch_size']
        self.print_freq = self.config['print_freq']
        self.save_freq = self.config['save_freq']
        self.gpu = self.config.get('gpus')
        print(f"gpus: {self.gpu}")
        if self.gpu:
            self.gpu = [self.gpu] if isinstance(self.gpu, int) else list(
                self.gpu)
        else:
            self.device = torch.device("cpu")
        self.train_dataloader = get_data_loader(
            self.config['train_data_path'],
            self.config['train_annot_path'],
            self.numclass,
            img_size=self.config['img_size'],
            batch_size=self.batch_size,
            name=self.config['dataset_name'])
        self.val_dataloader = get_data_loader(self.config['val_data_path'],
                                              self.config['val_annot_path'],
                                              self.numclass,
                                              img_size=self.config['img_size'],
                                              batch_size=self.batch_size,
                                              name=self.config['dataset_name'],
                                              mode='eval')
        self.metricer = Metrics(self.numclass)
        logdir = self._get_log_dir()
        self.writer = SummaryWriter(log_dir=logdir)
        if self.gpu:
            print(torch.cuda.device_count())
            self.model = nn.DataParallel(self.model,
                                         device_ids=self.gpu).cuda(self.gpu[0])
            # self.crition = self.crition.cuda(self.gpu[0])
            cudnn.benchmark = False  # 加速1
            cudnn.deterministic = True
示例#6
0
def fix_random_state(seed_value):
    """
    fix the random seed of each library
    """

    random.seed(seed_value)
    np.random.seed(seed_value)

    if torch.cuda.is_available():
        cuda.manual_seed(seed_value)
        cuda.manual_seed_all(seed_value)
    torch.manual_seed(seed_value)
    torch.random.manual_seed(seed_value)
示例#7
0
def set_seed(seed: int,
             cudnn_deterministic: bool = False,
             cudnn_benchmark: bool = True):
    """
    Set all relevant seeds for torch, numpy and python
    Args:
        seed: int seed
        cudnn_deterministic: set True for deterministic training..
        cudnn_benchmark: set False for deterministic training.
    """
    th.manual_seed(seed)
    cuda.manual_seed(seed)
    cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    if cudnn_deterministic:
        cudnn.deterministic = True
    cudnn.benchmark = cudnn_benchmark
示例#8
0
def set_seed(seed: int, set_deterministic: bool = True):
    """
    Set all relevant seeds for torch, numpy and python

    Args:
        seed: int seed
        set_deterministic: Guarantee deterministic training, possibly at the cost of performance.
    """
    th.manual_seed(seed)
    cuda.manual_seed(seed)
    cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    if set_deterministic:
        cudnn.benchmark = False
        cudnn.deterministic = True
    elif cudnn.benchmark or not cudnn.deterministic:
        print(
            f"WARNING: Despite fixed seed {seed}, training may not be deterministic with {cudnn.benchmark=} "
            f"(must be False for deterministic training) and {cudnn.deterministic=} (must be True for deterministic "
            f"training)")
示例#9
0
文件: starts.py 项目: MartrixG/CODES
def prepare_seed(rand_seed):
    random.seed(rand_seed)
    np.random.seed(rand_seed)
    torch.manual_seed(rand_seed)
    cuda.manual_seed(rand_seed)
    cuda.manual_seed_all(rand_seed)
示例#10
0
def main(args):

    init_process_group(backend='nccl')

    with open(args.config) as file:
        config = json.load(file)
    config.update(vars(args))
    config = apply_dict(Dict, config)

    backends.cudnn.benchmark = True
    backends.cudnn.fastest = True

    global_rank = distributed.get_rank()
    local_rank = global_rank % cuda.device_count()

    np.random.seed(global_rank)
    torch.manual_seed(global_rank)
    cuda.manual_seed(global_rank)
    cuda.set_device(local_rank)

    train_dataset = datasets.CIFAR10(
        root=config.train_root,
        train=True,
        transform=transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.49139968, 0.48215827, 0.44653124),
                                 std=(0.24703233, 0.24348505, 0.26158768))
        ]),
        download=True)
    train_train_dataset, train_val_dataset = utils.data.random_split(
        dataset=train_dataset,
        lengths=[
            int(len(train_dataset) * config.split_ratio),
            int(len(train_dataset) * (1 - config.split_ratio))
        ])
    val_dataset = datasets.CIFAR10(
        root=config.val_root,
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.49139968, 0.48215827, 0.44653124),
                                 std=(0.24703233, 0.24348505, 0.26158768))
        ]),
        download=True)

    train_train_sampler = utils.data.distributed.DistributedSampler(
        train_train_dataset)
    train_val_sampler = utils.data.distributed.DistributedSampler(
        train_val_dataset)
    val_sampler = utils.data.distributed.DistributedSampler(val_dataset)

    train_train_data_loaders = utils.data.DataLoader(
        dataset=train_train_dataset,
        batch_size=config.local_batch_size,
        sampler=train_train_sampler,
        num_workers=config.num_workers,
        pin_memory=True)
    train_val_data_loaders = utils.data.DataLoader(
        dataset=train_val_dataset,
        batch_size=config.local_batch_size,
        sampler=train_val_sampler,
        num_workers=config.num_workers,
        pin_memory=True)
    val_data_loader = utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=config.local_batch_size,
                                            sampler=val_sampler,
                                            num_workers=config.num_workers,
                                            pin_memory=True)

    generator = DARTSGenerator(
        latent_size=128,
        min_resolution=4,
        out_channels=3,
        operations=dict(
            sep_conv_3x3=functools.partial(SeparableConvTranspose2d,
                                           kernel_size=3,
                                           padding=1),
            sep_conv_5x5=functools.partial(SeparableConvTranspose2d,
                                           kernel_size=5,
                                           padding=2),
            dil_conv_3x3=functools.partial(DilatedConvTranspose2d,
                                           kernel_size=3,
                                           padding=2,
                                           dilation=2),
            dil_conv_5x5=functools.partial(DilatedConvTranspose2d,
                                           kernel_size=5,
                                           padding=4,
                                           dilation=2),
            identity=functools.partial(IdentityTranspose),
            # zero=functools.partial(ZeroTranspose)
        ),
        num_nodes=6,
        num_input_nodes=2,
        num_cells=9,
        reduction_cells=[2, 5, 8],
        num_predecessors=2,
        num_channels=16,
    ).cuda()

    discriminator = DARTSDiscriminator(
        in_channels=3,
        min_resolution=4,
        num_classes=10,
        operations=dict(
            sep_conv_3x3=functools.partial(SeparableConv2d,
                                           kernel_size=3,
                                           padding=1),
            sep_conv_5x5=functools.partial(SeparableConv2d,
                                           kernel_size=5,
                                           padding=2),
            dil_conv_3x3=functools.partial(DilatedConv2d,
                                           kernel_size=3,
                                           padding=2,
                                           dilation=2),
            dil_conv_5x5=functools.partial(DilatedConv2d,
                                           kernel_size=5,
                                           padding=4,
                                           dilation=2),
            identity=functools.partial(Identity),
            # zero=functools.partial(Zero)
        ),
        num_nodes=6,
        num_input_nodes=2,
        num_cells=9,
        reduction_cells=[2, 5, 8],
        num_predecessors=2,
        num_channels=128).cuda()

    criterion = CrossEntropyLoss(config.label_smoothing)

    config.global_batch_size = config.local_batch_size * distributed.get_world_size(
    )
    config.network_optimizer.lr *= config.global_batch_size / config.global_batch_denom
    config.architecture_optimizer.lr *= config.global_batch_size / config.global_batch_denom

    generator_network_optimizer = optim.Adam(
        params=generator.network.parameters(),
        lr=config.generator_network_optimizer.lr,
        betas=config.generator_network_optimizer.betas,
        weight_decay=config.generator_network_optimizer.weight_decay)
    generator_architecture_optimizer = optim.Adam(
        params=generator.architecture.parameters(),
        lr=config.generator_architecture_optimizer.lr,
        betas=config.generator_architecture_optimizer.betas,
        weight_decay=config.generator_architecture_optimizer.weight_decay)
    discriminator_network_optimizer = optim.Adam(
        params=discriminator.network.parameters(),
        lr=config.discriminator_network_optimizer.lr,
        betas=config.discriminator_network_optimizer.betas,
        weight_decay=config.discriminator_network_optimizer.weight_decay)
    discriminator_architecture_optimizer = optim.Adam(
        params=discriminator.architecture.parameters(),
        lr=config.discriminator_architecture_optimizer.lr,
        betas=config.discriminator_architecture_optimizer.betas,
        weight_decay=config.discriminator_architecture_optimizer.weight_decay)

    trainer = DARTSGANTrainer(
        generator=generator,
        generator_networks=[generator.network],
        generator_architectures=[generator.architecture],
        discriminator=discriminator,
        discriminator_networks=[discriminator.network],
        discriminator_architectures=[discriminator.architecture],
        generator_network_optimizer=generator_network_optimizer,
        generator_architecture_optimizer=generator_architecture_optimizer,
        discriminator_network_optimizer=discriminator_network_optimizer,
        discriminator_architecture_optimizer=
        discriminator_architecture_optimizer,
        train_train_data_loader=train_train_data_loader,
        train_val_data_loader=train_val_data_loader,
        val_data_loader=val_data_loader,
        train_train_sampler=train_train_sampler,
        train_val_sampler=train_val_sampler,
        val_sampler=val_sampler,
        log_dir=os.path.join('log', config.name))

    if config.checkpoint:
        trainer.load(config.checkpoint)

    if config.training:
        for epoch in range(trainer.epoch, config.num_epochs):
            trainer.step(epoch)
            trainer.train()
            trainer.log_architectures()
            trainer.log_histograms()
            trainer.save()

    elif config.validation:
        trainer.validate()
示例#11
0
def main():
    seed = util.prepare(args)
    if not cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
    CIFAR_CLASSES = 10
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    cuda.manual_seed(seed)
    cuda.set_device(args.gpu)
    cudnn.benchmark = False
    cudnn.deterministic = True
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    logging.info('hidden_layers:{:}'.format(args.hidden_layers))
    logging.info('first_neurons:{:}'.format(args.first_neurons))
    logging.info('change:{:}'.format(args.change))
    logging.info('activate_func:{:}'.format(args.activate_func))
    logging.info('opt:{:}'.format(args.opt))
    logging.info('cross_link:{:}'.format(args.cross_link))

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype, args)
    model = model.cuda()
    logging.info("param size = %fMB", util.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = util.get_data_transforms_cifar10(args)
    train_data = datasets.CIFAR10(root=args.data,
                                  train=True,
                                  download=False,
                                  transform=train_transform)
    valid_data = datasets.CIFAR10(root=args.data,
                                  train=False,
                                  download=False,
                                  transform=valid_transform)

    train_queue = DataLoader(train_data,
                             batch_size=args.batch_size,
                             shuffle=True,
                             pin_memory=True,
                             num_workers=1)

    valid_queue = DataLoader(valid_data,
                             batch_size=args.batch_size,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=1)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs)

    best_acc = 0
    for epoch in range(args.epochs):
        logging.info('epoch %d lr %.6f', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        epoch_str = '[{:03d}/{:03d}]'.format(epoch, args.epochs)
        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     epoch_str)
        logging.info('train_acc %.2f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch_str)
        logging.info('valid_acc %.2f', valid_acc)

        if valid_acc > best_acc:
            logging.info(
                'find the best model. Save it to {:}'.format(args.save +
                                                             'best.pt'))
            util.save(model, os.path.join(args.save, 'best.pt'))
            best_acc = valid_acc
        scheduler.step()
    logging.info('best acc is {:}'.format(best_acc))
    use_cuda = cuda.is_available()
    best_accuracy = 0  # best testing accuracy
    best_epoch = 0  # epoch with the best testing accuracy
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch

    if args.env_name is None:
        args.env_name = "Log:%s-Train:%s-Test:%s" % (args.enable_log_transform, \
                                                     args.enable_disturb_illumination_train, \
                                                     args.enable_disturb_illumination_test)
        args.save_directory = osp.join(args.save_directory, args.env_name)

    # Init seed
    print('==> Init seed..')
    torch.manual_seed(args.seed)
    if use_cuda:
        cuda.manual_seed(args.seed)

    # Calculate mean and std
    print('==> Prepare mean and std..')
    print("\t Log : %s" % args.enable_log_transform)
    if not args.enable_log_transform:
        # mean_log, std_log = (0.50707543, 0.48655024, 0.44091907), (0.26733398, 0.25643876, 0.27615029)
        mean_log, std_log = calculate_mean_and_std(enable_log_transform=False)
    else:
        # mean_log, std_log = (6.69928741, 6.65900993, 6.40947819), (1.2056427,  1.15127575, 1.31597221)
        mean_log, std_log = calculate_mean_and_std(enable_log_transform=True)
    print('\tmean_log = ', mean_log)
    print('\tstd_log = ', std_log)

    data_mean = torch.FloatTensor(mean_log)
    data_std = torch.FloatTensor(std_log)
示例#13
0
    while True:
        if Path(os.path.join(save_dir, 'run.%d' % (save_index, ))).exists():
            save_index += 1
        else:
            break
    return save_index


opt = parser.parse_args()
opt.save_path = os.path.join(opt.save_dir,
                             'run.%d' % (get_save_index(opt.save_dir), ))
Path(opt.save_path).mkdir_p()
print(opt.save_path)
torch.manual_seed(123)
cuda.set_device(opt.gpus[0])
cuda.manual_seed(123)

print(opt)
js.dump(opt.__dict__,
        open(os.path.join(opt.save_path, 'opt.json'), 'w'),
        sort_keys=True,
        indent=2)


def NMTCriterion(vocabSize):
    weight = torch.ones(vocabSize)
    weight[onmt.Constants.PAD] = 0
    crit = nn.NLLLoss(weight, size_average=False)
    if opt.gpus:
        crit.cuda()
    return crit
示例#14
0
def main(args):

    init_process_group(backend='nccl')

    with open(args.config) as file:
        config = json.load(file)
    config.update(vars(args))
    config = apply_dict(Dict, config)

    backends.cudnn.benchmark = True
    backends.cudnn.fastest = True

    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    cuda.manual_seed(config.seed)
    cuda.set_device(distributed.get_rank() % cuda.device_count())

    train_dataset = datasets.CIFAR10(
        root=config.train_root,
        train=True,
        transform=transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=(0.49139968, 0.48215827, 0.44653124),
                std=(0.24703233, 0.24348505, 0.26158768)
            ),
            Cutout(size=(16, 16))
        ]),
        download=True
    )
    val_dataset = datasets.CIFAR10(
        root=config.val_root,
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(
                mean=(0.49139968, 0.48215827, 0.44653124),
                std=(0.24703233, 0.24348505, 0.26158768)
            )
        ]),
        download=True
    )

    train_sampler = utils.data.distributed.DistributedSampler(train_dataset)
    val_sampler = utils.data.distributed.DistributedSampler(val_dataset)

    train_data_loader = utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=config.local_batch_size,
        sampler=train_sampler,
        num_workers=config.num_workers,
        pin_memory=True
    )
    val_data_loader = utils.data.DataLoader(
        dataset=val_dataset,
        batch_size=config.local_batch_size,
        sampler=val_sampler,
        num_workers=config.num_workers,
        pin_memory=True
    )

    model = DARTS(
        operations=dict(
            sep_conv_3x3=functools.partial(SeparableConv2d, kernel_size=3, padding=1),
            sep_conv_5x5=functools.partial(SeparableConv2d, kernel_size=5, padding=2),
            dil_conv_3x3=functools.partial(DilatedConv2d, kernel_size=3, padding=2, dilation=2),
            dil_conv_5x5=functools.partial(DilatedConv2d, kernel_size=5, padding=4, dilation=2),
            avg_pool_3x3=functools.partial(AvgPool2d, kernel_size=3, padding=1, postnormalization=False),
            max_pool_3x3=functools.partial(MaxPool2d, kernel_size=3, padding=1, postnormalization=False),
            identity=functools.partial(Identity),
            # zero=functools.partial(Zero)
        ),
        stem=[
            functools.partial(Conv2d, kernel_size=3, padding=1, stride=1, affine=True, preactivation=False),
            functools.partial(Conv2d, kernel_size=3, padding=1, stride=1, affine=True, preactivation=True)
        ],
        num_nodes=6,
        num_input_nodes=2,
        num_cells=20,
        reduction_cells=[6, 13],
        num_predecessors=2,
        num_channels=36,
        num_classes=10,
        drop_prob_fn=lambda epoch: config.drop_prob * (epoch / config.num_epochs),
        temperature_fn=lambda epoch: config.temperature ** (epoch / config.num_epochs)
    )

    checkpoint = Dict(torch.load('log/checkpoints/epoch_0'))
    model.architecture.load_state_dict(checkpoint.architecture_state_dict)
    model.build_discrete_dag()
    model.build_discrete_network()

    for parameter in model.architecture.parameters():
        parameter.requires_grad_(False)

    criterion = CrossEntropyLoss(config.label_smoothing)

    config.global_batch_size = config.local_batch_size * distributed.get_world_size()
    config.lr *= config.global_batch_size / config.global_batch_denom

    optimizer = optim.SGD(
        params=model.parameters(),
        lr=config.lr,
        momentum=config.momentum,
        weight_decay=config.weight_decay
    )

    lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
        optimizer=optimizer,
        T_max=config.num_epochs
    )

    trainer = ClassifierTrainer(
        model=model,
        criterion=criterion,
        train_sampler=train_sampler,
        val_sampler=val_sampler,
        train_data_loader=train_data_loader,
        val_data_loader=val_data_loader,
        optimizer=optimizer,
        lr_scheduler=lr_scheduler,
        log_dir=os.path.join('log', config.name)
    )

    if config.checkpoint:
        trainer.load(config.checkpoint)

    if config.training:
        for epoch in range(trainer.epoch, config.num_epochs):
            trainer.train()
            trainer.validate()
            trainer.save()
            trainer.step()

    elif config.validation:
        trainer.validate()
示例#15
0
def run(config):
    """
    Run training and testing.
    """
    # wandb
    if config["wandb"]:
        wandb.init(config=config,
                   project=config["project"],
                   group=config["group"],
                   name=config["run_name"])

    # Set random seeds
    manual_seed(config["seed"])
    cuda.manual_seed(config["seed"])

    # override device
    use_cuda = cuda.is_available()
    dev = device("cuda" if use_cuda else "cpu")
    config["device"] = dev

    # load datasets
    transformations = transforms.Compose([transforms.ToTensor()])

    train_dataset = datasets.MNIST(config["data_path"],
                                   train=True,
                                   download=True,
                                   transform=transformations)

    test_dataset = datasets.MNIST(config["data_path"],
                                  train=False,
                                  download=True,
                                  transform=transformations)

    # filter single label
    if config["use_single_label"]:

        idx = train_dataset.targets==config["single_label"]
        train_dataset.targets = train_dataset.targets[idx]
        train_dataset.data = train_dataset.data[idx]

        # test dataset
        idx = test_dataset.targets==config["single_label"]
        test_dataset.targets = test_dataset.targets[idx]
        test_dataset.data = test_dataset.data[idx]


    # define batchers
    d_kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    train_batcher = DataLoader(train_dataset,
                               batch_size=config["batch_size"],
                               shuffle=True,
                               **d_kwargs)

    test_batcher = DataLoader(test_dataset,
                              batch_size=config["batch_size"],
                              shuffle=False,
                              **d_kwargs)

    config["train_batcher"] = train_batcher
    config["test_batcher"] = test_batcher

    # create encoder
    n_classes = config["n_classes"]
    enc_kwargs = {"input_dim": config["input_dim"],
                  "hidden_dim": config["encoder_hidden_dim"],
                  "z_dim": config["z_dim"],
                  "act_func": getattr(torch, config["encoder_act_func"]),
                  "n_classes": n_classes}

    encoder = EncoderFactory.create(config["encoder_name"])
    encoder = encoder(**enc_kwargs)

    # create decoder
    dec_kwargs = {"z_dim": config["z_dim"],
                  "hidden_dim": config["decoder_hidden_dim"],
                  "output_dim": config["input_dim"],
                  "act_func": getattr(torch, config["decoder_act_func"]),
                  "pred_func": getattr(torch, config["decoder_pred_func"]),
                  "n_classes": n_classes}

    decoder = DecoderFactory.create(config["decoder_name"])
    decoder = decoder(**dec_kwargs)

    # assemble VAE
    reconstruction_loss = partial(getattr(functional, config["rec_loss"]),
                                  reduction="sum")

    vae_kwargs = {"encoder": encoder,
                  "decoder": decoder,
                  "recon_loss_func": reconstruction_loss,
                  "beta": config["beta"]}

    # selecte VAE model
    vae = VAEFactory.create(config["vae_name"])
    model = vae(**vae_kwargs)

    # send model to device and store it
    model = model.to(dev)
    config["model"] = model

    # print model summary
    print("----------------------------------------------------------------")
    print(f"Model: {model.name}")
    # summary(model, (1, config["input_dim"] + config["n_classes"]))

    # wandb
    if config["wandb"]:
        wandb.watch(model, log="all")

    # create the optimizer
    optimizer = getattr(optim, config["optimizer_name"])
    optimizer = optimizer(model.parameters(), lr=config["lr"])
    config["optimizer"] = optimizer

    # train and test
    print("Training...")
    print("----------------------------------------------------------------")

    # current date and time
    start = time()
    print(f"Start datetime: {datetime.now()}")
    print("----------------------------------------------------------------")

    # log control image
    test_losses = test(config)
    _, _, _, x, x_hat = test_losses
    num_img = config["test_num_img"]
    x_cat = cat((x[:num_img], x_hat[:num_img]), dim=0)
    grid = image_grid(x_cat, nrow=num_img)

    if config["wandb"]:
        wandb.log({"Test Example": wandb.Image(grid, caption="Epoch: 0")})


    for e in range(config["epochs"] + 1):

        train_losses = train(config)
        train_losses = [loss / len(train_batcher.dataset) for loss in train_losses]
        train_loss, train_recon_loss, train_kld_loss = train_losses

        # average
        test_losses = test(config)
        test_losses = [loss / len(test_batcher.dataset) for loss in test_losses]
        test_loss, test_recon_loss, test_kld_loss, x, x_hat = test_losses

        # print stuff
        print(f'Epoch {e}, Train Loss: {train_loss:.2f}, Test Loss: {test_loss:.2f}')

        # log images
        num_img = config["test_num_img"]
        x_cat = cat((x[:num_img], x_hat[:num_img]), dim=0)
        grid = image_grid(x_cat, nrow=num_img)
        # show_image_grid(x_cat, nrow=5)  # BUG: only works in test()?

        if not config["wandb"]:
            continue

        # wandb - merge into one logging operation
        wandb.log({"Train Loss - Total": train_loss,
                   "Train Loss - Reconstruction": train_recon_loss,
                   "Train Loss - KL Divergence": train_kld_loss,
                   "Test Loss - Total": test_loss,
                   "Test Loss - Reconstruction": test_recon_loss,
                   "Test Loss - KL Divergence": test_kld_loss,
                   "Test Example": wandb.Image(grid, caption=f"Epoch: {e}")})

    # save model with torch to wandb run dir (uploads after training is complete)
    # TODO: Save intermediary checkpoints instead
    if config["wandb"] and config["save_model"]:
        save({"model_name": model.name,
              "beta": config["beta"],
              "epoch": e,
              "model_state_dict": model.state_dict(),
              "optimizer_state_dict": optimizer.state_dict(),
              "z_dim": config["z_dim"],
              "train_loss": train_loss,
              "test_loss": test_loss},
             os.path.join(wandb.run.dir, "model_state.pt"))

    # current time
    print("----------------------------------------------------------------")
    print(f"End datetime: {datetime.now()}")
    print(f"Elapsed time: {round((time() - start) / 60.0, 2)} minutes")
    print("----------------------------------------------------------------")
示例#16
0
def reload_model(config):
    """
    Run training and testing.
    """
    # Set random seeds
    manual_seed(config["seed"])
    cuda.manual_seed(config["seed"])

    # override device
    use_cuda = cuda.is_available()
    dev = device("cuda" if use_cuda else "cpu")
    config["device"] = dev

     # create encoder
    n_classes = config["n_classes"]
    enc_kwargs = {"input_dim": config["input_dim"],
                  "hidden_dim": config["encoder_hidden_dim"],
                  "z_dim": config["z_dim"],
                  "act_func": getattr(torch, config["encoder_act_func"]),
                  "n_classes": n_classes}

    encoder = EncoderFactory.create(config["encoder_name"])
    encoder = encoder(**enc_kwargs)

    # create decoder
    dec_kwargs = {"z_dim": config["z_dim"],
                  "hidden_dim": config["decoder_hidden_dim"],
                  "output_dim": config["input_dim"],
                  "act_func": getattr(torch, config["decoder_act_func"]),
                  "pred_func": getattr(torch, config["decoder_pred_func"]),
                  "n_classes": n_classes}

    decoder = DecoderFactory.create(config["decoder_name"])
    decoder = decoder(**dec_kwargs)

    # assemble VAE
    reconstruction_loss = partial(getattr(functional, config["rec_loss"]),
                                  reduction="sum")

    vae_kwargs = {"encoder": encoder,
                  "decoder": decoder,
                  "recon_loss_func": reconstruction_loss,
                  "beta": config["beta"]}

    # selecte VAE model
    vae = VAEFactory.create(config["vae_name"])
    model = vae(**vae_kwargs)

    # load checkpoint
    checkpoint = load(config["checkpoint_path"], map_location=dev)

    # load state dict
    model.load_state_dict(checkpoint["model_state_dict"])

    # send model to device and store it
    model = model.to(dev)

    # print model summary
    # summary(model, (1, config["input_dim"]))

    # print out
    print("----------------------------------------------------------------")
    print(f"Model Loaded")
    print("----------------------------------------------------------------")

    return model
def main(args):

    init_process_group(backend='nccl')

    with open(args.config) as file:
        config = json.load(file)
    config.update(vars(args))
    config = apply_dict(Dict, config)

    backends.cudnn.benchmark = True
    backends.cudnn.fastest = True

    world_size = distributed.get_world_size()
    global_rank = distributed.get_rank()
    device_count = cuda.device_count()
    local_rank = global_rank % device_count

    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    cuda.manual_seed(config.seed)
    cuda.set_device(local_rank)

    train_dataset = datasets.MNIST(
        root=config.train_root,
        train=True,
        transform=transforms.Compose([
            transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ]),
        download=True
    )
    val_dataset = datasets.MNIST(
        root=config.val_root,
        train=False,
        transform=transforms.Compose([
            transforms.Resize((32, 32)),
            transforms.ToTensor(),
            transforms.Normalize((0.5,), (0.5,))
        ]),
        download=True
    )

    train_sampler = utils.data.distributed.DistributedSampler(train_dataset)
    val_sampler = utils.data.distributed.DistributedSampler(val_dataset)

    train_data_loader = utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=config.local_batch_size,
        sampler=train_sampler,
        num_workers=config.num_workers,
        pin_memory=True
    )
    val_data_loader = utils.data.DataLoader(
        dataset=val_dataset,
        batch_size=config.local_batch_size,
        sampler=val_sampler,
        num_workers=config.num_workers,
        pin_memory=True
    )

    generator = Generator(
        latent_size=128,
        mapping_layers=2,
        min_resolution=4,
        max_resolution=32,
        max_channels=128,
        min_channels=16,
        out_channels=1
    ).cuda()

    discriminator = Discriminator(
        in_channels=1,
        min_channels=16,
        max_channels=128,
        max_resolution=32,
        min_resolution=4,
        num_classes=1
    ).cuda()

    inverter = Discriminator(
        in_channels=1,
        min_channels=16,
        max_channels=128,
        max_resolution=32,
        min_resolution=4,
        num_classes=128
    ).cuda()

    config.global_batch_size = config.local_batch_size * distributed.get_world_size()
    config.generator_optimizer.lr *= config.global_batch_size / config.global_batch_denom
    config.discriminator_optimizer.lr *= config.global_batch_size / config.global_batch_denom
    config.inverter_optimizer.lr *= config.global_batch_size / config.global_batch_denom

    generator_optimizer = optim.Adam(generator.parameters(), **config.generator_optimizer)
    discriminator_optimizer = optim.Adam(discriminator.parameters(), **config.discriminator_optimizer)
    inverter_optimizer = optim.Adam(inverter.parameters(), **config.inverter_optimizer)

    trainer = GANTrainer(
        latent_size=128,
        generator=generator,
        discriminator=discriminator,
        inverter=inverter,
        generator_optimizer=generator_optimizer,
        discriminator_optimizer=discriminator_optimizer,
        inverter_optimizer=inverter_optimizer,
        train_data_loader=train_data_loader,
        val_data_loader=val_data_loader,
        train_sampler=train_sampler,
        val_sampler=val_sampler,
        log_dir=os.path.join('log', config.name)
    )

    if config.checkpoint:
        trainer.load(config.checkpoint)

    if config.training:
        for epoch in range(trainer.epoch, config.num_epochs):
            trainer.step(epoch)
            trainer.train()
            # trainer.validate()
            trainer.save()

    elif config.validation:
        trainer.validate()
示例#18
0
import torch
import torch.nn as nn
import torch.cuda as cuda
import extractinputs as ei
import support_functions as sf
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import csv
import time

torch.manual_seed(2020)
cuda.manual_seed(2020)
np.random.seed(2020)
os.chdir(os.getcwd())
path = os.getcwd()
device = torch.device("cuda:0" if cuda.is_available() else "cpu")

os.chdir(os.getcwd())
path = os.getcwd()

# Import the DMS dataset
data = pd.read_csv(path + '/Input/DMS_CCS_ML_Dataset_Class.csv')
data.head()

#Un-normalized data
inputs_raw = data.drop(labels=['Combined_CCS', 'Compound'], axis='columns')
target = data['Combined_CCS']
names = data['Compound']
示例#19
0
文件: train.py 项目: MartrixG/CODES
def main(arg):
    ##################################
    for key in arg:
        parse[key] = arg[key]
    global args
    args = SimpleNamespace(**parse)
    '''
    print('seed:{:}'.format(args.seed))
    print('dataset:{:}'.format(args.dataset))
    print('hidden_layers:{:}'.format(args.hidden_layers))
    print('first_neurons:{:}'.format(args.first_neurons))
    print('cross_link:{:}'.format(args.cross_link))
    print('fully_cross:{:}'.format(args.fully_cross))
    print()
    exit(0)
    '''
    ##################################
    seed = util.prepare(args)
    if not cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    cuda.manual_seed(seed)
    cuda.set_device(args.gpu)
    cudnn.benchmark = False
    cudnn.deterministic = True
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)
    logging.info('hidden_layers:{:}'.format(args.hidden_layers))
    logging.info('first_neurons:{:}'.format(args.first_neurons))
    logging.info('change:{:}'.format(args.change))
    logging.info('activate_func:{:}'.format(args.activate_func))
    logging.info('opt:{:}'.format(args.opt))
    logging.info('cross_link:{:}'.format(args.cross_link))
    logging.info('fully_cross:{:}'.format(args.fully_cross))

    model = Network(args)
    model = model.cuda()
    logging.info("param size = %fMB", util.count_parameters_in_MB(model))
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=30,
                                                gamma=0.7)

    train_data, valid_data = dataset.get_dataset(args.data, args.dataset)
    train_queue, valid_queue = dataset.get_data_loader(train_data, valid_data,
                                                       2)

    early_stop = util.EarlyStop(patience=10,
                                delta=0.0001,
                                save_path=args.save + '/best.pt')
    for epoch in range(args.epochs):
        logging.info('epoch %d lr %.6f', epoch, scheduler.get_lr()[0])

        epoch_str = '[{:03d}/{:03d}]'.format(epoch, args.epochs)
        train_acc, train_obj = train(train_queue, model, criterion, optimizer,
                                     epoch_str)
        logging.info('train_acc %.2f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion, epoch_str)
        logging.info('valid_acc %.2f', valid_acc)

        if early_stop.check(train_obj, valid_acc, model):
            logging.info('Early stopping at {:}'.format(epoch))
            break

        scheduler.step()
示例#20
0

def load_words_embed(pretrained_embed_model, vocab) -> torch.FloatTensor:
    l = len(vocab)
    embeds = torch.randn(l, 300)
    for i in range(0, l):
        try:
            embeds[i, :] = torch.from_numpy(
                pretrained_embed_model[vocab[i]]).view(1, 300)
        except:
            embeds[i, :] = torch.randn(1, 300)
    return embeds


if __name__ == '__main__':
    manual_seed(100)
    epochs = 1000
    data, vocab, tags = load_data("../data")
    fasttext = FastText.load("../data/wiki.ar.gensim")
    embeds = load_words_embed(fasttext, vocab)
    net = BiLSTMWithCRF(len(vocab), tags, 300, 8, preinit_embedding=embeds)
    # bilstmcrf = bilstmcrf.cuda()
    opt = Adam(net.parameters(), lr=0.01, weight_decay=1e-3)
    print("Begin training")
    for epoch in range(epochs):
        for sentence, tgs in data:
            opt.zero_grad()
            sentence_in = prepare_sequence(sentence, vocab)
            targets = torch.LongTensor([tags[t] for t in tgs])
            neg_log_likelihood = net.neg_log_likelihood(sentence_in, targets)
            neg_log_likelihood.backward()
def init_randseed(RANDOM_SEED=20190421):
    random.seed(RANDOM_SEED)
    np.random.seed(RANDOM_SEED)
    torch.manual_seed(RANDOM_SEED)
    if cuda.is_available():
        cuda.manual_seed(RANDOM_SEED)
示例#22
0
    from torchvision import transforms
    from torch.utils.data import DataLoader

    from torch import device
    from torch import cuda
    from torch import manual_seed

    from neu_vae.training import reload_model

    with open("../training/config.yaml") as f:
        config = yaml.safe_load(f)

    # Set random seeds
    if config["seed"]:
        manual_seed(config["seed"])
        cuda.manual_seed(config["seed"])

    # override device
    use_cuda = cuda.is_available()
    dev = device("cuda" if use_cuda else "cpu")
    config["device"] = dev

    # load test dataset
    transformations = transforms.Compose([transforms.ToTensor()])
    test_dataset = datasets.MNIST(config["data_path"],
                                  train=False,
                                  download=True,
                                  transform=transformations)

    idx = test_dataset.targets == config["single_label"]
    test_dataset.targets = test_dataset.targets[idx]
示例#23
0
def print_max_nbd(t):
    "Print values in a neighborhood of the max value in t"
    x, y = [v[0] for v in argmax(t)]
    print(f'max coords at {(x, y)}')
    print(t[max(0, x - 3):min(x + 3, t.size(0)),
            max(0, y - 3):min(y + 3, t.size(0))])


class Flatten(Module):
    "Simply flattens all but the batch (first) dimension of the input"

    def forward(self, i):
        return i.view(i.size(0), -1)


TP.manual_seed(0)  # Note: Messing with the current RNG state.

# Shape arguments passed to `InformationDropoutLayer`'s.
INFO_ARGS = [
    dict(output_size=(32, 38, 38),
         in_channels=2,
         out_channels=32,
         kernel_size=2,
         stride=2,
         max_alpha=0.),  # 0
    dict(output_size=(64, 10, 10),
         in_channels=32,
         out_channels=64,
         kernel_size=2,
         stride=2,
         max_alpha=0.),  # 1
示例#24
0
def main(args):

    init_process_group(backend='nccl')

    with open(args.config) as file:
        config = apply_dict(Dict, json.load(file))
    config.update(vars(args))
    config.update(
        dict(world_size=distributed.get_world_size(),
             global_rank=distributed.get_rank(),
             device_count=cuda.device_count(),
             local_rank=distributed.get_rank() % cuda.device_count()))
    print(f'config: {config}')

    backends.cudnn.benchmark = True
    backends.cudnn.fastest = True

    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    cuda.manual_seed(config.seed)
    cuda.set_device(config.local_rank)

    train_dataset = ImageNet(root=config.train_root,
                             meta=config.train_meta,
                             transform=transforms.Compose([
                                 transforms.RandomResizedCrop(224),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ColorJitter(brightness=0.4,
                                                        contrast=0.4,
                                                        saturation=0.4,
                                                        hue=0.2),
                                 transforms.ToTensor(),
                                 transforms.Normalize(mean=(0.485, 0.456,
                                                            0.406),
                                                      std=(0.229, 0.224,
                                                           0.225))
                             ]))
    val_dataset = ImageNet(root=config.val_root,
                           meta=config.val_meta,
                           transform=transforms.Compose([
                               transforms.Resize(256),
                               transforms.CenterCrop(224),
                               transforms.ToTensor(),
                               transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                                    std=(0.229, 0.224, 0.225)),
                           ]))

    train_sampler = utils.data.distributed.DistributedSampler(train_dataset)
    val_sampler = utils.data.distributed.DistributedSampler(val_dataset)

    train_data_loader = utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=config.local_batch_size,
        sampler=train_sampler,
        num_workers=config.num_workers,
        pin_memory=True)
    val_data_loader = utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=config.local_batch_size,
                                            sampler=val_sampler,
                                            num_workers=config.num_workers,
                                            pin_memory=True)

    model = SuperMobileNetV2(first_conv_param=Dict(in_channels=3,
                                                   out_channels=32,
                                                   kernel_size=3,
                                                   stride=2),
                             middle_conv_params=[
                                 Dict(in_channels=32,
                                      out_channels=16,
                                      expand_ratio_list=[3, 6],
                                      kernel_size_list=[3, 5],
                                      blocks=1,
                                      stride=1),
                                 Dict(in_channels=16,
                                      out_channels=24,
                                      expand_ratio_list=[3, 6],
                                      kernel_size_list=[3, 5],
                                      blocks=2,
                                      stride=2),
                                 Dict(in_channels=24,
                                      out_channels=32,
                                      expand_ratio_list=[3, 6],
                                      kernel_size_list=[3, 5],
                                      blocks=3,
                                      stride=2),
                                 Dict(in_channels=32,
                                      out_channels=64,
                                      expand_ratio_list=[3, 6],
                                      kernel_size_list=[3, 5],
                                      blocks=4,
                                      stride=2),
                                 Dict(in_channels=64,
                                      out_channels=96,
                                      expand_ratio_list=[3, 6],
                                      kernel_size_list=[3, 5],
                                      blocks=3,
                                      stride=1),
                                 Dict(in_channels=96,
                                      out_channels=160,
                                      expand_ratio_list=[3, 6],
                                      kernel_size_list=[3, 5],
                                      blocks=3,
                                      stride=2),
                                 Dict(in_channels=160,
                                      out_channels=320,
                                      expand_ratio_list=[3, 6],
                                      kernel_size_list=[3, 5],
                                      blocks=1,
                                      stride=1),
                             ],
                             last_conv_param=Dict(in_channels=320,
                                                  out_channels=1280,
                                                  kernel_size=1,
                                                  stride=1),
                             drop_prob=config.drop_prob,
                             num_classes=1000).cuda()

    for tensor in model.state_dict().values():
        distributed.broadcast(tensor, 0)

    criterion = CrossEntropyLoss(config.label_smoothing)

    config.global_batch_size = config.local_batch_size * config.world_size
    config.lr = config.lr * config.global_batch_size / config.global_batch_denom

    optimizer = torch.optim.RMSprop(params=model.weights(),
                                    lr=config.lr,
                                    alpha=config.alpha,
                                    eps=config.eps,
                                    weight_decay=config.weight_decay,
                                    momentum=config.momentum)
    lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer,
                                                  milestones=config.milestones,
                                                  gamma=config.gamma)

    last_epoch = -1
    global_step = 0
    if config.checkpoint:
        checkpoint = Dict(torch.load(config.checkpoint))
        model.load_state_dict(checkpoint.model_state_dict)
        optimizer.load_state_dict(checkpoint.optimizer_state_dict)
        last_epoch = checkpoint.last_epoch
        global_step = checkpoint.global_step
    elif config.global_rank == 0:
        if os.path.exists(config.checkpoint_directory):
            shutil.rmtree(config.checkpoint_directory)
        if os.path.exists(config.event_directory):
            shutil.rmtree(config.event_directory)
        os.makedirs(config.checkpoint_directory)
        os.makedirs(config.event_directory)

    if config.global_rank == 0:
        summary_writer = SummaryWriter(config.event_directory)

    if config.training:

        for epoch in range(last_epoch + 1, config.num_epochs):

            train_sampler.set_epoch(epoch)
            lr_scheduler.step(epoch)

            model.train()

            for local_step, (images, targets) in enumerate(train_data_loader):

                step_begin = time.time()

                images = images.cuda(non_blocking=True)
                targets = targets.cuda(non_blocking=True)

                logits = model(images)
                loss = criterion(logits, targets) / config.world_size

                optimizer.zero_grad()

                loss.backward()

                for parameter in model.parameters():
                    distributed.all_reduce(parameter.grad)

                optimizer.step()

                predictions = torch.argmax(logits, dim=1)
                accuracy = torch.mean(
                    (predictions == targets).float()) / config.world_size

                for tensor in [loss, accuracy]:
                    distributed.all_reduce(tensor)

                step_end = time.time()

                if config.global_rank == 0:
                    summary_writer.add_scalars(
                        main_tag='loss',
                        tag_scalar_dict=dict(train=loss),
                        global_step=global_step)
                    summary_writer.add_scalars(
                        main_tag='accuracy',
                        tag_scalar_dict=dict(train=accuracy),
                        global_step=global_step)
                    print(
                        f'[training] epoch: {epoch} global_step: {global_step} local_step: {local_step} '
                        f'loss: {loss:.4f} accuracy: {accuracy:.4f} [{step_end - step_begin:.4f}s]'
                    )

                global_step += 1

            if config.global_rank == 0:
                torch.save(
                    dict(model_state_dict=model.state_dict(),
                         optimizer_state_dict=optimizer.state_dict(),
                         last_epoch=epoch,
                         global_step=global_step),
                    f'{config.checkpoint_directory}/epoch_{epoch}')

            if config.validation:

                model.eval()

                with torch.no_grad():

                    average_loss = 0
                    average_accuracy = 0

                    for local_step, (images,
                                     targets) in enumerate(val_data_loader):

                        images = images.cuda(non_blocking=True)
                        targets = targets.cuda(non_blocking=True)

                        logits = model(images)
                        loss = criterion(logits, targets) / config.world_size

                        predictions = torch.argmax(logits, dim=1)
                        accuracy = torch.mean(
                            (predictions
                             == targets).float()) / config.world_size

                        for tensor in [loss, accuracy]:
                            distributed.all_reduce(tensor)

                        average_loss += loss
                        average_accuracy += accuracy

                    average_loss /= (local_step + 1)
                    average_accuracy /= (local_step + 1)

                if config.global_rank == 0:
                    summary_writer.add_scalars(
                        main_tag='loss',
                        tag_scalar_dict=dict(val=average_loss),
                        global_step=global_step)
                    summary_writer.add_scalars(
                        main_tag='accuracy',
                        tag_scalar_dict=dict(val=average_accuracy),
                        global_step=global_step)
                    print(
                        f'[validation] epoch: {epoch} loss: {average_loss:.4f} accuracy: {average_accuracy:.4f}'
                    )

    elif config.validation:

        model.eval()

        with torch.no_grad():

            average_loss = 0
            average_accuracy = 0

            for local_step, (images, targets) in enumerate(val_data_loader):

                images = images.cuda(non_blocking=True)
                targets = targets.cuda(non_blocking=True)

                logits = model(images)
                loss = criterion(logits, targets) / config.world_size

                predictions = torch.argmax(logits, dim=1)
                accuracy = torch.mean(
                    (predictions == targets).float()) / config.world_size

                for tensor in [loss, accuracy]:
                    distributed.all_reduce(tensor)

                average_loss += loss
                average_accuracy += accuracy

            average_loss /= (local_step + 1)
            average_accuracy /= (local_step + 1)

        if config.global_rank == 0:
            print(
                f'[validation] epoch: {last_epoch} loss: {average_loss:.4f} accuracy: {average_accuracy:.4f}'
            )

    if config.global_rank == 0:
        summary_writer.close()
示例#25
0
	print('==> Init variables..')
	use_cuda = cuda.is_available()
	best_accuracy = 0  # best testing accuracy
	best_epoch = 0  # epoch with the best testing accuracy
	start_epoch = 0  # start from epoch 0 or last checkpoint epoch
# 	netArch = "resnet34"
	netArch = "resnet50"
	save_directory = os.path.join("checkpoint", netArch)
	if not os.path.isdir(save_directory):
		os.makedirs(save_directory)

	# Init seed 
	print('==> Init seed..')
	torch.manual_seed(args.seed) # Sets the seed for generating random numbers
	if use_cuda:
		cuda.manual_seed(args.seed) # Sets the seed for generating random numbers for the current GPU

	# Calculate mean and std
	print('==> Prepare mean and std..')
#	data_mean, data_std = getMeanStdByBatch(datapath, args.train_batch_size)
# fengxi
	data_mean =  [ 0.331948,    0.33171957,  0.29903654]
	data_std =  [ 0.28179781,  0.27919075,  0.27801905]

	print('\tdata_mean = ', data_mean)
	print('\tdata_std = ', data_std)

	# Prepare training transform
	print('==> Prepare training transform..')
	training_transform = transforms.Compose([
# torchvision.transforms.RandomAffine(degrees, translate=None, scale=None, shear=None, resample=False, fillcolor=0)
示例#26
0
def main():
    # Control the random seeds
    torch.manual_seed(SEED)
    cuda.manual_seed(SEED)
    cuda.manual_seed_all(SEED)
    np.random.seed(SEED)  # Numpy module.
    random.seed(SEED)  # Python random module.
    backends.cudnn.benchmark = False
    backends.cudnn.deterministic = True
    print(">> Set random seed: {}".format(SEED))

    # Write data filenames and labels to a txt file.
    read_filenames_and_labels_to_txt(CANDIDATE_ROOT, "gt.txt")

    # Conduct data augmentation first
    create_patches(CANDIDATE_ROOT, PATCH_ROOT)

    # Get the size of the unlabeled data pool to build a list of indices
    indices = list(range(get_sample_num(PATCH_ROOT)))
    # Randomly select K samples in the first cycle
    random.shuffle(indices)
    labeled_indices = indices[:K]
    unlabeled_indices = indices[K:]

    # Load training and testing data
    filenames, labels = load_train_data(CANDIDATE_ROOT, PATCH_ROOT,
                                        labeled_indices)
    train_dataset = MyDataset(filenames, labels, transform=image_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH,
                              shuffle=True,
                              pin_memory=True)
    print("Current training dataset size: {}".format(len(train_dataset)))
    filenames, labels = load_test_data(TEST_ROOT)
    test_dataset = MyDataset(filenames, labels, transform=image_transform)
    test_loader = DataLoader(test_dataset,
                             batch_size=BATCH,
                             sampler=SequentialSampler(range(
                                 len(test_dataset))),
                             pin_memory=True)
    dataloaders = {'train': train_loader, 'test': test_loader}

    # Set the device for running the network
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Build the network structure
    classifier_network = ResNet18(num_classes=23)
    classifier_network.to(device)
    loss_network = LossNet()
    loss_network.to(device)
    # Load pre-trained weight of the classifier network
    classifier_dict = classifier_network.state_dict()
    pretrained_dict = torch.load("resnet18.pth")
    parameter_dict = {
        k: v
        for k, v in pretrained_dict.items() if k in classifier_dict
    }
    classifier_dict.update(parameter_dict)
    classifier_network.load_state_dict(classifier_dict)
    # Integration
    model = {'classifier': classifier_network, 'module': loss_network}

    # Set the loss criterion of the training procedure
    criterion = nn.CrossEntropyLoss(reduction='none')

    print(">> Start active learning!")
    for cycle in range(CYCLES):
        # for each cycle, we need new optimizers and learning rate schedulers
        optim_classifier = optim.SGD(model['classifier'].parameters(),
                                     lr=LR_classifier,
                                     momentum=MOMENTUM,
                                     weight_decay=WDECAY)
        optim_loss = optim.SGD(model['module'].parameters(),
                               lr=LR_loss,
                               momentum=MOMENTUM,
                               weight_decay=WDECAY)
        optimizers = {'classifier': optim_classifier, 'loss': optim_loss}
        scheduler_classifier = lr_scheduler.MultiStepLR(optim_classifier,
                                                        milestones=MILESTONE)
        scheduler_loss = lr_scheduler.MultiStepLR(optim_loss,
                                                  milestones=MILESTONE)
        schedulers = {
            'classifier': scheduler_classifier,
            'module': scheduler_loss
        }

        # Training
        train(model, criterion, optimizers, schedulers, dataloaders, EPOCH,
              device)
        acc = test(model, dataloaders, device, mode='test')
        print('Cycle {}/{} || Label set size {}: Test acc {}'.format(
            cycle + 1, CYCLES, len(labeled_indices), acc))

        # Random subset sampling to explore the data pool
        random.shuffle(unlabeled_indices)
        subset_indices = unlabeled_indices[:SUBSET]

        # Choose the active learning strategy
        selected_indices = active_sampling(strategy="hybrid",
                                           model=model,
                                           indices=subset_indices)

        # Add new labeled samples to the labeled dataset
        labeled_indices.extend(selected_indices)
        # Remove labeled samples from the unlabeled data pool
        for i in selected_indices:
            unlabeled_indices.remove(i)

        # Update the training dataset
        filenames, labels = load_train_data(CANDIDATE_ROOT, PATCH_ROOT,
                                            labeled_indices)
        train_dataset = MyDataset(filenames, labels, transform=image_transform)
        print("Training data number: ", len(train_dataset))
        dataloaders['train'] = DataLoader(train_dataset,
                                          batch_size=BATCH,
                                          pin_memory=True,
                                          shuffle=True)

        # Save the model of the current cycle
        torch.save(model["classifier"].state_dict(),
                   'checkpoints/active_resnet18_cycle{}.pth'.format(cycle))