Пример #1
0
def evolution_search():
    for exp_type in config_dict()['exp_order']:
        save_dir = f'{os.path.dirname(os.path.abspath(__file__))}/search-{args.save}-{exp_type}-{dataset}-{time.strftime("%Y%m%d-%H%M%S")}'
        utils.create_exp_dir(save_dir)
        fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
        fh.setFormatter(logging.Formatter(log_format))
        logging.getLogger().addHandler(fh)

        np.random.seed(args.seed)
        logging.info("args = %s", args)

        # setup NAS search problem
        if exp_type == 'micro':  # NASNet search space
            n_var, lb, ub = set_micro_exp(args)
        elif exp_type == 'macro':  # modified GeneticCNN search space
            n_var, lb, ub = set_macro_exp(args)
        elif exp_type == 'micromacro' or exp_type == 'micro_garbage' or exp_type == 'macro_garbage':  # modified GeneticCNN search space
            n_var_mac, lb_mac, ub_mac = set_macro_exp(args)
            n_var_mic, lb_mic, ub_mic = set_micro_exp(args)
            n_var = n_var_mic + n_var_mac
            lb = np.array([*lb_mac, *lb_mic])
            ub = np.array([*ub_mac, *ub_mic])
        else:
            raise NameError('Unknown search space type')

        problem = NAS(n_var=n_var, search_space=exp_type,
                      n_obj=2, n_constr=0, lb=lb, ub=ub,
                      init_channels=args.init_channels, layers=args.layers,
                      epochs=args.epochs, save_dir=save_dir, batch_size=args.batch_size)

        # configure the nsga-net method
        method = engine.nsganet(pop_size=args.pop_size,
                                n_offsprings=args.n_offspring,
                                eliminate_duplicates=True)

        if args.termination == 'ngens':
            termination = ('n_gen', args.n_gens)
        elif args.termination == 'time':
            termination = TimeTermination(time.time(), args.max_time)

        res = minimize(problem,
                       method,
                       callback=do_every_generations,
                       termination=termination)

        val_accs = res.pop.get('F')[:, 0]

        if exp_type == 'microtomacro' or exp_type == 'micro':
            best_idx = np.where(val_accs == np.min(val_accs))[0][0]
            best_genome = res.pop[best_idx].X
            with open(f'{save_dir}/best_genome.pkl', 'wb') as pkl_file:
                pickle.dump(best_genome, pkl_file)
        if exp_type == 'microtomacro':
            set_config('micro_creator', make_micro_creator(best_genome))

    return (100 - np.min(val_accs)) / 100
Пример #2
0
    def __init__(self, save_to=None, continue_from=None, args=None, **kwargs):
        kwargs["individual"] = Individual(rank=np.inf, crowding=-1)
        super().__init__(**kwargs)

        logger.info("new version")
        self.tournament_type = "comp_by_dom_and_crowding"
        self.func_display_attrs = disp_multi_objective

        self.continue_from = continue_from
        self.args = args

        self.save_to = os.path.join(args.save, args.code)
        utils.create_exp_dir(self.save_to)
Пример #3
0
        (`RankAndCrowdingSurvival` in this, CHANGED)
    3. call callback function (`do_every_generations` in this)
    4. call `GeneticAlgorithm.selection` to make selection. 
        func_comp serve as a compare function
        (`binary_tournament` in this, same as NSGA-II)
    go back
    '''
    res = minimize(problem,
                   method,
                   callback=do_every_generations,
                   termination=('n_gen', args.n_gens))

    return


if __name__ == "__main__":
    args.save = 'search-{}-{}-{}'.format(args.save, args.search_space,
                                         time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(args.save)

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    pop_hist = []  # keep track of every evaluated architecture
    main()
Пример #4
0
def main(args):
    save_dir = f'{os.path.dirname(os.path.abspath(__file__))}/../train/train-{args.save}-{time.strftime("%Y%m%d-%H%M%S")}'
    utils.create_exp_dir(save_dir)
    data_root = '../data'
    CIFAR_CLASSES = config_dict()['n_classes']
    INPUT_CHANNELS = config_dict()['n_channels']

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    if args.auxiliary and args.net_type == 'macro':
        logging.info(
            'auxiliary head classifier not supported for macro search space models'
        )
        sys.exit(1)

    logging.info("args = %s", args)

    cudnn.enabled = True
    cudnn.benchmark = True
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    best_acc = 0  # initiate a artificial best accuracy so far

    # Data
    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    # train_data = torchvision.datasets.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    # valid_data = torchvision.datasets.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

    train_data = my_cifar10.CIFAR10(root=data_root,
                                    train=True,
                                    download=False,
                                    transform=train_transform)
    valid_data = my_cifar10.CIFAR10(root=data_root,
                                    train=False,
                                    download=False,
                                    transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=1)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=128,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=1)

    # Model
    if args.net_type == 'micro':
        logging.info("==> Building micro search space encoded architectures")
        genotype = eval("genotypes.%s" % args.arch)
        net = NetworkCIFAR(args.init_channels,
                           num_classes=CIFAR_CLASSES,
                           num_channels=INPUT_CHANNELS,
                           layers=args.layers,
                           auxiliary=args.auxiliary,
                           genotype=genotype,
                           SE=args.SE)
    elif args.net_type == 'macro':
        genome = eval("macro_genotypes.%s" % args.arch)
        channels = [(INPUT_CHANNELS, 128), (128, 128), (128, 128)]
        net = EvoNetwork(
            genome,
            channels,
            CIFAR_CLASSES,
            (config_dict()['INPUT_HEIGHT'], config_dict()['INPUT_WIDTH']),
            decoder='dense')
    else:
        raise NameError(
            'Unknown network type, please only use supported network type')

    # logging.info("{}".format(net))
    logging.info("param size = %fMB", utils.count_parameters_in_MB(net))

    net = net.to(device)

    n_epochs = args.epochs

    parameters = filter(lambda p: p.requires_grad, net.parameters())

    criterion = nn.CrossEntropyLoss()
    criterion.to(device)
    optimizer = optim.SGD(parameters,
                          lr=args.learning_rate,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, n_epochs, eta_min=args.min_learning_rate)

    for epoch in range(n_epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        net.droprate = args.droprate * epoch / args.epochs

        train(args, train_queue, net, criterion, optimizer)
        _, valid_acc = infer(args, valid_queue, net, criterion)

        if valid_acc > best_acc:
            utils.save(net, os.path.join(save_dir, 'weights.pt'))
            best_acc = valid_acc

    return best_acc
Пример #5
0
def train_and_evaluate(
    genome: tuple,
    individual=None,
    args: argparse.Namespace = None,
    first_gen: bool = True,
    save: str = None,
    client_id: str = None,
):
    """
    Function to train and evaluate an individual using a TPU.

    Results are always saved in the save dir to make distributed data management easier.

    Args:
        first_gen:
        genome:
        save:
        individual:
        args:

    Returns:

    """

    if args.stream == "tpu":
        # must warp up TPU
        import torch_xla

    auxiliary = False

    assert hasattr(individual, "id")

    if not first_gen:
        # this is not the first generation, so mating should have occurred
        assert hasattr(individual, "parents")

    expr_root = ""

    save_pth = os.path.join(expr_root, "{}".format(save))
    utils.create_exp_dir(save_pth)

    CIFAR_CLASSES = 10
    learning_rate = 0.025
    momentum = 0.9
    weight_decay = 3e-4
    data_root = "../data"
    batch_size = args.batch_size
    auxiliary_weight = 0.4
    grad_clip = 5
    report_freq = 50
    train_params = {
        "auxiliary": auxiliary,
        "auxiliary_weight": auxiliary_weight,
        "grad_clip": grad_clip,
        "report_freq": report_freq,
    }

    if args.search_space == "micro":
        genotype = micro_encoding.decode(genome)
        model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                        auxiliary, genotype)

        if not first_gen:
            # change the way the weights are set up
            model = manage_weights(model, individual, expr_root, args)

    elif args.search_space == "macro":
        raise NotImplementedError("Not supported")
    else:
        raise NameError("Unknown search space type")

    logger.info("Architecture = %s", genotype)

    try:
        max_weight = args.max_weight
    except:
        print("Could Not Determine Maximum Weight Argument")
        max_weight = 1e20

    clip = weightClip(max_weight=max_weight, min_weight=max_weight * -1)

    if args.stream == "tpu":
        from projectcode.training.tpu import get_map_fn
        import torch_xla.distributed.xla_multiprocessing as xmp

        WRAPPED_MODEL = xmp.MpModelWrapper(model)

        logger.info("Executing TPU Training")
        map_fn = get_map_fn(model,
                            train_params,
                            data_root,
                            momentum,
                            weight_decay,
                            CIFAR_CLASSES,
                            learning_rate,
                            args.layers,
                            batch_size,
                            epochs=args.epochs,
                            save_pth=save_pth,
                            args=args,
                            WRAPPED_MODEL=WRAPPED_MODEL,
                            clip=clip)

        FLAGS = {}

        xmp.spawn(map_fn, args=(FLAGS, ), nprocs=1, start_method="fork")

        valid_acc, n_flops = torch.load("results.pt")
    elif args.stream == "gpu":
        from projectcode.training.gpu import train_gpu
        logger.info("Executing GPU Training")
        valid_acc, n_flops = train_gpu(model,
                                       train_params,
                                       data_root,
                                       momentum,
                                       weight_decay,
                                       CIFAR_CLASSES,
                                       learning_rate,
                                       args.layers,
                                       batch_size,
                                       epochs=args.epochs,
                                       save_pth=save_pth,
                                       args=args,
                                       clip=clip)

    else:

        raise NameError("Unrecognized client stream")

    n_params = (np.sum(
        np.prod(v.size())
        for v in filter(lambda p: p.requires_grad, model.parameters())) / 1e6)

    if main_config.distributed_cloud and args.weight_init == "lammarckian":
        wt_path = f"{args.code}_{client_id}_weights_{individual.id:05d}.pt"
        torch.save(model.state_dict(), wt_path)
        blob_name = upload_blob(wt_path)
    else:
        blob_name = None
        torch.save(model.state_dict(), os.path.join(save_pth, "weights.pt"))

    result_dict = {
        "id": individual.id,
        "save_path": save_pth,
        "valid_acc": valid_acc,
        "params": n_params,
        "flops": n_flops,
        "wt_blob_name": blob_name,
    }

    dump(result_dict, os.path.join(save_pth, "result.pkl"))

    return result_dict