Exemplo n.º 1
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.enabled = True
    cudnn.benchmark = False
    cudnn.deterministic = True

    args.steps = int(np.ceil(
        45000 / args.child_batch_size)) * args.child_epochs

    logging.info("args = %s", args)

    if args.child_arch_pool is not None:
        logging.info('Architecture pool is provided, loading')
        with open(args.child_arch_pool) as f:
            archs = f.read().splitlines()
            archs = list(map(utils.build_dag, archs))
            child_arch_pool = archs
    elif os.path.exists(os.path.join(args.output_dir, 'arch_pool')):
        logging.info('Architecture pool is founded, loading')
        with open(os.path.join(args.output_dir, 'arch_pool')) as f:
            archs = f.read().splitlines()
            archs = list(map(utils.build_dag, archs))
            child_arch_pool = archs
    else:
        child_arch_pool = None

    child_eval_epochs = eval(args.child_eval_epochs)
    build_fn = get_builder(args.dataset)
    train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler = build_fn(
        ratio=0.9, epoch=-1)

    nao = NAO(
        args.controller_encoder_layers,
        args.controller_encoder_vocab_size,
        args.controller_encoder_hidden_size,
        args.controller_encoder_dropout,
        args.controller_encoder_length,
        args.controller_source_length,
        args.controller_encoder_emb_size,
        args.controller_mlp_layers,
        args.controller_mlp_hidden_size,
        args.controller_mlp_dropout,
        args.controller_decoder_layers,
        args.controller_decoder_vocab_size,
        args.controller_decoder_hidden_size,
        args.controller_decoder_dropout,
        args.controller_decoder_length,
    )
    nao = nao.cuda()
    logging.info("Encoder-Predictor-Decoder param size = %fMB",
                 utils.count_parameters_in_MB(nao))

    # Train child model
    if child_arch_pool is None:
        logging.info(
            'Architecture pool is not provided, randomly generating now')
        child_arch_pool = utils.generate_arch(args.controller_seed_arch,
                                              args.child_nodes,
                                              5)  # [[[conv],[reduc]]]
    if args.child_sample_policy == 'params':
        child_arch_pool_prob = []
        for arch in child_arch_pool:
            if args.dataset == 'cifar10':
                tmp_model = NASNetworkCIFAR(
                    args, 10, args.child_layers, args.child_nodes,
                    args.child_channels, args.child_keep_prob,
                    args.child_drop_path_keep_prob, args.child_use_aux_head,
                    args.steps, arch)
            elif args.dataset == 'cifar100':
                tmp_model = NASNetworkCIFAR(
                    args, 100, args.child_layers, args.child_nodes,
                    args.child_channels, args.child_keep_prob,
                    args.child_drop_path_keep_prob, args.child_use_aux_head,
                    args.steps, arch)
            else:
                tmp_model = NASNetworkImageNet(
                    args, 1000, args.child_layers, args.child_nodes,
                    args.child_channels, args.child_keep_prob,
                    args.child_drop_path_keep_prob, args.child_use_aux_head,
                    args.steps, arch)
            child_arch_pool_prob.append(
                utils.count_parameters_in_MB(tmp_model))
            del tmp_model
    else:
        child_arch_pool_prob = None

    eval_points = utils.generate_eval_points(child_eval_epochs, 0,
                                             args.child_epochs)
    step = 0
    for epoch in range(1, args.child_epochs + 1):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)
        # sample an arch to train
        train_acc, train_obj, step = child_train(train_queue, model, optimizer,
                                                 step, child_arch_pool,
                                                 child_arch_pool_prob,
                                                 train_criterion)
        logging.info('train_acc %f', train_acc)

        if epoch not in eval_points:
            continue
        # Evaluate seed archs
        valid_accuracy_list = child_valid(valid_queue, model, child_arch_pool,
                                          eval_criterion)

        # Output archs and evaluated error rate
        old_archs = child_arch_pool
        old_archs_perf = valid_accuracy_list

        old_archs_sorted_indices = np.argsort(old_archs_perf)[::-1]
        old_archs = [old_archs[i] for i in old_archs_sorted_indices]
        old_archs_perf = [old_archs_perf[i] for i in old_archs_sorted_indices]
        with open(os.path.join(args.output_dir, 'arch_pool.{}'.format(epoch)),
                  'w') as fa:
            with open(
                    os.path.join(args.output_dir,
                                 'arch_pool.perf.{}'.format(epoch)),
                    'w') as fp:
                with open(os.path.join(args.output_dir, 'arch_pool'),
                          'w') as fa_latest:
                    with open(os.path.join(args.output_dir, 'arch_pool.perf'),
                              'w') as fp_latest:
                        for arch, perf in zip(old_archs, old_archs_perf):
                            arch = ' '.join(map(str, arch[0] + arch[1]))
                            fa.write('{}\n'.format(arch))
                            fa_latest.write('{}\n'.format(arch))
                            fp.write('{}\n'.format(perf))
                            fp_latest.write('{}\n'.format(perf))

        if epoch == args.child_epochs:
            break

        # Train Encoder-Predictor-Decoder
        logging.info('Training Encoder-Predictor-Decoder')
        encoder_input = list(
            map(
                lambda x: utils.parse_arch_to_seq(x[0], 2) + utils.
                parse_arch_to_seq(x[1], 2), old_archs))
        # [[conv, reduc]]
        min_val = min(old_archs_perf)
        max_val = max(old_archs_perf)
        encoder_target = [(i - min_val) / (max_val - min_val)
                          for i in old_archs_perf]

        if args.controller_expand is not None:
            dataset = list(zip(encoder_input, encoder_target))
            n = len(dataset)
            ratio = 0.9
            split = int(n * ratio)
            np.random.shuffle(dataset)
            encoder_input, encoder_target = list(zip(*dataset))
            train_encoder_input = list(encoder_input[:split])
            train_encoder_target = list(encoder_target[:split])
            valid_encoder_input = list(encoder_input[split:])
            valid_encoder_target = list(encoder_target[split:])
            for _ in range(args.controller_expand - 1):
                for src, tgt in zip(encoder_input[:split],
                                    encoder_target[:split]):
                    a = np.random.randint(0, args.child_nodes)
                    b = np.random.randint(0, args.child_nodes)
                    src = src[:4 * a] + src[4 * a + 2:4 * a + 4] + \
                            src[4 * a:4 * a + 2] + src[4 * (a + 1):20 + 4 * b] + \
                            src[20 + 4 * b + 2:20 + 4 * b + 4] + src[20 + 4 * b:20 + 4 * b + 2] + \
                            src[20 + 4 * (b + 1):]
                    train_encoder_input.append(src)
                    train_encoder_target.append(tgt)
        else:
            train_encoder_input = encoder_input
            train_encoder_target = encoder_target
            valid_encoder_input = encoder_input
            valid_encoder_target = encoder_target
        logging.info('Train data: {}\tValid data: {}'.format(
            len(train_encoder_input), len(valid_encoder_input)))

        nao_train_dataset = utils.NAODataset(
            train_encoder_input,
            train_encoder_target,
            True,
            swap=True if args.controller_expand is None else False)
        nao_valid_dataset = utils.NAODataset(valid_encoder_input,
                                             valid_encoder_target, False)
        nao_train_queue = torch.utils.data.DataLoader(
            nao_train_dataset,
            batch_size=args.controller_batch_size,
            shuffle=True,
            pin_memory=True)
        nao_valid_queue = torch.utils.data.DataLoader(
            nao_valid_dataset,
            batch_size=args.controller_batch_size,
            shuffle=False,
            pin_memory=True)
        nao_optimizer = torch.optim.Adam(nao.parameters(),
                                         lr=args.controller_lr,
                                         weight_decay=args.controller_l2_reg)
        for nao_epoch in range(1, args.controller_epochs + 1):
            nao_loss, nao_mse, nao_ce = nao_train(nao_train_queue, nao,
                                                  nao_optimizer)
            logging.info("epoch %04d train loss %.6f mse %.6f ce %.6f",
                         nao_epoch, nao_loss, nao_mse, nao_ce)
            if nao_epoch % 100 == 0:
                pa, hs = nao_valid(nao_valid_queue, nao)
                logging.info("Evaluation on valid data")
                logging.info(
                    'epoch %04d pairwise accuracy %.6f hamming distance %.6f',
                    epoch, pa, hs)

        # Generate new archs
        new_archs = []
        max_step_size = 50
        predict_step_size = 0
        top100_archs = list(
            map(
                lambda x: utils.parse_arch_to_seq(x[0], 2) + utils.
                parse_arch_to_seq(x[1], 2), old_archs[:100]))
        nao_infer_dataset = utils.NAODataset(top100_archs, None, False)
        nao_infer_queue = torch.utils.data.DataLoader(
            nao_infer_dataset,
            batch_size=len(nao_infer_dataset),
            shuffle=False,
            pin_memory=True)
        while len(new_archs) < args.controller_new_arch:
            predict_step_size += 1
            logging.info('Generate new architectures with step size %d',
                         predict_step_size)
            new_arch = nao_infer(nao_infer_queue,
                                 nao,
                                 predict_step_size,
                                 direction='+')
            for arch in new_arch:
                if arch not in encoder_input and arch not in new_archs:
                    new_archs.append(arch)
                if len(new_archs) >= args.controller_new_arch:
                    break
            logging.info('%d new archs generated now', len(new_archs))
            if predict_step_size > max_step_size:
                break
                # [[conv, reduc]]
        new_archs = list(
            map(lambda x: utils.parse_seq_to_arch(x, 2),
                new_archs))  # [[[conv],[reduc]]]
        num_new_archs = len(new_archs)
        logging.info("Generate %d new archs", num_new_archs)
        # replace bottom archs
        if args.controller_replace:
            new_arch_pool = old_archs[:len(old_archs) - (num_new_archs + args.controller_random_arch)] + \
                            new_archs + utils.generate_arch(args.controller_random_arch, 5, 5)
        # discard all archs except top k
        elif args.controller_discard:
            new_arch_pool = old_archs[:100] + new_archs + utils.generate_arch(
                args.controller_random_arch, 5, 5)
        # use all
        else:
            new_arch_pool = old_archs + new_archs + utils.generate_arch(
                args.controller_random_arch, 5, 5)
        logging.info("Totally %d architectures now to train",
                     len(new_arch_pool))

        child_arch_pool = new_arch_pool
        with open(os.path.join(args.output_dir, 'arch_pool'), 'w') as f:
            for arch in new_arch_pool:
                arch = ' '.join(map(str, arch[0] + arch[1]))
                f.write('{}\n'.format(arch))

        if args.child_sample_policy == 'params':
            child_arch_pool_prob = []
            for arch in child_arch_pool:
                if args.dataset == 'cifar10':
                    tmp_model = NASNetworkCIFAR(
                        args, 10, args.child_layers, args.child_nodes,
                        args.child_channels, args.child_keep_prob,
                        args.child_drop_path_keep_prob,
                        args.child_use_aux_head, args.steps, arch)
                elif args.dataset == 'cifar100':
                    tmp_model = NASNetworkCIFAR(
                        args, 100, args.child_layers, args.child_nodes,
                        args.child_channels, args.child_keep_prob,
                        args.child_drop_path_keep_prob,
                        args.child_use_aux_head, args.steps, arch)
                else:
                    tmp_model = NASNetworkImageNet(
                        args, 1000, args.child_layers, args.child_nodes,
                        args.child_channels, args.child_keep_prob,
                        args.child_drop_path_keep_prob,
                        args.child_use_aux_head, args.steps, arch)
                child_arch_pool_prob.append(
                    utils.count_parameters_in_MB(tmp_model))
                del tmp_model
        else:
            child_arch_pool_prob = None
Exemplo n.º 2
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU found!')
        sys.exit(1)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.enabled = True
    cudnn.benchmark = False
    cudnn.deterministic = True

    logging.info("Args = %s", args)

    nao = NAO(
        args.encoder_layers,
        args.encoder_vocab_size,
        args.encoder_hidden_size,
        args.encoder_dropout,
        args.encoder_length,
        args.source_length,
        args.encoder_emb_size,
        args.mlp_layers,
        args.mlp_hidden_size,
        args.mlp_dropout,
        args.decoder_layers,
        args.decoder_vocab_size,
        args.decoder_hidden_size,
        args.decoder_dropout,
        args.decoder_length,
    )
    logging.info("param size = %fMB", utils.count_parameters_in_MB(nao))
    nao = nao.cuda()

    with open(
            os.path.join(args.output_dir,
                         'arch_pool.{}'.format(args.iteration))) as f:
        arch_pool = f.read().splitlines()
        arch_pool = list(map(utils.build_dag, arch_pool))
    with open(
            os.path.join(args.output_dir,
                         'arch_pool.{}.perf'.format(args.iteration))) as f:
        arch_pool_valid_acc = f.read().splitlines()
        arch_pool_valid_acc = list(map(float, arch_pool_valid_acc))

    logging.info('Training Encoder-Predictor-Decoder')
    train_encoder_input = list(
        map(
            lambda x: utils.parse_arch_to_seq(x[0], 2) + utils.
            parse_arch_to_seq(x[1], 2), arch_pool))
    min_val = min(arch_pool_valid_acc)
    max_val = max(arch_pool_valid_acc)
    train_encoder_target = [(i - min_val) / (max_val - min_val)
                            for i in arch_pool_valid_acc]

    if args.expand is not None:
        buffer1, buffer2 = [], []
        for _ in range(args.expand - 1):
            for src, tgt in zip(train_encoder_input, train_encoder_target):
                a = np.random.randint(0, 5)
                b = np.random.randint(0, 5)
                src = src[:4 * a] + src[4 * a + 2:4 * a + 4] + \
                        src[4 * a:4 * a + 2] + src[4 * (a + 1):20 + 4 * b] + \
                        src[20 + 4 * b + 2:20 + 4 * b + 4] + src[20 + 4 * b:20 + 4 * b + 2] + \
                        src[20 + 4 * (b + 1):]
                buffer1.append(src)
                buffer2.append(tgt)
        train_encoder_input += buffer1
        train_encoder_target += buffer2

    nao_train_dataset = utils.NAODataset(train_encoder_input,
                                         train_encoder_target,
                                         True,
                                         swap=True)
    nao_valid_dataset = utils.NAODataset(train_encoder_input,
                                         train_encoder_target, False)
    nao_train_queue = torch.utils.data.DataLoader(nao_train_dataset,
                                                  batch_size=args.batch_size,
                                                  shuffle=True,
                                                  pin_memory=True)
    nao_valid_queue = torch.utils.data.DataLoader(
        nao_valid_dataset,
        batch_size=len(nao_valid_dataset),
        shuffle=False,
        pin_memory=True)
    nao_optimizer = torch.optim.Adam(nao.parameters(),
                                     lr=args.lr,
                                     weight_decay=args.l2_reg)
    for nao_epoch in range(1, args.epochs + 1):
        nao_loss, nao_mse, nao_ce = nao_train(nao_train_queue, nao,
                                              nao_optimizer)
        if nao_epoch % 10 == 0 or nao_epoch == 1:
            logging.info("epoch %04d train loss %.6f mse %.6f ce %.6f",
                         nao_epoch, nao_loss, nao_mse, nao_ce)
        if nao_epoch % 100 == 0 or nao_epoch == 1:
            mse, pa, hs = nao_valid(nao_train_queue, nao)
            logging.info("Evaluation on train data")
            logging.info(
                'epoch %04d mse %.6f pairwise accuracy %.6f hamming distance %.6f',
                nao_epoch, mse, pa, hs)
            mse, pa, hs = nao_valid(nao_valid_queue, nao)
            logging.info("Evaluation on valid data")
            logging.info(
                'epoch %04d mse %.6f pairwise accuracy %.6f hamming distance %.6f',
                nao_epoch, mse, pa, hs)

    new_archs = []
    predict_step_size = 0
    top_archs = list(
        map(
            lambda x: utils.parse_arch_to_seq(x[0], 2) + utils.
            parse_arch_to_seq(x[1], 2), arch_pool[:args.generate_topk]))
    nao_infer_dataset = utils.NAODataset(top_archs, None, False)
    nao_infer_queue = torch.utils.data.DataLoader(
        nao_infer_dataset,
        batch_size=len(nao_infer_dataset),
        shuffle=False,
        pin_memory=True)

    while len(new_archs) < args.new_arch:
        predict_step_size += 1
        logging.info('Generate new architectures with step size %d',
                     predict_step_size)
        new_arch = nao_infer(nao_infer_queue,
                             nao,
                             predict_step_size,
                             direction='+')
        for arch in new_arch:
            if arch not in train_encoder_input and arch not in new_archs:
                new_archs.append(arch)
            if len(new_archs) >= args.new_arch:
                break
        logging.info('%d new archs generated now', len(new_archs))
        if predict_step_size > args.max_step_size:
            break

    logging.info("Generate %d new archs", len(new_archs))
    new_arch_pool = list(
        map(lambda x: utils.parse_seq_to_arch(x, 2), new_archs))
    new_arch_pool = new_arch_pool + arch_pool[:args.remain_topk]
    with open(
            os.path.join(args.output_dir,
                         'new_arch_pool.{}'.format(args.iteration)), 'w') as f:
        for arch in new_arch_pool:
            arch = ' '.join(map(str, arch[0] + arch[1]))
            f.write('{}\n'.format(arch))
    logging.info('Finish training!')
Exemplo n.º 3
0
def main():
    arch_pool = utils.generate_arch(args.controller_seed_arch, 5, 5)
    valid_arch_pool = utils.generate_arch(100, 5, 5)
    train_encoder_input = list(
        map(
            lambda x: utils.parse_arch_to_seq(x[0], 2) + utils.
            parse_arch_to_seq(x[1], 2), arch_pool))
    valid_encoder_input = list(
        map(
            lambda x: utils.parse_arch_to_seq(x[0], 2) + utils.
            parse_arch_to_seq(x[1], 2), valid_arch_pool))
    train_encoder_target = [
        np.random.random() for i in range(args.controller_seed_arch)
    ]
    valid_encoder_target = [np.random.random() for i in range(100)]
    nao = NAO(
        args.controller_encoder_layers,
        args.controller_encoder_vocab_size,
        args.controller_encoder_hidden_size,
        args.controller_encoder_dropout,
        args.controller_encoder_length,
        args.controller_source_length,
        args.controller_encoder_emb_size,
        args.controller_mlp_layers,
        args.controller_mlp_hidden_size,
        args.controller_mlp_dropout,
        args.controller_decoder_layers,
        args.controller_decoder_vocab_size,
        args.controller_decoder_hidden_size,
        args.controller_decoder_dropout,
        args.controller_decoder_length,
    )
    logging.info("param size = %fMB", utils.count_parameters_in_MB(nao))
    nao = nao.cuda()
    nao_train_dataset = utils.NAODataset(train_encoder_input,
                                         train_encoder_target,
                                         True,
                                         swap=True)
    nao_valid_dataset = utils.NAODataset(valid_encoder_input,
                                         valid_encoder_target, False)
    nao_train_queue = torch.utils.data.DataLoader(
        nao_train_dataset,
        batch_size=args.controller_batch_size,
        shuffle=True,
        pin_memory=True)
    nao_valid_queue = torch.utils.data.DataLoader(
        nao_valid_dataset,
        batch_size=len(nao_valid_dataset),
        shuffle=False,
        pin_memory=True)
    nao_optimizer = torch.optim.Adam(nao.parameters(),
                                     lr=args.controller_lr,
                                     weight_decay=args.controller_l2_reg)
    for nao_epoch in range(1, args.controller_epochs + 1):
        nao_loss, nao_mse, nao_ce = nao_train(nao_train_queue, nao,
                                              nao_optimizer)
        if nao_epoch % 10 == 0:
            logging.info("epoch %04d train loss %.6f mse %.6f ce %.6f",
                         nao_epoch, nao_loss, nao_mse, nao_ce)
        if nao_epoch % 100 == 0:
            pa, hs = nao_valid(nao_valid_queue, nao)
            logging.info("Evaluation on training data")
            logging.info(
                'epoch %04d pairwise accuracy %.6f hamming distance %.6f',
                nao_epoch, pa, hs)

    new_archs = []
    max_step_size = 100
    predict_step_size = 0
    top100_archs = list(
        map(
            lambda x: utils.parse_arch_to_seq(x[0], 2) + utils.
            parse_arch_to_seq(x[1], 2), arch_pool[:100]))
    nao_infer_dataset = utils.NAODataset(top100_archs, None, False)
    nao_infer_queue = torch.utils.data.DataLoader(
        nao_infer_dataset,
        batch_size=len(nao_infer_dataset),
        shuffle=False,
        pin_memory=True)
    while len(new_archs) < args.controller_new_arch:
        predict_step_size += 1
        logging.info('Generate new architectures with step size %d',
                     predict_step_size)
        new_arch = nao_infer(nao_infer_queue, nao, predict_step_size)
        for arch in new_arch:
            if arch not in train_encoder_input and arch not in new_archs:
                new_archs.append(arch)
            if len(new_archs) >= args.controller_new_arch:
                break
        logging.info('%d new archs generated now', len(new_archs))
        if predict_step_size > max_step_size:
            break
            # [[conv, reduc]]
    new_archs = list(map(lambda x: utils.parse_seq_to_arch(x, 2),
                         new_archs))  # [[[conv],[reduc]]]
    num_new_archs = len(new_archs)
    logging.info("Generate %d new archs", num_new_archs)
    new_arch_pool = arch_pool + new_archs + utils.generate_arch(
        args.controller_random_arch, 5, 5)
    logging.info("Totally %d archs now to train", len(new_arch_pool))
    arch_pool = new_arch_pool
Exemplo n.º 4
0
def main():
    if not torch.cuda.is_available():
        logging.info('No GPU found!')
        sys.exit(1)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    cudnn.enabled = True
    cudnn.benchmark = True

    logging.info("Args = %s", args)

    args.source_length = args.encoder_length = args.decoder_length = (
        args.nodes + 2) * (args.nodes - 1) // 2

    nasbench = api.NASBench(os.path.join(args.data, 'nasbench_full.tfrecord'))

    controller = NAO(
        args.encoder_layers,
        args.decoder_layers,
        args.mlp_layers,
        args.mlp_hidden_size,
        args.hidden_size,
        args.vocab_size,
        args.dropout,
        args.source_length,
        args.encoder_length,
        args.decoder_length,
    )
    logging.info("param size = %d", utils.count_parameters(controller))
    controller = controller.cuda()

    child_arch_pool, child_seq_pool, child_arch_pool_valid_acc = utils.generate_arch(
        args.seed_arch, nasbench, need_perf=True)

    arch_pool = []
    seq_pool = []
    arch_pool_valid_acc = []
    for i in range(args.iteration + 1):
        logging.info('Iteration {}'.format(i + 1))
        if not child_arch_pool_valid_acc:
            for arch in child_arch_pool:
                data = nasbench.query(arch)
                child_arch_pool_valid_acc.append(data['validation_accuracy'])

        arch_pool += child_arch_pool
        arch_pool_valid_acc += child_arch_pool_valid_acc
        seq_pool += child_seq_pool

        arch_pool_valid_acc_sorted_indices = np.argsort(
            arch_pool_valid_acc)[::-1]
        arch_pool = [arch_pool[i] for i in arch_pool_valid_acc_sorted_indices]
        seq_pool = [seq_pool[i] for i in arch_pool_valid_acc_sorted_indices]
        arch_pool_valid_acc = [
            arch_pool_valid_acc[i] for i in arch_pool_valid_acc_sorted_indices
        ]
        with open(os.path.join(args.output_dir, 'arch_pool.{}'.format(i)),
                  'w') as fa:
            for arch, seq, valid_acc in zip(arch_pool, seq_pool,
                                            arch_pool_valid_acc):
                fa.write('{}\t{}\t{}\t{}\n'.format(arch.matrix, arch.ops, seq,
                                                   valid_acc))
        for arch_index in range(10):
            print('Top 10 architectures:')
            print('Architecutre connection:{}'.format(
                arch_pool[arch_index].matrix))
            print('Architecture operations:{}'.format(
                arch_pool[arch_index].ops))
            print('Valid accuracy:{}'.format(arch_pool_valid_acc[arch_index]))

        if i == args.iteration:
            print('Final architectures:')
            for arch_index in range(10):
                print('Architecutre connection:{}'.format(
                    arch_pool[arch_index].matrix))
                print('Architecture operations:{}'.format(
                    arch_pool[arch_index].ops))
                print('Valid accuracy:{}'.format(
                    arch_pool_valid_acc[arch_index]))
                fs, cs = nasbench.get_metrics_from_spec(arch_pool[arch_index])
                test_acc = np.mean(
                    [cs[108][j]['final_test_accuracy'] for j in range(3)])
                print('Mean test accuracy:{}'.format(test_acc))
            break

        train_encoder_input = seq_pool
        min_val = min(arch_pool_valid_acc)
        max_val = max(arch_pool_valid_acc)
        train_encoder_target = [(i - min_val) / (max_val - min_val)
                                for i in arch_pool_valid_acc]

        # Pre-train
        logging.info('Pre-train EPD')
        train_controller(controller, train_encoder_input, train_encoder_target,
                         args.pretrain_epochs)
        logging.info('Finish pre-training EPD')
        # Generate synthetic data
        logging.info('Generate synthetic data for EPD')
        synthetic_encoder_input, synthetic_encoder_target = generate_synthetic_controller_data(
            nasbench, controller, train_encoder_input, args.random_arch)
        if args.up_sample_ratio is None:
            up_sample_ratio = np.ceil(args.random_arch /
                                      len(train_encoder_input)).astype(np.int)
        else:
            up_sample_ratio = args.up_sample_ratio
        all_encoder_input = train_encoder_input * up_sample_ratio + synthetic_encoder_input
        all_encoder_target = train_encoder_target * up_sample_ratio + synthetic_encoder_target
        # Train
        logging.info('Train EPD')
        train_controller(controller, all_encoder_input, all_encoder_target,
                         args.epochs)
        logging.info('Finish training EPD')

        new_archs = []
        new_seqs = []
        predict_step_size = 0
        unique_input = train_encoder_input + synthetic_encoder_input
        unique_target = train_encoder_target + synthetic_encoder_target
        unique_indices = np.argsort(unique_target)[::-1]
        unique_input = [unique_input[i] for i in unique_indices]
        topk_archs = unique_input[:args.k]
        controller_infer_dataset = utils.ControllerDataset(
            topk_archs, None, False)
        controller_infer_queue = torch.utils.data.DataLoader(
            controller_infer_dataset,
            batch_size=len(controller_infer_dataset),
            shuffle=False,
            pin_memory=True)

        while len(new_archs) < args.new_arch:
            predict_step_size += 1
            logging.info('Generate new architectures with step size %d',
                         predict_step_size)
            new_seq, new_perfs = controller_infer(controller_infer_queue,
                                                  controller,
                                                  predict_step_size,
                                                  direction='+')
            for seq in new_seq:
                matrix, ops = utils.convert_seq_to_arch(seq)
                arch = api.ModelSpec(matrix=matrix, ops=ops)
                if nasbench.is_valid(arch) and len(
                        arch.ops
                ) == 7 and seq not in train_encoder_input and seq not in new_seqs:
                    new_archs.append(arch)
                    new_seqs.append(seq)
                if len(new_seqs) >= args.new_arch:
                    break
            logging.info('%d new archs generated now', len(new_archs))
            if predict_step_size > args.max_step_size:
                break

        child_arch_pool = new_archs
        child_seq_pool = new_seqs
        child_arch_pool_valid_acc = []
        child_arch_pool_test_acc = []
        logging.info("Generate %d new archs", len(child_arch_pool))

    print(nasbench.get_budget_counters())
Exemplo n.º 5
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    cudnn.enabled = True
    cudnn.benchmark = True
    cudnn.deterministic = True

    if args.dataset == 'cifar10':
        args.num_class = 10
    elif args.dataset == 'cifar100':
        args.num_class = 100
    else:
        args.num_class = 10

    if args.search_space == 'small':
        OPERATIONS = OPERATIONS_search_small
    elif args.search_space == 'middle':
        OPERATIONS = OPERATIONS_search_middle
    args.child_num_ops = len(OPERATIONS)
    args.controller_encoder_vocab_size = 1 + (args.child_nodes + 2 -
                                              1) + args.child_num_ops
    args.controller_decoder_vocab_size = args.controller_encoder_vocab_size
    args.steps = int(np.ceil(
        45000 / args.child_batch_size)) * args.child_epochs

    logging.info("args = %s", args)

    if args.child_arch_pool is not None:
        logging.info('Architecture pool is provided, loading')
        with open(args.child_arch_pool) as f:
            archs = f.read().splitlines()
            archs = list(map(utils.build_dag, archs))
            child_arch_pool = archs
    elif os.path.exists(os.path.join(args.output_dir, 'arch_pool')):
        logging.info('Architecture pool is founded, loading')
        with open(os.path.join(args.output_dir, 'arch_pool')) as f:
            archs = f.read().splitlines()
            archs = list(map(utils.build_dag, archs))
            child_arch_pool = archs
    else:
        child_arch_pool = None

    build_fn = get_builder(args.dataset)
    train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler = build_fn(
        ratio=0.9, epoch=-1)

    nao = NAO(
        args.controller_encoder_layers,
        args.controller_encoder_vocab_size,
        args.controller_encoder_hidden_size,
        args.controller_encoder_dropout,
        args.controller_encoder_length,
        args.controller_source_length,
        args.controller_encoder_emb_size,
        args.controller_mlp_layers,
        args.controller_mlp_hidden_size,
        args.controller_mlp_dropout,
        args.controller_decoder_layers,
        args.controller_decoder_vocab_size,
        args.controller_decoder_hidden_size,
        args.controller_decoder_dropout,
        args.controller_decoder_length,
    )
    nao = nao.cuda()
    logging.info("Encoder-Predictor-Decoder param size = %fMB",
                 utils.count_parameters_in_MB(nao))

    if child_arch_pool is None:
        logging.info(
            'Architecture pool is not provided, randomly generating now')
        child_arch_pool = utils.generate_arch(
            args.controller_seed_arch, args.child_nodes,
            args.child_num_ops)  # [[[conv],[reduc]]]
    arch_pool = []
    arch_pool_valid_acc = []
    for i in range(4):
        logging.info('Iteration %d', i)

        child_arch_pool_prob = []
        for arch in child_arch_pool:
            if args.dataset == 'cifar10':
                tmp_model = NASNetworkCIFAR(
                    args, args.num_class, args.child_layers, args.child_nodes,
                    args.child_channels, args.child_keep_prob,
                    args.child_drop_path_keep_prob, args.child_use_aux_head,
                    args.steps, arch)
            elif args.dataset == 'cifar100':
                tmp_model = NASNetworkCIFAR(
                    args, args.num_class, args.child_layers, args.child_nodes,
                    args.child_channels, args.child_keep_prob,
                    args.child_drop_path_keep_prob, args.child_use_aux_head,
                    args.steps, arch)
            else:
                tmp_model = NASNetworkImageNet(
                    args, args.num_class, args.child_layers, args.child_nodes,
                    args.child_channels, args.child_keep_prob,
                    args.child_drop_path_keep_prob, args.child_use_aux_head,
                    args.steps, arch)
            child_arch_pool_prob.append(
                utils.count_parameters_in_MB(tmp_model))
            del tmp_model

        step = 0
        scheduler = get_scheduler(optimizer, args.dataset)
        for epoch in range(1, args.child_epochs + 1):
            scheduler.step()
            lr = scheduler.get_lr()[0]
            logging.info('epoch %d lr %e', epoch, lr)
            # sample an arch to train
            train_acc, train_obj, step = child_train(train_queue, model,
                                                     optimizer, step,
                                                     child_arch_pool,
                                                     child_arch_pool_prob,
                                                     train_criterion)
            logging.info('train_acc %f', train_acc)

        logging.info("Evaluate seed archs")
        arch_pool += child_arch_pool
        arch_pool_valid_acc = child_valid(valid_queue, model, arch_pool,
                                          eval_criterion)

        arch_pool_valid_acc_sorted_indices = np.argsort(
            arch_pool_valid_acc)[::-1]
        arch_pool = [arch_pool[i] for i in arch_pool_valid_acc_sorted_indices]
        arch_pool_valid_acc = [
            arch_pool_valid_acc[i] for i in arch_pool_valid_acc_sorted_indices
        ]
        with open(os.path.join(args.output_dir, 'arch_pool.{}'.format(i)),
                  'w') as fa:
            with open(
                    os.path.join(args.output_dir,
                                 'arch_pool.perf.{}'.format(i)), 'w') as fp:
                for arch, perf in zip(arch_pool, arch_pool_valid_acc):
                    arch = ' '.join(map(str, arch[0] + arch[1]))
                    fa.write('{}\n'.format(arch))
                    fp.write('{}\n'.format(perf))
        if i == 3:
            break

        # Train Encoder-Predictor-Decoder
        logging.info('Train Encoder-Predictor-Decoder')
        encoder_input = list(
            map(
                lambda x: utils.parse_arch_to_seq(x[0]) + utils.
                parse_arch_to_seq(x[1]), arch_pool))
        # [[conv, reduc]]
        min_val = min(arch_pool_valid_acc)
        max_val = max(arch_pool_valid_acc)
        encoder_target = [(i - min_val) / (max_val - min_val)
                          for i in arch_pool_valid_acc]

        if args.controller_expand:
            dataset = list(zip(encoder_input, encoder_target))
            n = len(dataset)
            ratio = 0.9
            split = int(n * ratio)
            np.random.shuffle(dataset)
            encoder_input, encoder_target = list(zip(*dataset))
            train_encoder_input = list(encoder_input[:split])
            train_encoder_target = list(encoder_target[:split])
            valid_encoder_input = list(encoder_input[split:])
            valid_encoder_target = list(encoder_target[split:])
            for _ in range(args.controller_expand - 1):
                for src, tgt in zip(encoder_input[:split],
                                    encoder_target[:split]):
                    a = np.random.randint(0, args.child_nodes)
                    b = np.random.randint(0, args.child_nodes)
                    src = src[:4 * a] + src[4 * a + 2:4 * a + 4] + \
                            src[4 * a:4 * a + 2] + src[4 * (a + 1):20 + 4 * b] + \
                            src[20 + 4 * b + 2:20 + 4 * b + 4] + src[20 + 4 * b:20 + 4 * b + 2] + \
                            src[20 + 4 * (b + 1):]
                    train_encoder_input.append(src)
                    train_encoder_target.append(tgt)
        else:
            train_encoder_input = encoder_input
            train_encoder_target = encoder_target
            valid_encoder_input = encoder_input
            valid_encoder_target = encoder_target
        logging.info('Train data: {}\tValid data: {}'.format(
            len(train_encoder_input), len(valid_encoder_input)))

        nao_train_dataset = utils.NAODataset(
            train_encoder_input,
            train_encoder_target,
            True,
            swap=True if args.controller_expand is None else False)
        nao_valid_dataset = utils.NAODataset(valid_encoder_input,
                                             valid_encoder_target, False)
        nao_train_queue = torch.utils.data.DataLoader(
            nao_train_dataset,
            batch_size=args.controller_batch_size,
            shuffle=True,
            pin_memory=True)
        nao_valid_queue = torch.utils.data.DataLoader(
            nao_valid_dataset,
            batch_size=args.controller_batch_size,
            shuffle=False,
            pin_memory=True)
        nao_optimizer = torch.optim.Adam(nao.parameters(),
                                         lr=args.controller_lr,
                                         weight_decay=args.controller_l2_reg)
        for nao_epoch in range(1, args.controller_epochs + 1):
            nao_loss, nao_mse, nao_ce = nao_train(nao_train_queue, nao,
                                                  nao_optimizer)
            logging.info("epoch %04d train loss %.6f mse %.6f ce %.6f",
                         nao_epoch, nao_loss, nao_mse, nao_ce)
            if nao_epoch % 100 == 0:
                pa, hs = nao_valid(nao_valid_queue, nao)
                logging.info("Evaluation on valid data")
                logging.info(
                    'epoch %04d pairwise accuracy %.6f hamming distance %.6f',
                    nao_epoch, pa, hs)

        # Generate new archs
        new_archs = []
        max_step_size = 50
        predict_step_size = 0
        top100_archs = list(
            map(
                lambda x: utils.parse_arch_to_seq(x[0]) + utils.
                parse_arch_to_seq(x[1]), arch_pool[:100]))
        nao_infer_dataset = utils.NAODataset(top100_archs, None, False)
        nao_infer_queue = torch.utils.data.DataLoader(
            nao_infer_dataset,
            batch_size=len(nao_infer_dataset),
            shuffle=False,
            pin_memory=True)
        while len(new_archs) < args.controller_new_arch:
            predict_step_size += 1
            logging.info('Generate new architectures with step size %d',
                         predict_step_size)
            new_arch = nao_infer(nao_infer_queue,
                                 nao,
                                 predict_step_size,
                                 direction='+')
            for arch in new_arch:
                if arch not in encoder_input and arch not in new_archs:
                    new_archs.append(arch)
                if len(new_archs) >= args.controller_new_arch:
                    break
            logging.info('%d new archs generated now', len(new_archs))
            if predict_step_size > max_step_size:
                break

        child_arch_pool = list(
            map(lambda x: utils.parse_seq_to_arch(x),
                new_archs))  # [[[conv],[reduc]]]
        logging.info("Generate %d new archs", len(child_arch_pool))

    logging.info('Finish Searching')
    logging.info('Reranking top 5 architectures')
    # reranking top 5
    top_archs = arch_pool[:5]
    if args.dataset == 'cifar10':
        top_archs_perf = train_and_evaluate_top_on_cifar10(
            top_archs, train_queue, valid_queue)
    elif args.dataset == 'cifar100':
        top_archs_perf = train_and_evaluate_top_on_cifar100(
            top_archs, train_queue, valid_queue)
    else:
        top_archs_perf = train_and_evaluate_top_on_imagenet(
            top_archs, train_queue, valid_queue)
    top_archs_sorted_indices = np.argsort(top_archs_perf)[::-1]
    top_archs = [top_archs[i] for i in top_archs_sorted_indices]
    top_archs_perf = [top_archs_perf[i] for i in top_archs_sorted_indices]
    with open(os.path.join(args.output_dir, 'arch_pool.final'), 'w') as fa:
        with open(os.path.join(args.output_dir, 'arch_pool.perf.final'),
                  'w') as fp:
            for arch, perf in zip(top_archs, top_archs_perf):
                arch = ' '.join(map(str, arch[0] + arch[1]))
                fa.write('{}\n'.format(arch))
                fp.write('{}\n'.format(perf))