Exemplo n.º 1
0
Arquivo: main.py Projeto: abstatic/mos
parser.add_argument('--single_gpu',
                    default=False,
                    action='store_true',
                    help='use single GPU')
args = parser.parse_args()

if args.nhidlast < 0:
    args.nhidlast = args.emsize
if args.dropoutl < 0:
    args.dropoutl = args.dropouth
if args.small_batch_size < 0:
    args.small_batch_size = args.batch_size

if not args.continue_train:
    args.save = '{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
    create_exp_dir(args.save, scripts_to_save=['main.py', 'model.py'])


def logging(s, print_=True, log_=True):
    if print_:
        print(s)
    if log_:
        with open(os.path.join(args.save, 'log.txt'), 'a+') as f_log:
            f_log.write(s + '\n')


# Set the random seed manually for reproducibility.
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
Exemplo n.º 2
0
                        help='rank of process')
    parser.add_argument('--world_size',
                        type=int,
                        default=1,
                        help='number of gpus')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        help='seed used for initialization')
    parser.add_argument('--master_address',
                        type=str,
                        default='127.0.0.1',
                        help='address for master')

    args = parser.parse_args()
    utils.create_exp_dir(args.save)

    size = args.world_size

    if size > 1:
        args.distributed = True
        processes = []
        for rank in range(size):
            args.local_rank = rank
            p = Process(target=init_processes, args=(rank, size, main, args))
            p.start()
            processes.append(p)

        for p in processes:
            p.join()
    else:
Exemplo n.º 3
0
    def __init__(self, args, sub_dir_path=None):
        super(CNNSearchPolicy, self).__init__()

        self.args = args

        # initialize path and logger
        if not self.args.continue_train:
            self.sub_directory_path = sub_dir_path or '{}_SEED_{}'.format(
                self.args.supernet_train_method, self.args.seed)
            self.exp_dir = os.path.join(self.args.main_path,
                                        self.sub_directory_path)
            utils.create_exp_dir(self.exp_dir)
            utils.save_json(args, self.exp_dir + '/args.json')
        if self.args.visualize:
            self.viz_dir_path = utils.create_viz_dir(self.exp_dir)

        if self.args.tensorboard:
            self.tb_dir = self.exp_dir
            tboard_dir = os.path.join(self.args.tboard_dir,
                                      self.sub_directory_path)
            self.writer = SummaryWriter(tboard_dir)

        if self.args.debug:
            torch.autograd.set_detect_anomaly(True)

        # Set logger and directory.
        self.logger = utils.get_logger(
            "train_search",
            file_handler=utils.get_file_handler(
                os.path.join(self.exp_dir, 'log.txt')),
            level=logging.INFO if not args.debug else logging.DEBUG)

        # Random seed should be set once the Policy is created.
        logging.info(f"setting random seed as {args.seed}")
        utils.torch_random_seed(args.seed)
        logging.info('gpu number = %d' % args.gpus)
        logging.info("args = %s", args)

        # metrics to track
        # self.ranking_per_epoch = OrderedDict()
        self.search_space = None  # store the search space.
        self.model = None  # store the model
        self.model_fn = None
        self.running_stats = OrderedDict()  # store all running status.

        # to log the training results.
        self.logging_fn = self.logging_at_epoch
        if args.supernet_train_method in ['darts', 'spos']:
            """ Fundamental baseline training methods
            sample 1 architecture per batch
            train supernet
            Conv op has maximum possible filter channels (== output size of cell)
            Random a chunk of it.
            """
            train_fn = procedure_ops.darts_train_model
            self.train_fn = partial(train_fn,
                                    args=self.args,
                                    architect=None,
                                    sampler=self.random_sampler)
            self.eval_fn = partial(procedure_ops.darts_model_validation,
                                   args=self.args)
        elif args.supernet_train_method == 'fairnas':
            """
            Extend darts training method with FairNas strategy. It is not possible to use directly the FairNAS,
            but we can extend it into 2 method.
            """
            train_fn = procedure_ops.fairnas_train_model_v1
            self.train_fn = partial(train_fn,
                                    args=self.args,
                                    architect=None,
                                    topology_sampler=self.random_sampler,
                                    op_sampler=self.op_sampler)
            self.eval_fn = partial(procedure_ops.darts_model_validation,
                                   args=self.args)
        else:
            pass
Exemplo n.º 4
0
def run(net,
        init_ch=32,
        layers=20,
        auxiliary=True,
        lr=0.025,
        momentum=0.9,
        wd=3e-4,
        cutout=True,
        cutout_length=16,
        data='../data',
        batch_size=96,
        epochs=600,
        drop_path_prob=0.2,
        auxiliary_weight=0.4):
    save = '/checkpoint/linnanwang/nasnet/' + hashlib.md5(
        json.dumps(net).encode()).hexdigest()
    utils.create_exp_dir(save, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    np.random.seed(0)
    torch.cuda.set_device(0)
    cudnn.benchmark = True
    cudnn.enabled = True
    torch.manual_seed(0)
    logging.info('gpu device = %d' % 0)
    # logging.info("args = %s", args)

    genotype = net
    model = Network(init_ch, 10, layers, auxiliary, genotype).cuda()

    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr,
                                momentum=momentum,
                                weight_decay=wd)
    model, optimizer = apex.amp.initialize(model, optimizer, opt_level="O3")

    train_transform, valid_transform = utils._data_transforms_cifar10(
        cutout, cutout_length)
    train_data = dset.CIFAR10(root=data,
                              train=True,
                              download=True,
                              transform=train_transform)
    valid_data = dset.CIFAR10(root=data,
                              train=False,
                              download=True,
                              transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=2)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(epochs))

    best_acc = 0.0

    for epoch in range(epochs):
        scheduler.step()
        logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0])
        model.drop_path_prob = drop_path_prob * epoch / epochs

        train_acc, train_obj = train(train_queue,
                                     model,
                                     criterion,
                                     optimizer,
                                     auxiliary=auxiliary,
                                     auxiliary_weight=auxiliary_weight)
        logging.info('train_acc: %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc: %f', valid_acc)

        if valid_acc > best_acc and epoch >= 50:
            print('this model is the best')
            torch.save(model.state_dict(), os.path.join(save, 'model.pt'))
        if valid_acc > best_acc:
            best_acc = valid_acc
        print('current best acc is', best_acc)

        if epoch == 100:
            break

        # utils.save(model, os.path.join(args.save, 'trained.pt'))
        print('saved to: model.pt')

    return best_acc
Exemplo n.º 5
0
parser.add_argument('--max_seq_len_delta', type=int, default=40,
                    help='max sequence length')
parser.add_argument('--single_gpu', default=False, action='store_true', 
                    help='use single GPU')
args = parser.parse_args()

if args.nhidlast < 0:
    args.nhidlast = args.emsize
if args.dropoutl < 0:
    args.dropoutl = args.dropouth
if args.small_batch_size < 0:
    args.small_batch_size = args.batch_size

if not args.continue_train:
    args.save = '{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
    create_exp_dir(args.save, scripts_to_save=['main.py', 'model.py'])

def logging(s, print_=True, log_=True):
    if print_:
        print(s)
    if log_:
        with open(os.path.join(args.save, 'log.txt'), 'a+') as f_log:
            f_log.write(s + '\n')

# Set the random seed manually for reproducibility.
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print("WARNING: You have a CUDA device, so you should probably run with --cuda")
    else:
Exemplo n.º 6
0
parser.add_argument('--net1_name',
                    type=str,
                    required=True,
                    help='name of net1')  # resnet20/resnet110
parser.add_argument('--net2_name',
                    type=str,
                    required=True,
                    help='name of net2')  # resnet20/resnet110

# hyperparameter lambda
parser.add_argument('--lambda_kd', type=float, default=1.0)

args, unparsed = parser.parse_known_args()

args.save_root = os.path.join(args.save_root, args.note)
create_exp_dir(args.save_root)

log_format = '%(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format)
fh = logging.FileHandler(os.path.join(args.save_root, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)


def main():
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)
        cudnn.enabled = True
        cudnn.benchmark = True
Exemplo n.º 7
0
    def __init__(self, args):
        self.args = args

        if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

        if self.args.distributed:
            # Init distributed environment
            self.rank, self.world_size, self.device = init_dist(
                port=self.args.port)
            self.seed = self.rank * self.args.seed
        else:
            torch.cuda.set_device(self.args.gpu)
            self.device = torch.device("cuda")
            self.rank = 0
            self.seed = self.args.seed
            self.world_size = 1

        if self.args.fix_seedcudnn:
            random.seed(self.seed)
            torch.backends.cudnn.deterministic = True
            np.random.seed(self.seed)
            cudnn.benchmark = False
            torch.manual_seed(self.seed)
            cudnn.enabled = True
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)
        else:
            np.random.seed(self.seed)
            cudnn.benchmark = True
            torch.manual_seed(self.seed)
            cudnn.enabled = True
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)

        self.path = os.path.join(generate_date, self.args.save)
        if self.rank == 0:
            utils.create_exp_dir(generate_date,
                                 self.path,
                                 scripts_to_save=glob.glob('*.py'))
            logging.basicConfig(stream=sys.stdout,
                                level=logging.INFO,
                                format=log_format,
                                datefmt='%m/%d %I:%M:%S %p')
            fh = logging.FileHandler(os.path.join(self.path, 'log.txt'))
            fh.setFormatter(logging.Formatter(log_format))
            logging.getLogger().addHandler(fh)
            logging.info("self.args = %s", self.args)
            self.logger = tensorboardX.SummaryWriter(
                './runs/' + generate_date + '/nas_{}'.format(self.args.remark))
        else:
            self.logger = None

        # set default resource_lambda for different methods
        if self.args.resource_efficient:
            if self.args.method == 'policy_gradient':
                if self.args.log_penalty:
                    default_resource_lambda = 1e-4
                else:
                    default_resource_lambda = 1e-5
            if self.args.method == 'reparametrization':
                if self.args.log_penalty:
                    default_resource_lambda = 1e-2
                else:
                    default_resource_lambda = 1e-5
            if self.args.method == 'discrete':
                if self.args.log_penalty:
                    default_resource_lambda = 1e-2
                else:
                    default_resource_lambda = 1e-4
            if self.args.resource_lambda == default_lambda:
                self.args.resource_lambda = default_resource_lambda

        #initialize loss function
        self.criterion = nn.CrossEntropyLoss().to(self.device)

        #initialize model
        self.init_model()

        #calculate model param size
        if self.rank == 0:
            logging.info("param size = %fMB",
                         utils.count_parameters_in_MB(self.model))
            self.model._logger = self.logger
            self.model._logging = logging

        #initialize optimizer
        self.init_optimizer()

        #iniatilize dataset loader
        self.init_loaddata()

        self.update_theta = True
        self.update_alpha = True
Exemplo n.º 8
0
def main(args):
    """Main training function."""
    torch.cuda.set_device(args.device_id)
    if args.distributed:
        args.distributed_rank = args.device_id
        distributed_init(args)
    if args.seed is not None:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)
    options.setup_device(args)

    ############################################################################
    # Experiment & Logging
    ############################################################################
    if is_master(args):
        if args.resume:
            # rank-0 device creates experiment dir and log to the file
            logging = utils.get_logger(os.path.join(args.model_dir, "log.txt"),
                                       log_=not args.debug)
        else:
            # rank-0 device creates experiment dir and log to the file
            logging = utils.create_exp_dir(args.model_dir, debug=args.debug)
    else:
        # other devices only log to console (print) but not the file
        logging = utils.get_logger(log_path=None, log_=False)

    ############################################################################
    # Load data
    ############################################################################
    logging("Loading data..")
    loaded_data, label_dict = data.load_data(args)
    args.num_class = len(label_dict)
    logging("Loading finish")
    tr_data, va_data, te_data = loaded_data
    va_loader = data.BucketIterator(va_data, args.valid_bsz, args.pad_id,
                                    args.seg_id_pad, args.device,
                                    args.max_length)
    te_loader = data.BucketIterator(te_data, args.test_bsz, args.pad_id,
                                    args.seg_id_pad, args.device,
                                    args.max_length)

    options.setup_device(args)

    args.model_path = os.path.join(args.model_dir, "model.pt")
    args.var_path = os.path.join(args.model_dir, "var.pt")
    args.config_path = os.path.join(args.model_dir, "net_config.json")
    train_step = 0
    best_accuracy = -float("inf")

    # create model
    if args.resume:
        logging("Resuming from {}...".format(args.model_dir))
        net_config = modeling.ModelConfig.init_from_json(
            args.config_path, args)
        model = modeling.FunnelTFM(net_config, args)
        model_param, optimizer = torch.load(args.model_path,
                                            map_location="cpu")
        logging(model.load_state_dict(model_param, strict=False))
        model = model.to(args.device)
        for state in optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(args.device)
        best_accuracy, train_step = torch.load(args.var_path)
    else:
        # create new model
        if args.init_ckpt:
            logging("Init from ckpt {}".format(args.init_ckpt))
            net_config = modeling.ModelConfig.init_from_json(
                args.init_ckpt_config, args)
            model = modeling.FunnelTFM(net_config, args)
            print(
                model.load_state_dict(torch.load(args.init_ckpt),
                                      strict=False))
        else:
            logging("init model")
            net_config = modeling.ModelConfig.init_from_args(args)
            model = modeling.FunnelTFM(net_config, args)
        net_config.to_json(args.config_path)
        model = model.to(args.device)

    # create new optimizer
    if args.fp16:
        from apex.optimizers import FusedAdam
        import apex.amp as amp
        optimizer = FusedAdam(model.parameters(),
                              lr=args.lr,
                              weight_decay=args.weight_decay)
        amp_model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args.amp_opt)
    else:
        try:
            from apex.optimizers import FusedAdam
            optimizer = FusedAdam(model.parameters(),
                                  lr=args.lr,
                                  betas=(0.9, 0.99),
                                  eps=1e-6,
                                  weight_decay=args.weight_decay)
        except ImportError as e:
            logging("use pytorch optimizer")
            optimizer = torch.optim.AdamW(model.parameters(),
                                          lr=args.lr,
                                          betas=(0.9, 0.99),
                                          eps=1e-6,
                                          weight_decay=args.weight_decay)
        amp_model = model

    if args.distributed:
        if args.ddp_backend == "apex":
            from apex.parallel import DistributedDataParallel as DDP
            para_model = DDP(amp_model)
        else:
            from torch.nn.parallel import DistributedDataParallel as DDP
            para_model = DDP(amp_model,
                             device_ids=[args.device_id],
                             find_unused_parameters=True)
    else:
        para_model = amp_model

    ############################################################################
    # Log args
    ############################################################################
    logging("=" * 100)
    for k, v in args.__dict__.items():
        logging("  - {} : {}".format(k, v))
    logging("=" * 100)

    ############################################################################
    # Training
    ############################################################################
    if not args.test_only:
        tr_loader = data.BucketIterator(tr_data, args.train_bsz, args.pad_id,
                                        args.seg_id_pad, args.device,
                                        args.max_length)

        if args.distributed:
            num_data = len(tr_data) // args.distributed_world_size
        else:
            num_data = len(tr_data)
        num_tr_batch = (num_data + args.train_bsz - 1) // args.train_bsz
        args.train_steps = num_tr_batch * args.epochs
        args.warmup_steps = int(args.train_steps * args.warmup_prop)

        num_example = torch.Tensor([0]).to(args.device)
        num_correct = torch.Tensor([0]).to(args.device)

        if args.dataset in ["CoLA"]:
            num_tp = torch.Tensor([0]).to(args.device)
            num_fp = torch.Tensor([0]).to(args.device)
            num_tn = torch.Tensor([0]).to(args.device)
            num_fn = torch.Tensor([0]).to(args.device)

        for epoch in range(args.epochs):
            #### One epoch
            for i, (sent, seg_id, label) in enumerate(
                    tr_loader.get_iter(epoch, distributed=args.distributed)):
                optimizer.zero_grad()
                _, ret_dict = para_model(sent, seg_id=seg_id, cls_target=label)
                cls_loss = ret_dict["cls_loss"]
                cls_corr = ret_dict["cls_corr"]

                if args.fp16:
                    with amp.scale_loss(cls_loss, optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    cls_loss.backward()
                num_correct += cls_corr.detach()
                num_example += len(sent)
                if args.dataset in ["CoLA"]:
                    tp, fp, tn, fn = confusion_matrix(ret_dict["cls_pred"],
                                                      label)
                    num_tp = num_tp + tp
                    num_fp = num_fp + fp
                    num_tn = num_tn + tn
                    num_fn = num_fn + fn

                if args.clip > 0:
                    if args.fp16:
                        gnorm = torch.nn.utils.clip_grad_norm_(
                            amp.master_params(optimizer), args.clip)
                    else:
                        gnorm = torch.nn.utils.clip_grad_norm_(
                            model.parameters(), args.clip)
                else:
                    gnorm = 0
                    for p in model.parameters():
                        if p.grad is not None:
                            param_gnorm = p.grad.data.norm(2)
                            gnorm += param_gnorm.item()**2
                    gnorm = gnorm**(1. / 2)
                train_step += 1
                adjust_lr(args, train_step, optimizer)
                optimizer.step()

                ##### training stat
                if (i + 1) % (num_tr_batch // args.n_log_epoch) == 0:
                    if args.distributed:
                        torch.distributed.all_reduce(
                            num_correct, op=torch.distributed.ReduceOp.SUM)
                        torch.distributed.all_reduce(
                            num_example, op=torch.distributed.ReduceOp.SUM)
                        if args.dataset in ["CoLA"]:
                            torch.distributed.all_reduce(
                                num_tp, op=torch.distributed.ReduceOp.SUM)
                            torch.distributed.all_reduce(
                                num_fp, op=torch.distributed.ReduceOp.SUM)
                            torch.distributed.all_reduce(
                                num_tn, op=torch.distributed.ReduceOp.SUM)
                            torch.distributed.all_reduce(
                                num_fn, op=torch.distributed.ReduceOp.SUM)

                    if is_master(args):
                        if args.dataset in ["CoLA"]:
                            corref = _compute_metric_based_on_keys(
                                "corr", num_tp.item(), num_fp.item(),
                                num_tn.item(), num_fn.item())
                            logging(
                                "[{:>02d}/{:>08d}] Train | corref {:.4f} | gnorm {:.2f} "
                                "| lr {:.6f}".format(
                                    epoch, train_step, corref, gnorm,
                                    optimizer.param_groups[0]["lr"]))
                        else:
                            accuracy = num_correct.item() / num_example.item()
                            logging(
                                "[{:>02d}/{:>08d}] Train | accu {:.4f} | gnorm {:.2f} "
                                "| lr {:.6f}".format(
                                    epoch, train_step, accuracy, gnorm,
                                    optimizer.param_groups[0]["lr"]))
                    num_example.zero_()
                    num_correct.zero_()
                    if args.dataset in ["CoLA"]:
                        num_tp.zero_()
                        num_fp.zero_()
                        num_tn.zero_()
                        num_fn.zero_()

                ##### validation
                if train_step % (args.train_steps // 10) == 0:
                    accuracy = evaluate(args, model, va_loader)
                    if is_master(args):
                        if accuracy > best_accuracy:
                            torch.save([model.state_dict(), optimizer],
                                       args.model_path)
                            torch.save([best_accuracy, train_step],
                                       args.var_path)
                        best_accuracy = max(accuracy, best_accuracy)
                        logging(
                            "[{}] Valid | curr accu {:.4f} | best accu {:.4f}".
                            format(train_step // (args.train_steps // 10),
                                   accuracy, best_accuracy))

    ##### make prediction
    if is_master(args) and args.write_prediction:
        rev_label_dict = dict((v, k) for k, v in label_dict.items())
        model.load_state_dict(torch.load(args.model_path,
                                         map_location="cpu")[0],
                              strict=False)
        model = model.to(args.device)
        predict(args, model, te_loader,
                os.path.join(args.model_dir, "test_results.txt"),
                rev_label_dict)
        predict(args, model, va_loader,
                os.path.join(args.model_dir, "valid_results.txt"),
                rev_label_dict)
Exemplo n.º 9
0
if args.save:
    if args.dataset in ["CoraFull", "Computers", "Photo", "CS"]:
        nsave = "log/{}-{}/sample-{}/{}".format(args.dataset, args.train_num,
                                                args.sample, args.complete)
    else:
        if not args.keep_train_num:
            nsave = "log/{}/sample-{}/{}".format(args.dataset, args.sample,
                                                 args.complete)
        else:
            nsave = "log/{}-keep/sample-{}/{}".format(args.dataset,
                                                      args.sample,
                                                      args.complete)
else:
    print("not saving file")
    nsave = "log/trash/{}".format(args.complete)
create_exp_dir(nsave)  #, scripts_to_save=glob.glob('*.py'))
log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p',
                    filemode="w")
nfile = "para{}-nhid{}-lr{}-lrg{}-hidg{}-wd{}-dr{}-layer{}-norm{}-seed{}-{}".format(
    args.compl_param, args.nhid, args.lr, args.lr_graph, args.hid_graph,
    args.wd, args.dropout, args.layertype, args.normalize, args.seed,
    args.dataseed)
fh = logging.FileHandler(os.path.join(nsave, nfile + ".txt"), "w")
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

dataset = load_dataset(args.dataset)
    def __init__(self, args):
        self.args = args

        if not torch.cuda.is_available():
            logging.info('no gpu device available')
            sys.exit(1)

        torch.cuda.set_device(self.args.gpu)
        self.device = torch.device("cuda")
        self.rank = 0
        self.seed = self.args.seed
        self.world_size = 1

        if self.args.fix_cudnn:
            random.seed(self.seed)
            torch.backends.cudnn.deterministic = True
            np.random.seed(self.seed)
            cudnn.benchmark = False
            torch.manual_seed(self.seed)
            cudnn.enabled = True
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)
        else:
            np.random.seed(self.seed)
            cudnn.benchmark = True
            torch.manual_seed(self.seed)
            cudnn.enabled = True
            torch.cuda.manual_seed(self.seed)
            torch.cuda.manual_seed_all(self.seed)

        self.path = os.path.join(generate_date, self.args.save)
        if self.rank == 0:
            utils.create_exp_dir(generate_date,
                                 self.path,
                                 scripts_to_save=glob.glob('*.py'))
            logging.basicConfig(stream=sys.stdout,
                                level=logging.INFO,
                                format=log_format,
                                datefmt='%m/%d %I:%M:%S %p')
            fh = logging.FileHandler(os.path.join(self.path, 'log.txt'))
            fh.setFormatter(logging.Formatter(log_format))
            logging.getLogger().addHandler(fh)
            logging.info("self.args = %s", self.args)
            self.logger = tensorboardX.SummaryWriter('./runs/' +
                                                     generate_date + '/' +
                                                     self.args.save_log)
        else:
            self.logger = None

        #initialize loss function
        self.criterion = nn.CrossEntropyLoss().to(self.device)

        #initialize model
        self.init_model()
        if self.args.resume:
            self.reload_model()

        #calculate model param size
        if self.rank == 0:
            logging.info("param size = %fMB",
                         utils.count_parameters_in_MB(self.model))
            self.model._logger = self.logger
            self.model._logging = logging

        #initialize optimizer
        self.init_optimizer()

        #iniatilize dataset loader
        self.init_loaddata()

        self.update_theta = True
        self.update_alpha = True
Exemplo n.º 11
0
                u = "c_{k-1}"
            else:
                u = str(j - 2)
            v = str(i)
            g.edge(u, v, label=op, fillcolor="gray")

    g.node("c_{k}", fillcolor='palegoldenrod')
    for i in range(steps):
        g.edge(str(i), "c_{k}", fillcolor="gray")

    g.render(filename, view=True)


import os
from utils import create_exp_dir
if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("usage:\n python {} ARCH_NAME".format(sys.argv[0]))
        sys.exit(1)
    genotype_name = sys.argv[1]
    file_path = './vis/' + genotype_name
    create_exp_dir(file_path)
    try:
        genotype = eval('genotypes.{}'.format(genotype_name))
    except AttributeError:
        print("{} is not specified in genotypes.py".format(genotype_name))
        sys.exit(1)

    plot1(genotype.normal, os.path.join(file_path, "normal"))
    plot1(genotype.reduce, os.path.join(file_path, "reduction"))
Exemplo n.º 12
0
                    default=0.9,
                    help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay',
                    type=float,
                    default=1e-3,
                    help='weight decay for arch encoding')
parser.add_argument('--resume',
                    default='',
                    type=str,
                    metavar='PATH',
                    help='path to latest checkpoint (default: none)')
args = parser.parse_args()

args.save = './logs/search/search-{}-{}'.format(args.save,
                                                time.strftime("%Y%m%d-%H%M%S"))
utils.create_exp_dir(args.save, scripts_to_save=None)

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CIFAR_CLASSES = 10


def main():
    if not torch.cuda.is_available():
Exemplo n.º 13
0
########################

assert args.training_split_num >= args.valid_per_epoch

if args.small_batch_size < 0:
    args.small_batch_size = args.batch_size
assert args.batch_size % args.small_batch_size == 0, 'batch_size must be divisible by small_batch_size'

if args.coeff_opt == 'maxlc':
    current_coeff_opt = 'max'
else:
    current_coeff_opt = args.coeff_opt

if not args.continue_train:
    args.save = '{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
create_exp_dir(args.save, scripts_to_save=['./src/main_train_topics.py', './src/model.py', './src/nsd_loss.py'])


def logging(s, print_=True, log_=True):
    if print_:
        print(s)
        sys.stdout.flush()
    if log_:
        with open(os.path.join(args.save, 'log.txt'), 'a+') as f_log:
            f_log.write(s + '\n')

# Set the random seed manually for reproducibility.
seed_all_randomness(args.seed,args.cuda)

logging('Args: {}'.format(args))
Exemplo n.º 14
0
def main():
    args.save = 'eval-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                        format=log_format, datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    CIFAR_CLASSES = 10

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        args.learning_rate,
        momentum=args.momentum,
        weight_decay=args.weight_decay
    )

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform)
    valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs)

    for epoch in range(args.epochs):
        lr = scheduler.get_last_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs

        # training
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        # スケジューラの更新
        scheduler.step()

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Exemplo n.º 15
0
        help='Dataset to use if you are using without warmstarting')

    args, unknowns = cmdline_parser.parse_known_args()

    log_lvl = logging.INFO if args.verbose == 'INFO' else logging.DEBUG
    logging.basicConfig(level=log_lvl, stream=sys.stdout)

    if unknowns:
        logging.warning('Found unknown arguments!')
        logging.warning(str(unknowns))
        logging.warning('These will be ignored')

    exp_dir = 'experiment-{}-{}'.format(
        args.methods,
        datetime.now().strftime("%Y%m%d-%H%M%S%f"))
    utils.create_exp_dir(exp_dir)

    genotype = config = None

    if (args.methods == 'DARTS' or args.methods == 'BOTH'):
        logging.info('\n###### NAS w/ DARTS ######\n')
        start = time.time()
        darts.main(exp_dir)
        architecture_res = exp_dir + '/arch'
        with open(architecture_res, 'rb') as f:
            genotype = pickle.load(f)
        end = time.time()
        logging.info('\nTime elapsed for DARTS: %.0f sec\n', (end - start))
    else:
        genotype = eval(str("genotypes." + args.genotype))
Exemplo n.º 16
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)

    print('Enabled distributed training.')

    # rank, world_size = init_dist(
    #     backend='nccl', port=args.port)
    # args.rank = rank
    # args.world_size = world_size

    args.rank = 0
    args.world_size = 8

    np.random.seed(args.seed*args.rank)
    torch.manual_seed(args.seed*args.rank)
    torch.cuda.manual_seed(args.seed*args.rank)
    torch.cuda.manual_seed_all(args.seed*args.rank)
    print('random seed: ', args.seed*args.rank)

    # create model
    print("=> creating model '{}'".format(args.model))
    if args.SinglePath:
        architecture = 20*[0]
        channels_scales = 20*[1.0]
        model = ShuffleNetV2_OneShot(args=args, architecture=architecture, channels_scales=channels_scales)
        model.cuda()
        #broadcast_params(model)
        for v in model.parameters():
            if v.requires_grad:
                if v.grad is None:
                    v.grad = torch.zeros_like(v)
        model.log_alpha.grad = torch.zeros_like(model.log_alpha)   
    
    criterion = CrossEntropyLoss(smooth_eps=0.1, smooth_dist=(torch.ones(1000)*0.001).cuda()).cuda()


    wo_wd_params = []
    wo_wd_param_names = []
    network_params = []
    network_param_names = []

    for name, mod in model.named_modules():
        if isinstance(mod, nn.BatchNorm2d):
            for key, value in mod.named_parameters():
                wo_wd_param_names.append(name+'.'+key)
        
    for key, value in model.named_parameters():
        if key != 'log_alpha':
            if value.requires_grad:
                if key in wo_wd_param_names:
                    wo_wd_params.append(value)
                else:
                    network_params.append(value)
                    network_param_names.append(key)

    params = [
        {'params': network_params,
         'lr': args.base_lr,
         'weight_decay': args.weight_decay },
        {'params': wo_wd_params,
         'lr': args.base_lr,
         'weight_decay': 0.},
    ]
    param_names = [network_param_names, wo_wd_param_names]
    if args.rank == 0:
        print('>>> params w/o weight decay: ', wo_wd_param_names)

    optimizer = torch.optim.SGD(params, momentum=args.momentum)
    if args.SinglePath:
        arch_optimizer = torch.optim.Adam(
            [param for name, param in model.named_parameters() if name == 'log_alpha'],
            lr=args.arch_learning_rate,
            betas=(0.5, 0.999),
            weight_decay=args.arch_weight_decay
        )

    # auto resume from a checkpoint
    remark = 'imagenet_'
    remark += 'epo_' + str(args.epochs) + '_layer_' + str(args.layers) + '_batch_' + str(args.batch_size) + '_lr_' + str(args.base_lr)  + '_seed_' + str(args.seed)

    if args.early_fix_arch:
        remark += '_early_fix_arch'  

    if args.flops_loss:
        remark += '_flops_loss_' + str(args.flops_loss_coef)

    if args.remark != 'none':
        remark += '_'+args.remark

    args.save = 'search-{}-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"), remark)
    args.save_log = 'nas-{}-{}'.format(time.strftime("%Y%m%d-%H%M%S"), remark)
    generate_date = str(datetime.now().date())

    path = os.path.join(generate_date, args.save)
    if args.rank == 0:
        log_format = '%(asctime)s %(message)s'
        utils.create_exp_dir(generate_date, path, scripts_to_save=glob.glob('*.py'))
        logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                            format=log_format, datefmt='%m/%d %I:%M:%S %p')
        fh = logging.FileHandler(os.path.join(path, 'log.txt'))
        fh.setFormatter(logging.Formatter(log_format))
        logging.getLogger().addHandler(fh)
        logging.info("args = %s", args)
        writer = SummaryWriter('./runs/' + generate_date + '/' + args.save_log)
    else:
        writer = None

    model_dir = path
    start_epoch = 0
    
    if args.evaluate:
        load_state_ckpt(args.checkpoint_path, model)
    else:
        best_prec1, start_epoch = load_state(model_dir, model, optimizer=optimizer)

    cudnn.benchmark = True
    cudnn.enabled = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    transform = transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize])
    train_dataset = datasets.ImageNet(split='train', transform=transform)

    transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize])
    train_dataset_wo_ms = datasets.ImageNet(split='train', transform=transform)

    transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize])
    val_dataset = datasets.ImageNet(split='val', transform=transform)

    # train_sampler = DistributedSampler(train_dataset)
    # val_sampler = DistributedSampler(val_dataset)
    #
    # train_loader = DataLoader(
    #     train_dataset, batch_size=args.batch_size//args.world_size, shuffle=False,
    #     num_workers=args.workers, pin_memory=False, sampler=train_sampler)
    #
    # train_loader_wo_ms = DataLoader(
    #     train_dataset_wo_ms, batch_size=args.batch_size//args.world_size, shuffle=False,
    #     num_workers=args.workers, pin_memory=False, sampler=train_sampler)
    #
    # val_loader = DataLoader(
    #     val_dataset, batch_size=50, shuffle=False,
    #     num_workers=args.workers, pin_memory=False, sampler=val_sampler)

    train_loader = DataLoader(
        train_dataset, batch_size=args.batch_size//args.world_size, shuffle=False,
        num_workers=args.workers, pin_memory=False)

    train_loader_wo_ms = DataLoader(
        train_dataset_wo_ms, batch_size=args.batch_size//args.world_size, shuffle=False,
        num_workers=args.workers, pin_memory=False)

    val_loader = DataLoader(
        val_dataset, batch_size=50, shuffle=False,
        num_workers=args.workers, pin_memory=False)

    if args.evaluate:
        validate(val_loader, model, criterion, 0, writer, logging)
        return

    niters = len(train_loader)

    lr_scheduler = LRScheduler(optimizer, niters, args)

    for epoch in range(start_epoch, args.epochs):
        #train_sampler.set_epoch(epoch)
        
        if args.early_fix_arch:
            if len(model.fix_arch_index.keys()) > 0:
                for key, value_lst in model.fix_arch_index.items():
                    model.log_alpha.data[key, :] = value_lst[1]
            sort_log_alpha = torch.topk(F.softmax(model.log_alpha.data, dim=-1), 2)
            argmax_index = (sort_log_alpha[0][:,0] - sort_log_alpha[0][:,1] >= 0.3)
            for id in range(argmax_index.size(0)):
                if argmax_index[id] == 1 and id not in model.fix_arch_index.keys():
                    model.fix_arch_index[id] = [sort_log_alpha[1][id,0].item(), model.log_alpha.detach().clone()[id, :]]
            
        if args.rank == 0 and args.SinglePath:
            logging.info('epoch %d', epoch)
            logging.info(model.log_alpha)         
            logging.info(F.softmax(model.log_alpha, dim=-1))         
            logging.info('flops %fM', model.cal_flops())  

        # train for one epoch
        if epoch >= args.epochs - 5 and args.lr_mode == 'step' and args.off_ms:
            train(train_loader_wo_ms, model, criterion, optimizer, arch_optimizer, lr_scheduler, epoch, writer, logging)
        else:
            train(train_loader, model, criterion, optimizer, arch_optimizer, lr_scheduler, epoch, writer, logging)


        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch, writer, logging)
        if args.gen_max_child:
            args.gen_max_child_flag = True
            prec1 = validate(val_loader, model, criterion, epoch, writer, logging)        
            args.gen_max_child_flag = False

        if args.rank == 0:
            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(model_dir, {
                'epoch': epoch + 1,
                'model': args.model,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
Exemplo n.º 17
0
def train_model(args):
    if os.path.isdir(args.save) == False:
        os.makedirs(args.save)
    save_dir = '{}eval-{}-{}'.format(args.save, args.note,
                                     time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(save_dir, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    if args.cifar100:
        CIFAR_CLASSES = 100
        data_folder = 'cifar-100-python'
    else:
        CIFAR_CLASSES = 10
        data_folder = 'cifar-10-batches-py'

    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)
    logging.info("unparsed args = %s", unparsed)
    num_gpus = torch.cuda.device_count()

    if args.arch in genotypes.__dict__.keys():
        genotype = eval("genotypes.%s" % args.arch)
    else:
        genotype = eval(args.arch)

    print('---------Genotype---------')
    logging.info(genotype)
    print('--------------------------')
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
    model = torch.nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
        valid_data = dset.CIFAR100(root=args.tmp_data_dir,
                                   train=False,
                                   download=True,
                                   transform=valid_transform)
    else:
        train_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)
        valid_data = dset.CIFAR10(root=args.tmp_data_dir,
                                  train=False,
                                  download=True,
                                  transform=valid_transform)

    train_queue = torch.utils.data.DataLoader(train_data,
                                              batch_size=args.batch_size,
                                              shuffle=True,
                                              pin_memory=True,
                                              num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(valid_data,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              pin_memory=True,
                                              num_workers=args.workers)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(args.epochs))
    best_acc = 0.0
    for epoch in range(args.epochs):
        scheduler.step()
        logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0])
        model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
        start_time = time.time()
        train_acc, train_obj = train(train_queue, model, criterion, optimizer)
        logging.info('Train_acc: %f', train_acc)

        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        if valid_acc > best_acc:
            best_acc = valid_acc
        logging.info('Valid_acc: %f', valid_acc)
        logging.info('Best_acc: %f', best_acc)
        end_time = time.time()
        duration = end_time - start_time
        print('Epoch time: %ds.' % duration)
        utils.save(model.module, os.path.join(save_dir, 'weights.pt'))
Exemplo n.º 18
0
def main():
    global cfg, rank, world_size

    cfg = Config.fromfile(args.config)

    # Set seed
    np.random.seed(cfg.seed)
    cudnn.benchmark = True
    torch.manual_seed(cfg.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(cfg.seed)

    # Model
    print('==> Building model..')
    arch_code = eval('architecture_code.{}'.format(cfg.model))
    net = models.model_entry(cfg, arch_code)
    rank = 0  # for non-distributed
    world_size = 1  # for non-distributed
    if args.distributed:
        print('==> Initializing distributed training..')
        init_dist(
            launcher='slurm', backend='nccl'
        )  # Only support slurm for now, if you would like to personalize your launcher, please refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/dist_utils.py
        rank, world_size = get_dist_info()
    net = net.cuda()

    cfg.netpara = sum(p.numel() for p in net.parameters()) / 1e6

    start_epoch = 0
    best_acc = 0
    # Load checkpoint.
    if cfg.get('resume_path', False):
        print('==> Resuming from {}checkpoint {}..'.format(
            ('original ' if cfg.resume_path.origin_ckpt else ''),
            cfg.resume_path.path))
        if cfg.resume_path.origin_ckpt:
            utils.load_state(cfg.resume_path.path, net, rank=rank)
        else:
            if args.distributed:
                net = torch.nn.parallel.DistributedDataParallel(
                    net,
                    device_ids=[torch.cuda.current_device()],
                    output_device=torch.cuda.current_device())
            utils.load_state(cfg.resume_path.path, net, rank=rank)

    # Data
    print('==> Preparing data..')
    trainloader, testloader, train_sampler, test_sampler = dataset_entry(
        cfg, args.distributed)
    criterion = nn.CrossEntropyLoss()
    if not args.eval_only:
        cfg.attack_param.num_steps = 7

    net_adv = AttackPGD(net, cfg.attack_param)
    # Train params
    print('==> Setting train parameters..')
    train_param = cfg.train_param
    epochs = train_param.epochs
    init_lr = train_param.learning_rate
    if train_param.get('warm_up_param', False):
        warm_up_param = train_param.warm_up_param
        init_lr = warm_up_param.warm_up_base_lr
        epochs += warm_up_param.warm_up_epochs
    if train_param.get('no_wd', False):
        param_group, type2num, _, _ = utils.param_group_no_wd(net)
        cfg.param_group_no_wd = type2num
        optimizer = torch.optim.SGD(param_group,
                                    lr=init_lr,
                                    momentum=train_param.momentum,
                                    weight_decay=train_param.weight_decay)
    else:
        optimizer = torch.optim.SGD(net.parameters(),
                                    lr=init_lr,
                                    momentum=train_param.momentum,
                                    weight_decay=train_param.weight_decay)

    scheduler = lr_scheduler.CosineLRScheduler(
        optimizer, epochs, train_param.learning_rate_min, init_lr,
        train_param.learning_rate,
        (warm_up_param.warm_up_epochs if train_param.get(
            'warm_up_param', False) else 0))
    # Log
    print('==> Writing log..')
    if rank == 0:
        cfg.save = '{}/{}-{}-{}'.format(cfg.save_path, cfg.model, cfg.dataset,
                                        time.strftime("%Y%m%d-%H%M%S"))
        utils.create_exp_dir(cfg.save)
        logger = utils.create_logger('global_logger', cfg.save + '/log.txt')
        logger.info('config: {}'.format(pprint.pformat(cfg)))

    # Evaluation only
    if args.eval_only:
        assert cfg.get(
            'resume_path',
            False), 'Should set the resume path for the eval_only mode'
        print('==> Testing on Clean Data..')
        test(net, testloader, criterion)
        print('==> Testing on Adversarial Data..')
        test(net_adv, testloader, criterion, adv=True)
        return

    # Training process
    for epoch in range(start_epoch, epochs):
        train_sampler.set_epoch(epoch)
        test_sampler.set_epoch(epoch)
        scheduler.step()
        if rank == 0:
            logger.info('Epoch %d learning rate %e', epoch,
                        scheduler.get_lr()[0])

        # Train for one epoch
        train(net_adv, trainloader, criterion, optimizer)

        # Validate for one epoch
        valid_acc = test(net_adv, testloader, criterion, adv=True)

        if rank == 0:
            logger.info('Validation Accuracy: {}'.format(valid_acc))
            is_best = valid_acc > best_acc
            best_acc = max(valid_acc, best_acc)
            print('==> Saving')
            state = {
                'epoch': epoch,
                'best_acc': best_acc,
                'optimizer': optimizer.state_dict(),
                'state_dict': net.state_dict(),
                'scheduler': scheduler
            }
            utils.save_checkpoint(state, is_best, os.path.join(cfg.save))
Exemplo n.º 19
0
parser.add_argument('--channels_last', type=str, default='False')

# others
parser.add_argument('--seed', type=int, default=2, help='random seed')
parser.add_argument('--note',
                    type=str,
                    default='try',
                    help='note for this run')

args, unparsed = parser.parse_known_args()
args.channels_last = eval(args.channels_last)

args.save = os.path.join(
    args.save, '{}-{}'.format(time.strftime("%Y%m%d-%H%M%S"), args.note))
if args.local_rank == 0:
    create_exp_dir(args.save,
                   scripts_to_save=glob.glob('*.py') + glob.glob('*.sh'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

if hasattr(torch, 'channels_last') and hasattr(torch, 'contiguous_format'):
    if args.channels_last:
        memory_format = torch.channels_last
    else:
        memory_format = torch.contiguous_format
                    action='store_true',
                    default=False,
                    help='use one-step unrolled validation loss')
parser.add_argument('--arch_learning_rate',
                    type=float,
                    default=3e-4,
                    help='learning rate for arch encoding')
parser.add_argument('--arch_weight_decay',
                    type=float,
                    default=1e-3,
                    help='weight decay for arch encoding')
args = parser.parse_args()

args.save = 'search-{}-{}'.format(args.save,
                                  time.strftime("%Y%m%d-%H%M%S"))  # 生成search目录
utils.create_exp_dir(
    args.save, scripts_to_save=glob.glob('*.py'))  # 把cnn内所有py脚本拷到search目录里
# glob.glob()查找符合特定规则的文件路径名
'''
log
'''
log_format = '%(asctime)s %(message)s'  # %(asctime)s 当前时间,%(message)s 用户输出的消息
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CIFAR_CLASSES = 10
Exemplo n.º 21
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    with open(args.config) as f:
        config = yaml.load(f)

    for key in config:
        for k, v in config[key].items():
            setattr(args, k, v)

    print('Enabled distributed training.')

    rank, world_size = init_dist(
        backend='nccl', port=args.port)
    args.rank = rank
    args.world_size = world_size


    np.random.seed(args.seed*args.rank)
    torch.manual_seed(args.seed*args.rank)
    torch.cuda.manual_seed(args.seed*args.rank)
    torch.cuda.manual_seed_all(args.seed*args.rank)

    # create model
    print("=> creating model '{}'".format(args.model))
    if args.SinglePath:
        architecture = 20*[0]
        channels_scales = 20*[1.0]
        #load derived child network
        log_alpha = torch.load(args.checkpoint_path, map_location='cuda:{}'.format(torch.cuda.current_device()))['state_dict']['log_alpha']
        weights = torch.zeros_like(log_alpha).scatter_(1, torch.argmax(log_alpha, dim = -1).view(-1,1), 1)
        model = ShuffleNetV2_OneShot(args=args, architecture=architecture, channels_scales=channels_scales, weights=weights)
        model.cuda()
        broadcast_params(model)
        for v in model.parameters():
            if v.requires_grad:
                if v.grad is None:
                    v.grad = torch.zeros_like(v)
        model.log_alpha.grad = torch.zeros_like(model.log_alpha)   
        if not args.retrain:
            load_state_ckpt(args.checkpoint_path, model)
            checkpoint = torch.load(args.checkpoint_path, map_location='cuda:{}'.format(torch.cuda.current_device()))
            args.base_lr = checkpoint['optimizer']['param_groups'][0]['lr']
        if args.reset_bn_stat:
            model._reset_bn_running_stats()

    # define loss function (criterion) and optimizer
    criterion = CrossEntropyLoss(smooth_eps=0.1, smooth_dist=(torch.ones(1000)*0.001).cuda()).cuda()

    wo_wd_params = []
    wo_wd_param_names = []
    network_params = []
    network_param_names = []

    for name, mod in model.named_modules():
        #if isinstance(mod, (nn.BatchNorm2d, SwitchNorm2d)):
        if isinstance(mod, nn.BatchNorm2d):
            for key, value in mod.named_parameters():
                wo_wd_param_names.append(name+'.'+key)
        
    for key, value in model.named_parameters():
        if key != 'log_alpha':
            if value.requires_grad:
                if key in wo_wd_param_names:
                    wo_wd_params.append(value)
                else:
                    network_params.append(value)
                    network_param_names.append(key)

    params = [
        {'params': network_params,
         'lr': args.base_lr,
         'weight_decay': args.weight_decay },
        {'params': wo_wd_params,
         'lr': args.base_lr,
         'weight_decay': 0.},
    ]
    param_names = [network_param_names, wo_wd_param_names]
    if args.rank == 0:
        print('>>> params w/o weight decay: ', wo_wd_param_names)
    optimizer = torch.optim.SGD(params, momentum=args.momentum)
    arch_optimizer=None

    # auto resume from a checkpoint
    remark = 'imagenet_'
    remark += 'epo_' + str(args.epochs) + '_layer_' + str(args.layers) + '_batch_' + str(args.batch_size) + '_lr_' + str(float("{0:.2f}".format(args.base_    lr))) + '_seed_' + str(args.seed)

    if args.remark != 'none':
        remark += '_'+args.remark

    args.save = 'search-{}-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"), remark)
    args.save_log = 'nas-{}-{}'.format(time.strftime("%Y%m%d-%H%M%S"), remark)
    generate_date = str(datetime.now().date())

    path = os.path.join(generate_date, args.save)
    if args.rank == 0:
        log_format = '%(asctime)s %(message)s'
        utils.create_exp_dir(generate_date, path, scripts_to_save=glob.glob('*.py'))
        logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                            format=log_format, datefmt='%m/%d %I:%M:%S %p')
        fh = logging.FileHandler(os.path.join(path, 'log.txt'))
        fh.setFormatter(logging.Formatter(log_format))
        logging.getLogger().addHandler(fh)
        logging.info("args = %s", args)
        writer = SummaryWriter('./runs/' + generate_date + '/' + args.save_log)
    else:
        writer = None

    #model_dir = args.model_dir
    model_dir = path
    start_epoch = 0
    
    if args.evaluate:
        load_state_ckpt(args.checkpoint_path, model)
    else:
        best_prec1, start_epoch = load_state(model_dir, model, optimizer=optimizer)

    cudnn.benchmark = True

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = ImagenetDataset(
        args.train_root,
        args.train_source,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))
    train_dataset_wo_ms = ImagenetDataset(
        args.train_root,
        args.train_source,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))
    val_dataset = ImagenetDataset(
        args.val_root,
        args.val_source,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ]))

    train_sampler = DistributedSampler(train_dataset)
    val_sampler = DistributedSampler(val_dataset)

    train_loader = DataLoader(
        train_dataset, batch_size=args.batch_size//args.world_size, shuffle=False,
        num_workers=args.workers, pin_memory=False, sampler=train_sampler)

    train_loader_wo_ms = DataLoader(
        train_dataset_wo_ms, batch_size=args.batch_size//args.world_size, shuffle=False,
        num_workers=args.workers, pin_memory=False, sampler=train_sampler)

    val_loader = DataLoader(
        val_dataset, batch_size=50, shuffle=False,
        num_workers=args.workers, pin_memory=False, sampler=val_sampler)

    if args.evaluate:
        validate(val_loader, model, criterion, 0, writer, logging)
        return

    niters = len(train_loader)

    lr_scheduler = LRScheduler(optimizer, niters, args)

    for epoch in range(start_epoch, args.epochs):
        train_sampler.set_epoch(epoch)
        
        if args.rank == 0 and args.SinglePath:
            logging.info('epoch %d', epoch)
        
        # evaluate on validation set after loading the model
        if epoch == 0 and not args.reset_bn_stat:
            prec1 = validate(val_loader, model, criterion, epoch, writer, logging)
       
         # train for one epoch
        if epoch >= args.epochs - 5 and args.lr_mode == 'step' and args.off_ms and args.retrain:
            train(train_loader_wo_ms, model, criterion, optimizer, arch_optimizer, lr_scheduler, epoch, writer, logging)
        else:
            train(train_loader, model, criterion, optimizer, arch_optimizer, lr_scheduler, epoch, writer, logging)


        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch, writer, logging)

        if rank == 0:
            # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            save_checkpoint(model_dir, {
                'epoch': epoch + 1,
                'model': args.model,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
Exemplo n.º 22
0
 def save_table(self, path):
     create_exp_dir(os.path.dirname(path))
     np.savez(path, sap_time=self.sap_time)
Exemplo n.º 23
0
parser.add_argument('--grad_clip', type=float, default=5., help='gradient clipping')
parser.add_argument('--label_smooth', type=float, default=0.1, help='label smoothing')
parser.add_argument('--gamma', type=float, default=0.97, help='learning rate decay')
parser.add_argument('--decay_period', type=int, default=1, help='epochs between two learning rate decays')
parser.add_argument('--parallel', action='store_true', default=False, help='data parallelism')
args = parser.parse_args()

args.data = os.path.expanduser(args.data)
os.makedirs(args.data, exist_ok=True)

pt_output_dir = os.environ.get('PT_OUTPUT_DIR', '')
if pt_output_dir:
    args.exp_path = pt_output_dir
geno_path = os.path.join(os.path.expanduser(args.exp_path), 'darts_pytorch_imagenet_orig_search', 'genotype.txt')
args.exp_path = os.path.join(os.path.expanduser(args.exp_path), 'darts_pytorch_imagenet_orig_eval')
args.exp_path = utils.create_exp_dir(args.exp_path, scripts_to_save=glob.glob('*.py'))

args.seed = int(args.seed)

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout, level=logging.INFO,
                    format=log_format, datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.exp_path, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CLASSES = 1000


class CrossEntropyLabelSmooth(nn.Module):
Exemplo n.º 24
0
parser.add_argument('--batch_size', type=int, default=128, help='batch_size')
parser.add_argument('--lr', type=int, default=0.1, help='learning rate')
parser.add_argument('--gpu', type=int, default=3, help='GPU device to use')

args = parser.parse_args()

# if torch.cuda.is_available():
torch.cuda.set_device(args.gpu)
# cudnn.benchmark = True
# cudnn.enabled=True
# torch.cuda.manual_seed_all(args.seed)

# device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

save_name = 'main-{}-{}'.format('EXP', time.strftime("%Y%m%d-%H%M%S"))
utils.create_exp_dir(save_name, scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join('results', save_name, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

logging.info('Args: {}'.format(args))

# logging.info(f"Using computation device: {device}")

Exemplo n.º 25
0
                    type=str,
                    default='EvNASA',
                    help='which architecture to use')
parser.add_argument('--grad_clip',
                    type=float,
                    default=5,
                    help='gradient clipping')
args = parser.parse_args()

args.save = 'eval-cifar100-{}-{}'.format(args.save,
                                         time.strftime("%Y%m%d-%H%M%S"))
#utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))
if args.epochs == 100:
    args.save = 'eval-{}epochs'.format(args.epochs)
if args.dir is not None:
    utils.create_exp_dir(os.path.join(args.dir, 'cifar100'))
    args.save = os.path.join(args.dir, 'cifar100', args.save)
utils.create_exp_dir(args.save)

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

logging.info('[INFO] torch version: {}, torchvision version: {}'.format(
    torch.__version__, torchvision.__version__))
Exemplo n.º 26
0
def model_compress(args):
    if os.path.isdir(args.save) == False:
        os.makedirs(args.save)
    save_dir = '{}compress-{}-{}'.format(args.save, args.note,
                                         time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(save_dir, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(save_dir, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    if args.cifar100:
        CIFAR_CLASSES = 100
        data_folder = 'cifar-100-python'
    else:
        CIFAR_CLASSES = 10
        data_folder = 'cifar-10-batches-py'

    if not torch.cuda.is_available():
        logging.info('No GPU device available')
        sys.exit(1)
    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info("args = %s", args)

    #  prepare dataset
    if args.cifar100:
        train_transform, valid_transform = utils._data_transforms_cifar100(
            args)
    else:
        train_transform, valid_transform = utils._data_transforms_cifar10(args)
    if args.cifar100:
        train_data = dset.CIFAR100(root=args.train_data_dir,
                                   train=True,
                                   download=True,
                                   transform=train_transform)
    else:
        train_data = dset.CIFAR10(root=args.train_data_dir,
                                  train=True,
                                  download=True,
                                  transform=train_transform)

    num_train = len(train_data)
    iter_per_one_epoch = num_train // (2 * args.batch_size)
    if iter_per_one_epoch >= 100:
        train_extend_rate = 1
    else:
        train_extend_rate = (100 // iter_per_one_epoch) + 1

    iter_per_one_epoch = iter_per_one_epoch * train_extend_rate
    logging.info('num original train data: %d', num_train)
    logging.info('iter per one epoch: %d', iter_per_one_epoch)

    indices = list(range(num_train))
    random.shuffle(indices)
    split = int(np.floor(args.train_portion * num_train))
    train_set = torch.utils.data.Subset(train_data, indices[:split])
    valid_set = torch.utils.data.Subset(train_data, indices[split:num_train])

    train_set = torch.utils.data.ConcatDataset([train_set] * train_extend_rate)
    # valid_set = torch.utils.data.ConcatDataset([valid_set]*train_extend_rate)

    train_queue = torch.utils.data.DataLoader(
        train_set,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.RandomSampler(train_set),
        pin_memory=True,
        num_workers=args.workers)

    valid_queue = torch.utils.data.DataLoader(
        valid_set,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.RandomSampler(valid_set),
        pin_memory=True,
        num_workers=args.workers)

    # build Network
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    eps_no_arch = args.eps_no_archs
    epochs = args.epochs

    if args.arch in genotypes.__dict__.keys():
        genotype = eval("genotypes.%s" % args.arch)
    else:
        genotype = eval(args.arch)

    model = Network(genotype,
                    args.init_channels,
                    CIFAR_CLASSES,
                    args.layers,
                    criterion,
                    steps=args.inter_nodes,
                    multiplier=args.inter_nodes,
                    stem_multiplier=args.stem_multiplier,
                    residual_connection=args.residual_connection)
    model = nn.DataParallel(model)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
    network_params = []
    for k, v in model.named_parameters():
        if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')):
            network_params.append(v)

    optimizer = torch.optim.SGD(network_params,
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    optimizer_a = torch.optim.Adam(model.module.arch_parameters(),
                                   lr=args.arch_learning_rate,
                                   betas=(0.5, 0.999),
                                   weight_decay=args.arch_weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, float(epochs), eta_min=args.learning_rate_min)

    scheduler_a = torch.optim.lr_scheduler.StepLR(optimizer_a, 30, gamma=0.2)

    train_epoch_record = -1
    arch_train_count = 0
    prev_geno = ''
    prev_rank = None
    rank_geno = None
    result_geno = None
    arch_stable = 0
    best_arch_stable = 0

    for epoch in range(epochs):

        lr = scheduler.get_lr()[0]
        logging.info('Epoch: %d lr: %e', epoch, lr)
        epoch_start = time.time()
        # training
        if epoch < eps_no_arch:
            train_acc, train_obj = train(train_queue,
                                         valid_queue,
                                         model,
                                         network_params,
                                         criterion,
                                         optimizer,
                                         optimizer_a,
                                         lr,
                                         train_arch=False)

        else:
            ops, probs = compressing_parse(model)
            concat = range(2, 2 + model.module._steps)
            genotype = Genotype(
                normal=ops[0],
                normal_concat=concat,
                reduce=ops[1],
                reduce_concat=concat,
            )

            if str(prev_geno) != str(genotype):
                prev_geno = genotype
                logging.info(genotype)

            # early stopping

            stable_cond = True
            rank = []
            for i in range(len(probs)):
                rank_tmp = ranking(probs[i])
                rank.append(rank_tmp)

            if prev_rank != rank:
                stable_cond = False
                arch_stable = 0
                prev_rank = rank
                rank_geno = genotype
                logging.info('rank: %s', rank)

            if stable_cond:
                arch_stable += 1

            if arch_stable > best_arch_stable:
                best_arch_stable = arch_stable
                result_geno = rank_geno
                logging.info('arch_stable: %d', arch_stable)
                logging.info('best genotype: %s', rank_geno)

            if arch_stable >= args.stable_arch - 1:
                logging.info('stable genotype: %s', rank_geno)
                result_geno = rank_geno
                break

            train_acc, train_obj = train(train_queue,
                                         valid_queue,
                                         model,
                                         network_params,
                                         criterion,
                                         optimizer,
                                         optimizer_a,
                                         lr,
                                         train_arch=True)
            arch_train_count += 1

            scheduler_a.step()

        scheduler.step()
        logging.info('Train_acc %f, Objs: %e', train_acc, train_obj)
        epoch_duration = time.time() - epoch_start
        logging.info('Epoch time: %ds', epoch_duration)

        # validation
        if epoch >= eps_no_arch:
            valid_acc, valid_obj = infer(valid_queue, model, criterion)
            logging.info('Valid_acc %f, Objs: %e', valid_acc, valid_obj)

        # # early arch training
        # if train_epoch_record == -1:
        #     if train_acc > 70:
        #         arch_train_num = args.epochs - args.eps_no_archs
        #         eps_no_arch = 0
        #         train_epoch_record = epoch
        # else:
        #     if epoch >= train_epoch_record + arch_train_num:
        #         break

        utils.save(model, os.path.join(save_dir, 'weights.pt'))

    # last geno parser
    ops, probs = compressing_parse(model)
    concat = range(2, 2 + model.module._steps)
    genotype = Genotype(
        normal=ops[0],
        normal_concat=concat,
        reduce=ops[1],
        reduce_concat=concat,
    )
    logging.info('Last geno: %s', genotype)

    if result_geno == None:
        result_geno = genotype

    return result_geno, best_arch_stable
Exemplo n.º 27
0
def main():
    args.save = 'search-{}-{}'.format(args.save,
                                      time.strftime("%Y%m%d-%H%M%S"))
    utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

    log_format = '%(asctime)s %(message)s'
    logging.basicConfig(stream=sys.stdout,
                        level=logging.INFO,
                        format=log_format,
                        datefmt='%m/%d %I:%M:%S %p')
    fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
    fh.setFormatter(logging.Formatter(log_format))
    logging.getLogger().addHandler(fh)

    CIFAR_CLASSES = 10

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device = %d' % args.gpu)
    logging.info("args = %s", args)

    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion)
    model = model.cuda()
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    train_transform, valid_transform = utils._data_transforms_cifar10(args)
    train_data = dset.CIFAR10(root=args.data,
                              train=True,
                              download=True,
                              transform=train_transform)

    num_train = len(train_data)
    indices = list(range(num_train))
    split = int(np.floor(args.train_portion * num_train))

    train_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]),
        pin_memory=True,
        num_workers=2)

    valid_queue = torch.utils.data.DataLoader(
        train_data,
        batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:num_train]),
        pin_memory=True,
        num_workers=2)

    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs, eta_min=args.learning_rate_min)

    architect = Architect(model, args)

    for epoch in range(args.epochs):
        lr = scheduler.get_last_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        genotype = model.genotype()
        logging.info('genotype = %s', genotype)

        print(F.softmax(model.alphas_normal, dim=-1))
        print(F.softmax(model.alphas_reduce, dim=-1))

        # training
        train_acc, train_obj = train(train_queue, valid_queue, model,
                                     architect, criterion, optimizer, lr)
        logging.info('train_acc %f', train_acc)

        # validation
        valid_acc, valid_obj = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc)

        # スケジューラの更新
        scheduler.step()

        utils.save(model, os.path.join(args.save, 'weights.pt'))
Exemplo n.º 28
0
#                   maxlen=args.maxlen,
#                   fields=args.fields,
#                   token_level=args.token_level,
#                   vocab_size=args.vocab_size,
#                   lowercase=args.lowercase,
#                   cut_by_cnt=False, create_dict=False,
#                   if_tokenize=False, if_vetorize=False)


# save arguments
ntokens = len(corpus.dictionary.word2idx)
print("Vocabulary Size: {}".format(ntokens))
args.ntokens = ntokens

# exp dir
create_exp_dir(os.path.join(args.save), ['train_ae.py', 'models.py', 'utils.py'],
               dict=corpus.dictionary.word2idx, options=args)


def logging(str, to_stdout=True):
    with open(os.path.join(args.save, 'log.txt'), 'a') as f:
        f.write(str + '\n')
    if to_stdout:
        print(str)


logging(str(vars(args)))

eval_batch_size = 32

train_batches_num = math.floor(corpus.train_num / args.batch_size)
test_batches_num = math.floor(corpus.test_num / eval_batch_size)
Exemplo n.º 29
0
def main():
    seed = args.seed
    np.random.seed(seed)
    cudnn.benchmark = True
    torch.manual_seed(seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(seed)
    timestamp = str(utils.get_unix_timestamp())
    utils.makedirs(args.save)
    path = os.path.join(args.save, timestamp)
    utils.create_exp_dir(path, scripts_to_save=glob.glob('../*.py'))
    logger = utils.get_logger(args.save, timestamp, file_type='txt')
    utils.makedirs(os.path.join(path, 'logs'))
    logger.info("time = %s, args = %s", str(utils.get_unix_timestamp()), args)

    input_shape = [
        11, 9, 3
    ]  # MANUALLY SET NUMBER OF CHANNELS (11) ACCORDING TO PRETRAINING

    os.system('cp -f ../pretrain-weights.pt {}'.format(
        os.path.join(path, 'weights.pt')))
    utils.makedirs(os.path.join(path, 'scripts'))
    os.system('cp -f ./for-copy/parse-ga.py {}'.format(
        os.path.join(path, 'scripts', 'parse-ga.py')))
    os.system('cp -f ./for-copy/parse-ga.py {}'.format(
        os.path.join(path, 'scripts', 'parse-log.py')))
    os.system('cp -f ./for-copy/parse_data.py {}'.format(
        os.path.join(path, 'scripts', 'parse_data.py')))
    os.system('cp -f ./for-copy/optimization-plots.sh {}'.format(
        os.path.join(path, 'scripts', '1_optimization-plots.sh')))

    # PyTorch
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)
    model = Network(input_shape, args.num_drones, criterion, path)
    model = model.to(device)
    utils.load(model, os.path.join(path, 'weights.pt'))
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # PyGMO
    prob = pg.problem(genetic_algo.Flocking(path, timestamp, model))
    pop = pg.population(prob, size=10, seed=24601)
    algo = pg.algorithm(
        pg.sga(gen=1,
               cr=.90,
               m=0.02,
               param_s=3,
               crossover="single",
               mutation="uniform",
               selection="truncated"))
    algo.set_verbosity(1)

    for i in range(29):
        logger.info(
            "time = %s gen = %d \n champ_f = %s \n champ_x = %s \n f_s = %s \n x_s = %s \n id_s = %s",
            str(utils.get_unix_timestamp()), i + 1,
            str(np.array(pop.champion_f).tolist()),
            str(np.array(pop.champion_x).tolist()),
            str(np.array(pop.get_f()).tolist()),
            str(np.array(pop.get_x()).tolist()),
            str(np.array(pop.get_ID()).tolist()))
        pop = algo.evolve(pop)
        model.online_update(path, genetic_algo.TS_LIST[-100:], input_shape,
                            criterion, optimizer, logger, i)
        utils.save(model, os.path.join(path, 'weights.pt'))
Exemplo n.º 30
0
                    default=0.2,
                    help='drop path probability')
parser.add_argument('--save', type=str, default='EXP', help='experiment name')
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--arch',
                    type=str,
                    default='DARTS',
                    help='which architecture to use')
parser.add_argument('--grad_clip',
                    type=float,
                    default=5,
                    help='gradient clipping')
args = parser.parse_args()

args.save = 'eval-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CIFAR_CLASSES = 10


def main():
    if not torch.cuda.is_available():
Exemplo n.º 31
0
parser.add_argument('--seed', type=int, default=0, help='random seed')
parser.add_argument('--arch',
                    type=str,
                    default='SNAS_edge_all',
                    help='which architecture to use')
parser.add_argument('--grad_clip',
                    type=float,
                    default=5,
                    help='gradient clipping')
args = parser.parse_args()
print(args.arch)
args.save = 'eval-{}-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"),
                                   args.arch)
generate_date = str(datetime.now().date())
utils.create_exp_dir(generate_date,
                     args.save,
                     scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)

CIFAR_CLASSES = 10

logger = tensorboardX.SummaryWriter('./runs/eval_{}'.format(args.arch))