示例#1
0
文件: exp.py 项目: jalynnliu/Nasexp
def random_test(nn=NetworkUnit()):
    """Fix a network structure, give a setting randomly and get its score"""
    spl = Sampler()
    eva = Evaluater()
    spl.renewp(CONFIG_RAND_VECTOR)
    scores = []

    for i in range(TEST_MAX_EPOCH):
        nn.set_cell(spl.sample(len(nn.graph_part)))
        score = eva.evaluate(nn)
        scores.append(score)
    return scores
class Experiment_struct:

    def __init__(self, nn, sample_size=5, budget=20, positive_num=2, r_p=0.99, uncertain_bit=3, add_num=20000):
        self.nn = nn
        self.spl = Sampler_struct(nn)
        self.opt = Optimizer(self.spl.get_dim(), self.spl.get_parametets_subscript())
        # sample_size = 5  # the instance number of sampling in an iteration
        # budget = 20  # budget in online style
        # positive_num = 2  # the set size of PosPop
        # r_p = 0.99  # the probability of sample in model
        # uncertain_bit = 3  # the dimension size that is sampled randomly
        # set hyper-parameter for optimization, budget is useless for single step optimization
        self.opt.set_parameters(ss=sample_size, bud=budget, pn=positive_num, rp=r_p, ub=uncertain_bit)
        # clear optimization model
        self.opt.clear()
        self.budget = budget
        pros = self.opt.sample()
        self.spl.renewp(pros)
        self.eva = Evaluater()
        self.eva.add_data(add_num)
        self.opt_p_log = []
        print(self.eva.max_steps)
        print(len(pros))

        for i in range(self.budget):
            self.opt_p_log.append(pros)
            spl_list = self.spl.sample()
            self.nn.cell_list.append(spl_list)
            # score = np.random.random()
            # time_tmp = time.time()
            # score = self.eva.evaluate(self.nn, i, time_tmp)
            score = self.eva.evaluate(self.nn)
            # Updating optimization based on the obtained scores
            # Upadting pros in spl
            self.opt.update_model(pros, -score)
            pros = self.opt.sample()
            self.spl.renewp(pros)

        self.res_fea = self.opt.get_optimal().get_features()
        self.res_fit = self.opt.get_optimal().get_fitness()
        print('best:')
        print('features', self.res_fea)  # pros
        print('fitness', self.res_fit)  # scores
    def test_word_count(self):
        file_name = 'data/01-train-input.txt'
        reader = Reader(file_name)
        reader.read_file()
        unigram = TrainUnigram(reader.word_list, reader.total_word_count)
        unigram.train_unigram()

        file_name = 'data/unigram_model'
        write_model = Writer(file_name, unigram.word_dict)
        write_model.write_file()
        model = Model(file_name)
        model.read_model()

        file_name = 'data/01-test-input.txt'
        test = Evaluater(file_name, model.word_dict)
        test.evaluate_model()

        print('entropy is ' + str(test.H / test.total_word_count))
        print('coverage is ' + str(1.0 * (test.total_word_count - test.unknown_word_count) / test.total_word_count))
def main(config):
    logger = config.get_logger('train')

    # setup data_loader instances
    data_loader = config.init_obj('data_loader', module_data)
    valid_data_loader = data_loader.valid_data_loader
    test_data_loader = data_loader.test_data_loader

    # build model architecture, then print to console
    global model
    for file, types in files_models.items():
        for type in types:
            if config["arch"]["type"] == type:
                model = config.init_obj('arch', eval("module_arch_" + file))
                logger.info(model)

    # get function handles of loss and metrics
    if config['loss']['type'] == 'FocalLoss2d':
        count = data_loader.count
        indices = data_loader.indices
        w =  np.max(count[indices]) / count
        w[indices] = 0

        only_scored_classes = config['trainer'].get('only_scored_class', False)
        if only_scored_classes:
            w = w[indices]

        weight = config['loss'].get('args', w)
        criterion = getattr(module_loss, 'FocalLoss2d')(weight=weight)
    else:
        criterion = getattr(module_loss, config['loss']['type'])

    # get function handles of metrics

    challenge_metrics = ChallengeMetric(config['data_loader']['args']['label_dir'])
    # challenge_metrics = ChallengeMetric2(num_classes=9)

    metrics = [getattr(challenge_metrics, met) for met in config['metrics']]

    # prepare model for testing
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    # Evaluater
    checkpoint_dir = config["arch"]["checkpoint_dir"]
    result_dir = config["arch"]["result_dir"]
    evaluater = Evaluater(model, criterion, metrics,
                          config=config,
                          test_data_loader=test_data_loader,
                          checkpoint_dir=checkpoint_dir,
                          result_dir=result_dir
                           )

    evaluater.evaluate()

    challenge_metrics.return_metric_list()

    evaluater.analyze(challenge_metrics)
def main(config):
    logger = config.get_logger('train')

    # setup data_loader instances
    data_loader = config.init_obj('data_loader', module_data)
    valid_data_loader = data_loader.valid_data_loader
    test_data_loader = data_loader.test_data_loader

    # build model architecture, then print to console
    model = config.init_obj('arch', module_arch)
    logger.info(model)

    # get function handles of loss and metrics
    criterion = getattr(module_loss, config['loss'])
    metrics = [getattr(module_metric, met) for met in config['metrics']]

    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = config.init_obj('optimizer', torch.optim, trainable_params)

    lr_scheduler = config.init_obj('lr_scheduler', torch.optim.lr_scheduler, optimizer)

    if config.config['trainer']['type'] == 'MC_dropout':
        trainer = TrainerMc(model, criterion, metrics, optimizer,
                          config=config,
                          data_loader=data_loader,
                          valid_data_loader=valid_data_loader,
                          lr_scheduler=lr_scheduler)
    elif config.config['trainer']['type'] == 'Quality_driven_PI':
        trainer = TrainerQd(model, criterion, metrics, optimizer,
                          config=config,
                          data_loader=data_loader,
                          valid_data_loader=valid_data_loader,
                          lr_scheduler=lr_scheduler)
    elif config.config['trainer']['type'] == 'Deep_Ensemble':
        trainer = TrainerDe(model, criterion, metrics, optimizer,
                            config=config,
                            data_loader=data_loader,
                            valid_data_loader=valid_data_loader,
                            lr_scheduler=lr_scheduler)
    else:
        trainer = Trainer(model, criterion, metrics, optimizer,
                            config=config,
                            data_loader=data_loader,
                            valid_data_loader=valid_data_loader,
                            lr_scheduler=lr_scheduler)
    trainer.train()

    if config.config['trainer']['type'] == 'MC_dropout':
        evaluater = EvaluaterMC(model, criterion, metrics,
                                config=config,
                                test_data_loader=test_data_loader)
    elif config.config['trainer']['type'] == 'Quality_driven_PI':
        evaluater = EvaluaterQd(model, criterion, metrics,
                              config=config,
                              test_data_loader=test_data_loader)
    elif config.config['trainer']['type'] == 'Deep_Ensemble':
        evaluater = EvaluaterDE(model, criterion, metrics,
                              config=config,
                              test_data_loader=test_data_loader)
    else:
        evaluater = Evaluater(model, criterion, metrics,
                                config=config,
                                test_data_loader=test_data_loader)
    evaluater.evaluate()
def main(argv):
    conf_file = './imagenet_train.yaml' if len(sys.argv) == 1 else sys.argv[1]
    
   
    # 1. config load, as hypter params should define in yaml config
    config = Configure(conf_file)
    config.reconfigure()
    param = config.param

    # 2. selet a model, dataset, lr, opt, and so on,  as these can be enumeration.
    create_model_func = official_model.ImageNetModel(param.resnet_layer, param.class_num) 
    
    if param.validation_enable:
        evaluater = Evaluater(param, eval_dataset, 
                          modelfun = lambda image : create_model_func(image, False),
                          accuracyfun = lambda labels, predicts: accuracy(labels, predicts, 1))
    else:
        evaluater = None

    pipe  = multipipeline.Pipeline(param)
    
    with tf.device('/device:CPU:0'), tf.name_scope('cpu_0') as op_scope:
        global_step = tf.train.get_or_create_global_step()
        lr = PiecewiseLR(param)
    
    opt = tf.train.MomentumOptimizer(lr, param.momentum)    
    
    #3 set up graph these should not modify
    #3.1 set_up dataset
    train_set = train_dataset(param)
    device_dataset = pipe.setup_dataset(train_set)

    #3.2 set_up model and loss    
    device_labels, device_predicts = pipe.setup_model(device_dataset, create_model_func)
    
    def compute_loss_func(labels, logits):
        loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
        return tf.reduce_mean(loss)
    device_losses, train_loss, l2_loss = pipe.setup_loss(device_labels, device_predicts, compute_loss_func, param.weight_decay)
    


    top1 = pipe.setup_reduce(device_labels, device_predicts, lambda x,y:accuracy(x,y, 1), use_mean=True)
    top5 = pipe.setup_reduce(device_labels, device_predicts, lambda x,y:accuracy(x,y, 5), use_mean=True)
    
    tf.summary.scalar('top1', top1)
    tf.summary.scalar('top5', top5)

    
    #3.3 set_up gradient compute and update
    train_op = pipe.setup_train(device_losses, opt)
    
    hooks = pipe.get_hook() + [
        tf.train.StopAtStepHook(last_step = param.all_step),
        train_hook.SaverHook(param, save_every_n_steps=10000, evaluater=evaluater),
        train_hook.TrainStateHook(param, lr, train_loss, l2_loss, 
                                    {'batch_top1': top1, 'batch_top5': top5},
                                   every_sec = 15),
        train_hook.ProfilerHook(save_steps=200, output_dir=param.checkpoint)
        #train_hook.SummaryHook(path=param.checkpoint)
    ]
    logging.info("set up hook done") 
 

    
    # refer from tensorflow/core/protobuf/config.proto
    config = tf.ConfigProto()

    config.allow_soft_placement=True
    config.log_device_placement=False
    
    # cpu thread auto set    
    #config.intra_op_parallelism_threads=0
    #config.inter_op_parallelism_threads=0
    #config.session_inter_op_thread_pool.num_threads=0
    #config.session_inter_op_thread_pool.global_name='train'

    #session 
    config.use_per_session_threads = True
    config.isolate_session_state = True
    
    # gpu
    config.gpu_options.allow_growth = True
    config.gpu_options.force_gpu_compatible=False
    # this disable nccl ?
    #config.gpu_options.experimental.use_unified_memory=True
    #config.gpu_options.per_process_gpu_memory_fraction = 1.0

    # graph
    #config.graph_options.enable_recv_scheduling = True # no used
    #config.graph_options.build_cost_model=1
    #config.graph_options.build_cost_model_after=1
    #config.graph_options.infer_shapes=True
    #config.graph_options.enable_bfloat16_sendrecv=False
    config.graph_options.optimizer_options.do_common_subexpression_elimination = True
    config.graph_options.optimizer_options.max_folded_constant_in_bytes = 0 # default 10M
    #config.graph_options.optimizer_options.do_function_inlining = True # default 10M

    #config.graph_options.optimizer_options.opt_level = config_pb2.OptimizerOptions.L1
    #config.graph_options.optimizer_options.global_jit_level = config_pb2.OptimizerOptions.ON_1
   
    # default on
    config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.ON
    #config.graph_options.rewrite_options.optimizers='autoparallel'
    
    # start train loop 
    scaffold = InitScaffold(param)
    with tf.train.MonitoredTrainingSession(hooks=hooks,
                                            scaffold = scaffold,
                                            config=config) as mon_sess:
        
        #pipe.vgr.debug_cross_device_op()
                                      
        while not mon_sess.should_stop():
            mon_sess.run([train_op])