Exemplo n.º 1
0
def run_no_expert():
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('Running: no experts, pure Racos')
    log_buffer.append('+++++++++++++++++++++++++++++++')
    print('+++++++++++++++++++++++++++++++')
    print('Running: no experts, pure Racos')
    print('+++++++++++++++++++++++++++++++')

    # optimization
    racos = RacosOptimization(dimension)
    opt_error_list = []

    for i in range(opt_repeat):
        start_t = time.time()
        racos.mix_opt(prob_fct,
                      ss=sample_size,
                      bud=budget,
                      pn=positive_num,
                      rp=rand_probability,
                      ub=uncertain_bit)
        end_t = time.time()

        optimal = racos.get_optimal()
        opt_error = optimal.get_fitness()

        hour, minute, second = time_formulate(start_t, end_t)

        print('spending time: ', hour, ' hours ', minute, ' minutes ', second,
              ' seconds')
        print('optimal value: ', opt_error)
        opt_error_list.append(opt_error)
        print('validation optimal value: ', opt_error)
        log_buffer.append('validation optimal value: ' + str(opt_error))

    opt_mean = np.mean(np.array(opt_error_list))
    opt_std = np.std(np.array(opt_error_list))
    print('--------------------------------------------------')
    print('optimization result for ' + str(opt_repeat) + ' times average: ',
          opt_mean, ', standard variance is: ', opt_std)
    log_buffer.append('--------------------------------------------------')
    log_buffer.append('optimization result for ' + str(opt_repeat) +
                      ' times average: ' + str(opt_mean) +
                      ', standard variance is: ' + str(opt_std))

    return opt_mean, opt_std
def run_exp_racos_for_synthetic_problem_analysis():

    # parameters
    sample_size = 10  # the instance number of sampling in an iteration
    budget = 500  # budget in online style
    positive_num = 2  # the set size of PosPop
    rand_probability = 0.99  # the probability of sample in model
    uncertain_bit = 1  # the dimension size that is sampled randomly
    adv_threshold = 10  # advance sample size

    opt_repeat = 10

    dimension_size = 10
    problem_name = 'sphere'
    problem_num = 200
    start_index = 0
    bias_region = 0.2

    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    log_buffer = []

    # logging
    learner_path = './ExpLearner/SyntheticProbsLearner/' + problem_name + '/dimension' + str(dimension_size)\
                   + '/DirectionalModel/' + 'learner-' + problem_name + '-' + 'dim' + str(dimension_size) + '-'\
                   + 'bias' + str(bias_region) + '-'
    problem_path = './ExpLog/SyntheticProbsLog/' + problem_name + '/dimension' + str(dimension_size)\
                   + '/DirectionalModel/' + 'bias-' + problem_name + '-' + 'dim' + str(dimension_size) + '-'\
                   + 'bias' + str(bias_region) + '-'

    func = DistributedFunction(dimension, bias_region=[-0.5, 0.5])
    target_bias = [0.1 for _ in range(dimension_size)]
    func.setBias(target_bias)

    if problem_name == 'ackley':
        prob_fct = func.DisAckley
    else:
        prob_fct = func.DisSphere

    relate_error_list = []

    for prob_i in range(problem_num):

        print(
            start_index + prob_i,
            '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
        log_buffer.append(
            str(start_index + prob_i) +
            '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')

        log_buffer.append('+++++++++++++++++++++++++++++++')
        log_buffer.append('optimization parameters')
        log_buffer.append('sample size: ' + str(sample_size))
        log_buffer.append('budget: ' + str(budget))
        log_buffer.append('positive num: ' + str(positive_num))
        log_buffer.append('random probability: ' + str(rand_probability))
        log_buffer.append('uncertain bits: ' + str(uncertain_bit))
        log_buffer.append('advance num: ' + str(adv_threshold))
        log_buffer.append('+++++++++++++++++++++++++++++++')
        log_buffer.append('problem parameters')
        log_buffer.append('dimension size: ' + str(dimension_size))
        log_buffer.append('problem name: ' + problem_name)
        log_buffer.append('bias_region: ' + str(bias_region))
        log_buffer.append('+++++++++++++++++++++++++++++++')

        problem_file = problem_path + str(start_index + prob_i) + '.txt'
        problem_str = fo.FileReader(problem_file)[0].split(',')
        problem_index = int(problem_str[0])
        problem_bias = string2list(problem_str[1])
        if problem_index != (start_index + prob_i):
            print('problem error!')
            exit(0)
        print('source bias: ', problem_bias)
        log_buffer.append('source bias: ' + list2string(problem_bias))

        reduisal = np.array(target_bias) - np.array(problem_bias)
        this_distance = reduisal * reduisal.T

        learner_file = learner_path + str(start_index + prob_i) + '.pkl'
        log_buffer.append('learner file: ' + learner_file)
        print('learner file: ', learner_file)

        net = torch.load(learner_file)

        net_list = [net]

        opt_error_list = []

        for i in range(opt_repeat):

            print('optimize ', i,
                  '===================================================')
            log_buffer.append(
                'optimize ' + str(i) +
                '===================================================')

            exp_racos = ExpRacosOptimization(dimension, net_list)

            start_t = time.time()
            exp_racos.exp_mix_opt(obj_fct=prob_fct,
                                  ss=sample_size,
                                  bud=budget,
                                  pn=positive_num,
                                  rp=rand_probability,
                                  ub=uncertain_bit,
                                  at=adv_threshold)
            end_t = time.time()

            print('total budget is ', budget)
            log_buffer.append('total budget is ' + str(budget))

            hour, minute, second = time_formulate(start_t, end_t)
            print('spending time: ', hour, ':', minute, ':', second)
            log_buffer.append('spending time: ' + str(hour) + '+' +
                              str(minute) + '+' + str(second))

            optimal = exp_racos.get_optimal()
            opt_error = optimal.get_fitness()
            optimal_x = optimal.get_features()

            opt_error_list.append(opt_error)
            print('validation optimal value: ', opt_error)
            log_buffer.append('validation optimal value: ' + str(opt_error))
            print('optimal x: ', optimal_x)
            log_buffer.append('optimal nn structure: ' +
                              list2string(optimal_x))

        opt_mean = np.mean(np.array(opt_error_list))
        relate_error_list.append([this_distance, opt_mean])
        opt_std = np.std(np.array(opt_error_list))
        print('--------------------------------------------------')
        print('optimization result: ', opt_mean, '#', opt_std)
        log_buffer.append('--------------------------------------------------')
        log_buffer.append('optimization result: ' + str(opt_mean) + '#' +
                          str(opt_std))

    result_path = './Results/SyntheticProbs/' + problem_name + '/dimension' + str(
        dimension_size) + '/'
    relate_error_file = result_path + 'relate-error-' + problem_name + '-dim' + str(dimension_size) + '-bias'\
                            + str(bias_region) + '.txt'
    temp_buffer = []
    for i in range(len(relate_error_list)):
        relate, error = relate_error_list[i]
        temp_buffer.append(str(relate) + ',' + str(error))
    print('relate error logging: ', relate_error_file)
    log_buffer.append('relate error logging: ' + relate_error_file)
    fo.FileWriter(relate_error_file, temp_buffer, style='w')

    optimization_log_file = result_path + 'opt-log-' + problem_name + '-dim' + str(dimension_size) + '-bias'\
                            + str(bias_region) + '.txt'
    print('optimization logging: ', optimization_log_file)
    fo.FileWriter(optimization_log_file, log_buffer, style='w')
Exemplo n.º 3
0
def run_for_real_problem(problem_name, type):
    dtrain, dtest, dvalid = mlbp.get_train_test_data(problem_name)
    opt_error_list = []
    gen_error_list = []
    print(type, ' optimize ', problem_name,
          '===================================================')
    log_buffer.append(type + ' optimize ' + problem_name +
                      '===================================================')

    for j in range(opt_repeat):
        print(j)
        log_buffer.append(str(j))
        model = lgb.LGBMClassifier()
        start_t = time.time()

        def score_fun(x):
            ## here is the score function
            hyper_param = (sample_codec.sample_decode(x))
            model.set_params(**hyper_param)
            bst = model.fit(dtrain[:, :-1], dtrain[:, -1])
            pred = bst.predict(dvalid[:, :-1])
            fitness = -f1_score(dvalid[:, -1], pred, average='macro')
            return fitness

        if type == 'racos':
            optimizer = RacosOptimization(dimension)
            optimizer.clear()
            optimizer.mix_opt(obj_fct=score_fun,
                              ss=sample_size,
                              bud=budget,
                              pn=positive_num,
                              rp=rand_probability,
                              ub=uncertain_bit)
        elif type == 'ave':
            optimizer = ExpRacosOptimization(dimension, nets)
            log = optimizer.exp_mix_opt(obj_fct=score_fun,
                                        ss=sample_size,
                                        bud=budget,
                                        pn=positive_num,
                                        rp=rand_probability,
                                        ub=uncertain_bit,
                                        at=adv_threshold)
            for line in log:
                log_buffer.append(line)
        elif type == 'ada':
            optimizer = ExpAdaRacosOptimization(dimension, expert)
            optimizer.clear()
            log = optimizer.exp_ada_mix_opt(obj_fct=score_fun,
                                            ss=sample_size,
                                            bud=budget,
                                            pn=positive_num,
                                            rp=rand_probability,
                                            ub=uncertain_bit,
                                            at=adv_threshold,
                                            step=step)
            for line in log:
                log_buffer.append(line)
        else:
            print('Wrong type!')
            return

        end_t = time.time()

        print('total budget is ', budget)
        log_buffer.append('total budget is ' + str(budget))

        hour, minute, second = time_formulate(start_t, end_t)
        print('spending time: ', hour, ':', minute, ':', second)
        log_buffer.append('spending time: ' + str(hour) + '+' + str(minute) +
                          '+' + str(second))

        optimal = optimizer.get_optimal()
        opt_error = optimal.get_fitness()
        optimal_x = optimal.get_features()
        hyper_param = (sample_codec.sample_decode(optimal_x))
        model = lgb.LGBMClassifier()
        model.set_params(**hyper_param)
        train = np.concatenate((dtrain, dvalid), axis=0)
        bst = model.fit(train[:, :-1], train[:, -1])
        pred = bst.predict(dtest[:, :-1])
        gen_error = -f1_score(dtest[:, -1], pred, average='macro')

        gen_error_list.append(gen_error)
        opt_error_list.append(opt_error)
        print('***********validation optimal value: ', opt_error)
        log_buffer.append('***********validation optimal value: ' +
                          str(opt_error))
        print('***********generalize optimal value: ', gen_error)
        log_buffer.append('***********generalize optimal value: ' +
                          str(gen_error))
        print('optimal x: ', optimal_x)
        # log_buffer.append('optimal nn structure: ' + list2string(optimal_x))

    opt_mean = np.mean(np.array(opt_error_list))
    opt_std = np.std(np.array(opt_error_list))
    gen_mean = np.mean(np.array(gen_error_list))
    gen_std = np.std(np.array(gen_error_list))

    return -opt_mean, opt_std, -gen_mean, gen_std
def synthetic_problems_sample(budget=500,
                              problem_name='sphere',
                              problem_size=5,
                              max_bias=0.5,
                              bias_step=0):
    sample_size = 10  # the instance number of sampling in an iteration
    positive_num = 2  # the set size of PosPop
    rand_probability = 0.99  # the probability of sample in model
    uncertain_bits = 2  # the dimension size that is sampled randomly

    start_index = 0

    repeat_num = 10

    exp_path = path + '/ExpLog/SyntheticProbsLog/'

    bias = 0

    dimension_size = 10

    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    if bias_step > 0:
        problem_name += '_group-sample'

    for prob_i in range(problem_size):

        if bias_step > 0 and prob_i % (problem_size / max_bias *
                                       bias_step) == 0:
            bias += bias_step
        else:
            bias = max_bias

        # bias log format: 'index,bias_list: dim1 dim2 dim3...'
        bias_log = []
        running_log = []
        running_log.append('+++++++++++++++++++++++++++++++++')
        running_log.append('optimization setting: ')
        running_log.append('sample_size: ' + str(sample_size))
        running_log.append('positive_num: ' + str(positive_num))
        running_log.append('rand_probability: ' + str(rand_probability))
        running_log.append('uncertain_bits: ' + str(uncertain_bits))
        running_log.append('budget: ' + str(budget))
        running_log.append('group sample step: ' + str(bias_step))
        running_log.append('+++++++++++++++++++++++++++++++++')

        print(problem_name, ': ', start_index + prob_i,
              ' ==============================================')
        running_log.append(problem_name + ': ' + str(start_index + prob_i) +
                           ' ==============================================')

        # problem setting
        func = DistributedFunction(dim=dimension, bias_region=[-bias, bias])
        if 'ackley' in problem_name:
            prob = func.DisAckley
        elif 'sphere' in problem_name:
            prob = func.DisSphere
        elif 'rosenbrock' in problem_name:
            prob = func.DisRosenbrock
        else:
            print('Wrong function!')
            return

            # bias log
        bias_log.append(str(prob_i) + ',' + list2string(func.getBias()))
        print('function: ', problem_name, ', this bias: ', func.getBias())
        running_log.append('function: ' + problem_name + ', this bias: ' +
                           list2string(func.getBias()))

        # optimization setting
        optimizer = RacosOptimization(dimension)

        positive_set = []
        negative_set = []
        new_sample_set = []
        label_set = []

        for repeat_i in range(repeat_num):
            print('repeat ', repeat_i,
                  ' ----------------------------------------')
            running_log.append('repeat ' + str(repeat_i) +
                               ' ----------------------------------------')

            # optimization process
            start_t = time.time()
            optimizer.mix_opt(obj_fct=prob,
                              ss=sample_size,
                              bud=budget,
                              pn=positive_num,
                              rp=rand_probability,
                              ub=uncertain_bits)
            end_t = time.time()
            hour, minute, second = time_formulate(start_t, end_t)

            # optimization results
            optimal = optimizer.get_optimal()
            print('optimal v: ', optimal.get_fitness(), ' - ',
                  optimal.get_features())
            running_log.append('optimal v: ' + str(optimal.get_fitness()) +
                               ' - ' + list2string(optimal.get_features()))
            print('spent time: ', hour, ':', minute, ':', second)
            running_log.append('spent time: ' + str(hour) + ':' + str(minute) +
                               ':' + str(second))

            # log samples
            this_positive, this_negative, this_new, this_label = optimizer.get_log(
            )

            print('sample number: ', len(this_positive), ':', len(this_label))
            running_log.append('sample number: ' + str(len(this_positive)) +
                               ':' + str(len(this_label)))

            positive_set.extend(this_positive)
            negative_set.extend(this_negative)
            new_sample_set.extend(this_new)
            label_set.extend(this_label)
        print('----------------------------------------------')
        print('sample finish!')
        print('all sample number: ', len(positive_set), '-', len(negative_set), '-', len(new_sample_set), \
              '-', len(label_set))
        running_log.append('----------------------------------------------')
        running_log.append('all sample number: ' + str(len(positive_set)) +
                           '-' + str(len(negative_set)) + '-' +
                           str(len(new_sample_set)) + '-' +
                           str(len(label_set)))

        data_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/DataLog/' + \
                        'data-' + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' \
                        + str(bias) + '-' + str(start_index + prob_i) + '.pkl'
        bias_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/RecordLog/' + 'bias-' \
                        + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' + str(bias) \
                        + '-' + str(start_index + prob_i) + '.txt'
        running_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/RecordLog/' + \
                           'running-' + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' \
                           + str(bias) + '-' + str(start_index + prob_i) + '.txt'

        print('data logging: ', data_log_file)
        running_log.append('data log path: ' + data_log_file)
        save_log(positive_set, negative_set, new_sample_set, label_set,
                 data_log_file)

        print('bias logging: ', bias_log_file)
        running_log.append('bias log path: ' + bias_log_file)
        fo.FileWriter(bias_log_file, bias_log, style='w')

        print('running logging: ', running_log_file)
        fo.FileWriter(running_log_file, running_log, style='w')

    return
def run(type):
    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    # problem define
    func = DistributedFunction(dimension,
                               bias_region=[-bias_region, bias_region])
    target_bias = [0.1 for _ in range(dimension_size)]
    func.setBias(target_bias)

    if problem_name == 'ackley':
        prob_fct = func.DisAckley
    elif problem_name == 'sphere':
        prob_fct = func.DisSphere
    elif problem_name == 'rosenbrock':
        prob_fct = func.DisRosenbrock
    else:
        print('Wrong function!')
        exit()
    opt_error_list = []
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('Running: ' + type)
    log_buffer.append('+++++++++++++++++++++++++++++++')
    print('+++++++++++++++++++++++++++++++')
    print('Running: ' + type)
    print('+++++++++++++++++++++++++++++++')
    if type == 'ada':
        # pre=sorted(predictors,key=lambda a:a.dist)
        expert = Experts(predictors=predictors, eta=eta, bg=budget)

    for i in range(opt_repeat):
        print('optimize ', i,
              '===================================================')
        log_buffer.append(
            'optimize ' + str(i) +
            '===================================================')
        start_t = time.time()
        if type == 'exp':
            exp_racos = ExpRacosOptimization(dimension, nets)
            opt_error = exp_racos.exp_mix_opt(obj_fct=prob_fct,
                                              ss=sample_size,
                                              bud=budget,
                                              pn=positive_num,
                                              rp=rand_probability,
                                              ub=uncertain_bit,
                                              at=adv_threshold)
        elif type == 'ada':
            exp_racos = ExpAdaRacosOptimization(dimension, expert)
            opt_error = exp_racos.exp_ada_mix_opt(obj_fct=prob_fct,
                                                  ss=sample_size,
                                                  bud=budget,
                                                  pn=positive_num,
                                                  rp=rand_probability,
                                                  ub=uncertain_bit,
                                                  at=adv_threshold)
        else:
            print('Wrong type!')
            return

        end_t = time.time()

        hour, minute, second = time_formulate(start_t, end_t)
        print('spending time: ', hour, ':', minute, ':', second)
        log_buffer.append('spending time: ' + str(hour) + '+' + str(minute) +
                          '+' + str(second))

        opt_error_list.append(opt_error)
        print('validation optimal value: ', opt_error)
        log_buffer.append('validation optimal value: ' + str(opt_error))

    opt_mean = np.mean(np.array(opt_error_list), axis=0)
    opt_std = np.std(np.array(opt_error_list), axis=0)
    print('--------------------------------------------------')
    print('optimization result for ' + str(opt_repeat) + ' times average: ',
          opt_mean, ', standard variance is: ', opt_std)
    log_buffer.append('--------------------------------------------------')
    log_buffer.append('optimization result for ' + str(opt_repeat) +
                      ' times average: ' + str(opt_mean) +
                      ', standard variance is: ' + str(opt_std))

    return opt_mean
def run_racos():
    # parameters
    sample_size = 10  # the instance number of sampling in an iteration
    budget = 500  # budget in online style
    positive_num = 2  # the set size of PosPop
    rand_probability = 0.99  # the probability of sample in model
    uncertain_bit = 1  # the dimension size that is sampled randomly
    bias_region = 0.5

    repeat = 10

    # dimension setting
    dimension_size = 10

    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    func = DistributedFunction(dim=dimension,
                               bias_region=[-bias_region, bias_region])
    if problem_name == 'rosenbrock':
        prob = func.DisRosenbrock
    else:
        prob = func.DisSphere

    # optimization
    racos = RacosOptimization(dimension)
    opt_error_list = []

    for i in range(repeat):
        start_t = time.time()
        racos.mix_opt(prob,
                      ss=sample_size,
                      bud=budget,
                      pn=positive_num,
                      rp=rand_probability,
                      ub=uncertain_bit)
        end_t = time.time()

        optimal = racos.get_optimal()

        hour, minute, second = time_formulate(start_t, end_t)

        print('total budget is ', budget, '------------------------------')
        print('spending time: ', hour, ' hours ', minute, ' minutes ', second,
              ' seconds')
        print('optimal value: ', optimal.get_fitness())
        opt_error = optimal.get_fitness()
        optimal_x = optimal.get_features()

        opt_error_list.append(opt_error)
        print('validation optimal value: ', opt_error)
        log_buffer.append('validation optimal value: ' + str(opt_error))
        print('optimal x: ', optimal_x)
        log_buffer.append('optimal nn structure: ' + list2string(optimal_x))
    opt_mean = np.mean(np.array(opt_error_list))
    opt_std = np.std(np.array(opt_error_list))
    print('--------------------------------------------------')
    print('optimization result: ', opt_mean, '#', opt_std)
    log_buffer.append('--------------------------------------------------')
    log_buffer.append('optimization result: ' + str(opt_mean) + '#' +
                      str(opt_std))

    return opt_mean
Exemplo n.º 7
0
def learning_exp_ensemble():
    random.seed(1)

    # training parameters
    epoch_size = 50
    batch_size = 32
    vali_rate = 0.1
    learn_rate = 0.0005
    categorical_size = 1
    validation_switch = True

    # exp data parameters
    dim_size = 10
    problem_name = 'sphere'
    start_index = 2000
    bias_region = 0.5
    problem_num = 2000

    learner_path = path + '/ExpLearner/SyntheticProbsLearner/'
    data_path = path + '/ExpLog/SyntheticProbsLog/'

    log_buffer = []
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('training parameter')
    log_buffer.append('epoch size: ' + str(epoch_size))
    log_buffer.append('batch size: ' + str(batch_size))
    log_buffer.append('validation rate: ' + str(vali_rate))
    log_buffer.append('learning rate: ' + str(learn_rate))
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('experience data parameter')
    log_buffer.append('dimension size: ' + str(dim_size))
    log_buffer.append('problem name: ' + problem_name)
    log_buffer.append('problem index: ' + str(start_index + 1))
    log_buffer.append('+++++++++++++++++++++++++++++++')

    log_name = learner_path + problem_name + '/dimension' + str(dim_size) + '/TrainingLog/' + 'learning-log-' \
               + problem_name + '-' + 'dim' + str(dim_size) + '-' + 'bias' + str(bias_region) \
               + '-' + str(start_index + 1) + '.txt'
    train_data = []
    train_label = []

    for prob_i in range(problem_num):
        data_file = data_path + problem_name + '/dimension' + str(dim_size) + '/LearningData/' + 'learning-data-' \
                    + problem_name + '-' + 'dim' + str(dim_size) + '-' + 'bias' + str(bias_region) + '-' \
                    + str(prob_i) + '.pkl'

        print('data loading: ', data_file)
        data_inf, bias, ori_data, blc_data = learning_data_load(
            file_path=data_file)

        train_data_, train_label_ = blc_data
        # test_data_, test_label_ = ori_data

        train_data.append(train_data_)
        train_label.append(train_label_)

    print('data transfer...')
    train_data = learning_data_transfer(instance_set=train_data)

    print('train data formulation: ', len(train_data), '*', len(train_data[0]),
          '*', len(train_data[0][0]), '*', len(train_data[0][0][0]))
    log_buffer.append('--' + 'train data formulation: ' +
                      str(len(train_data)) + '*' + str(len(train_data[0])) +
                      '*' + str(len(train_data[0][0])) + '*' +
                      str(len(train_data[0][0][0])) + '--')
    print('train label size: ', len(train_label))
    log_buffer.append('--' + 'train label size: ' + str(len(train_label)))

    # train new models
    print(
        'training model...---------------------------------------------------------------------------'
    )

    print('split train and validation data...')
    trainloader, validationloader = split_data([train_data, train_label],
                                               batch_size=batch_size,
                                               validation_rate=vali_rate)
    log_buffer.append('--split data: train data size: ' +
                      str(len(trainloader)) + ' validation data size: ' +
                      str(len(validationloader)))

    print(
        'training net ================================================================'
    )
    # data = mix_data(minority_data, majority_data[net_i])

    net_start = time.time()

    net = ImageNet(middle_input_size=dim_size, output_size=categorical_size)
    net.cuda()
    # criterion = nn.CrossEntropyLoss()
    criterion = nn.BCELoss()
    optimizer = optim.Adam(net.parameters(), lr=learn_rate)

    log_buffer.append('criterion: BCELoss, optimizer: Adam')
    log_buffer.append('--net train--')

    for epoch in range(epoch_size):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data

            # wrap them in Variable
            inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)

            labels = labels.float()  # BCELoss used

            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 50 == 49:  # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 50))
                log_buffer.append('[%d, %5d] loss: %.3f' %
                                  (epoch + 1, i + 1, running_loss / 50))
                running_loss = 0.0

                # validation
                if validation_switch is True:
                    all_predictions, all_labels = [], []
                    for data in validationloader:
                        images, labels = data
                        images = Variable(images.cuda())

                        outputs = net(images)
                        outputs = outputs.cpu()

                        # for BCELoss
                        predicted = (torch.sign(outputs * 2 - 1) + 1) / 2
                        predicted = predicted.data.numpy()
                        predicted = predicted.reshape(
                            predicted.size).astype(int).tolist()

                        all_predictions.extend(predicted)
                        all_labels.extend(labels.numpy().tolist())

                    accuracy = accuracy_score(all_labels, all_predictions)
                    recall = recall_score(all_labels, all_predictions)
                    precision = precision_score(all_labels, all_predictions)

                    print('accuracy: ', accuracy, ', recall rate: ', recall,
                          ', precision rate: ', precision)
                    log_buffer.append('accuracy: ' + str(accuracy) +
                                      ', recall rate: ' + str(recall) +
                                      ', precision rate: ' + str(precision))
    net_end = time.time()
    hour, minute, second = time_formulate(net_start, net_end)
    print('train net time: ', hour, ':', minute, ':', second)
    log_buffer.append('train net time: ' + str(hour) + ':' + str(minute) +
                      ':' + str(second))

    net_file = learner_path + problem_name + '/dimension' + str(dim_size) + '/DirectionalModel/' + 'learner-' \
               + problem_name + '-' + 'dim' + str(dim_size) + '-' + 'bias' + str(bias_region) \
               + '-' + str(start_index + 1) + 'alldata.pkl'
    print('net saving...')
    torch.save(net, net_file)
    log_buffer.append('--net save: ' + net_file + '--')
    print('net saved!')

    fo.FileWriter(log_name, log_buffer, style='w')

    return
def run_for_synthetic_problem():

    sample_size = 10  # the instance number of sampling in an iteration
    budget = 50  # budget in online style
    positive_num = 2  # the set size of PosPop
    rand_probability = 0.99  # the probability of sample in model
    uncertain_bit = 1  # the dimension size that is sampled randomly
    adv_threshold = 10  # advance sample size

    opt_repeat = 10

    dimension_size = 10
    problem_name = 'sphere'
    bias_region = 0.5

    eta = 0.9
    step = 100

    dimension = Dimension()
    dimension.set_dimension_size(dimension_size)
    dimension.set_regions([[-1.0, 1.0] for _ in range(dimension_size)],
                          [0 for _ in range(dimension_size)])

    log_buffer = []

    # problem define
    func = DistributedFunction(dimension, bias_region=[-0.5, 0.5])
    target_bias = [0.2 for _ in range(dimension_size)]
    func.setBias(target_bias)

    if problem_name == 'ackley':
        prob_fct = func.DisAckley
    else:
        prob_fct = func.DisSphere

    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('optimization parameters')
    log_buffer.append('sample size: ' + str(sample_size))
    log_buffer.append('budget: ' + str(budget))
    log_buffer.append('positive num: ' + str(positive_num))
    log_buffer.append('random probability: ' + str(rand_probability))
    log_buffer.append('uncertain bits: ' + str(uncertain_bit))
    log_buffer.append('advance num: ' + str(adv_threshold))
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('problem parameters')
    log_buffer.append('dimension size: ' + str(dimension_size))
    log_buffer.append('problem name: ' + problem_name)
    log_buffer.append('bias: ' + list2string(target_bias))
    log_buffer.append('+++++++++++++++++++++++++++++++')

    predictors, load_buffer = get_predicotrs()
    expert = Experts(predictors=predictors, eta=eta, step=step)
    log_buffer.extend(load_buffer)

    opt_error_list = []

    for i in range(opt_repeat):
        print('optimize ', i,
              '===================================================')
        log_buffer.append(
            'optimize ' + str(i) +
            '===================================================')

        exp_racos = ExpAdaRacosOptimization(dimension, expert)

        start_t = time.time()
        exp_racos.exp_ada_mix_opt(obj_fct=prob_fct,
                                  ss=sample_size,
                                  bud=budget,
                                  pn=positive_num,
                                  rp=rand_probability,
                                  ub=uncertain_bit,
                                  at=adv_threshold)
        end_t = time.time()

        print('total budget is ', budget)
        log_buffer.append('total budget is ' + str(budget))

        hour, minute, second = time_formulate(start_t, end_t)
        print('spending time: ', hour, ':', minute, ':', second)
        log_buffer.append('spending time: ' + str(hour) + '+' + str(minute) +
                          '+' + str(second))

        optimal = exp_racos.get_optimal()
        opt_error = optimal.get_fitness()
        optimal_x = optimal.get_features()

        opt_error_list.append(opt_error)
        print('validation optimal value: ', opt_error)
        log_buffer.append('validation optimal value: ' + str(opt_error))
        print('optimal x: ', optimal_x)
        log_buffer.append('optimal nn structure: ' + list2string(optimal_x))

    opt_mean = np.mean(np.array(opt_error_list))
    opt_std = np.std(np.array(opt_error_list))
    print('--------------------------------------------------')
    print('optimization result: ', opt_mean, '#', opt_std)
    log_buffer.append('--------------------------------------------------')
    log_buffer.append('optimization result: ' + str(opt_mean) + '#' +
                      str(opt_std))

    result_path = path + '/Results/Ada/' + problem_name + '/dimension' + str(
        dimension_size) + '/'

    optimization_log_file = result_path + 'opt-log-' + problem_name + '-dim' + str(dimension_size) + '-bias' \
                            + str(bias_region) + '.txt'
    print('optimization logging: ', optimization_log_file)
    fo.FileWriter(optimization_log_file, log_buffer, style='w')

    return
Exemplo n.º 9
0
def run(type):
    opt_error_list = []
    log_buffer.append('+++++++++++++++++++++++++++++++')
    log_buffer.append('Running: ' + type)
    log_buffer.append('+++++++++++++++++++++++++++++++')
    print('+++++++++++++++++++++++++++++++')
    print('Running: ' + type)
    print('+++++++++++++++++++++++++++++++')
    if type == 'ada':
        # pre=sorted(predictors,key=lambda a:a.dist)
        expert = Experts(predictors=predictors, eta=eta, bg=budget)

    for i in range(opt_repeat):
        print('optimize ', i,
              '===================================================')
        log_buffer.append(
            'optimize ' + str(i) +
            '===================================================')
        start_t = time.time()
        if type == 'ave':
            exp_racos = ExpRacosOptimization(dimension, nets)
            opt_error = exp_racos.exp_mix_opt(obj_fct=prob_fct,
                                              ss=sample_size,
                                              bud=budget,
                                              pn=positive_num,
                                              rp=rand_probability,
                                              ub=uncertain_bit,
                                              at=adv_threshold)
        elif type == 'ada':
            exp_racos = ExpAdaRacosOptimization(dimension, expert)
            opt_error = exp_racos.exp_ada_mix_opt(obj_fct=prob_fct,
                                                  ss=sample_size,
                                                  bud=budget,
                                                  pn=positive_num,
                                                  rp=rand_probability,
                                                  ub=uncertain_bit,
                                                  at=adv_threshold,
                                                  step=step)
        elif type == 'ground truth':
            exp_racos = ExpRacosOptimization(dimension, nets[:step])
            exp_racos.exp_mix_opt(obj_fct=prob_fct,
                                  ss=sample_size,
                                  bud=budget,
                                  pn=positive_num,
                                  rp=rand_probability,
                                  ub=uncertain_bit,
                                  at=adv_threshold)
        else:
            print('Wrong type!')
            return

        end_t = time.time()

        hour, minute, second = time_formulate(start_t, end_t)
        print('spending time: ', hour, ':', minute, ':', second)
        log_buffer.append('spending time: ' + str(hour) + '+' + str(minute) +
                          '+' + str(second))

        optimal = exp_racos.get_optimal()
        opt_error = optimal.get_fitness()
        optimal_x = optimal.get_features()

        opt_error_list.append(opt_error)
        print('validation optimal value: ', opt_error)
        log_buffer.append('validation optimal value: ' + str(opt_error))
        print('optimal x: ', optimal_x)
        log_buffer.append('optimal nn structure: ' + list2string(optimal_x))

    opt_mean = np.mean(np.array(opt_error_list), axis=0)
    opt_std = np.std(np.array(opt_error_list), axis=0)
    print('--------------------------------------------------')
    print('optimization result for ' + str(opt_repeat) + ' times average: ',
          opt_mean, ', standard variance is: ', opt_std)
    log_buffer.append('--------------------------------------------------')
    log_buffer.append('optimization result for ' + str(opt_repeat) +
                      ' times average: ' + str(opt_mean) +
                      ', standard variance is: ' + str(opt_std))

    return opt_mean, opt_std
def synthetic_problems_sample(prob_i):
    # bias log format: 'index,bias_list: dim1 dim2 dim3...'
    bias_log = []
    running_log = []
    running_log.append('+++++++++++++++++++++++++++++++++')
    running_log.append('optimization setting: ')
    running_log.append('sample_size: ' + str(sample_size))
    running_log.append('positive_num: ' + str(positive_num))
    running_log.append('rand_probability: ' + str(rand_probability))
    running_log.append('uncertain_bits: ' + str(uncertain_bits))
    running_log.append('budget: ' + str(budget))
    running_log.append('+++++++++++++++++++++++++++++++++')

    print(problem_name, ': ', start_index + prob_i,
          ' ==============================================')
    running_log.append(problem_name + ': ' + str(start_index + prob_i) +
                       ' ==============================================')

    # bias setting
    group_num = 10
    group_size = problem_num / group_num
    bias_step = bias_region / group_num
    new_bias_region = int(prob_i / group_size) * bias_step

    # problem setting
    func = DistributedFunction(dim=dimension,
                               bias_region=[-new_bias_region, new_bias_region])
    if 'ackley' in problem_name:
        prob_fct = func.DisAckley
    elif 'sphere' in problem_name:
        prob_fct = func.DisSphere
    elif 'rosenbrock' in problem_name:
        prob_fct = func.DisRosenbrock
    else:
        print('Wrong Function!')
        exit()

    # bias log
    bias_log.append(
        str(prob_i + start_index) + ',' + list2string(func.getBias()))
    print('function: ', problem_name, ', this bias: ', func.getBias())
    running_log.append('function: ' + problem_name + ', this bias: ' +
                       list2string(func.getBias()))

    # optimization setting
    optimizer = RacosOptimization(dimension)

    positive_set = []
    negative_set = []
    new_sample_set = []
    label_set = []

    for repeat_i in range(repeat_num):
        print('repeat ', repeat_i, ' ----------------------------------------')
        running_log.append('repeat ' + str(repeat_i) +
                           ' ----------------------------------------')

        # optimization process
        start_t = time.time()
        optimizer.mix_opt(obj_fct=prob_fct,
                          ss=sample_size,
                          bud=budget,
                          pn=positive_num,
                          rp=rand_probability,
                          ub=uncertain_bits)
        end_t = time.time()
        hour, minute, second = time_formulate(start_t, end_t)

        # optimization results
        optimal = optimizer.get_optimal()
        print('optimal v: ', optimal.get_fitness(), ' - ',
              optimal.get_features())
        running_log.append('optimal v: ' + str(optimal.get_fitness()) + ' - ' +
                           list2string(optimal.get_features()))
        print('spent time: ', hour, ':', minute, ':', second)
        running_log.append('spent time: ' + str(hour) + ':' + str(minute) +
                           ':' + str(second))

        # log samples
        this_positive, this_negative, this_new, this_label = optimizer.get_log(
        )

        print('sample number: ', len(this_positive), ':', len(this_label))
        running_log.append('sample number: ' + str(len(this_positive)) + ':' +
                           str(len(this_label)))

        positive_set.extend(this_positive)
        negative_set.extend(this_negative)
        new_sample_set.extend(this_new)
        label_set.extend(this_label)
    print('----------------------------------------------')
    print('sample finish!')
    print('all sample number: ', len(positive_set), '-', len(negative_set), '-', len(new_sample_set), \
          '-', len(label_set))
    running_log.append('----------------------------------------------')
    running_log.append('all sample number: ' + str(len(positive_set)) + '-' +
                       str(len(negative_set)) + '-' +
                       str(len(new_sample_set)) + '-' + str(len(label_set)))

    data_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/DataLog/' + \
                    'data-' + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' \
                    + str(bias_region) + '-' + str(start_index + prob_i) + '.pkl'
    bias_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/RecordLog/' + 'bias-' \
                    + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' + str(bias_region) \
                    + '-' + str(start_index + prob_i) + '.txt'
    running_log_file = exp_path + str(problem_name) + '/dimension' + str(dimension_size) + '/RecordLog/' + \
                       'running-' + problem_name + '-' + 'dim' + str(dimension_size) + '-' + 'bias' \
                       + str(bias_region) + '-' + str(start_index + prob_i) + '.txt'

    print('data logging: ', data_log_file)
    running_log.append('data log path: ' + data_log_file)
    save_log(positive_set, negative_set, new_sample_set, label_set,
             data_log_file)

    print('bias logging: ', bias_log_file)
    running_log.append('bias log path: ' + bias_log_file)
    fo.FileWriter(bias_log_file, bias_log, style='w')

    print('running logging: ', running_log_file)
    fo.FileWriter(running_log_file, running_log, style='w')

    return