示例#1
0
文件: example.py 项目: tmadl/ZOOpt
def run_ss_test(task_name, layers, in_budget, max_step, repeat,
                terminal_value):
    gym_task = GymTask(task_name)  # choose a task by name
    gym_task.new_nnmodel(layers)  # construct a neural network
    gym_task.set_max_step(max_step)  # set max step in gym

    budget = in_budget  # number of calls to the objective function
    rand_probability = 0.95  # the probability of sample in model

    # set dimension
    dim_size = gym_task.get_w_size()
    dim_regs = [[-10, 10]] * dim_size
    dim_tys = [True] * dim_size
    dim = Dimension(dim_size, dim_regs, dim_tys)

    def resample_function(solution, iteration_num):
        eval_list = []
        for i in range(iteration_num):
            eval_list.append(gym_task.sum_reward(solution))
        return sum(eval_list) * 1.0 / len(eval_list)

    # form up the objective function
    objective = Objective(gym_task.sum_reward,
                          dim,
                          re_sample_func=resample_function)
    # by default, the algorithm is sequential RACOS
    parameter = Parameter(budget=budget,
                          autoset=True,
                          suppression=True,
                          terminal_value=terminal_value)
    parameter.set_resample_times(70)
    parameter.set_probability(rand_probability)

    result = []
    total_sum = 0
    total_step = []
    print('solved solution is:')
    for i in range(repeat):
        ins = Opt.min(objective, parameter)
        result.append(ins.get_value())
        total_sum += ins.get_value()
        ins.print_solution()
        print("total step %s" % gym_task.total_step)
        total_step.append(gym_task.total_step)
        gym_task.total_step = 0
    print(result)  # results in repeat times
    print(total_sum / len(result))  # average result
    print(total_step)
    print("------------------------avg total step %s" %
          (sum(total_step) / len(total_step)))
示例#2
0
def run_test_handlingnoise(task_name, layers, in_budget, max_step, repeat,
                           terminal_value):
    """
    example of running direct policy search for gym task with noise handling.

    :param task_name: gym task name
    :param layers:
        layer information of the neural network
        e.g., [2, 5, 1] means input layer has 2 neurons, hidden layer(only one) has 5 and output layer has 1
    :param in_budget:  number of calls to the objective function
    :param max_step: max step in gym
    :param repeat:  number of repeatitions for noise handling
    :param terminal_value: early stop, algorithm should stop when such value is reached
    :return: no return value
    """
    gym_task = GymTask(task_name)  # choose a task by name
    gym_task.new_nnmodel(layers)  # construct a neural network
    gym_task.set_max_step(max_step)  # set max step in gym

    budget = in_budget  # number of calls to the objective function
    rand_probability = 0.95  # the probability of sample in model

    # set dimension
    dim_size = gym_task.get_w_size()
    dim_regs = [[-10, 10]] * dim_size
    dim_tys = [True] * dim_size
    dim = Dimension(dim_size, dim_regs, dim_tys)
    # form up the objective function
    objective = Objective(gym_task.sum_reward, dim)
    # by default, the algorithm is sequential RACOS
    parameter = Parameter(budget=budget,
                          autoset=True,
                          suppression=True,
                          terminal_value=terminal_value)
    parameter.set_resample_times(70)
    parameter.set_probability(rand_probability)

    solution_list = ExpOpt.min(objective, parameter, repeat=repeat)