def test_high_dim(self): dim_size = 10000 # dimensions dim_regs = [[-1, 1]] * dim_size # dimension range dim_tys = [True] * dim_size # dimension type : real dim = Dimension(dim_size, dim_regs, dim_tys) # form up the dimension object objective = Objective(sphere_sre, dim) # form up the objective function # setup algorithm parameters budget = 2000 # number of calls to the objective function parameter = Parameter(budget=budget, high_dim_handling=True, reducedim=True, num_sre=5, low_dimension=Dimension(10, [[-1, 1]] * 10, [True] * 10), seed=1) sol1 = Opt.min(objective, parameter) sol2 = Opt.min(objective, parameter) assert sol1.get_value() == sol2.get_value()
def test_noisy(self): ackley_noise_func = ackley_noise_creator(0, 0.1) dim_size = 100 # dimensions dim_regs = [[-1, 1]] * dim_size # dimension range dim_tys = [True] * dim_size # dimension type : real dim = Dimension(dim_size, dim_regs, dim_tys) # form up the dimension object objective = Objective(ackley_noise_func, dim) # form up the objective function budget = 20000 # 20*dim_size # number of calls to the objective function parameter = Parameter(budget=budget, noise_handling=True, suppression=True, non_update_allowed=200, resample_times=50, balance_rate=0.5, seed=1) # parameter = Parameter(budget=budget, noise_handling=True, resampling=True, resample_times=10) parameter.set_positive_size(5) sol1 = Opt.min(objective, parameter) sol2 = Opt.min(objective, parameter) assert sol1.get_value() == sol2.get_value()
def run_test(task_name, layers, in_budget, max_step, repeat): gym_task = GymTask(task_name) # choose a task by name gym_task.new_nnmodel(layers) # construct a neural network gym_task.set_max_step(max_step) # set max step in gym budget = in_budget # number of calls to the objective function rand_probability = 0.95 # the probability of sample in model # set dimension dim_size = gym_task.get_w_size() dim_regs = [[-10, 10]] * dim_size dim_tys = [True] * dim_size dim = Dimension(dim_size, dim_regs, dim_tys) objective = Objective(gym_task.sum_reward, dim) # form up the objective function parameter = Parameter( budget=budget, autoset=True) # by default, the algorithm is sequential RACOS parameter.set_probability(rand_probability) result = [] sum = 0 print('solved solution is:') for i in range(repeat): ins = Opt.min(objective, parameter) result.append(ins.get_value()) sum += ins.get_value() ins.print_solution() print(result) # results in repeat times print(sum / len(result)) # average result
def search(_dataset): ''' Search the best hyper-paramers for the given dataset Using ZOOpt :param _dataset: the given dataset :return: (best hyper-parameters,performance of the best hyper-parameters) ''' global dataset dataset = _dataset dim = Dimension( 19, [[16, 32], [1, 8], [1, 1], [1, 1], [16, 32], [1, 8], [1, 1], [1, 1], [0, 1], [1, 8], [1, 10], [0, 1], [1, 8], [1, 10], [40, 50], [30, 40], [20, 30], [10, 20], [0.0001, 0.001]], [ False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True ]) obj = Objective(eval, dim) # perform optimization global round round = 0 solution = Opt.min(obj, Parameter(budget=BUDGET)) # print result solution.print_solution() plt.plot(obj.get_history_bestsofar()) plt.savefig('figure.png') return (solution.get_x(), solution.get_value())
def test_performance(self): ackley_noise_func = ackley_noise_creator(0, 0.1) dim_size = 100 # dimensions one_dim = (ValueType.CONTINUOUS, [-1, 1], 1e-6) dim_list = [(one_dim)] * dim_size dim = Dimension2(dim_list) # form up the dimension object objective = Objective(ackley_noise_func, dim) # form up the objective function budget = 20000 # 20*dim_size # number of calls to the objective function # suppression=True means optimize with value suppression, which is a noise handling method # resampling=True means optimize with re-sampling, which is another common used noise handling method # non_update_allowed=500 and resample_times=100 means if the best solution doesn't change for 500 budgets, # the best solution will be evaluated repeatedly for 100 times # balance_rate is a parameter for exponential weight average of several evaluations of one sample. parameter = Parameter(budget=budget, noise_handling=True, suppression=True, non_update_allowed=200, resample_times=50, balance_rate=0.5) # parameter = Parameter(budget=budget, noise_handling=True, resampling=True, resample_times=10) parameter.set_positive_size(5) sol = Opt.min(objective, parameter) assert sol.get_value() < 4
def test_sracos_performance(self): dim = 100 # dimension objective = Objective(ackley, Dimension(dim, [[-1, 1]] * dim, [True] * dim)) # setup objective parameter = Parameter(budget=100 * dim) solution = Opt.min(objective, parameter) assert solution.get_value() < 0.2
def opt_var_ids(exs, maps): dim = Dimension(len(flatten(exs)), [[0, 1]] * len(flatten(exs)), [False] * len(flatten(exs))) obj = Objective(lambda v: -consistent_score(exs, v.get_x(), maps).score, dim) param = Parameter(budget=100, autoset=True) solution = Opt.min(obj, param) return solution
def test_racos_performance2(self): # continuous dim = 100 # dimension one_dim = (ValueType.CONTINUOUS, [-1, 1], 1e-6) dim_list = [(one_dim)] * dim objective = Objective(ackley, Dimension2(dim_list)) # setup objective parameter = Parameter(budget=100 * dim, sequential=False, seed=1) solution = ExpOpt.min(objective, parameter)[0] assert solution.get_value() < 0.2 dim = 500 dim_list = [(one_dim)] * dim objective = Objective(ackley, Dimension2(dim_list)) # setup objective parameter = Parameter(budget=10000, sequential=False, seed=1) sol = Opt.min(objective, parameter) sol.print_solution() assert solution.get_value() < 2 # discrete # setcover problem = SetCover() dim_size = 20 one_dim = (ValueType.DISCRETE, [0, 1], False) dim_list = [(one_dim)] * dim_size dim = Dimension2(dim_list) # the dim is prepared by the class objective = Objective(problem.fx, dim) # form up the objective function budget = 100 * dim.get_size( ) # number of calls to the objective function parameter = Parameter(budget=budget, sequential=False, seed=777) sol = Opt.min(objective, parameter) sol.print_solution() assert sol.get_value() < 2 # sphere dim_size = 100 # dimensions one_dim = (ValueType.DISCRETE, [-10, 10], True) dim_list = [(one_dim)] * dim_size dim = Dimension2(dim_list) # form up the dimension object objective = Objective(sphere_discrete_order, dim) # form up the objective function parameter = Parameter(budget=10000, sequential=False, seed=77) sol = Opt.min(objective, parameter) sol.print_solution() assert sol.get_value() < 200
def test_racos_performance(self): # continuous dim = 100 # dimension objective = Objective(ackley, Dimension(dim, [[-1, 1]] * dim, [True] * dim)) # setup objective parameter = Parameter(budget=100 * dim, sequential=False, seed=1) solution = ExpOpt.min(objective, parameter)[0] assert solution.get_value() < 0.2 dim = 500 objective = Objective(ackley, Dimension(dim, [[-1, 1]] * dim, [True] * dim)) # setup objective parameter = Parameter(budget=10000, sequential=False, seed=1) sol = Opt.min(objective, parameter) sol.print_solution() assert solution.get_value() < 2 # discrete # setcover problem = SetCover() dim = problem.dim # the dim is prepared by the class objective = Objective(problem.fx, dim) # form up the objective function budget = 100 * dim.get_size( ) # number of calls to the objective function parameter = Parameter(budget=budget, sequential=False, seed=777) sol = Opt.min(objective, parameter) sol.print_solution() assert sol.get_value() < 2 # sphere dim_size = 100 # dimensions dim_regs = [[-10, 10]] * dim_size # dimension range dim_tys = [False] * dim_size # dimension type : integer dim_order = [True] * dim_size dim = Dimension(dim_size, dim_regs, dim_tys, order=dim_order) # form up the dimension object objective = Objective(sphere_discrete_order, dim) # form up the objective function parameter = Parameter(budget=10000, sequential=False, seed=77) sol = Opt.min(objective, parameter) sol.print_solution() assert sol.get_value() < 200
def opt_var_ids_sets_chess_constraint(exs, mapping, constraint): num_chess = num_of_chess(exs) dim = Dimension(num_chess, [[0, 1]] * num_chess, [False] * num_chess) obj = Objective(lambda v: -consistent_score_sets_chess( exs, [int(i) for i in v.get_x()], mapping)[0], dim=dim, constraint=constraint) param = Parameter(budget=100, autoset=True) solution = Opt.min(obj, param) return solution
def opt_var_ids_sets_constraint(exs, mapping, constraint): dim = Dimension(size=len(flatten(exs)), regs=[[0, 1]] * len(flatten(exs)), tys=[False] * len(flatten(exs))) obj = Objective(lambda v: -consistent_score_sets( exs, [int(i) for i in v.get_x()], mapping)[0], dim=dim, constraint=constraint) param = Parameter(budget=100, autoset=True) solution = Opt.min(obj, param) return solution
def run_ss_test(task_name, layers, in_budget, max_step, repeat, terminal_value): gym_task = GymTask(task_name) # choose a task by name gym_task.new_nnmodel(layers) # construct a neural network gym_task.set_max_step(max_step) # set max step in gym budget = in_budget # number of calls to the objective function rand_probability = 0.95 # the probability of sample in model # set dimension dim_size = gym_task.get_w_size() dim_regs = [[-10, 10]] * dim_size dim_tys = [True] * dim_size dim = Dimension(dim_size, dim_regs, dim_tys) def resample_function(solution, iteration_num): eval_list = [] for i in range(iteration_num): eval_list.append(gym_task.sum_reward(solution)) return sum(eval_list) * 1.0 / len(eval_list) # form up the objective function objective = Objective(gym_task.sum_reward, dim, re_sample_func=resample_function) # by default, the algorithm is sequential RACOS parameter = Parameter(budget=budget, autoset=True, suppression=True, terminal_value=terminal_value) parameter.set_resample_times(70) parameter.set_probability(rand_probability) result = [] total_sum = 0 total_step = [] print('solved solution is:') for i in range(repeat): ins = Opt.min(objective, parameter) result.append(ins.get_value()) total_sum += ins.get_value() ins.print_solution() print("total step %s" % gym_task.total_step) total_step.append(gym_task.total_step) gym_task.total_step = 0 print(result) # results in repeat times print(total_sum / len(result)) # average result print(total_step) print("------------------------avg total step %s" % (sum(total_step) / len(total_step)))
def generate_negative_data(self, dim_range): self.__negative_dataset = [] dim_size = self.__dim_size # dimensions dim_regs = [dim_range] * dim_size # dimension range dim_tys = [True] * dim_size # dimension type : real dim = Dimension(dim_size, dim_regs, dim_tys) # form up the dimension object budget = self.__Budget # number of calls to the objective function # by setting autoset=false, the algorithm parameters will not be set by default parameter = Parameter(algorithm="sracos", budget=budget, autoset=True) # so you are allowed to setup algorithm parameters of racos # parameter.set_train_size(6) # parameter.set_probability(0.95) # parameter.set_uncertain_bits(2) # parameter.set_positive_size(1) # parameter.set_negative_size(5) print "generate negative sample of class:", self.__class_num for i in range(self.__generate_size): # init the SRACOS randomly sample_list = random.sample(range(self.__original_data.shape[0]), self.__init_num) init_data = self.__original_data[sample_list] parameter.set_init_samples(init_data) objective = Objective(self.train_Dminus, dim) print 'I have objective' solution = Opt.min(objective, parameter) print 'Trying for solution' x_minus = solution.get_x() self.__negative_dataset.append(x_minus) print x_minus print "[ASG] class", self.__class_num, ": Generating negative data, data size:", len( self.__negative_dataset) print "**************************************************" isExists = os.path.exists(self.__gendir) # store the generated data if not isExists: os.mkdir(self.__gendir) with open(self.__negative_filename, "w") as f: f.write("") with open(self.__negative_filename, "a") as f: for k in range(len(self.__negative_dataset)): for t in range(len(self.__negative_dataset[k])): f.write(str(self.__negative_dataset[k][t]) + ' ') f.write("\n") return
def test_performance(self): mse = SparseMSE('example/sparse_regression/sonar.arff') mse.set_sparsity(8) # setup objective # print(mse.get_dim().get_size()) objective = Objective(func=mse.loss, dim=mse.get_dim(), constraint=mse.constraint) parameter = Parameter(algorithm='poss', budget=2 * exp(1) * (mse.get_sparsity()**2) * mse.get_dim().get_size()) # perform sparse regression with constraint |w|_0 <= k solution = Opt.min(objective, parameter) assert solution.get_value()[0] < 0.6
def run_test(task_name, layers, in_budget, max_step, repeat): gym_task = GymTask(task_name) # choose a task by name gym_task.new_nnmodel(layers) # construct a neural network gym_task.set_max_step(max_step) # set max step in gym budget = in_budget # number of calls to the objective function rand_probability = 0.95 # the probability of sample in model # set dimension dim_size = gym_task.get_w_size() dim_regs = [[-10, 10]] * dim_size dim_tys = [True] * dim_size dim = Dimension(dim_size, dim_regs, dim_tys) result = [] sum = 0 print('solved solution is:') for i in range(repeat): objective = Objective(gym_task.sum_reward, dim) # form up the objective function parameter = Parameter(budget=budget, autoset=True) # by default, the algorithm is sequential RACOS parameter.set_probability(rand_probability) ins = Opt.min(objective, parameter) result.append(ins.get_value()) best_stable_ins = objective.get_best_stable_ins() if best_stable_ins != None: best_stable_ins_val = best_stable_ins.get_value() else: best_stable_ins_val = float("inf") for i in range(1): ins_rewards = [] for i in range(100): ins_rewards.append(gym_task.sum_reward(ins)) # print(np.mean(ins_rewards),best_stable_ins_val) if np.mean(ins_rewards) < best_stable_ins_val: print("last mean", np.mean(ins_rewards)) print("last std", np.std(ins_rewards)) else: print("stable mean", best_stable_ins.get_value()) print("stable std", best_stable_ins.get_std()) sum += ins.get_value() #ins.print_solution() print(result) # results in repeat times print(sum/len(result)) # average result
def test_performance(self): # load data file mse = SparseMSE('example/sparse_regression/sonar.arff') mse.set_sparsity(8) # setup objective objective = Objective(func=mse.loss, dim=mse.get_dim(), constraint=mse.constraint) # ponss_theta and ponss_b are parameters used in PONSS algorithm and should be provided by users. ponss_theta stands # for the threshold. ponss_b limits the number of solutions in the population set. parameter = Parameter(algorithm='poss', noise_handling=True, ponss=True, ponss_theta=0.5, ponss_b=mse.get_k(), budget=2 * exp(1) * (mse.get_sparsity()**2) * mse.get_dim().get_size()) # perform sparse regression with constraint |w|_0 <= k solution = Opt.min(objective, parameter) assert solution.get_value()[0] < 0.7
def Policy_Search(self): def ackley(solution): value = [] if self.pre: w1 = self.Pre_Reg(self.policy_reserve) w_1_dim = (self.config.feature_size, self.config.hidden_size) w_2_dim = (self.config.hidden_size, self.config.action_size) w_flat = solution.get_x() if not self.pre: value.append( np.reshape(w_flat[0:w_1_dim[0] * w_1_dim[1]], w_1_dim)) value.append( np.reshape(w_flat[w_1_dim[0] * w_1_dim[1]:], w_2_dim)) else: value.append(w1) value.append(np.reshape(w_flat, w_2_dim)) self.model._assign(value) self.model.train_model(None, None, None, None, None) mean_reward, max_reward, min_reward = self.Get_Data( self.policy_reserve) print "eval max_reward: {}, min_reward: {}, mean_reward: {}".format( max_reward, min_reward, mean_reward) return -mean_reward if not self.pre: dim = self.config.feature_size * self.config.hidden_size + self.config.hidden_size * self.config.action_size else: dim = self.config.hidden_size * self.config.action_size obj = Objective(ackley, Dimension(dim, [[-0.01, 0.01]] * dim, [True] * dim)) solution = Opt.min( obj, Parameter(budget=100 * dim, uncertain_bits=100, intermediate_result=False, intermediate_freq=1)) solution.print_solution()
def ackley(sol): """ ackley function """ x = sol.get_x() x_len = len(x) seq = 0 cos = 0 for i in range(x_len): seq += (x[i] - optimal_position[i]) * (x[i] - optimal_position[i]) cos += np.cos(2.0 * np.pi * (x[i] - optimal_position[i])) ave_seq = seq / x_len ave_cos = cos / x_len value = -20 * np.exp( -0.2 * np.sqrt(ave_seq)) - np.exp(ave_cos) + 20.0 + np.e return value dim_size = 20 # dimension size dim = Dimension(dim_size, [[-1, 1]] * dim_size, [True] * dim_size) # dim = Dimension2([(ValueType.CONTINUOUS, [-1, 1], 1e-6)]*dim_size) obj = Objective(ackley, dim) # perform optimization solution = Opt.min(obj, Parameter(budget=100 * dim_size, intermediate_result=True)) # print the solution print(solution.get_x(), solution.get_value()) # parallel optimization for time-consuming tasks # solution = Opt.min(obj, Parameter(budget=100*dim_size, parallel=True, server_num=3))
def main(): global env, agent_list if args.data == "pusher": agent_list = [] best_w_list = [] for i in xrange(100, 120): for j in range(100): hdf = h5py.File( '/home/zhangc/POSEC/base_policy/Pusher/Pusher%d' % j, 'r') snapnames = hdf['agent_snapshots'].keys() snapname = snapnames[-1] agent = cPickle.loads(hdf['agent_snapshots'][snapname].value) agent.stochastic = False timestep_limit = 200 if hasattr(agent, "reset"): agent.reset() agent_list.append(agent) env = gym.make("Pusher-v%d" % i) ob = env.reset() dim = 100 obj = Objective(ensembel, Dimension(dim, [[-1, 1]] * dim, [True] * dim)) # setup objective # perform optimization solution = Opt.min(obj, Parameter(budget=250)) # print result best_w, reward = solution.print_solution() with open( '/home/zhangc/POSEC/base_policy/Pusher/Pusher%d_best_w.txt' % i, 'a') as f: f.write(str(best_w)) best_w_list.append(best_w) with open("/home/zhangc/POSEC/base_policy/Pusher/all_best_w.txt", 'w') as outfile: for l in best_w_list: num = 0 for i in l: outfile.write(str(i)) if num != 99: outfile.write(",") num += 1 outfile.write("\n") elif args.data == "striker": agent_list = [] best_w_list = [] for i in xrange(100, 120): for j in range(100): hdf = h5py.File( '/home/zhangc/POSEC/base_policy/Striker/Striker%d' % j, 'r') snapnames = hdf['agent_snapshots'].keys() snapname = snapnames[-1] agent = cPickle.loads(hdf['agent_snapshots'][snapname].value) agent.stochastic = False timestep_limit = 200 if hasattr(agent, "reset"): agent.reset() agent_list.append(agent) env = gym.make("Striker-v%d" % i) ob = env.reset() dim = 100 obj = Objective(ensembel, Dimension(dim, [[-1, 1]] * dim, [True] * dim)) # setup objective # perform optimization solution = Opt.min(obj, Parameter(budget=250)) # print result best_w, reward = solution.print_solution() with open( '/home/zhangc/POSEC/base_policy/Striker/Striker%d_best_w.txt' % i, 'a') as f: f.write(str(best_w)) best_w_list.append(best_w) with open("/home/zhangc/POSEC/base_policy/Pusher/all_best_w.txt", 'w') as outfile: for l in best_w_list: num = 0 for i in l: outfile.write(str(i)) if num != 99: outfile.write(",") num += 1 outfile.write("\n") elif args.data == "thrower": agent_list = [] best_w_list = [] for i in xrange(100, 120): for j in range(100): hdf = h5py.File( '/home/zhangc/POSEC/base_policy/Thrower/Thrower%d' % j, 'r') snapnames = hdf['agent_snapshots'].keys() snapname = snapnames[-1] agent = cPickle.loads(hdf['agent_snapshots'][snapname].value) agent.stochastic = False timestep_limit = 200 if hasattr(agent, "reset"): agent.reset() agent_list.append(agent) env = gym.make("Thrower-v%d" % i) ob = env.reset() dim = 100 obj = Objective(ensembel, Dimension(dim, [[-1, 1]] * dim, [True] * dim)) # setup objective # perform optimization solution = Opt.min(obj, Parameter(budget=250)) # print result best_w, reward = solution.print_solution() with open( '/home/zhangc/POSEC/base_policy/Thrower/Thrower%d_best_w.txt' % i, 'a') as f: f.write(str(best_w)) best_w_list.append(best_w) with open("/home/zhangc/POSEC/base_policy/Pusher/all_best_w.txt", 'w') as outfile: for l in best_w_list: num = 0 for i in l: outfile.write(str(i)) if num != 99: outfile.write(",") num += 1 outfile.write("\n") else: print("Invalid argument")
import numpy as np import matplotlib.pyplot as plt from zoopt import Dimension, Objective, Parameter, Opt def ackley(solution): x = solution.get_x() bias = 0.2 value = -20 * np.exp(-0.2 * np.sqrt(sum([(i - bias) * (i - bias) for i in x]) / len(x))) - \ np.exp(sum([np.cos(2.0*np.pi*(i-bias)) for i in x]) / len(x)) + 20.0 + np.e return value dim = 100 # dimension obj = Objective(ackley, Dimension(dim, [[-1, 1]] * dim, [True] * dim)) # perform optimization solution = Opt.min(obj, Parameter(budget=100 * dim)) # print result solution.print_solution() plt.plot(obj.get_history_bestsofar()) plt.savefig('figure.png')
for i in range(repeat): for j in range(len(budget_list)): dim = Dimension(dim_size, [dim_regs] * dim_size, [True] * dim_size) objective = Objective(lambda sol: noisy_function_dict[obj_name] (sol.get_x()), dim) # form the objective function if args.noise_handling: parameter = Parameter(budget=budget_list[j], noise_handling=True, suppression=True, non_update_allowed=100, resample_times=20, balance_rate=0.5) else: parameter = Parameter(budget=budget, intermediate_result=True, intermediate_freq=1000) parameter.set_positive_size(5) sol = Opt.min(objective, parameter) real_value[i][j] = base_function_dict[args.objective](sol.get_x()) log_address = os.path.join(project_dir, 'ZOOpt_exp/log/noisy/') if args.noise_handling is True: file_name = os.path.join(log_address, '{}_nh_{}.txt'.format(obj_name, dim_size)) else: file_name = os.path.join(log_address, '{}_{}.txt'.format(obj_name, dim_size)) os.makedirs(log_address, exist_ok=True) np.savetxt(file_name, np.array(real_value)) print(real_value.shape)
def main(): global agent_list, theta_best, action_sample if args.data == "pusher": env_list = [] agent_list = [] observation_list = [] for i in xrange(100, 120): for j in range(100): hdf = h5py.File( '/home/zhangc/POSEC/base_policy/Pusher/Pusher%d' % j, 'r') snapnames = hdf['agent_snapshots'].keys() snapname = snapnames[-1] agent = cPickle.loads(hdf['agent_snapshots'][snapname].value) agent.stochastic = False agent_list.append(agent) env = gym.make("Pusher-v%d" % i) env_list.append(env) observation = env.reset() observation_list.append(observation) feature = np.zeros((20, 15)) action_sample = [] for t in range(5): for m in range(20): a = env.action_space.sample() observation, reward, done, info = env_list[m].step(a) feature[m, 3 * t:3 * t + 3] = observation[14:17] action_sample.append(a) X = feature theta_best = [] Y = loadfile('/home/zhangc/POSEC/base_policy/Pusher/all_best_w.txt') m, n = np.shape(X) numIterations = 1000 alpha = 0.05 theta = np.ones(n) for i in range(100): YY = Y[:, i] theta_op = Regression(X, YY, theta, alpha, m, numIterations) theta_best.append(theta_op) with open('/home/zhangc/POSEC/base_policy/Pusher/best_theta.txt', 'a') as f: f.write(("theta%d:" + str(theta_op) + "\n") % i) dim = 35 # dimension obj = Objective(max_a_w, Dimension(dim, [[-2, 2]] * dim, [True] * dim)) solution = Opt.min(obj, Parameter(budget=250)) best_action, reward = solution.print_solution() with open('/home/zhangc/POSEC/base_policy/Pusher/best_action.txt', 'a') as f: f.write("best_action:" + str(best_action) + "\n") f.write("reward:" + str(reward)) elif args.data == "striker": env_list = [] agent_list = [] observation_list = [] for i in xrange(100, 120): for j in range(100): hdf = h5py.File( '/home/zhangc/POSEC/base_policy/Striker/Striker%d' % j, 'r') snapnames = hdf['agent_snapshots'].keys() snapname = snapnames[-1] agent = cPickle.loads(hdf['agent_snapshots'][snapname].value) agent.stochastic = False agent_list.append(agent) env = gym.make("Striker-v%d" % i) env_list.append(env) observation = env.reset() observation_list.append(observation) feature = np.zeros((20, 15)) action_sample = [] for t in range(5): for m in range(20): a = env.action_space.sample() observation, reward, done, info = env_list[m].step(a) feature[i, 3 * t:3 * t + 3] = observation[14:17] action_sample.append(a) X = feature theta_best = [] Y = loadfile('/home/zhangc/POSEC/base_policy/Striker/all_best_w.txt') m, n = np.shape(X) numIterations = 1000 alpha = 0.05 theta = np.ones(n) for i in range(100): YY = Y[:, i] theta_op = Regression(X, YY, theta, alpha, m, numIterations) theta_best.append(theta_op) with open('/home/zhangc/POSEC/base_policy/Striker/best_theta.txt', 'a') as f: f.write("theta%d:" + str(theta_op) + '\n' % i) dim = 35 # dimension obj = Objective(max_a_w, Dimension(dim, [[-2, 2]] * dim, [True] * dim)) solution = Opt.min(obj, Parameter(budget=250)) best_action, reward = solution.print_solution() with open('/home/zhangc/POSEC/base_policy/Striker/best_action.txt', 'a') as f: f.write(str(best_action)) elif args.data == "thrower": env_list = [] agent_list = [] observation_list = [] for i in xrange(100, 120): for j in range(100): hdf = h5py.File( '/home/zhangc/POSEC/base_policy/Thrower/Thrower%d' % j, 'r') snapnames = hdf['agent_snapshots'].keys() snapname = snapnames[-1] agent = cPickle.loads(hdf['agent_snapshots'][snapname].value) agent.stochastic = False agent_list.append(agent) env = gym.make("Thrower-v%d" % i) env_list.append(env) observation = env.reset() observation_list.append(observation) feature = np.zeros((20, 15)) action_sample = [] for t in range(5): for m in range(20): a = env.action_space.sample() observation, reward, done, info = env_list[m].step(a) feature[i, 3 * t:3 * t + 3] = observation[14:17] action_sample.append(a) X = feature theta_best = [] Y = loadfile('/home/zhangc/POSEC/base_policy/Thrower/all_best_w.txt') m, n = np.shape(X) numIterations = 1000 alpha = 0.05 theta = np.ones(n) for i in range(100): YY = Y[:, i] theta_op = Regression(X, YY, theta, alpha, m, numIterations) theta_best.append(theta_op) with open('/home/zhangc/POSEC/base_policy/Thrower/best_theta.txt', 'a') as f: f.write("theta%d:" + str(theta_op) + '\n' % i) dim = 35 # dimension obj = Objective(max_a_w, Dimension(dim, [[-2, 2]] * dim, [True] * dim)) solution = Opt.min(obj, Parameter(budget=250)) best_action, reward = solution.print_solution() with open('/home/zhangc/POSEC/base_policy/Thrower/best_action.txt', 'a') as f: f.write(str(best_action)) else: print("Invalid argument")
# setup optimization problem dim_size = 100 # dimensions dim_regs = [[-1, 1]] * dim_size # dimension range dim_tys = [True] * dim_size # dimension type : real dim = Dimension(dim_size, dim_regs, dim_tys) # form up the dimension object objective = Objective(sphere, dim) # form up the objective function # setup algorithm parameters budget = 1000 # number of calls to the objective function parameter = Parameter( budget=budget, sequential=True) # by default, the algorithm is sequential RACOS # perform the optimization solution = Opt.min(objective, parameter) # store the optimization result print('solved solution is:') solution.print_solution() result.append(solution.get_value()) ### to plot the optimization history, uncomment the following codes. ### matplotlib is required #plt.plot(objective.get_history_bestsofar()) #plt.savefig("figure.png") result_analysis(result, 1) t2 = time.clock() print('time costed %f seconds' % (t2 - t1))