def run(self): while True: windowSurfaceObj.fill(whiteColor) # environment's food set is changing while the for loop runs, so we must lock it so that we do not iterate over a changing set self.environment.lock.acquire() for food in self.environment.food_set: x, y = convert_to_display_loc(food.pos) pygame.gfxdraw.aacircle(windowSurfaceObj, x, y, int(0.01 * display_width), redColor) pygame.gfxdraw.filled_circle(windowSurfaceObj, x, y, int(0.01 * display_width), redColor) for cell in self.environment.cell_list: self.draw_wrapping_circle(cell, cell.radius, cell.color) self.environment.lock.release() for event in pygame.event.get(): if event.type == QUIT: pygame.quit() return () elif event.type == KEYDOWN: if event.key == K_u: environment.Environment().resistance += 100 elif event.key == K_d: environment.Environment().resistance -= 100 pygame.display.update() fpsClock.tick(60)
def __init__(self): self._typenv = environment.Environment() for type_name, kind in primitive_types(): self._typenv.define(type_name, kind) self._env = environment.Environment() for value_name, type in primitive_values(): self._env.define(value_name, type)
def test_agent(agent, x_start, y_start, epsilon, goal, pit, labyrinth, plots): # initialize and plot the environment state = [x_start, y_start] env = environment.Environment(x, y, state, goal, pit, labyrinth) if plots: plot_map(x, y, state, goal, pit, labyrinth, walls, 0) reward = 0 # run episodes for step in range(1, 30): # find state index state_index = state[0] * y + state[1] # choose an action action = agent.select_action(state_index, epsilon) # the agent moves in the environment result = env.move(action) # update state state = result[0] reward += result[1] # plot the environment in the current state if plots: plot_map(x, y, state, goal, pit, labyrinth, walls, step) if (state[0] * y == goal[0]) and (state[1] == goal[1]): print('The agent reached the goal starting from x:', x_start, ' y:', y_start, 'in ', step, ' steps') break
def train_val_agent(learner, epsilon, alpha, episodes, episode_length, goal, pit, labyrinth, walls, x, y, validation): cumulative = 0 tot_reward = [] # perform the training for index in range(0, episodes): # start from a random state not on the walls if labyrinth: initial = [np.random.randint(0, x), np.random.randint(0, y)] for i in walls: yy = i % 10 xx = i // 10 if initial[0] == xx and initial[1] == yy: initial = start_walls(initial, walls, x, y) else: # start from a random state initial = [np.random.randint(0, x), np.random.randint(0, y)] # initialize environment state = initial env = environment.Environment(x, y, state, goal, pit, labyrinth) reward = 0 # run episode for step in range(0, episode_length): # find state index state_index = state[0] * y + state[1] # choose an action action = learner.select_action(state_index, epsilon[index]) # the agent moves in the environment result = env.move(action) # Q-learning update next_index = result[0][0] * y + result[0][1] learner.update(state_index, action, result[1], next_index, alpha[index], epsilon[index]) # update state and reward reward += result[1] state = result[0] reward /= episode_length cumulative += reward tot_reward.append(cumulative) # Save the agent with open('agent.obj', 'wb') as agent_file: dill.dump(learner, agent_file) if validation: val_reward = validation_agent(learner, epsilon[episodes - 1], episode_length, goal, pit, labyrinth, walls, x, y) print('validation reward:', val_reward) return val_reward else: return tot_reward
def generate(self): env = environment.Environment(self.source_dir, self.build_dir, self.meson_script_file, options) mlog.initialize(env.get_log_dir()) mlog.log(mlog.bold('The Meson build system')) mlog.log('Version:', coredata.version) mlog.log('Source dir:', mlog.bold(app.source_dir)) mlog.log('Build dir:', mlog.bold(app.build_dir)) if env.is_cross_build(): mlog.log('Build type:', mlog.bold('cross build')) else: mlog.log('Build type:', mlog.bold('native build')) b = build.Build(env) intr = interpreter.Interpreter(b) intr.run() if options.backend == 'ninja': import ninjabackend g = ninjabackend.NinjaBackend(b, intr) elif options.backend == 'vs2010': import vs2010backend g = vs2010backend.Vs2010Backend(b, intr) elif options.backend == 'xcode': import xcodebackend g = xcodebackend.XCodeBackend(b, intr) else: raise RuntimeError('Unknown backend "%s".' % options.backend) g.generate() env.generating_finished() dumpfile = os.path.join(env.get_scratch_dir(), 'build.dat') pickle.dump(b, open(dumpfile, 'wb'))
def read_argument(): parser = argparse.ArgumentParser('Reinforcement Learning') parser.add_argument('goal_state_reward', type=float, help='The reward for reaching the goal state') parser.add_argument('pit_fall_reward', type=float, help='The reward for falling into a pit') parser.add_argument('move_reward', type=float, help='The reward for moving') parser.add_argument('give_up_reward', type=float, help='The reward for giving up') parser.add_argument('number_of_trials', type=int, help='The number of learning trials to run') parser.add_argument('exploration_epsilon', type=float, help='The weight for exploration') args = vars(parser.parse_args()) env = environment.Environment( args['goal_state_reward'], args['pit_fall_reward'], args['move_reward'], args['give_up_reward']) sarsa = SARSA.SARSA( env, args['number_of_trials'], args['exploration_epsilon'] ) return env, sarsa
def EVAL(ast, env): #print ("evaluating", ast) if not isinstance(ast, list): return eval_ast(ast, env) if len(ast) == 0: return ast op = ast[0] if isinstance(op, parser.StrSymbol) and op.val == 'def!': """call the set method of the current environment (second parameter of EVAL called env) using the unevaluated first parameter (second list element) as the symbol key and the evaluated second parameter as the value.""" val = EVAL(ast[2], env) env.set(ast[1].val, val) return val elif isinstance(op, parser.StrSymbol) and op.val == 'let*': newenv = environment.Environment(env) for i in range(0, len(ast[1]), 2): name = ast[1][i].val val = EVAL(ast[1][i+1], newenv) newenv.set(name, val) val = EVAL(ast[2], newenv) return val else: ast_list = eval_ast(ast, env) return ast_list[0](*ast_list[1:])
def evaluate_agent(ag_obj): env = environment.Environment(buoys, steps_between_actions, vessel_id, rudder_id, thruster_id, scenario, goal, goal_heading_e_ccw, goal_vel_lon, True) env.set_up() agent = learner.Learner(load_saved_regression=ag_obj, action_space_name='large_action_space') env.set_single_start_pos_mode([8000, 4600, -103.5, 3, 0, 0]) # env.set_single_start_pos_mode([6600, 4200, -102, 3, 0, 0]) env.new_episode() final_flag = 0 with open('debug.txt', 'w') as outfile: for step in range(evaluation_steps): state = env.get_state() print(state, file=outfile) action = agent.select_action(state) print(action, file=outfile) state_prime, reward = env.step(action[0], action[1]) print(state_prime, file=outfile) print(reward, file=outfile) print('\n', file=outfile) final_flag = env.is_final() print("***Evaluation step " + str(step + 1) + " Completed") if final_flag != 0: break
def generate_one_seq_q(self): env = environment.Environment(size) agent = q_agent.QAgent(env) env.maze.display_cui() image = [] directions = [] coordinates = [] image.append(self.visual_image()) coordinates.append(0) while not env.get_goal(): s, a, next_s = agent.choose_action() directions.append(a) image.append(self.visual_image(s)) coordinates.append(s) env.reset() input = [] for i in range(len(directions)): input.append(directions[i] + image[i].tolist()) return { 'input': input, 'output': image[1:], 'coordinates': coordinates[1:] }
def train_from_single_episode(episodes, pickle_vars, ep_number): env = environment.Environment(buoys, steps_between_actions, vessel_id, rudder_id, thruster_id, scenario, goal, goal_heading_e_ccw, goal_vel_lon, False) replace_reward = reward.RewardMapper(plot_flag=False) replace_reward.set_boundary_points(buoys) replace_reward.set_goal(goal, goal_heading_e_ccw, goal_vel_lon) batch_learner = learner.Learner( file_to_save=learner_file, action_space_name=pickle_vars['action_space'], r_m_=replace_reward) episode = episodes[ep_number] with open('debug_ep.txt', 'w') as outfile: for transition in episode['transitions_list']: print(transition[0], file=outfile) print(list(transition[1]), file=outfile) print(transition[2], file=outfile) print(transition[3], file=outfile) print('\n', file=outfile) batch_learner.add_to_batch(episode['transitions_list'], episode['final_flag']) batch_learner.set_up_agent() for it in range(max_fit_iterations): if it % 10 == 0: batch_learner.fqi_step(1, debug=True) else: batch_learner.fqi_step(1, debug=False)
def get_env_from_cfg(cfg, real_env=False, **kwargs): kwarg_list = [ 'room_length', 'room_width', 'num_cubes', 'obstacle_config', 'use_distance_to_receptacle_channel', 'distance_to_receptacle_channel_scale', 'use_shortest_path_to_receptacle_channel', 'use_shortest_path_channel', 'shortest_path_channel_scale', 'use_position_channel', 'position_channel_scale', 'partial_rewards_scale', 'use_shortest_path_partial_rewards', 'collision_penalty', 'nonmovement_penalty', 'use_shortest_path_movement', 'fixed_step_size', 'use_steering_commands', 'steering_commands_num_turns', 'ministep_size', 'inactivity_cutoff', 'random_seed', ] original_kwargs = {} for kwarg_name in kwarg_list: original_kwargs[kwarg_name] = cfg[kwarg_name] original_kwargs.update(kwargs) if real_env: return environment.RealEnvironment(**original_kwargs) return environment.Environment(**original_kwargs)
def History(n_episodes=1000): model = mrp.MRP(mrp.RNode([[1, 2, 3], [], []])) # set all rewards to 0 model.reward_matrix.fill(-1) # set rewards model.nodes[7].reward = 1 model.nodes[18].reward = 1 model.nodes[59].reward = 1 model.nodes[7].neighbours = [] model.nodes[18].neighbours = [] model.nodes[59].neighbours = [] agent = TDAgent() env = environment.Environment(model) history = [] for i in range(n_episodes): while True: rnd_state = np.random.randint(0, len(model.nodes)) if len(model.nodes[rnd_state].neighbours) != 0: break env.take_sample(rnd_state, agent.state_action_map) history.append((i, agent.play_counter)) #print('Episode:' + str(agent.n_episodes) + ', Episode Length Mean: ' + str(agent.mean_episode_length)) return (np.round(agent.mean_episode_length, 2), history)
def init_new_map(walls, init_position): environment = env.Environment(screen, COLOR_ENVIROMENT, walls) robot = rb.Robot(screen, 2 * ROBOT_RADIUS, MAX_VELOCITY, MAX_DISTANCE_SENSOR) robot.position = init_position robot.use_sensors(walls) return environment, robot
def __init__(self, bounds, factions): #Create the environment self.field = Env.Environment(bounds[0], 0, bounds[1], 0) self.targets = [] self.agents = [] self.private_links = {} self.broadcast_channel = Comm.PublicLink() #Create factions for faction in factions: #With 6 bodies each faction_bodies = [] while len(faction_bodies) < 6: #Randomly generate a map coordinate, and create a body at that location x_coord = Rnd.randrange(self.field.x_lower, self.field.x_upper) y_coord = Rnd.randrange(self.field.y_lower, self.field.y_upper) body = Bod.Body(self.field, x_coord, y_coord) #If the location is valid, add it to the list; otherwise re-roll if self.field.registerAgent(body): faction_bodies.append(body) #Insert target controllers for i in range(1,len(faction_bodies)): self.targets.append(Con.TargetController(faction, faction_bodies[i], None)) #Insert agent controller agent_stats = {"faction":faction, "controller":self.createAgent(faction, faction_bodies[0], self.broadcast_channel.send), "collected_targets":0, "steps_taken":0, "happiness":[]} self.private_links[faction] = Comm.PrivateLink(agent_stats["controller"].perceiveMessage, faction) self.broadcast_channel.registerChannel(self.private_links[faction]) self.agents.append(agent_stats) return
def default(self, inp): if inp == 'x' or inp == 'q': return self.do_exit(inp) if inp == 'c' or inp == 'start': # initialize dataframe to keep the results of each run run_results_df = pd.DataFrame() for run in range(1, args['runs'] + 1): print("-Run {0}/{1}".format(run, args['runs'])) network = environment.Environment( args['n_agents'], args['n_liars'], args['n_experts'], args['n_connections'], args['cluster_distance'], args['n_news'], args['n_steps'], args['connectivity_type'], args['communication_protocol'], args['conversation_protocol']) # combine run results with existing ones run_results_df = pd.concat( [run_results_df, network.run_simulation()]) # export results dataframe io_utils.export_results(run_results_df) if inp == 'show_values': self.do_show_values() if inp == 'show_description': self.do_show_description() if inp == 'run_stepwise': self.run_stepwise()
def main(): starting_food_count = 100 #input('Enter starting amount of food: ') starting_cell_count = 100 #input('Enter starting amount of cells: ') environment.e = World = environment.Environment(starting_food_count, starting_cell_count) old_food_list_length = len(World.food_set) number_of_test_ticks = 10000 #input('Enter number of test ticks: ') t1 = time() sum_runs = 0 count_runs = 0 max_run = 0 for i in range(number_of_test_ticks): World.tick() t2 = time() this_run = 1 / (t2 - t1) sum_runs += this_run count_runs += 1 #if this_run > max_run: max_run = this_run print 'food: ', len( World.food_set ), '\t\tTick: ', i, this_run, ', avg:', sum_runs / count_runs t1 = time() if len(World.food_set) == 0: break
def evaluate_model(graph, rqs, inventory, degree_constraint, edge_cap, t0, cooling, iterations, seed, routing_order_rnd=False, subsample=None): env = environment.Environment(graph, degree_limit=degree_constraint) myalgo = SimulatedAnnealing(env=env, reqs=rqs, num_extra_edges=inventory, num_steps=iterations, temperature0=t0, cooling_factor=cooling, seed=seed, subsample=subsample, rnd_routing_order=routing_order_rnd, outpath="data/", edge_capacity=edge_cap) res_num_routed, res_env, res_sequence = myalgo.run() return res_env.topology, res_sequence
def main(): # Obstacle config obstacle_config = 'small_empty' #obstacle_config = 'small_columns' #obstacle_config = 'large_columns' #obstacle_config = 'large_divider' # Room config kwargs = {} kwargs['room_width'] = 1 if obstacle_config.startswith('large') else 0.5 kwargs['num_cubes'] = 20 if obstacle_config.startswith('large') else 10 kwargs['obstacle_config'] = obstacle_config #kwargs['random_seed'] = 0 # Visualization kwargs['use_gui'] = True kwargs['show_debug_annotations'] = True #kwargs['show_occupancy_map'] = True # Shortest path components #kwargs['use_distance_to_receptacle_channel'] = False #kwargs['use_shortest_path_to_receptacle_channel'] = True #kwargs['use_shortest_path_channel'] = True #kwargs['use_shortest_path_partial_rewards'] = True #kwargs['use_shortest_path_movement'] = True env = environment.Environment(**kwargs) agent = ClickAgent(env) agent.run() env.close()
def rust_comparison_test( raw_params, out_dir, name, log_level, box_width=2, box_height=1, ): params = parameters.refine_raw_params(raw_params) cell_d = 2 * params["cell_r"] box_height = box_height * cell_d box_width = box_width * cell_d box_x_offset = 0.0 box_y_offset = 0.0 cell_group_bbox = np.array([ box_x_offset, box_x_offset + box_width, box_y_offset, box_height + box_y_offset, ]) params["cell_group_bbox"] = cell_group_bbox an_environment = environment.Environment(out_dir, name, log_level, params) an_environment.execute_system_dynamics() print("Done.") return params
def History(n_episodes=1000): model = mrp.MRP(mrp.RNode([[1, 2, 3], [], []])) # set all rewards to -1 model.reward_matrix.fill(-1) # set termination states model.nodes[7].reward = 1 model.nodes[18].reward = 1 model.nodes[59].reward = 1 model.nodes[7].neighbours = [] model.nodes[18].neighbours = [] model.nodes[59].neighbours = [] agent = EveryVisitExploringStart() env = environment.Environment(model) history = [] for i in range(n_episodes): while True: rnd_state = np.random.randint(0, len(model.nodes)) if len(model.nodes[rnd_state].neighbours) != 0: break if agent.n_episodes % 1 == 0: agent.update_policy() env.take_sample(rnd_state, agent.state_action_map) history.append((i, agent.last_sample_size)) return (np.round(agent.mean_episode_length, 2), history)
def test(global_model): env = environment.Environment() scores = [] print_interval = 5 for n_epi in range(max_test_ep): done = False s = env.reset() while not done: df_target = s[s[KEY_DATA_TARGET_NUM] == num_target] df_target = df_target.drop([KEY_DATA_TARGET_NUM], axis=1) if len(list(df_target.values)) == 0: break inputs = np.array(list(df_target.values)[0]) day_target = list(df_target.index)[0] prob = global_model.pi(torch.from_numpy(inputs).float()) a = Categorical(prob).sample().item() action = [{ KEY_ACTION_DAY: day_target, KEY_ACTION_NUM: num_target, KEY_ACTION_VALUE: a }] s_prime, r, done = env.step(action) # if n_epi % print_interval == 0 and n_epi != 0: # print(day_target, ': ', action, ', Reward: ', r) scores.append(r) s = s_prime if n_epi % print_interval == 0 and n_epi != 0: print("# of episode :{}, avg score : {:.1f}".format( n_epi, np.mean(scores))) scores = []
def __init__(self, train_featurizer=False): self.featurizer = Featurizer(train_featurizer) self.env = environment.Environment(32, 32) self.MAX_ITER = 25 self.NUM_GAMES_PER_WEIGHTSET = 4 self.FITNESS_THRESH = 10000 self.dim = self.featurizer.bottleneck_dim * 2 self.init_weightset = np.zeros(self.dim) # Calculate hyperparameters from dimension self.lam = int(4 + np.floor(3 * np.log(self.dim))) self.mu = int(np.floor(self.lam / 2)) self.w = np.zeros(self.mu) for i in range(1, self.mu + 1): self.w[i - 1] = (np.log(self.mu + 1) - np.log(i)) / (self.mu\ * np.log(self.mu + 1)\ - np.sum([np.log(j) for j in range(1, self.mu + 1)])) self.mu_eff = 1 / (self.w @ self.w) self.c_sig = (self.mu_eff + 2) / (self.dim + self.mu_eff + 3) self.d_sig = 1 + 2 * np.max([0.0,\ np.sqrt((self.mu_eff - 1)\ / (self.dim + 1)) - 1])\ + self.c_sig self.c_c = 4 / (self.dim + 4) self.mu_co = self.mu_eff self.c_co = (1 / self.mu_co) * (2 / (self.dim + np.sqrt(2)) ** 2)\ + (1 - 1 / self.mu_co) * np.min([1.0, (2 * self.mu_eff - 1)\ / ((self.dim + 2) ** 2 + self.mu_eff)]) self.gauss_dist_norm = np.sqrt(self.dim) * (1 - (1 / 4 * self.dim)\ + (1 / (21 * self.dim ** 2))) pass
def main(): env = environment.Environment() q = Qnet().to(device) q_target = Qnet().to(device) q_target.load_state_dict(q.state_dict) memory = ReplayBuffer() print_interval = 20 score = 0.0 optimizer = optim.Adam(q.parameters(), lr=learning_rate) for n_epi in range(10000): epsilon = max(0.01, 0.08 - 0.01 * (n_epi/200)) state = env.reset() done = False actions = [] while not done: day_target, inputs = makeInput(state) a = q.sampleAction(inputs, epsilon) actions.append({KEY_ACTION_DAY: day_target, KEY_ACTION_NUM: num_target, KEY_ACTION_VALUE: a.item()}) next_state, reward, modified_r, done = env.step(actions) done_mask = 0.0 if done else 1.0 memory.put((state, a, reward/100.0, next_state, done_mask)) score += modified_r if done: break if memory.size() > 2000: train(q, q_target, memory, optimizer) if n_epi % print_interval == 0 and n_epi != 0: print('# of episode: {}, avg score: {}, loss: {}'.format(n_epi, round(score/ print_interval, 3), LOSS)) score = 0
def reset_game(): run_ = True # Init Player p_ = player.Player() # Init Enemy e_ = enemy.Enemy() # Group for bullets bullets_ = pygame.sprite.Group() # Init Environment for AI to learn env_ = environment.Environment(e_, p_, bullets_) score_ = 0 # X pos of agent init_x_enemy_norm_ = np.interp(e_.x, [0, 1000], [0, 1]) # X pos of player init_x_player_norm_ = np.interp(p_.x, [0, 1000], [0, 1]) # Set the state to X pos of player and enemy my_tensor_ = tf.constant([[init_x_enemy_norm_, init_x_player_norm_]]) my_variable_ = tf.Variable(my_tensor_, dtype=np.float64) config.level = 1 return run_, p_, e_, bullets_, env_, score_, my_variable_
def __init__(self, n): self.env = environment.Environment(n) self.size = n self.current_state = () self.cost_map = dict({}) random.seed(time.time()) return
def initialize_from_info(self): """Initialize objects specified in info.json""" self.environment_name = self.info['environment'] self.env = environment.Environment(self.environment_name, self.main_group) self.walkpath = walkpath.WalkPath(dict_repr=self.info['walkpath']) self.camera = camera.Camera(dict_repr=self.info['camera_points'])
def main(): print("test") env = environment.Environment(["input"], "output") translationUnitRootFile = env.resolveFile( "gcamera.h" ) rootDAst = dast.Factory(translationUnitRootFile, env) print(rootDAst)
def __init__(self, render, xml_file): if render: self.renderer = renderer.Renderer(600, 500, 'Maze Simulator') self.env = env.Environment(xml_file) self.pois_reached = [False] * len(self.env.pois) self.reached_goal = False
def do_one(config): my_env = environment.Environment(config) my_env.setup_environment() my_run = run.Run(my_env) my_run.prefix = "simulation" my_run.run_many() my_run.prefix = "reconstruction" my_run.run_many()
def run_stepwise(self): network = environment.Environment( args['n_agents'], args['n_liars'], args['n_experts'], args['n_connections'], args['cluster_distance'], args['n_news'], args['n_steps'], args['connectivity_type'], args['communication_protocol'], args['conversation_protocol']) # export results dataframe io_utils.export_results(network.run_simulation(stepwise=True))