def __init__(self, policy, optimizer, env, test_envs, pretrained_lm, writer, out_path, gamma=1., lr=1e-2, grad_clip=None, scheduler=None, pretrain=False, update_every=50, num_truncated=10, p_th=None, truncate_mode="top_k", log_interval=10, eval_no_trunc=0, alpha_logits=0., alpha_decay_rate=0., epsilon_truncated=0., train_seed=0, epsilon_truncated_rate=1., is_loss_correction=1, train_metrics=[], test_metrics=[], top_p=1., temperature=1., temperature_step=1, temp_factor=1., temperature_min=1., temperature_max=10, s_min=10, s_max=200, inv_schedule_step=0, schedule_start=1, curriculum=0, KL_coeff=0., truncation_optim=0): Agent.__init__(self, policy=policy, optimizer=optimizer, env=env, writer=writer, out_path=out_path, gamma=gamma, lr=lr, grad_clip=grad_clip, scheduler=scheduler, pretrained_lm=pretrained_lm, pretrain=pretrain, update_every=update_every, num_truncated=num_truncated, p_th=p_th, truncate_mode=truncate_mode, log_interval=log_interval, test_envs=test_envs, eval_no_trunc=eval_no_trunc, alpha_logits=alpha_logits, alpha_decay_rate=alpha_decay_rate, epsilon_truncated=epsilon_truncated, train_seed=train_seed, epsilon_truncated_rate=epsilon_truncated_rate, is_loss_correction=is_loss_correction, train_metrics=train_metrics, test_metrics=test_metrics, top_p=top_p, temperature=temperature, temperature_step=temperature_step, temp_factor=temp_factor, temperature_min=temperature_min, temperature_max=temperature_max, s_min=s_min, s_max=s_max, inv_schedule_step=inv_schedule_step, schedule_start=schedule_start, curriculum=curriculum, KL_coeff=KL_coeff, truncation_optim=truncation_optim) self.MSE_loss = nn.MSELoss(reduction="none") self.grad_clip = grad_clip self.update_mode = "episode" self.writer_iteration = 0
def run(): if len(sys.argv) < 3: print "usage: %s <prover9 location> <the world filename> ...\n" % ( sys.argv[0]) quit() else: prover9_dir = sys.argv[1] world_filename = sys.argv[2] # the world wumpus_world = Wumpus_World(world_filename) # the knowledge base kb = KB(prover9_dir) # the agent agent = Agent(wumpus_world, kb) # before the game we should reset the agent,kb and the wumpus world wumpus_world.reset() kb.reset(prover9_dir) agent.reset(wumpus_world, kb) # the agent won't stop until it finds the gold and return to the start position while 1: # show the current position of the agent print "***********************************************************************" print "current position:", agent.pos print "arrow:", agent.arrow, "gold:", agent.gold, "mark:", agent.mark wumpus_world.draw_board() # if it returns 1, it means the agent has finished the task and we # should exit the game successfully if agent.action_process() == 1: break print "all the steps are:", agent.steps
def __init__(self, policy, env, writer, gamma=1., lr=1e-2, pretrained_lm=None, word_emb_size=8, hidden_size=24, pretrain=False, kernel_size=1, stride=2, num_filters=3, num_truncated=10, update_every=30): Agent.__init__(self, policy, env, gamma=gamma, lr=lr, pretrained_lm=pretrained_lm, word_emb_size=word_emb_size, hidden_size=hidden_size, pretrain=pretrain, update_every=update_every, kernel_size=kernel_size, stride=stride, num_filters=num_filters, num_truncated=num_truncated, writer=writer) self.update_every = 1 self.MSE_loss = nn.MSELoss(reduction="none") self.update_mode = "episode" self.writer_iteration = 0
def objective(args): NUM_TESTS_FOR_NOISE = 1 env = gym.make('LunarLander-v2') learningRate = args # numIntermediateLayers = int(numIntermediateLayers) # intermediateLayerSize = int(intermediateLayerSize) # finalLayerSize = int(finalLayerSize) # layers = [] # for i in range(numIntermediateLayers): # layers.append(intermediateLayerSize) # layers.append(finalLayerSize) # print("Layers: ",layers) # print("Priority: ",priorityExponent) # print("LR: ",learningRate) totalResult = 0 for i in range(NUM_TESTS_FOR_NOISE): sess = tf.Session() a = Agent( sess=sess, env=env, numAvailableActions=4, numObservations=8, rewardsMovingAverageSampleLength=20, gamma=1, nStepUpdate=1, includeIntermediatePairs=False, maxRunningMinutes=30, # test parameters episodesPerTest=1, numTestPeriods=40000, numTestsPerTestPeriod=30, episodeStepLimit=1024, intermediateTests=False, render=False, showGraph=False, # hyperparameters valueMin=-400.0, valueMax=300.0, numAtoms=14, maxMemoryLength=100000, batchSize=256, networkSize=[128, 128, 256], learningRate=learningRate, priorityExponent=0, epsilonInitial = 2, epsilonDecay = .9987, minFramesForTraining = 2048, noisyLayers = False, maxGradientNorm = 4, minExploration = .15, ) testResults = np.array(a.execute()) performance = np.mean(testResults[np.argpartition(-testResults,range(4))[:4]]) totalResult = totalResult + performance print(str(learningRate)+","+str(performance)) return -totalResult
def random_scene(cls, map_size, input_data, difficulty=None): agent_num = input_data['ag'] ob_num = input_data['ob'] check_num = input_data['ch'] blocks = pg.sprite.Group() blocks.empty() element_dict = {'ag': [], 'ob': [], 'ch': []} # For obstacle generation: for i in range(ob_num): while True: i_size = random.choice(OB_SCALE_SIZE_LIST) pos_x, pos_y, tmp_vector = Obstacle.random(map_size, i_size) tmp = Obstacle(i, pos_x, pos_y, i_size, tmp_vector) collision = False for j in blocks: if pg.sprite.collide_rect(tmp, j): collision = True break if not collision: blocks.add(tmp) element_dict['ob'].append([i, pos_x, pos_y, i_size, tmp_vector]) break # For checkpoint generation: for i in range(check_num): while True: pos_x, pos_y = Checkpoint.random( map_size, CHECKPOINT_SIZE) tmp = Checkpoint(i, pos_x, pos_y, CHECKPOINT_SIZE) collision = False for j in blocks: if pg.sprite.collide_rect(tmp, j): collision = True break if not collision: blocks.add(tmp) element_dict['ch'].append([i, pos_x, pos_y, CHECKPOINT_SIZE]) break # For agent generation: for i in range(agent_num): ch_pos = (difficulty is not None) and (element_dict['ch'][0][1], element_dict['ch'][0][2]) or None while True: pos_x, pos_y = Agent.random(map_size, AGENT_SIZE, difficulty, ch_pos) tmp = Agent(i, pos_x, pos_y, AGENT_SIZE) collision = False for j in blocks: if pg.sprite.collide_rect(tmp, j): collision = True break if not collision: blocks.add(tmp) element_dict['ag'].append([i, pos_x, pos_y, AGENT_SIZE]) break return element_dict
def test_agent_registration(dispatcher: Dispatcher, agent: Agent): name = 'agent_test_name' agent.name = name agent.register() assert agent.id in dispatcher.agents, 'Agent ID mismatch' assert agent.name == dispatcher.agents[agent.id].name, \ 'Agent name mismatch' assert 0.01 > (agent.last_sync - dispatcher.agents[agent.id].last_sync).seconds,\ 'Request-Reply sync timestamp differs more than expected'
def test_agent_collect_info(): vec_env, custom_draws = make_block_push_env(two_d=True) red_plan = red_skeleton() green_plan = green_skeleton() blue_plan = blue_skeleton() #plans = [blue_plan, green_plan, red_plan] plans = [red_plan] agent = Agent("test_collect_info") agent.collect_transition_data(vec_env, plans)
def collect_statistics(self, num_sessions): """Runs multiple dialog sessions between the user and the agent to collect statistics about user's actions. Args: user (:obj: User): The dialog user. agent (:obj: Agent): The dialog agent. dialog_session (DialogSession): The dialog session class num_sessions (int): Number of dialog sessions to execute. """ user_actions = [user_action for user_action in UserActionType] user_action_map = {action: i for i, action in enumerate(user_actions)} user_action_stats = { action_type: np.zeros(len(user_actions)) for action_type in AgentActionType } agent_actions = [agent_action for agent_action in AgentActionType] agent_action_map = { action: i for i, action in enumerate(agent_actions) } agent_action_counts = np.zeros(len(agent_actions)) agent = Agent() # Run multiple dialog sessions to gather user's action statistics. for _ in xrange(num_sessions): # Reset the agent and the user. agent.reset() user = self._pick_user_stochastically() user.reset(reset_policy=False) # Only reset state, not policy. # Create a new dialog session. session = DialogSession(user, agent) # Start the dialog session by having the dialog agent make the # first move. agent_action = session.ask_agent_to_start() user_action = None while not (agent_action is AgentActionType.CLOSE and user_action is UserActionType.CLOSE): user_action, next_agent_action = session.execute_one_step() # Update action statistics user_action_index = user_action_map[user_action] user_action_stats[agent_action][user_action_index] += 1 agent_action_index = agent_action_map[agent_action] agent_action_counts[agent_action_index] += 1 agent_action = next_agent_action print user_action_stats print agent_action_counts
def test_one_push_planner(): vec_env, custom_draws = make_block_push_env(two_d=True) start_state_str = vec_env.get_pillar_state()[0] start_state = State.create_from_serialized_string(start_state_str) goal_state = State.create_from_serialized_string(start_state_str) goal_pose = np.array(start_state.get_values_as_vec([block_pos_fqn])) goal_pose[0] -= 0.05 goal_state.set_values_from_vec([block_pos_fqn], goal_pose.tolist()) planner = Planner(vec_env.cfg) one_push_plan = planner.plan(start_state.get_serialized_string(), goal_state.get_serialized_string()) plans = [one_push_plan] agent = Agent("test_planner") agent.collect_transition_data(vec_env, plans)
def main(args): env = UnityEnvironment(file_name=args.env) brain_name = env.brain_names[0] brain = env.brains[brain_name] action_size = brain.vector_action_space_size env_info = env.reset(train_mode=True)[brain_name] state = env_info.vector_observations[0] state_size = len(state) agent = Agent(state_size=state_size, action_size=action_size) agent.load_local_weights(args.checkpoint) scores = dqn_tester(agent, env, brain_name)
def __init__(self, args, display_size, saver): pygame.init() self.args = args self.surface = pygame.display.set_mode(display_size, 0, 24) pygame.display.set_caption('UNREAL') args.action_size = Environment.get_action_size(args.env_name) self.global_network = Agent(1, args) saver.restore(self.global_network) self.global_network.eval() self.environment = Environment.create_environment(args.env_name) self.font = pygame.font.SysFont(None, 20) self.value_history = ValueHistory() self.state_history = StateHistory() self.distribution = torch.distributions.Categorical self.episode_reward = 0
def run_single_session(): """Executes a single dialog session. """ user = User(policy_type=UserPolicyType.handcrafted) agent = Agent() session = DialogSession(user, agent) session.start()
def __init__(self, policy, env, writer, gamma=1., eps_clip=0.2, pretrained_lm=None, update_every=100, K_epochs=10, entropy_coeff=0.01, pretrain=False, word_emb_size=8, hidden_size=24, kernel_size=1, stride=2, num_filters=3, num_truncated=10): Agent.__init__(self, policy, env, writer, gamma=gamma, pretrained_lm=pretrained_lm, pretrain=pretrain, update_every=update_every, word_emb_size=word_emb_size, hidden_size=hidden_size, kernel_size=kernel_size, stride=stride, num_filters=num_filters, num_truncated=num_truncated) self.policy_old = policy(env.clevr_dataset.len_vocab, word_emb_size, hidden_size, kernel_size=kernel_size, stride=stride, num_filters=num_filters) self.policy_old.load_state_dict(self.policy.state_dict()) self.policy_old.to(self.device) self.K_epochs = K_epochs self.MSE_loss = nn.MSELoss(reduction="none") self.eps_clip = eps_clip self.entropy_coeff = entropy_coeff self.update_mode = "episode" self.writer_iteration = 0
def reset(self, input_dict, static=False): self.step_counter = 0 self.done = False if input_dict is None: raise ValueError('input_dict is None.') self.blocks.empty() self.agent_num = len(input_dict['ag']) self.ob_num = len(input_dict['ob']) self.check_num = len(input_dict['ch']) # For obstacle generation: for i in input_dict['ob']: if static: i[-1] = (0, 0) self.blocks.add(Obstacle(*i)) for i in input_dict['ag']: self.blocks.add(Agent(*i)) for i in input_dict['ch']: self.blocks.add(Checkpoint(*i)) self.draw() self.event_loop() pg.display.update() # pg.time.delay(20) # TODO if self.snapshot: pass # TODO: snapshot state = self.get_state() return state
def create_agent(config, session): logging.info( "Create agent : ====================================================================" ) model = Model(config=config, sess=session) replay_buffer = Replay_Buffer(config) ou_process = OU_Process(config) record = create_df("data/temp/user_15330397.csv", "record") item_set = create_df( "data/fresh_comp_offline/tianchi_fresh_comp_train_item.csv", "item_set") user_item_data = create_df( "data/fresh_comp_offline/tianchi_fresh_comp_train_user.csv", "user_set") agent = Agent(config=config, model=model, replay_buffer=replay_buffer, noise=ou_process, record=record, item_set=item_set, user_item_data=user_item_data, verbose=1) logging.info( "End creating agent : ====================================================================" ) return agent
def solve_qp(self): num_users = len(self.users) # Calculate feature expectations of all simulated users. fe = [] for user in self.users: fe.append(IRL.calc_feature_expectation(user, Agent())) # Calculate feature expectation of expert user. fe_expert = IRL.calc_feature_expectation(self.real_user, Agent()) # Cacluate matrix P in QP formulation of cvxopt P = np.zeros((num_users, num_users)) for i in xrange(num_users): for j in xrange(i, num_users): product = np.dot(fe[i], fe[j]) P[i][j] = product P[j][i] = product P = cvx.matrix(P) # print P q = np.zeros(num_users) for i in xrange(num_users): q[i] = -2 * np.dot(fe_expert, fe[i]) q = cvx.matrix(q) # print q G = np.eye(num_users) * (-1) G = cvx.matrix(G) # print G h = np.zeros(num_users) h = cvx.matrix(h, (num_users, 1)) # print h A = np.ones(num_users) A = cvx.matrix(A, (1, num_users)) # print A b = cvx.matrix([1.], (1, 1)) # print b sol = cvx.solvers.qp(P, q, G, h, A, b) print sol self.mixture_weights = np.array(sol['x']).reshape(num_users) utils.normalize_probabilities(self.mixture_weights)
def main2(): world_height, world_width = 15, 15 world = World(world_width, world_height) agent = Agent(world) world.place_agent(agent) app = Application(world, agent) app.start()
def run_single_session(user): """Executes a single dialog session. """ agent = Agent() user.reset(reset_policy=False) session = DialogSession(user, agent) session.start() return session.user_log
def main(stock_name, model_name): # if len(sys.argv) != 3: # print("Usage: python evaluate.py [stock] [model]") # exit() # stock_name, model_name = sys.argv[1], sys.argv[2] model = load_model("models/" + model_name) window_size = model.layers[0].input.shape.as_list()[1] agent = Agent(window_size, True, model_name) data = getStockDataVec(stock_name) l = len(data) - 1 batch_size = 32 state = getState(data, 0, window_size + 1) total_profit = 0 agent.inventory = [] for t in range(l): action = agent.act(state) # sit next_state = getState(data, t + 1, window_size + 1) reward = 0 if action == 1: # buy agent.inventory.append(data[t]) print("Buy: " + formatPrice(data[t])) elif action == 2 and len(agent.inventory) > 0: # sell bought_price = agent.inventory.pop(0) reward = max(data[t] - bought_price, 0) total_profit += data[t] - bought_price print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price)) done = True if t == l - 1 else False agent.memory.append((state, action, reward, next_state, done)) state = next_state if done: print("--------------------------------") print(stock_name + " Total Profit: " + formatPrice(total_profit)) print("--------------------------------")
def _create_agent(robot_driver, vae, torch_device): env = robot_driver() teleop = Teleoperator() agent = Agent(env, vae, teleop=teleop, device=torch_device, reward_callback=reward) return agent
def create_agent(name: Union[str, int]) -> Agent: agent = Agent(token=AGENT_TEST_TOKEN, dsp_port=DISPATCHER_PORT) agent.name = str(name) agent.socket.establish() agent.register() agent.init_broker() agent.broker._inactivity_timeout = 0.1 return agent
def init_agents(self, types=[('Black', BLACK), ('White', WHITE)], type_assignment='random'): """ Initialize a dicitionary of agent intstances """ agents = {} type_list = round(self.number_agents / 2) * types for name in range(1, self.number_agents + 1): tag, color = type_list[name - 1] agents[name] = Agent(color=color, tag=tag, name=name) self.agents = agents
def eval_model(stock_name, model_name): # Agent window_size = get_window_size(model_name) agent = Agent(window_size, True, model_name) # Environment env = SimpleTradeEnv(stock_name, window_size, agent) # Main loop state = env.reset() done = False while not done: action = agent.act(state) next_state, reward, done, _ = env.step(action) agent.memory.append((state, action, reward, next_state, done)) state = next_state return env.total_profit
def accept(self, conn, address): self._count += 1 a = Agent(conn, conn) h = MessageHandler(a, ARIMAModel()) a.handler = h logger.info("Starting Agent for connection %d", self._count) a.start() a.wait() logger.info("Agent finished connection %d", self._count)
def init(numSensors, numPrimitives, numActions, numFeatures): global world, agent, actions world = ShellWorld(numSensors, numPrimitives, numActions) """ A unique identifying string for the agent, allowing specific saved agents to be recalled. """ agent_name = "test"; agent = Agent(world.num_sensors, world.num_primitives, world.num_actions, numFeatures, agent_name) agent.display_state = False agent.REPORTING_PERIOD = 10**4 """ Control how rapidly previous inputs are forgotten """ agent.perceiver.INPUT_DECAY_RATE = 0.5 # real, 0 < x < 1 """ Control how rapidly the coactivity update platicity changes """ agent.perceiver.PLASTICITY_UPDATE_RATE = 4 * 10 ** (-1) agent.perceiver.NEW_GROUP_THRESHOLD = 0.25 agent.perceiver.MAX_PLASTICITY = 0.1 agent.actor.WORKING_MEMORY_DECAY_RATE = 0.5 # real, 0 < x <= 1 """ If uncommented, try to restore the agent from saved data. If commented out, start fresh each time. """ #agent = agent.restore() actions = np.zeros(world.num_actions) """ If configured to do so, the world sets some Becca parameters to modify its behavior. This is a development hack, and should eventually be removed as Becca matures and settles on a good, general purpose set of parameters. """ world.set_agent_parameters(agent) """ Report the performance of the agent on the world. """
def generate_dialog_corpus(num_sessions): """Generates a dialog corpus by executing multiple sessions successively. Args: num_sessions (int, optional): Number of dialog sessions to be executed. """ user = User(policy_type=UserPolicyType.handcrafted) agent = Agent() for _ in xrange(num_sessions): session = DialogSession(user, agent) session.start() print("----") session.clear_user_log()
class MultiTask(Task): def __init__(self, config=None): self.agent = Agent(config=config) def step(self, state): action = 101 terminate = False if state in self.terminal_state: terminate = True else: action = self.agent.egreedy_action(state) return action, terminate def nlg(self, action): return 'action is: {}'.format(action)
def main(args): env = UnityEnvironment(file_name=args.env) brain_name = env.brain_names[0] brain = env.brains[brain_name] action_size = brain.vector_action_space_size env_info = env.reset(train_mode=True)[brain_name] state = env_info.vector_observations[0] state_size = len(state) print('Action Space:', action_size) print('State Space: \n', state_size) agent = Agent(state_size=state_size, action_size=action_size) scores = dqn_trainer(agent, env, brain_name)
def main(): # Check that a command line argument for the database path was passed if (len(sys.argv) != 2): print("Error: Path of database expected") sys.exit(0) # Assign the path to a variable database_path = sys.argv[1] if (os.path.isfile(database_path)): connection = sqlite3.connect(database_path) else: print("Error: Database does not exist") sys.exit(0) # Instantiate a cursor cursor = connection.cursor() # Infinite loop while the program is running while True: # Process the login of the user userCreds = User.processLogin(cursor) # If login fails, go back to login processing if not userCreds: continue # uid is first element in the userCreds list if userCreds[2] == 'a': # Instantiate an agent object user = Agent(userCreds[0], cursor, connection) else: # Instantiate an agent object user = Officer(userCreds[0], cursor, connection) # Loop through actions of agent or officer while True: # Check if the user logged out if not user.isLoggedIn(): break # Check if the user exit if user.isExit(): # Commit changes to the database connection.commit() return # Check what job the user wants to do. user.processJobs() # Commit any changes made to the database connection.commit()
def initialize(): app = FlaskAPI(__name__) parser = argparse.ArgumentParser() parser.add_argument('--agent', help='Run agent', default=False, action="store_true") parser.add_argument('--agent-ip', help='The IP of the agent', default="localhost") parser.add_argument('--controller', help='Run Controller', action="store_true") args = parser.parse_args() if args.agent: cmd = Agent(args, app) if args.controller: cmd = Controller(args, app) return cmd
def main(*args, **kwargs): conf.save_dir = os.path.abspath(conf.save_dir) if not os.path.exists(conf.save_dir): os.makedirs(conf.save_dir) with tf.Session() as sess: env = Environment() agent = Agent(sess, conf, env, name='kindAgent') tf.global_variables_initializer().run() env.create() env.connect_client() if conf.is_train: agent.train() else: agent.competition()
import keras from keras.models import load_model from agent.agent import Agent from functions import * import sys if len(sys.argv) != 3: print "Usage: python evaluate.py [stock] [model]" exit() stock_name, model_name = sys.argv[1], sys.argv[2] model = load_model("models/" + model_name) window_size = model.layers[0].input.shape.as_list()[1] agent = Agent(window_size, True, model_name) data = getStockDataVec(stock_name) l = len(data) - 1 batch_size = 32 state = getState(data, 0, window_size + 1) total_profit = 0 agent.inventory = [] for t in xrange(l): action = agent.act(state) # sit next_state = getState(data, t + 1, window_size + 1) reward = 0
from agent.agent import Agent from functions import * import sys if len(sys.argv) != 4: print("Usage: python train.py [stock] [window] [episodes]") exit() stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]) agent = Agent(window_size) data = getStockDataVec(stock_name) l = len(data) - 1 batch_size = 32 for e in range(episode_count + 1): print("Episode " + str(e) + "/" + str(episode_count)) state = getState(data, 0, window_size + 1) total_profit = 0 agent.inventory = [] for t in range(l): action = agent.act(state) # sit next_state = getState(data, t + 1, window_size + 1) reward = 0 if action == 1: # buy agent.inventory.append(data[t])