Пример #1
0
    def __init__(self, policy, optimizer, env, test_envs, pretrained_lm, writer, out_path, gamma=1., lr=1e-2,
                 grad_clip=None, scheduler=None,
                 pretrain=False, update_every=50, num_truncated=10, p_th=None, truncate_mode="top_k", log_interval=10,
                 eval_no_trunc=0, alpha_logits=0., alpha_decay_rate=0., epsilon_truncated=0., train_seed=0,
                 epsilon_truncated_rate=1.,
                 is_loss_correction=1, train_metrics=[], test_metrics=[], top_p=1., temperature=1., temperature_step=1,
                 temp_factor=1., temperature_min=1., temperature_max=10, s_min=10, s_max=200, inv_schedule_step=0,
                 schedule_start=1, curriculum=0, KL_coeff=0., truncation_optim=0):
        Agent.__init__(self, policy=policy, optimizer=optimizer, env=env, writer=writer, out_path=out_path, gamma=gamma,
                       lr=lr,
                       grad_clip=grad_clip,
                       scheduler=scheduler,
                       pretrained_lm=pretrained_lm,
                       pretrain=pretrain, update_every=update_every,
                       num_truncated=num_truncated,
                       p_th=p_th,
                       truncate_mode=truncate_mode,
                       log_interval=log_interval, test_envs=test_envs, eval_no_trunc=eval_no_trunc,
                       alpha_logits=alpha_logits, alpha_decay_rate=alpha_decay_rate,
                       epsilon_truncated=epsilon_truncated,
                       train_seed=train_seed, epsilon_truncated_rate=epsilon_truncated_rate,
                       is_loss_correction=is_loss_correction, train_metrics=train_metrics, test_metrics=test_metrics,
                       top_p=top_p, temperature=temperature, temperature_step=temperature_step, temp_factor=temp_factor,
                       temperature_min=temperature_min, temperature_max=temperature_max, s_min=s_min, s_max=s_max,
                       inv_schedule_step=inv_schedule_step, schedule_start=schedule_start, curriculum=curriculum,
                       KL_coeff=KL_coeff, truncation_optim=truncation_optim)

        self.MSE_loss = nn.MSELoss(reduction="none")
        self.grad_clip = grad_clip
        self.update_mode = "episode"
        self.writer_iteration = 0
Пример #2
0
def run():
    if len(sys.argv) < 3:
        print "usage: %s <prover9 location> <the world filename> ...\n" % (
            sys.argv[0])
        quit()
    else:
        prover9_dir = sys.argv[1]
        world_filename = sys.argv[2]
    # the world
    wumpus_world = Wumpus_World(world_filename)
    # the knowledge base
    kb = KB(prover9_dir)
    # the agent
    agent = Agent(wumpus_world, kb)

    # before the game we should reset the agent,kb and the wumpus world
    wumpus_world.reset()
    kb.reset(prover9_dir)
    agent.reset(wumpus_world, kb)

    # the agent won't stop until it finds the gold and return to the start position
    while 1:
        # show the current position of the agent
        print "***********************************************************************"
        print "current position:", agent.pos
        print "arrow:", agent.arrow, "gold:", agent.gold, "mark:", agent.mark
        wumpus_world.draw_board()
        # if it returns 1, it means the agent has finished the task and we
        # should exit the game successfully
        if agent.action_process() == 1:
            break
    print "all the steps are:", agent.steps
Пример #3
0
 def __init__(self,
              policy,
              env,
              writer,
              gamma=1.,
              lr=1e-2,
              pretrained_lm=None,
              word_emb_size=8,
              hidden_size=24,
              pretrain=False,
              kernel_size=1,
              stride=2,
              num_filters=3,
              num_truncated=10,
              update_every=30):
     Agent.__init__(self,
                    policy,
                    env,
                    gamma=gamma,
                    lr=lr,
                    pretrained_lm=pretrained_lm,
                    word_emb_size=word_emb_size,
                    hidden_size=hidden_size,
                    pretrain=pretrain,
                    update_every=update_every,
                    kernel_size=kernel_size,
                    stride=stride,
                    num_filters=num_filters,
                    num_truncated=num_truncated,
                    writer=writer)
     self.update_every = 1
     self.MSE_loss = nn.MSELoss(reduction="none")
     self.update_mode = "episode"
     self.writer_iteration = 0
Пример #4
0
def objective(args):
    NUM_TESTS_FOR_NOISE = 1
    env = gym.make('LunarLander-v2')
    learningRate = args
    # numIntermediateLayers = int(numIntermediateLayers)
    # intermediateLayerSize = int(intermediateLayerSize)
    # finalLayerSize = int(finalLayerSize)
    # layers = []
    # for i in range(numIntermediateLayers):
    #     layers.append(intermediateLayerSize)
    # layers.append(finalLayerSize)
    # print("Layers: ",layers)
    # print("Priority: ",priorityExponent)
    # print("LR: ",learningRate)
    totalResult = 0
    for i in range(NUM_TESTS_FOR_NOISE):
        sess = tf.Session()
        a = Agent(
            sess=sess,
            env=env,
            numAvailableActions=4,
            numObservations=8,
            rewardsMovingAverageSampleLength=20,
            gamma=1,
            nStepUpdate=1,
            includeIntermediatePairs=False,
            maxRunningMinutes=30,

            # test parameters
            episodesPerTest=1,
            numTestPeriods=40000,
            numTestsPerTestPeriod=30,
            episodeStepLimit=1024,
            intermediateTests=False,

            render=False,
            showGraph=False,

            # hyperparameters
            valueMin=-400.0,
            valueMax=300.0,
            numAtoms=14,
            maxMemoryLength=100000,
            batchSize=256,
            networkSize=[128, 128, 256],
            learningRate=learningRate,
            priorityExponent=0,
            epsilonInitial = 2,
            epsilonDecay = .9987,
            minFramesForTraining = 2048,
            noisyLayers = False,
            maxGradientNorm = 4,
            minExploration = .15,
        )
        testResults = np.array(a.execute())
        performance = np.mean(testResults[np.argpartition(-testResults,range(4))[:4]])
        totalResult = totalResult + performance
    print(str(learningRate)+","+str(performance))
    return -totalResult
Пример #5
0
    def random_scene(cls, map_size, input_data, difficulty=None):
        agent_num = input_data['ag']
        ob_num = input_data['ob']
        check_num = input_data['ch']

        blocks = pg.sprite.Group()
        blocks.empty()

        element_dict = {'ag': [], 'ob': [], 'ch': []}

        # For obstacle generation:
        for i in range(ob_num):
            while True:
                i_size = random.choice(OB_SCALE_SIZE_LIST)
                pos_x, pos_y, tmp_vector = Obstacle.random(map_size, i_size)
                tmp = Obstacle(i, pos_x, pos_y, i_size, tmp_vector)
                collision = False
                for j in blocks:
                    if pg.sprite.collide_rect(tmp, j):
                        collision = True
                        break
                if not collision:
                    blocks.add(tmp)
                    element_dict['ob'].append([i, pos_x, pos_y, i_size, tmp_vector])
                    break

        # For checkpoint generation:
        for i in range(check_num):
            while True:
                pos_x, pos_y = Checkpoint.random(
                    map_size, CHECKPOINT_SIZE)
                tmp = Checkpoint(i, pos_x, pos_y, CHECKPOINT_SIZE)
                collision = False
                for j in blocks:
                    if pg.sprite.collide_rect(tmp, j):
                        collision = True
                        break
                if not collision:
                    blocks.add(tmp)
                    element_dict['ch'].append([i, pos_x, pos_y, CHECKPOINT_SIZE])
                    break

        # For agent generation:
        for i in range(agent_num):
            ch_pos = (difficulty is not None) and (element_dict['ch'][0][1], element_dict['ch'][0][2]) or None
            while True:
                pos_x, pos_y = Agent.random(map_size, AGENT_SIZE, difficulty, ch_pos)
                tmp = Agent(i, pos_x, pos_y, AGENT_SIZE)
                collision = False
                for j in blocks:
                    if pg.sprite.collide_rect(tmp, j):
                        collision = True
                        break
                if not collision:
                    blocks.add(tmp)
                    element_dict['ag'].append([i, pos_x, pos_y, AGENT_SIZE])
                    break

        return element_dict
Пример #6
0
def test_agent_registration(dispatcher: Dispatcher, agent: Agent):
    name = 'agent_test_name'
    agent.name = name
    agent.register()
    assert agent.id in dispatcher.agents, 'Agent ID mismatch'
    assert agent.name == dispatcher.agents[agent.id].name, \
        'Agent name mismatch'
    assert 0.01 > (agent.last_sync - dispatcher.agents[agent.id].last_sync).seconds,\
        'Request-Reply sync timestamp differs more than expected'
Пример #7
0
def test_agent_collect_info():
    vec_env, custom_draws = make_block_push_env(two_d=True)
    red_plan = red_skeleton()
    green_plan = green_skeleton()
    blue_plan = blue_skeleton()
    #plans = [blue_plan, green_plan, red_plan]
    plans = [red_plan]
    agent = Agent("test_collect_info")
    agent.collect_transition_data(vec_env, plans)
Пример #8
0
    def collect_statistics(self, num_sessions):
        """Runs multiple dialog sessions between the user and the agent to collect
        statistics about user's actions.

        Args:
            user (:obj: User): The dialog user.
            agent (:obj: Agent): The dialog agent.
            dialog_session (DialogSession): The dialog session class
            num_sessions (int): Number of dialog sessions to execute.
        """
        user_actions = [user_action for user_action in UserActionType]
        user_action_map = {action: i for i, action in enumerate(user_actions)}
        user_action_stats = {
            action_type: np.zeros(len(user_actions))
            for action_type in AgentActionType
        }

        agent_actions = [agent_action for agent_action in AgentActionType]
        agent_action_map = {
            action: i
            for i, action in enumerate(agent_actions)
        }
        agent_action_counts = np.zeros(len(agent_actions))

        agent = Agent()
        # Run multiple dialog sessions to gather user's action statistics.
        for _ in xrange(num_sessions):
            # Reset the agent and the user.
            agent.reset()
            user = self._pick_user_stochastically()
            user.reset(reset_policy=False)  # Only reset state, not policy.
            # Create a new dialog session.
            session = DialogSession(user, agent)
            # Start the dialog session by having the dialog agent make the
            # first move.
            agent_action = session.ask_agent_to_start()
            user_action = None
            while not (agent_action is AgentActionType.CLOSE
                       and user_action is UserActionType.CLOSE):
                user_action, next_agent_action = session.execute_one_step()

                # Update action statistics
                user_action_index = user_action_map[user_action]
                user_action_stats[agent_action][user_action_index] += 1
                agent_action_index = agent_action_map[agent_action]
                agent_action_counts[agent_action_index] += 1

                agent_action = next_agent_action

        print user_action_stats
        print agent_action_counts
Пример #9
0
def test_one_push_planner():
    vec_env, custom_draws = make_block_push_env(two_d=True)
    start_state_str = vec_env.get_pillar_state()[0]
    start_state = State.create_from_serialized_string(start_state_str)
    goal_state = State.create_from_serialized_string(start_state_str)
    goal_pose = np.array(start_state.get_values_as_vec([block_pos_fqn]))
    goal_pose[0] -= 0.05
    goal_state.set_values_from_vec([block_pos_fqn], goal_pose.tolist())
    planner = Planner(vec_env.cfg)
    one_push_plan = planner.plan(start_state.get_serialized_string(),
                                 goal_state.get_serialized_string())
    plans = [one_push_plan]
    agent = Agent("test_planner")
    agent.collect_transition_data(vec_env, plans)
Пример #10
0
def main(args):
    env = UnityEnvironment(file_name=args.env)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    action_size = brain.vector_action_space_size
    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations[0]
    state_size = len(state)

    agent = Agent(state_size=state_size, action_size=action_size)
    agent.load_local_weights(args.checkpoint)

    scores = dqn_tester(agent, env, brain_name)
Пример #11
0
 def __init__(self, args, display_size, saver):
     pygame.init()
     self.args = args
     self.surface = pygame.display.set_mode(display_size, 0, 24)
     pygame.display.set_caption('UNREAL')
     args.action_size = Environment.get_action_size(args.env_name)
     self.global_network = Agent(1, args)
     saver.restore(self.global_network)
     self.global_network.eval()
     self.environment = Environment.create_environment(args.env_name)
     self.font = pygame.font.SysFont(None, 20)
     self.value_history = ValueHistory()
     self.state_history = StateHistory()
     self.distribution = torch.distributions.Categorical
     self.episode_reward = 0
def run_single_session():
    """Executes a single dialog session.
    """
    user = User(policy_type=UserPolicyType.handcrafted)
    agent = Agent()
    session = DialogSession(user, agent)
    session.start()
Пример #13
0
 def __init__(self, policy, env, writer, gamma=1., eps_clip=0.2, pretrained_lm=None, update_every=100,
              K_epochs=10, entropy_coeff=0.01, pretrain=False, word_emb_size=8, hidden_size=24, kernel_size=1,
              stride=2, num_filters=3, num_truncated=10):
     Agent.__init__(self, policy, env, writer, gamma=gamma, pretrained_lm=pretrained_lm, pretrain=pretrain,
                    update_every=update_every, word_emb_size=word_emb_size, hidden_size=hidden_size,
                    kernel_size=kernel_size, stride=stride, num_filters=num_filters, num_truncated=num_truncated)
     self.policy_old = policy(env.clevr_dataset.len_vocab, word_emb_size, hidden_size, kernel_size=kernel_size,
                              stride=stride, num_filters=num_filters)
     self.policy_old.load_state_dict(self.policy.state_dict())
     self.policy_old.to(self.device)
     self.K_epochs = K_epochs
     self.MSE_loss = nn.MSELoss(reduction="none")
     self.eps_clip = eps_clip
     self.entropy_coeff = entropy_coeff
     self.update_mode = "episode"
     self.writer_iteration = 0
Пример #14
0
    def reset(self, input_dict, static=False):
        self.step_counter = 0
        self.done = False
        if input_dict is None:
            raise ValueError('input_dict is None.')

        self.blocks.empty()
        self.agent_num = len(input_dict['ag'])
        self.ob_num = len(input_dict['ob'])
        self.check_num = len(input_dict['ch'])

        # For obstacle generation:
        for i in input_dict['ob']:
            if static:
                i[-1] = (0, 0)
            self.blocks.add(Obstacle(*i))
        for i in input_dict['ag']:
            self.blocks.add(Agent(*i))
        for i in input_dict['ch']:
            self.blocks.add(Checkpoint(*i))

        self.draw()
        self.event_loop()
        pg.display.update()
        # pg.time.delay(20)

        # TODO
        if self.snapshot:
            pass
            # TODO: snapshot
        state = self.get_state()
        return state
Пример #15
0
def create_agent(config, session):
    logging.info(
        "Create agent : ===================================================================="
    )
    model = Model(config=config, sess=session)
    replay_buffer = Replay_Buffer(config)
    ou_process = OU_Process(config)
    record = create_df("data/temp/user_15330397.csv", "record")
    item_set = create_df(
        "data/fresh_comp_offline/tianchi_fresh_comp_train_item.csv",
        "item_set")
    user_item_data = create_df(
        "data/fresh_comp_offline/tianchi_fresh_comp_train_user.csv",
        "user_set")
    agent = Agent(config=config,
                  model=model,
                  replay_buffer=replay_buffer,
                  noise=ou_process,
                  record=record,
                  item_set=item_set,
                  user_item_data=user_item_data,
                  verbose=1)
    logging.info(
        "End creating agent : ===================================================================="
    )
    return agent
Пример #16
0
    def solve_qp(self):
        num_users = len(self.users)

        # Calculate feature expectations of all simulated users.
        fe = []
        for user in self.users:
            fe.append(IRL.calc_feature_expectation(user, Agent()))

        # Calculate feature expectation of expert user.
        fe_expert = IRL.calc_feature_expectation(self.real_user, Agent())

        # Cacluate matrix P in QP formulation of cvxopt
        P = np.zeros((num_users, num_users))
        for i in xrange(num_users):
            for j in xrange(i, num_users):
                product = np.dot(fe[i], fe[j])
                P[i][j] = product
                P[j][i] = product
        P = cvx.matrix(P)
        # print P

        q = np.zeros(num_users)
        for i in xrange(num_users):
            q[i] = -2 * np.dot(fe_expert, fe[i])
        q = cvx.matrix(q)
        # print q

        G = np.eye(num_users) * (-1)
        G = cvx.matrix(G)
        # print G

        h = np.zeros(num_users)
        h = cvx.matrix(h, (num_users, 1))
        # print h

        A = np.ones(num_users)
        A = cvx.matrix(A, (1, num_users))
        # print A

        b = cvx.matrix([1.], (1, 1))
        # print b

        sol = cvx.solvers.qp(P, q, G, h, A, b)
        print sol

        self.mixture_weights = np.array(sol['x']).reshape(num_users)
        utils.normalize_probabilities(self.mixture_weights)
Пример #17
0
def main2():
    world_height, world_width = 15, 15
    world = World(world_width, world_height)
    agent = Agent(world)
    world.place_agent(agent)

    app = Application(world, agent)
    app.start()
Пример #18
0
def run_single_session(user):
    """Executes a single dialog session.
    """
    agent = Agent()
    user.reset(reset_policy=False)
    session = DialogSession(user, agent)
    session.start()
    return session.user_log
Пример #19
0
def main(stock_name, model_name):
    # if len(sys.argv) != 3:
    # 	print("Usage: python evaluate.py [stock] [model]")
    # 	exit()

    # stock_name, model_name = sys.argv[1], sys.argv[2]

    model = load_model("models/" + model_name)
    window_size = model.layers[0].input.shape.as_list()[1]

    agent = Agent(window_size, True, model_name)
    data = getStockDataVec(stock_name)
    l = len(data) - 1
    batch_size = 32

    state = getState(data, 0, window_size + 1)
    total_profit = 0
    agent.inventory = []

    for t in range(l):
        action = agent.act(state)

        # sit
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0

        if action == 1:  # buy
            agent.inventory.append(data[t])
            print("Buy: " + formatPrice(data[t]))

        elif action == 2 and len(agent.inventory) > 0:  # sell
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            print("Sell: " + formatPrice(data[t]) + " | Profit: " +
                  formatPrice(data[t] - bought_price))

        done = True if t == l - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print("--------------------------------")
            print(stock_name + " Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
Пример #20
0
def _create_agent(robot_driver, vae, torch_device):
    env = robot_driver()
    teleop = Teleoperator()
    agent = Agent(env,
                  vae,
                  teleop=teleop,
                  device=torch_device,
                  reward_callback=reward)
    return agent
Пример #21
0
def create_agent(name: Union[str, int]) -> Agent:
    agent = Agent(token=AGENT_TEST_TOKEN, dsp_port=DISPATCHER_PORT)
    agent.name = str(name)
    agent.socket.establish()
    agent.register()
    agent.init_broker()
    agent.broker._inactivity_timeout = 0.1
    return agent
 def init_agents(self,
                 types=[('Black', BLACK), ('White', WHITE)],
                 type_assignment='random'):
     """ Initialize a dicitionary of agent intstances """
     agents = {}
     type_list = round(self.number_agents / 2) * types
     for name in range(1, self.number_agents + 1):
         tag, color = type_list[name - 1]
         agents[name] = Agent(color=color, tag=tag, name=name)
     self.agents = agents
Пример #23
0
def eval_model(stock_name, model_name):
    # Agent
    window_size = get_window_size(model_name)
    agent = Agent(window_size, True, model_name)

    # Environment
    env = SimpleTradeEnv(stock_name, window_size, agent)

    # Main loop
    state = env.reset()
    done = False

    while not done:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

    return env.total_profit
Пример #24
0
 def accept(self, conn, address):
     self._count += 1
     a = Agent(conn, conn)
     h = MessageHandler(a, ARIMAModel())
     a.handler = h
     logger.info("Starting Agent for connection %d", self._count)
     a.start()
     a.wait()
     logger.info("Agent finished connection %d", self._count)
Пример #25
0
def init(numSensors, numPrimitives, numActions, numFeatures):
    global world, agent, actions

    world = ShellWorld(numSensors, numPrimitives, numActions)
    
    """ A unique identifying string for the agent, allowing specific
    saved agents to be recalled. 
    """
    agent_name = "test";

    agent = Agent(world.num_sensors, world.num_primitives, 
                  world.num_actions, numFeatures, agent_name)
    
    agent.display_state = False
    agent.REPORTING_PERIOD = 10**4

    """ Control how rapidly previous inputs are forgotten """
    agent.perceiver.INPUT_DECAY_RATE = 0.5 # real, 0 < x < 1

    """ Control how rapidly the coactivity update platicity changes """
    agent.perceiver.PLASTICITY_UPDATE_RATE = 4 * 10 ** (-1)

    agent.perceiver.NEW_GROUP_THRESHOLD = 0.25
    agent.perceiver.MAX_PLASTICITY = 0.1

    agent.actor.WORKING_MEMORY_DECAY_RATE = 0.5      # real, 0 < x <= 1

    """ If uncommented, try to restore the agent from saved data.
    If commented out, start fresh each time.
    """
    #agent = agent.restore()
    actions = np.zeros(world.num_actions)
    
    """ If configured to do so, the world sets some Becca parameters to 
    modify its behavior. This is a development hack, and should eventually be 
    removed as Becca matures and settles on a good, general purpose
    set of parameters.
    """
    world.set_agent_parameters(agent)
         
    """ Report the performance of the agent on the world. """
Пример #26
0
def generate_dialog_corpus(num_sessions):
    """Generates a dialog corpus by executing multiple sessions successively.

    Args:
        num_sessions (int, optional): Number of dialog sessions to be executed.
    """
    user = User(policy_type=UserPolicyType.handcrafted)
    agent = Agent()
    for _ in xrange(num_sessions):
        session = DialogSession(user, agent)
        session.start()
        print("----")
        session.clear_user_log()
class MultiTask(Task):
    def __init__(self, config=None):
        self.agent = Agent(config=config)

    def step(self, state):
        action = 101
        terminate = False
        if state in self.terminal_state:
            terminate = True
        else:
            action = self.agent.egreedy_action(state)
        return action, terminate

    def nlg(self, action):
        return 'action is: {}'.format(action)
Пример #28
0
def main(args):
    env = UnityEnvironment(file_name=args.env)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    action_size = brain.vector_action_space_size
    env_info = env.reset(train_mode=True)[brain_name]
    state = env_info.vector_observations[0]
    state_size = len(state)

    print('Action Space:', action_size)
    print('State Space: \n', state_size)

    agent = Agent(state_size=state_size, action_size=action_size)

    scores = dqn_trainer(agent, env, brain_name)
Пример #29
0
def main():
    # Check that a command line argument for the database path was passed
    if (len(sys.argv) != 2):
        print("Error: Path of database expected")
        sys.exit(0)

    # Assign the path to a variable
    database_path = sys.argv[1]
    if (os.path.isfile(database_path)):
        connection = sqlite3.connect(database_path)
    else:
        print("Error: Database does not exist")
        sys.exit(0)

    # Instantiate a cursor
    cursor = connection.cursor()

    # Infinite loop while the program is running
    while True:
        # Process the login of the user
        userCreds = User.processLogin(cursor)

        # If login fails, go back to login processing
        if not userCreds:
            continue

        # uid is first element in the userCreds list
        if userCreds[2] == 'a':
            # Instantiate an agent object
            user = Agent(userCreds[0], cursor, connection)
        else:
            # Instantiate an agent object
            user = Officer(userCreds[0], cursor, connection)

        # Loop through actions of agent or officer
        while True:
            # Check if the user logged out
            if not user.isLoggedIn():
                break

            # Check if the user exit
            if user.isExit():
                # Commit changes to the database
                connection.commit()
                return
            # Check what job the user wants to do.
            user.processJobs()
            # Commit any changes made to the database
            connection.commit()
Пример #30
0
def initialize():
    app = FlaskAPI(__name__)
    parser = argparse.ArgumentParser()

    parser.add_argument('--agent', help='Run agent', default=False, action="store_true")
    parser.add_argument('--agent-ip', help='The IP of the agent', default="localhost")
    parser.add_argument('--controller', help='Run Controller', action="store_true")

    args = parser.parse_args()



    if args.agent:
        cmd = Agent(args, app)

    if args.controller:
        cmd = Controller(args, app)

    return cmd
Пример #31
0
def main(*args, **kwargs):
    conf.save_dir = os.path.abspath(conf.save_dir)
    if not os.path.exists(conf.save_dir):
        os.makedirs(conf.save_dir)

    with tf.Session() as sess:
        env = Environment()
        agent = Agent(sess, conf, env, name='kindAgent')
        tf.global_variables_initializer().run()
        env.create()
        env.connect_client()
        if conf.is_train:
            agent.train()
        else:
            agent.competition()
import keras
from keras.models import load_model

from agent.agent import Agent
from functions import *
import sys

if len(sys.argv) != 3:
	print "Usage: python evaluate.py [stock] [model]"
	exit()

stock_name, model_name = sys.argv[1], sys.argv[2]
model = load_model("models/" + model_name)
window_size = model.layers[0].input.shape.as_list()[1]

agent = Agent(window_size, True, model_name)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32

state = getState(data, 0, window_size + 1)
total_profit = 0
agent.inventory = []

for t in xrange(l):
	action = agent.act(state)

	# sit
	next_state = getState(data, t + 1, window_size + 1)
	reward = 0
from agent.agent import Agent
from functions import *
import sys

if len(sys.argv) != 4:
	print("Usage: python train.py [stock] [window] [episodes]")
	exit()

stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3])

agent = Agent(window_size)
data = getStockDataVec(stock_name)
l = len(data) - 1
batch_size = 32

for e in range(episode_count + 1):
	print("Episode " + str(e) + "/" + str(episode_count))
	state = getState(data, 0, window_size + 1)

	total_profit = 0
	agent.inventory = []

	for t in range(l):
		action = agent.act(state)

		# sit
		next_state = getState(data, t + 1, window_size + 1)
		reward = 0

		if action == 1: # buy
			agent.inventory.append(data[t])