Python DQNAgent.act примеры использования

Язык программирования: Python

Пространство имен/Пакет: dqn

Класс/Тип: DQNAgent

Метод/Функция: act

Примеров на hotexamples.com: 13

Python DQNAgent.act - 13 примеров найдено. Это лучшие примеры Python кода для dqn.DQNAgent.act, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DQNAgent(30)

act(13)

load(11)

compile(8)

fit(5)

save(5)

train(5)

replay(5)

test(4)

save_weights(4)

remember(4)

get_action(4)

load_model(4)

actDeterministically(4)

epsilon(3)

save_model(3)

load_weights(3)

target_model(2)

observe(2)

start(2)

get_last_observations(2)

end(2)

train_one_episode(1)

train_model(1)

trainAgent(1)

train_only(1)

update_epoch(1)

update_replay_memory(1)

test_one_episode(1)

test_model(1)

update_target(1)

store_transition(1)

train_rnn(1)

testAgent(1)

update_target_model(1)

train_vae(1)

training(1)

restart_epoch(1)

store_experience(1)

load_state_dict(1)

__init__(1)

act_2(1)

append_sample(1)

backword(1)

fill_memory(1)

get_test_loss(1)

learn(1)

loss(1)

step(1)

parameters(1)

Пример #1

Показать файл

def collect_stats(agent: DQNAgent, n_games=1000):
    MAX_STEPS = 1000
    lenghts = []
    looped = 0
    for i in range(1, n_games+1):
        env = gym.make('snake-v0')
        # env.__init__(human_mode=False)
        observation = env.reset()
        done = False
        steps = 0
        agent.epsilon = 0.0
        state = agent.get_last_observations(observation)
        while not done and steps < MAX_STEPS:
            action = agent.act(state)
            next_observation, _, done, _ = env.step(action)
            state = agent.get_last_observations(next_observation)
            steps += 1

        if steps == MAX_STEPS:
            looped += 1
        else:
            lenghts.append(len(env.game.snake.body))

        if i % (n_games//10) == 0:
            print(f"Avg len: {sum(lenghts) / len(lenghts):.2f}, looped {looped}/{i}")

Пример #2

Показать файл

Файл: run_episode.py Проект: sunnihu/AI-Game

def run_episode(environment: gym.Env, agent: DQNAgent, render: bool,
                max_length: int):
    """
    Run one episode in the given environment with the agent.

    Arguments:
        environment {`gym.Env`} -- Environment representing the Markov Decision Process
        agent {`DQNAgent`} -- Reinforcment Learning agent that acts in the envíronment
        render {`bool`} -- Whether the frames of the episode should be rendered on the screen
        max_length {`int`} -- Maximum number of steps before the episode is terminated

    Returns:
        `float` -- Cumulated reward that the agent received during the episode
    """
    episode_reward = 0
    state = environment.reset()
    for _ in range(max_length):
        if render:
            environment.render()
        action = agent.act(state)
        next_state, reward, terminal, _ = environment.step(action)
        agent.observe(
            Transition(state, action, reward,
                       None if terminal else next_state))
        episode_reward += reward
        if terminal:
            break
        else:
            state = next_state
    return episode_reward

Пример #3

Показать файл

Файл: dqn_scheduler.py Проект: JohanKJIP/bachelors-thesis

class DQNScheduler:
    def __init__(self, simulator):
        self.agent = DQNAgent(25, 6)
        self.agent.load("./save/car-100-dqn.h5")
        self.simulator = simulator
        self.agent.epsilon = 0

    def schedule(self):
        action = self.agent.act(np.reshape(self.simulator.get_state(),
                                           [1, 25]))
        return action

Пример #4

Показать файл

def _run_agent_one_ep(env: BaseEnv,
                      agent: DQNAgent,
                      config: Config,
                      eps: float,
                      behavior_name: str,
                      train: Optional[bool] = True):
    # Get a starting state
    env.reset()

    decision_steps, terminal_steps = env.get_steps(behavior_name)
    state = decision_steps.obs[0]

    agent_id = decision_steps.agent_id[0]
    done = False
    did_win = False
    episode_reward = 0.0
    import time
    while not done:
        reward = 0.0
        # Get and perform an action
        action = agent.act(decision_steps.obs[0], eps)
        env.set_actions(behavior_name,
                        np.expand_dims(action, 0).reshape(-1, 1))
        env.step()

        decision_steps, terminal_steps = env.get_steps(behavior_name)
        # Determine S', R, Done
        next_state = None
        if agent_id in decision_steps:
            reward += decision_steps.reward[0]
            next_state = decision_steps.obs[0]
        if agent_id in terminal_steps:
            terminal_reward = terminal_steps.reward[0]
            # Add win/loss
            did_win = True if math.isclose(terminal_reward, 1.0) else False
            reward += terminal_reward
            next_state = terminal_steps.obs[0]
            done = True

        assert next_state is not None, f"next_state cannot be None. Agent {agent_id} did not appear in decision or terminal steps"

        if train:
            # Learn from (S, A, R, S')
            experience = Experience(state, action, reward, next_state, done)
            agent.step(experience)

        # Set new state
        state = next_state

        episode_reward += reward

    return (episode_reward, did_win)

Пример #5

Показать файл

def watch_agent(agent: DQNAgent):
    env = gym.make('snake-v0')
    env.__init__(human_mode=True)
    observation = env.reset()
    renderer=Renderer(env.game)
    try:
        done = False
        steps = 0
        agent.epsilon = 0
        state = agent.get_last_observations(observation)
        while not done:
            # time.sleep(0.001)
            renderer.render_frame()
            action = agent.act(state)
            next_observation, _, done, _ = env.step(action)
            state = agent.get_last_observations(next_observation)
            steps += 1
    finally:
        renderer.close_window()
    print(f"Snake length: {len(env.game.snake.body)}")
    print(f"Simulation ended after {steps} steps.")

Пример #6

Показать файл

def main(argv):
    args = parser.parse_args(argv[1:])

    if args.usage == 'help':
        return parser.print_help()

    if is_environments_gen(args):
        _write_env_file(args)
    elif is_environments_list(args):
        all_registry = registry.all()
        registry_envs_name = [
            trim_env_spec_name(env.__repr__()) for env in all_registry
        ]
        for environment in registry_envs_name:
            print(environment)
    elif is_environments_act(args):
        env = gym.make(args.environment_name)
        if is_action_type('dqn', args):
            if args.pre_defined_state_size == 'nesgym':
                pre_state_size = 172032
            elif args.pre_defined_state_size == 'gym':
                pre_state_size = env.observation_space.shape[0]
            elif args.pre_defined_state_size == 'gym-atari':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-atari-extend':
                pre_state_size = 120000
            elif args.pre_defined_state_size == 'gym-atari-small':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-gomoku':
                pre_state_size = 361
            # state_size = (1,) + env.observation_space.shape
            state_size = pre_state_size
            action_size = env.action_space.n
            agent = DQNAgent(state_size, action_size)
            # try:
            #     agent.load('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
            #                                           args.i_episodes))
            # except Exception:
            #     pass
            done = False
            batch_size = 64
        i_episodes = args.i_episodes
        timesteps = args.timesteps
        factor = args.seed_factor
        for i_episode in range(i_episodes):
            state = env.reset()
            if is_action_type('dqn', args):
                state = np.reshape(state, [1, pre_state_size])
            for t in range(timesteps):
                try:
                    if args.render == 'present': env.render()
                    if args.render == 'presented': env.render(args.render)
                    if args.action_type == 'alternate':
                        action_choice = i_episodes * 2
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'specific':
                        action = env.action_space.sample()
                    elif args.action_type == 'conditional':
                        action_choice = i_episodes
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'numerical':
                        action = env.action_space.n
                    elif is_action_type('dqn', args) and len(state) == 5:
                        action = agent.act(state)
                    elif is_action_type('dqn', args) and len(state) != 5:
                        action = env.action_space.sample()
                    collect_stat(action, ['input', 'actions'], stats)
                    observation, reward, done, info = env.step(action)
                    if is_action_type('dqn', args):
                        reward = reward if not done else -10
                        observation = np.reshape(observation,
                                                 [1, pre_state_size])
                        agent.remember(state, action, reward, observation,
                                       done)
                        state = observation
                    # collect_stat(observation,['observation'],stats)
                    collect_stat(reward, ['rewards'], stats)
                    # collect_stat(done,['output','done'],stats)
                    # collect_stat(info,['output','info'],stats)
                    if done:
                        max_episodes_range = (i_episodes - 1)
                        episode_timesteps_iteration_limit = max_episodes_range - 1
                        is_latest_episode = is_filled_latest_episode_with_iteration(
                            i_episode, episode_timesteps_iteration_limit)
                        increased_timestep = increase_timestep(t)
                        print('i_episode {}'.format(i_episode))
                        print('Episode finished after {} timesteps'.format(
                            increased_timestep))
                        if is_action_type('dqn', args):
                            print('Episode: {}/{}, score: {}, e: {:.2}'.format(
                                i_episode, i_episodes, t, agent.epsilon))
                        collect_stat(t, ['output', 'timestep', 'iteration'],
                                     stats)
                        collect_stat(increased_timestep,
                                     ['output', 'timestep', 'increased'],
                                     stats)
                        is_latest_episode_to_save_state = lambda args_cached: is_latest_episode and args_cached.output_stats_filename
                        if is_latest_episode_to_save_state(args):
                            filename = args.output_stats_filename
                            pre_df = {
                                # 'observations': stats['observations'],
                                'rewards': stats['rewards'],
                                # 'done-output': stats['output']['done'],
                                # 'info-output': stats['output']['info'],
                                # 'iteration-timestep': stats['output']['timestep']['iteration'],
                                # 'increased-timestep': stats['output']['timestep']['increased'],
                                'actions-input': stats['input']['actions']
                            }
                            df = pd.DataFrame(pre_df)
                            stamp = lambda: '%s' % (int(datetime.now().
                                                        timestamp()))
                            with open(
                                    'data/{}-{}.csv'.format(stamp(), filename),
                                    'w') as f:
                                f.write(df.to_csv())
                                f.close()
                            print('Statistics file saved ({}.csv)!'.format(
                                filename))
                            del df
                            del filename
                        print(check_output_env_label())
                        del is_latest_episode_to_save_state
                        del increased_timestep
                        del is_latest_episode
                        del episode_timesteps_iteration_limit
                        del max_episodes_range
                        break
                except Exception as e:
                    print('Rendering execution ({})'.format(e))
                finally:
                    print('Execution of timestep done')
            if is_action_type('dqn',
                              args) and (len(agent.memory) > batch_size):
                agent.replay(batch_size)
        # agent.save('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
        #                                       args.i_episodes))
        # env.close()
    else:
        parser.print_help()

Пример #7

Показать файл

Файл: test.py Проект: aaiteam/code_gen

def main():
    print "Creating DQN agent..."
    # env = gym.make("codegen-v0")
    set_debugger_org_frc()

    iters = 6300
    n_goal = 0
    n_goal_all = 0
    time_stamp = 0

    max_steps = 5
    agent = DQNAgent(max_steps)
    agent.dqn.initial_exploration = 6000 * max_steps

    for iter in range(iters):
        print "\n********Iteration # ", iter, "***********\n"
        # 1 iteration
        env = gym.make("codegen-v0")
        num = random.randrange(1, 100)
        print "Goal Number : ", num + 1
        env.my_input = num
        #env.goal = "['" + env.my_input + "']"
        env.goal = str(num + 1)

        code = env._reset()
        step_in_episode = 0
        total_score = 0.0
        reward = 0.0
        mystate = []
        my_state_new = []

        # debug : the sys
        # sss = []
        # for arg in sys.argv[1:]:
        #    sss.append(arg)
        # print "sss = " , sss

        # while True:
        while step_in_episode < max_steps:

            # state = env.code_index_list + [-1]*(max_steps-len(env.code_index_list
            state = env.code_index_list[:]
            state += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                              dtype=int).tolist()
            # state = state.tolist()
            # state = 1;
            # print "env = ",env.code_index_list
            # print "state = ",state
            # raw_input()

            if step_in_episode == 0:
                action_idx = agent.start(code, state)
            else:
                action_idx = agent.act(code, state, reward)

            code, reward, terminal, info = env._step(action_idx,
                                                     agent.dqn.actions)
            state_prime = env.code_index_list[:]
            state_prime += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                                    dtype=int).tolist()

            # debug : the sys
            # sss = []
            # for arg in sys.argv[1:]:
            #    sss.append(arg)
            # print "sss = " , sss

            print "state : "
            print state
            print "state' : "
            print state_prime

            if step_in_episode == max_steps - 1:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 1)
            else:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 0)

            agent.dqn.experience_replay(agent.dqn.time_stamp)

            agent.dqn.target_model_update(agent.dqn.time_stamp,
                                          soft_update=False)

            total_score += reward

            if terminal:

                agent.dqn.goal_idx.append(agent.dqn.time_stamp)

                agent.end(reward)
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 1)

                n_goal_all += 1
                step_in_episode += 1
                agent.dqn.time_stamp += 1

                if iters - iter <= 100:
                    n_goal += 1

                break

            step_in_episode += 1
            agent.dqn.time_stamp += 1

        if iter == 1 + (agent.dqn.initial_exploration / max_steps):
            print "n_goal_all = ", n_goal_all
            print agent.dqn.goal_idx
            raw_input()

    print "n_goal : ", n_goal
    print "epsilon : ", agent.epsilon

Пример #8

Показать файл

Файл: server_org.py Проект: Brook1711/car-fight-with-dqn

class VideoStreamingTest(object):
    def __init__(self, host, port):
        self.state_size = 3
        self.action_size = 7
        self.done = False
        self.batch_size = 32
        self.agent = DQNAgent(self.state_size, self.action_size)
        self.state_now = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                    [1, self.state_size])
        self.state_last = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                     [1, self.state_size])
        self.action_for_next = 0
        self.action_for_now = 0
        self.reward = 0
        self.forward = "T394"
        self.left = "S450"
        self.right = "S270"
        self.backward = "T330"
        self.stop = "T370"
        self.middle = "S360"
        #dqn parameters
        self.server_socket = socket.socket()
        self.server_socket.bind((host, port))
        self.server_socket.listen(0)
        self.connection, self.client_address = self.server_socket.accept()
        self.connection = self.connection.makefile("rb")
        self.host_name = socket.gethostname()
        self.host_ip = socket.gethostbyname(self.host_name)
        self.temp_result = None
        self.finnal_result = None
        self.RANGE = 350
        self.WIDTH = 720
        self.time_now = 0
        self.count = 0
        self.streaming()

    def dqn_loop(self):
        if self.finnal_result['me']['r'] > 1:
            self.done = True
        else:
            self.done = False
        if True:
            self.prepare_state()  #更新前一次状态，并获取这一次状态
            self.prepare_action()  #更新前一次动作，并获取本次操作

            if self.count == 1:
                self.prepare_reward()  #获取上一次活动的奖励
            else:
                self.count += 1
            self.act_move()  #更新小车运动状态
            if self.count == 1:
                self.remember_step()  #收集本次数据
            if len(self.agent.memory) > self.batch_size:
                self.agent.replay(self.batch_size)

    def prepare_state(self):
        self.state_last = self.state_now
        state_now_ = [self.finnal_result['me']['alpha_big'], \
        self.finnal_result['me']['alpha_small'], \
        self.finnal_result['me']['r']]
        self.state_now = np.reshape(state_now_, [1, self.state_size])
        #self.state_now = state_now_

    def prepare_action(self):
        self.action_for_now = self.action_for_next
        self.action_for_next = self.agent.act(self.state_now)

    def prepare_reward(self):  #运行条件：state_last非空
        if self.done:
            self.reward = -10
        else:
            self.reward = (self.state_last[0][2] - self.state_now[0][2]) * 100
            #self.reward = (self.state_last[2] - self.state_now[2])*100
    def remember_step(self):
        self.agent.remember(self.state_last, self.action_for_now, self.reward,
                            self.state_now, self.done)

    def act_move(self):
        if self.done:
            self.action_for_next = 0

        if self.action_for_next == 0:  #停止
            str_S = self.middle
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 1:  #前进
            str_S = self.middle
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 2:  #左转前进
            str_S = self.left
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 3:  #右转前进
            str_S = self.right
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 4:  #后退
            str_S = self.middle
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.middle
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.middle
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 5:  #左转后退
            str_S = self.left
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.left
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            str_S = self.left
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")

        elif self.action_for_next == 6:  #右转后退
            str_S = self.right
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.right
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.right
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

    def get_one_car(self, x1, y1, x2, y2):
        x0 = (x1 + x2) / 2
        y0 = (y1 + y2) / 2
        detx = x1 - x2
        dety = y1 - y2
        temp_x0 = x0 - self.WIDTH / 2
        temp_y0 = y0 - self.WIDTH / 2
        if detx > 0:
            alpha_small = math.atan(dety / detx)
        elif detx < 0:
            alpha_small = math.atan(dety / detx) + math.pi
        else:
            if dety > 0:
                alpha_small = math.pi / 2
            else:
                alpha_small = 0 - math.pi / 2

        if temp_x0 > 0:
            alpha_big = math.atan(temp_y0 / temp_x0)
        elif temp_x0 < 0:
            alpha_big = math.atan(temp_y0 / temp_x0) + math.pi
        else:
            if temp_y0 > 0:
                alpha_big = math.pi / 2
            else:
                alpha_big = 0 - math.pi / 2

        alpha_small = alpha_small / math.pi - 0.5
        alpha_big = alpha_big / math.pi - 0.5
        r = math.sqrt(temp_x0**2 + temp_y0**2) / self.RANGE
        return {
            "alpha_big": alpha_big,
            "alpha_small": alpha_small,
            "r": r,
            "x0": x0,
            "y0": y0
        }

    def get_finnal_result(self):
        red_x = self.temp_result["red"]["x"]
        red_y = self.temp_result["red"]["y"]
        green_x = self.temp_result["green"]["x"]
        green_y = self.temp_result["green"]["y"]
        blue_x = self.temp_result["blue"]["x"]
        blue_y = self.temp_result["blue"]["y"]
        yellow_x = self.temp_result["yellow"]["x"]
        yellow_y = self.temp_result["yellow"]["y"]
        finnal_temp = {}
        me_temp = self.get_one_car(red_x, red_y, green_x, green_y)
        enemy_temp = self.get_one_car(blue_x, blue_y, yellow_x, yellow_y)
        finnal_temp["me"] = me_temp
        finnal_temp["enemy"] = enemy_temp
        self.finnal_result = finnal_temp

    def draw(self, frame, lowerRGB, upperRGB, word):

        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        # 根据阈值构建掩膜
        mask = cv2.inRange(hsv, lowerRGB, upperRGB)
        # 腐蚀操作
        mask = cv2.erode(mask, None, iterations=2)
        # 膨胀操作，其实先腐蚀再膨胀的效果是开运算，去除噪点
        mask = cv2.dilate(mask, None, iterations=2)
        cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
                                cv2.CHAIN_APPROX_SIMPLE)[-2]
        # 初始化瓶盖圆形轮廓质心
        center = None
        # 如果存在轮廓
        if len(cnts) > 0:
            # 找到面积最大的轮廓
            c = max(cnts, key=cv2.contourArea)
            # 确定面积最大的轮廓的外接圆
            ((x, y), radius) = cv2.minEnclosingCircle(c)
            # 计算轮廓的矩
            M = cv2.moments(c)
            # 计算质心
            center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
            # 只有当半径大于10时，才执行画图
            if radius > 10:
                cv2.circle(frame, (int(x), int(y)), int(radius), (0, 255, 255),
                           2)
                cv2.circle(frame, center, 5, (0, 0, 255), -1)

                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(frame, word, (int(x), int(y)), font, 1.2,
                            (255, 255, 255), 2)
                result = {}
                result["x"] = x
                result["y"] = y

                return result

    def streaming(self):

        try:
            print("Host: ", self.host_name + " " + self.host_ip)
            print("Connection from: ", self.client_address)
            print("Streaming...")
            print("Press 'q' to exit")

            redLower = np.array([170, 100, 200])
            redUpper = np.array([179, 255, 255])

            greenLower = np.array([65, 100, 100])
            greenUpper = np.array([85, 255, 255])

            #blueLower = np.array([0, 0, 150])
            #blueUpper = np.array([100, 100, 255])
            blueLower = np.array([95, 100, 100])
            blueUpper = np.array([115, 255, 255])
            yellowLower = np.array([5, 100, 100])
            yellowUpper = np.array([20, 255, 255])
            # need bytes here
            stream_bytes = b" "
            while True:
                stream_bytes += self.connection.read(1024)
                first = stream_bytes.find(b"\xff\xd8")
                last = stream_bytes.find(b"\xff\xd9")
                #str_ = 'S270'
                #str_ = str_.encode("utf-8")
                #socket_tcp.send(str_)

                #f = open('record_' + str(self.count) + '.json', 'w')
                #json.dump(dic_dump, f)
                #f.close()

                if first != -1 and last != -1:
                    jpg = stream_bytes[first:last + 2]
                    stream_bytes = stream_bytes[last + 2:]
                    image = cv2.imdecode(np.frombuffer(jpg, dtype=np.uint8),
                                         cv2.IMREAD_COLOR)
                    frame = image
                    result_red = self.draw(frame, redLower, redUpper, "RED")
                    result_green = self.draw(frame, greenLower, greenUpper,
                                             "GREEN")
                    result_blue = self.draw(frame, blueLower, blueUpper,
                                            "blue")
                    result_yellow = self.draw(frame, yellowLower, yellowUpper,
                                              "YELLOW")
                    result = {}
                    result["red"] = result_red
                    result["green"] = result_green
                    result["blue"] = result_blue
                    result["yellow"] = result_yellow

                    self.temp_result = result
                    flag = True
                    if not result_red:
                        flag = False
                    if not result_green:
                        flag = False
                    if not result_blue:
                        flag = False
                    if not result_yellow:
                        flag = False
                    if flag:
                        self.get_finnal_result()
                        self.time_now = int((time.time() - start_time) * 1000)
                        self.dqn_loop()
                        '''
                        dic_dump = {'data': self.finnal_result, 'time' : self.time_now}
                        f = open('./test_1/record_' + str(self.count) + '.json', 'w')
                        json.dump(dic_dump, f)
                        f.close()
                        self.count +=1
                        '''
                        cv2.line(frame, (int(self.temp_result["red"]["x"]),
                                         int(self.temp_result["red"]["y"])),
                                 (int(self.temp_result["green"]["x"]),
                                  int(self.temp_result["green"]["y"])),
                                 (0, 255, 0), 1, 4)
                        cv2.line(frame, (int(self.temp_result["blue"]["x"]),
                                         int(self.temp_result["blue"]["y"])),
                                 (int(self.temp_result["yellow"]["x"]),
                                  int(self.temp_result["yellow"]["y"])),
                                 (0, 255, 0), 1, 4)
                        cv2.line(frame, (int(self.finnal_result["me"]["x0"]),
                                         int(self.finnal_result["me"]["y0"])),
                                 (int(self.WIDTH / 2), int(self.WIDTH / 2)),
                                 (0, 0, 255), 4, 4)
                        cv2.line(frame,
                                 (int(self.finnal_result["enemy"]["x0"]),
                                  int(self.finnal_result["enemy"]["y0"])),
                                 (int(self.WIDTH / 2), int(self.WIDTH / 2)),
                                 (255, 0, 0), 4, 4)
                        font = cv2.FONT_HERSHEY_SIMPLEX
                        cv2.putText(frame,
                                    str(self.finnal_result["me"]["alpha_big"]),
                                    (int(self.finnal_result["me"]["x0"]),
                                     int(self.finnal_result["me"]["y0"])),
                                    font, 1, (0, 255, 0), 1)
                        cv2.putText(
                            frame,
                            str(self.finnal_result["enemy"]["alpha_small"]),
                            (int(self.finnal_result["enemy"]["x0"]),
                             int(self.finnal_result["enemy"]["y0"])), font, 1,
                            (0, 255, 0), 1)
                    else:
                        str_S = "S360"
                        str_T = "T370"
                        str_S = str_S.encode("utf-8")
                        str_T = str_T.encode("utf-8")
                        socket_tcp.send(str_S)
                        socket_tcp.send(str_T)
                    #print(self.finnal_result)
                    cv2.imshow("Frame", frame)

                    if cv2.waitKey(1) & 0xFF == ord("q"):
                        break
        finally:
            self.connection.close()
            self.server_socket.close()

Пример #9

Показать файл

i = []
v = []
r = []
for e in range(EPISODES):
    WH = w.generateWind()
    hdg0_rand = random.choice(hdg0_rand_vec) * TORAD
    hdg0 = hdg0_rand * np.ones(10)

    mdp.simulator.hyst.reset()

    #  We reinitialize the memory of the flow
    state = mdp.initializeMDP(hdg0, WH)
    loss_sim_list = []
    for time in range(80):
        WH = w.generateWind()
        action = agent.act(state)
        next_state, reward = mdp.transition(action, WH)
        agent.remember(
            state, action, reward,
            next_state)  # store the transition + the state flow in the
        # final state !!
        state = next_state
        if len(agent.memory) >= batch_size:
            loss_sim_list.append(agent.replay(batch_size))
            # For data visualisation
            i.append(mdp.s[0, -1])
            v.append(mdp.s[1, -1])
            r.append(mdp.reward)

    loss_over_simulation_time = np.sum(np.array([loss_sim_list])[0]) / len(
        np.array([loss_sim_list])[0])

Пример #10

Показать файл

Файл: test.py Проект: aaiteam/code_gen

def main():
    print "Creating DQN agent..."

    iters = 10000
    n_goal = 0
    n_goal_all = 0
    time_stamp = 0

    ############################################################
    # print x
    # max_steps = 3
    # actions = ["print", " ", "x"]
    ############################################################

    ############################################################
    # print x+1
    max_steps = 5
    actions = ["print", " ", "x", "+", "1"]
    ############################################################

    agent = DQNAgent(max_steps, actions)
    agent.dqn.initial_exploration = iters * 0.6

    results = []
    policy_frozen = False
    wins_file = "wins.txt"
    with io.FileIO(wins_file, "w") as file:
        file.write("Winning codes:\n")

    for iter in range(iters):
        print "\n\n::{}::".format(iter)

        if iter == 4300:  # 2300:
            policy_frozen = True

        env = gym.make("codegen-v0")
        num = random.randrange(1, 100)
        env.my_input = num

        ############################################################
        # print x
        # env.goal = str(num)
        ############################################################

        ############################################################
        # print x+1
        env.goal = str(num + 1)
        ############################################################

        code = env._reset()
        step_in_episode = 0
        total_score = 0.0
        reward = 0.0
        mystate = []
        my_state_new = []

        while step_in_episode < max_steps:
            state = env.code_index_list[:]
            state += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                              dtype=int).tolist()

            if step_in_episode == 0:
                action_idx = agent.start(code, state, policy_frozen)
            else:
                action_idx = agent.act(code, state, reward)

            code, reward, terminal, info = env._step(action_idx,
                                                     agent.dqn.actions)
            state_prime = env.code_index_list[:]
            state_prime += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                                    dtype=int).tolist()

            agent.dqn.experience_replay(agent.dqn.time_stamp)
            if step_in_episode == max_steps - 1 or terminal:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime,
                                           True)
                if terminal:
                    agent.dqn.goal_idx.append(agent.dqn.time_stamp)
                agent.dqn.time_stamp += 1
            else:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime,
                                           False)

            total_score += reward

            if terminal:
                agent.end(reward)

                n_goal_all += 1
                step_in_episode += 1

                if iters - iter <= 100:
                    n_goal += 1

            step_in_episode += 1

        if iter >= 100:
            results = results[1:]
        if reward >= 1:
            print "WIN"
            results.append(1.0)
            with io.FileIO(wins_file, "a") as f:
                f.write(
                    "\n=====================\n{}\n=====================\n\n".
                    format(code))
                f.flush()
                os.fsync(f)
        else:
            results.append(0.0)
        total_iters = 100 if iter >= 100 else iter + 1
        print "TOTAL {:.2f}% of wins in last {} iters, sum: {}, total good: {}".format(
            100 * sum(results) / total_iters, total_iters, sum(results),
            len(agent.dqn.goal_idx))

        if iter == 1 + agent.dqn.initial_exploration:
            print "n_goal_all = ", n_goal_all
            print agent.dqn.goal_idx
            raw_input()

    print "n_goal : ", n_goal
    print "epsilon : ", agent.epsilon

Пример #11

Показать файл

Файл: longshort.py Проект: Tuaman/CS229project

        batch_size = 32

        title = env.symbol.upper() + ' MDP Replay ' + os.path.basename(
            __file__).split('.')[0]
        grapher = Grapher(title)

        with open('./save/losses_' + stock_name + '.txt', 'w') as f:
            for e in range(EPISODES + 1):
                # Train
                state = env.reset()
                state = np.reshape(state, [1, state_size])
                for time in range(500):
                    cash, nown, price = env.holdings[0], env.holdings[
                        1], env.state[-1]
                    # env.render()
                    action = agent.act(state, time)
                    next_state, reward, done, _ = env.step(action)
                    next_state = np.reshape(next_state, [1, state_size])
                    agent.remember(state, action, reward, next_state, done)
                    # agent.train(state, action, reward, next_state, done)
                    if len(agent.memory) > batch_size:
                        agent.replay(batch_size)
                    if e % 2 == 0:
                        #cash, nown, price = state[0, 1], state[0, 2], state[0, -1]
                        # cash, nown, price = *env.holdings, state[0,-1]
                        grapher.add(cash,
                                    nown,
                                    price,
                                    action,
                                    reward,
                                    loss=agent.loss)

Пример #12

Показать файл

                 observation=obs,
                 input_shape=[len(obs)],
                 training=True,
                 policy=policy)
agent.compile()

result = []
for episode in range(500):  # 1000エピソード回す
    agent.reset()
    observation = env.reset()  # 環境の初期化
    # observation, _, _, _ = env.step(env.action_space.sample())
    observation = deepcopy(observation)
    agent.observe(observation)
    for t in range(250):  # n回試行する
        # env.render() # 表示
        action = agent.act()
        observation, reward, done, info = env.step(
            action)  #　アクションを実行した結果の状態、報酬、ゲームをクリアしたかどうか、その他の情報を返す
        observation = deepcopy(observation)
        agent.observe(observation, reward, done)
        if done:
            break

    # test
    agent.training = False
    observation = env.reset()  # 環境の初期化
    agent.observe(observation)
    for t in range(250):
        # env.render() # 表示
        action = agent.act()
        observation, reward, done, info = env.step(action)

Пример #13

Показать файл

class AgentTrainer(object):
    def __init__(self, config):
        # Create session to store trained parameters
        self.session = tf.Session()

        self.action_count = config["action_count"]

        # Create agent for training
        self.agent = DQNAgent(self.action_count)

        # Create memory to store observations
        self.memory = ExperienceMemory(config["replay_memory_size"])

        # Tools for saving and loading networks
        self.saver = tf.train.Saver()

        # Last action that agent performed
        self.last_action_index = None

        # Deque to keep track of average reward and play time
        self.game_history = GameHistory(config["match_memory_size"])

        # Deque to store losses
        self.episode_history = EpisodeHistory(config["replay_memory_size"])

        self.INITIAL_EPSILON = config["initial_epsilon"]
        self.FINAL_EPSILON = config["final_epsilon"]
        self.OBSERVE = config["observe_step_count"]
        self.EXPLORE = config["explore_step_count"]
        self.FRAME_PER_ACTION = config["frame_per_action"]
        self.GAMMA = config["gamma"]
        self.LOG_PERIOD = config["log_period"]
        self.BATCH_SIZE = config["batch_size"]

    def init_training(self):
        # Initialize training parameters
        self.session.run(tf.global_variables_initializer())
        self.epsilon = self.INITIAL_EPSILON
        self.t = 0
        self.last_action_index = None

    def load_model(self, path):
        checkpoint = tf.train.get_checkpoint_state(path)
        if checkpoint and checkpoint.model_checkpoint_path:
            self.saver.restore(self.session, checkpoint.model_checkpoint_path)
            print("Successfully loaded: {}".format(checkpoint.model_checkpoint_path))
        else:
            print("Could not find old network weights")

    def save_model(self, path):
        # Replace with os.path.join
        self.saver.save(self.session, path + "/dqn", global_step=self.t)

    def reset_state(self, initial_state):
        # Get the first state by doing nothing and preprocess the image to 80x80x4
        x_t = initial_state
        x_t = transformImage(x_t)
        self.s_t = np.concatenate((x_t, x_t, x_t, x_t), axis=2)
        self.match_reward = 0
        self.match_playtime = 0
        self.gamma_pow = 1

    def act(self):
        # Choose an action epsilon greedily
        action_index = 0
        if self.t % self.FRAME_PER_ACTION == 0:
            if np.random.random() <= self.epsilon:
                action_index = np.random.randint(0, self.action_count)
            else:
                action_index = self.agent.act(self.session, self.s_t)
        else:
            action_index = self.last_action_index  # do the same thing as before
        self.last_action_index = action_index
        return action_index

    def process_frame(self, screen, reward, terminal):
        if self.last_action_index is None:
            self.reset_state(screen)
            return

        a_t = np.zeros([self.action_count])
        a_t[self.last_action_index] = 1

        # scale down epsilon
        if self.epsilon > self.FINAL_EPSILON and self.t > self.OBSERVE:
            self.epsilon -= (self.INITIAL_EPSILON - self.FINAL_EPSILON) / self.EXPLORE

        # run the selected action and observe next state and reward
        x_t1, r_t = screen, reward
        x_t1 = transformImage(x_t1)
        s_t1 = np.append(x_t1, self.s_t[:, :, :3], axis=2)

        # store the transition in memory
        self.memory.add_experience((self.s_t, a_t, r_t, s_t1, terminal))

        # only train if done observing
        if self.t > self.OBSERVE:
            loss = self.make_train_step()
            self.episode_history.add_episode(Episode(loss))

        # update the old values
        self.s_t = s_t1
        self.t += 1

        # print info
        if self.t % self.LOG_PERIOD == 0:
            print("TIMESTEP {}, EPSILON {}, EPISODE_STATS {}, MATCH_STATS {}".format(
                self.t,
                self.epsilon,
                self.episode_history.get_average_stats(),
                self.game_history.get_average_stats()))
            sys.stdout.flush()

        self.match_reward += r_t * self.gamma_pow
        self.match_playtime += 1
        self.gamma_pow *= self.GAMMA

        if terminal:
            self.game_history.add_match(MatchResults(
                self.match_reward,
                self.match_playtime,
                reward))
            self.reset_state(screen)

    def make_train_step(self):
        # sample a minibatch to train on
        minibatch = self.memory.sample(self.BATCH_SIZE)

        # get the batch variables
        s_j_batch = [d[0] for d in minibatch]
        a_batch = [d[1] for d in minibatch]
        r_batch = [d[2] for d in minibatch]
        s_j1_batch = [d[3] for d in minibatch]

        # get the batch variables
        # s_j_batch, a_batch, r_batch, s_j1_batch, terminal_batch = zip(*minibatch)
        action_scores_batch = np.array(self.agent.score_actions(self.session, s_j1_batch))
        # r_future = GAMMA * (1 - np.array(terminal_batch)) * np.max(action_scores_batch, axis=1)
        # y_batch = r_batch + r_future

        y_batch = []
        for i in range(0, len(minibatch)):
            # if terminal only equals reward
            if minibatch[i][4]:
                y_batch.append(r_batch[i])
            else:
                y_batch.append(r_batch[i] + self.GAMMA * np.max(action_scores_batch[i]))

        return self.agent.train(self.session, y_batch, a_batch, s_j_batch)