示例#1
0
def run_episode(environment: gym.Env, agent: DQNAgent, render: bool,
                max_length: int):
    """
    Run one episode in the given environment with the agent.

    Arguments:
        environment {`gym.Env`} -- Environment representing the Markov Decision Process
        agent {`DQNAgent`} -- Reinforcment Learning agent that acts in the envíronment
        render {`bool`} -- Whether the frames of the episode should be rendered on the screen
        max_length {`int`} -- Maximum number of steps before the episode is terminated

    Returns:
        `float` -- Cumulated reward that the agent received during the episode
    """
    episode_reward = 0
    state = environment.reset()
    for _ in range(max_length):
        if render:
            environment.render()
        action = agent.act(state)
        next_state, reward, terminal, _ = environment.step(action)
        agent.observe(
            Transition(state, action, reward,
                       None if terminal else next_state))
        episode_reward += reward
        if terminal:
            break
        else:
            state = next_state
    return episode_reward
示例#2
0
def collect_stats(agent: DQNAgent, n_games=1000):
    MAX_STEPS = 1000
    lenghts = []
    looped = 0
    for i in range(1, n_games+1):
        env = gym.make('snake-v0')
        # env.__init__(human_mode=False)
        observation = env.reset()
        done = False
        steps = 0
        agent.epsilon = 0.0
        state = agent.get_last_observations(observation)
        while not done and steps < MAX_STEPS:
            action = agent.act(state)
            next_observation, _, done, _ = env.step(action)
            state = agent.get_last_observations(next_observation)
            steps += 1

        if steps == MAX_STEPS:
            looped += 1
        else:
            lenghts.append(len(env.game.snake.body))

        if i % (n_games//10) == 0:
            print(f"Avg len: {sum(lenghts) / len(lenghts):.2f}, looped {looped}/{i}")
示例#3
0
    def __init__(self, config):
        # Create session to store trained parameters
        self.session = tf.Session()

        self.action_count = config["action_count"]

        # Create agent for training
        self.agent = DQNAgent(self.action_count)

        # Create memory to store observations
        self.memory = ExperienceMemory(config["replay_memory_size"])

        # Tools for saving and loading networks
        self.saver = tf.train.Saver()

        # Last action that agent performed
        self.last_action_index = None

        # Deque to keep track of average reward and play time
        self.game_history = GameHistory(config["match_memory_size"])

        # Deque to store losses
        self.episode_history = EpisodeHistory(config["replay_memory_size"])

        self.INITIAL_EPSILON = config["initial_epsilon"]
        self.FINAL_EPSILON = config["final_epsilon"]
        self.OBSERVE = config["observe_step_count"]
        self.EXPLORE = config["explore_step_count"]
        self.FRAME_PER_ACTION = config["frame_per_action"]
        self.GAMMA = config["gamma"]
        self.LOG_PERIOD = config["log_period"]
        self.BATCH_SIZE = config["batch_size"]
 def __init__(self, host, port):
     self.state_size = 3
     self.action_size = 7
     self.done = False
     self.batch_size = 32
     self.agent = DQNAgent(self.state_size, self.action_size)
     self.state_now = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                 [1, self.state_size])
     self.state_last = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                  [1, self.state_size])
     self.action_for_next = 0
     self.action_for_now = 0
     self.reward = 0
     self.forward = "T394"
     self.left = "S450"
     self.right = "S270"
     self.backward = "T330"
     self.stop = "T370"
     self.middle = "S360"
     #dqn parameters
     self.server_socket = socket.socket()
     self.server_socket.bind((host, port))
     self.server_socket.listen(0)
     self.connection, self.client_address = self.server_socket.accept()
     self.connection = self.connection.makefile("rb")
     self.host_name = socket.gethostname()
     self.host_ip = socket.gethostbyname(self.host_name)
     self.temp_result = None
     self.finnal_result = None
     self.RANGE = 350
     self.WIDTH = 720
     self.time_now = 0
     self.count = 0
     self.streaming()
示例#5
0
 def __init__(self,
              model_class,
              model=None,
              env=None,
              exploration=None,
              gamma=0.99,
              memory_size=10000,
              batch_size=64,
              target_update_frequency=10,
              saving_dir=None):
     """
     base class for lstm dqn agent
     :param model_class: sub class of torch.nn.Module. class reference of the model
     :param model: initial model of the policy net. could be None if loading from checkpoint
     :param env: environment
     :param exploration: exploration object. Must have function value(step) which returns e
     :param gamma: gamma
     :param memory_size: size of the memory
     :param batch_size: size of the mini batch for one step update
     :param target_update_frequency: the frequency for updating target net (in episode)
     :param saving_dir: the directory for saving checkpoint
     """
     DQNAgent.__init__(self, model_class, model, env, exploration, gamma,
                       memory_size, batch_size, target_update_frequency,
                       saving_dir)
     self.memory = EpisodicReplayMemory(memory_size)
     self.hidden_size = 0
     if self.policy_net:
         self.hidden_size = self.policy_net.hidden_size
     self.hidden = None
示例#6
0
    def predict_dqn(self):
        # get size of state and action from environment
        state_size = 4
        action_size = 2

        agent = DQNAgent(state_size, action_size, load_model=True)

        done = False
        score = 0

        self.reset()
        state, _, _, _ = self.step(-1)
        state = np.reshape(state, [1, state_size])

        while not done:
            # get action for the current state and go one step in environment
            action = agent.get_action(state)
            next_state, reward, done, info = self.step(action)
            next_state = np.reshape(next_state, [1, state_size])

            score += reward
            state = next_state

            if done or score >= 500:
                print("score:", score)
                break
示例#7
0
def play_it():
    #ENV_NAME = 'CartPole-v0'
    #ENV_NAME = 'MountainCar-v0'
    ENV_NAME = 'Single_virtual-v0'
    # Get the environment and extract the number of actions.
    env = make(ENV_NAME)
    env1 = make(ENV_NAME)
    np.random.seed(123)
    env.seed(123)
    nb_actions = env.action_space.n
    model = build_model(nb_actions,env.observation_space)
    # model = build_model1(nb_actions, env.observation_space)

    # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
    # even the metrics!
    memory = SequentialMemory(limit=50000, window_length=1)
    policy = BoltzmannQPolicy()
    dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
                   target_model_update=1e-2, policy=policy,)
    dqn.compile(Adam(lr=1e-3), metrics=['mae'])

    # Okay, now it's time to learn something! We visualize the training here for show, but this
    # slows down training quite a lot. You can always safely abort the training prematurely using
    # Ctrl + C.
    dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

    # After training is done, we save the final weights.
    dqn.save_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME+ datetime.now().strftime("%Y%m%d-%H%M%S"))), overwrite=True)
    # dqn.load_weights(os.path.join('models_weights_logs','dqn_{}_weights.h5f'.format(ENV_NAME)))
    # Finally, evaluate our algorithm for 5 episodes.
    dqn.test(env1, nb_episodes=5, visualize=True)
class DQNScheduler:
    def __init__(self, simulator):
        self.agent = DQNAgent(25, 6)
        self.agent.load("./save/car-100-dqn.h5")
        self.simulator = simulator
        self.agent.epsilon = 0

    def schedule(self):
        action = self.agent.act(np.reshape(self.simulator.get_state(),
                                           [1, 25]))
        return action
示例#9
0
def _run_agent_one_ep(env: BaseEnv,
                      agent: DQNAgent,
                      config: Config,
                      eps: float,
                      behavior_name: str,
                      train: Optional[bool] = True):
    # Get a starting state
    env.reset()

    decision_steps, terminal_steps = env.get_steps(behavior_name)
    state = decision_steps.obs[0]

    agent_id = decision_steps.agent_id[0]
    done = False
    did_win = False
    episode_reward = 0.0
    import time
    while not done:
        reward = 0.0
        # Get and perform an action
        action = agent.act(decision_steps.obs[0], eps)
        env.set_actions(behavior_name,
                        np.expand_dims(action, 0).reshape(-1, 1))
        env.step()

        decision_steps, terminal_steps = env.get_steps(behavior_name)
        # Determine S', R, Done
        next_state = None
        if agent_id in decision_steps:
            reward += decision_steps.reward[0]
            next_state = decision_steps.obs[0]
        if agent_id in terminal_steps:
            terminal_reward = terminal_steps.reward[0]
            # Add win/loss
            did_win = True if math.isclose(terminal_reward, 1.0) else False
            reward += terminal_reward
            next_state = terminal_steps.obs[0]
            done = True

        assert next_state is not None, f"next_state cannot be None. Agent {agent_id} did not appear in decision or terminal steps"

        if train:
            # Learn from (S, A, R, S')
            experience = Experience(state, action, reward, next_state, done)
            agent.step(experience)

        # Set new state
        state = next_state

        episode_reward += reward

    return (episode_reward, did_win)
示例#10
0
def make_bot(un, pw, expected_opponent, team, challenge, trainer, epsilon=None, 
	model_path=None, target_model_path=None
):
	
	if trainer:
		if model_path:
			agent = DQNAgent(INPUT_SHAPE, training=False)
		else:
			agent = RandomAgent()
	else:
		agent = DQNAgent(
			INPUT_SHAPE, epsilon=epsilon, random_moves=True, training=False, 
			copy_target_model=False
		)
		agent.load_model(model_path)
		if target_model_path != None:
			agent.target_model = load_model(target_model_path)
		else:
			agent.target_model.set_weights(agent.model.get_weights())

	bot = BotClient(
		name=un, password=pw, expected_opponent=expected_opponent, team=team, 
		challenge=challenge, runType=RunType.Iterations, runTypeData=1, 
		agent=agent, trainer=trainer, save_model=False, 
		should_write_replay=(not trainer)
	)
	bot.start()
示例#11
0
    def get_agent(env, **kwargs):
        replay_capacity = 1e6
        n_episodes = 10e7

        return DQNAgent(env=env or gym.make('CartPole-v0'),
                        n_episodes=n_episodes,
                        replay_capacity=replay_capacity,
                        **kwargs)
示例#12
0
 async def on_challenge_update(self, challenge_data):
     incoming = challenge_data.get('challengesFrom', {})
     if self.expected_opponent.lower() in incoming:
         if self.trainer:
             model_paths = [
                 os.path.join(self.logs_dir, content)
                 for content in os.listdir(self.logs_dir) if
                 content.endswith('.model') and content.startswith('Epoch')
             ]
             if len(model_paths) > 0:
                 sorted_model_paths = sorted(
                     model_paths,
                     key=lambda x: int(
                         os.path.basename(x).lstrip('Epoch').rstrip('.model'
                                                                    )))
                 model_to_load = sorted_model_paths[-1]
                 self.log(f'Loading model {model_to_load}')
                 self.agent = DQNAgent(INPUT_SHAPE, training=False)
                 self.agent.load_model(model_to_load)
         await self.accept_challenge(self.expected_opponent, self.team_text)
示例#13
0
    def __init__(self, player_name=None, letter=None):
        if player_name is None:
            self.player_name = common_utils.get_random_name()
        else:
            self.player_name = player_name

        if letter is not None:
            self.letter = letter
        else:
            pass
            # TODO: Handle this

        if letter == 'X':
            self.enemy_letter = 'O'
        else:
            self.enemy_letter = 'X'

        logger.debug("Initializing player {} with letter {} ...".format(
            self.player_name, self.letter))

        self.agent = DQNAgent()
示例#14
0
def watch_agent(agent: DQNAgent):
    env = gym.make('snake-v0')
    env.__init__(human_mode=True)
    observation = env.reset()
    renderer=Renderer(env.game)
    try:
        done = False
        steps = 0
        agent.epsilon = 0
        state = agent.get_last_observations(observation)
        while not done:
            # time.sleep(0.001)
            renderer.render_frame()
            action = agent.act(state)
            next_observation, _, done, _ = env.step(action)
            state = agent.get_last_observations(next_observation)
            steps += 1
    finally:
        renderer.close_window()
    print(f"Snake length: {len(env.game.snake.body)}")
    print(f"Simulation ended after {steps} steps.")
示例#15
0
def test_dqn():
    args = DQNArgs()
    env = gym.make(args.env_name)
    agent = DQNAgent(env, QNet, SimpleNormalizer, args)
    agent.load(args.save_dir)
    for _ in range(10):
        agent.test_one_episode(True)
示例#16
0
    def simulateDQNControl(self, hdg0):
        '''
        Plots the control law of the network over a simulation.

        :param hdg0: Initial heading of the boat for the simulation.
        :return: A plot of the angle of attack and velocity during the control.
        '''
        agent = DQNAgent(self.mdp.size, self.action_size)
        agent.load(self.src)
        WH = self.wh.generateWind()
        hdg0 = hdg0 * TORAD * np.ones(self.wh.samples)

        state = self.mdp.initializeMDP(hdg0, WH)

        i = np.ones(0)
        v = np.ones(0)
        wind_heading = np.ones(0)

        for time in range(self.sim_time):
            WH = self.wh.generateWind()
            action = agent.actDeterministically(state)
            next_state, reward = self.mdp.transition(action, WH)
            state = next_state
            i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]])
            v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]])
            wind_heading = np.concatenate([wind_heading, WH[0:10]])

        time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt))

        f, axarr = plt.subplots(2, sharex=True)
        axarr[0].plot(time_vec, i / TORAD)
        axarr[1].plot(time_vec, v)
        axarr[0].set_ylabel("i [°]")
        axarr[1].set_ylabel("v [m/s]")
        axarr[0].set_xlabel("t [s]")
        axarr[1].set_xlabel("t [s]")

        plt.show()
示例#17
0
def train_dqn():
    args = DQNArgs()
    env = gym.make(args.env_name)
    agent = DQNAgent(env, QNet, SimpleNormalizer, args)
    pre_best = -1e9
    for ep in range(args.max_ep):
        agent.train_one_episode()
        if ep % args.test_interval == 0:
            r = agent.test_model()
            if r > pre_best:
                pre_best = r
                agent.save(args.save_dir)
示例#18
0
    def simulateGustsControl(self):
        '''
        Simulate the response of the controller to gusts.

        :return: A plot of the simulation.
        '''
        self.sim_time = 100
        agent = DQNAgent(self.mdp.size, self.action_size)
        agent.load(self.src)
        WH = self.wh.generateWind()
        hdg0 = 0 * TORAD * np.ones(self.wh.samples)

        state = self.mdp.initializeMDP(hdg0, WH)

        i = np.ones(0)
        v = np.ones(0)
        wind_heading = np.ones(0)

        for time in range(self.sim_time):
            WH = self.wh.generateWind()
            if time == 20:
                WH = self.wh.generateGust(10 * TORAD)
            action = agent.actDeterministically(state)
            next_state, reward = self.mdp.transition(action, WH)
            state = next_state
            i = np.concatenate([i, self.mdp.extractSimulationData()[0, :]])
            v = np.concatenate([v, self.mdp.extractSimulationData()[1, :]])
            wind_heading = np.concatenate([wind_heading, WH[0:10]])

        time_vec = np.linspace(0, self.sim_time, int((self.sim_time) / self.mdp.dt))

        f, axarr = plt.subplots(2, sharex=True)
        axarr[0].plot(time_vec, i / TORAD)
        axarr[1].plot(time_vec, v)
        axarr[0].set_ylabel("angle of attack")
        axarr[1].set_ylabel("v")

        plt.show()
示例#19
0
def main():
    # parser = argparse.ArgumentParser(description='Run DQN on Atari SpaceInvaders')
    # parser.add_argument('--env', default='SpaceInvaders-v0', help='Atari env name')
    # parser.add_argument(
    #     '-o', '--output', default='SpaceInvaders-v0', help='Directory to save data to')
    # parser.add_argument('--seed', default=0, type=int, help='Random seed')
    # # parser.add_argument('--input_shape', default=(84, 84, 4), type=tuple, help='Size of each frame')
    #
    # args = parser.parse_args()
    #
    # args.output = get_output_folder(args.output, args.env)

    #vehicle_network
    veh_network = create_lstm_model(nb_time_steps,
                                    nb_input_vector,
                                    num_actions=g1)
    #Attacker network
    att_network = create_lstm_model(nb_time_steps,
                                    nb_input_vector,
                                    num_actions=gym.make(
                                        args.env).action_space.n)
    veh_agent = DQNAgent(q_network=veh_network,
                         preprocessor=core.Preprocessor(),
                         memory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    att_agent = DQNAgent(q_network=att_network,
                         preprocessor=core.Preprocessor(),
                         memory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    veh_agent.compile('Adam', 'mse')
    att_agent.compile('Adam', 'mse')
    env = VehicleFollowingENV
    for i_episode in range(20):
        agent.fit(env, 10**6)
    # env.close()
    model_json = q_network.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
示例#20
0
def build(args):
    # Params
    training = is_training(args)
    # Hack for switching number of DQN input features (see help)
    n_feats = {'all': 11, 'distance': 1}
    n_actions = 4  # we are ignoring action 0 (for now)

    # Maximum number of steps per episode
    max_steps = 8 * (args.dims[0] + args.dims[1]) - 1
    # Total feature dimension
    total_feats = n_feats[args.feats] * sum(
        [4**i for i in range(args.n_nodes + 1)])

    # Flatland Environment
    environment = FlatlandEnv(x_dim=args.dims[0],
                              y_dim=args.dims[1],
                              n_cars=args.n_agents,
                              n_acts=n_actions,
                              min_obs=-1.0,
                              max_obs=1.0,
                              n_nodes=args.n_nodes,
                              feats=args.feats)

    # Simple DQN agent
    agent = DQNAgent(alpha=0.0005,
                     gamma=0.99,
                     epsilon=1.0,
                     input_shape=total_feats,
                     sample_size=512,
                     batch_size=32,
                     n_actions=n_actions,
                     training=training)

    if not training:
        agent.load_model()

    return environment, agent, max_steps
示例#21
0
def main():
    train_data, parameter[1]["episode_length"] = data_prepare(parameter)
    parameter[2]['action_size'], parameter[2][
        'state_size'], state, env = create_states(parameter, train_data)
    #create model
    agent = DQNAgent(parameter)
    #train model and save
    train(agent, parameter, state, env)
    caculation(agent, env)

    #test model
    parameter[0]["mode"] = 'test'
    test_data = data_prepare(parameter)[0]
    test_env = create_states(parameter, test_data)[3]
    caculation(agent, test_env)
示例#22
0
def load_model(MODEL_TYPE):
    curr_model = None
    if MODEL_TYPE == "SVM":
        print("LOADING SVM...")
        curr_model = load("svm.joblib")
    elif MODEL_TYPE == "LR":
        print("LOADING LR...")
        lr = LogReg(74)  #(env.matches.shape[1])
        lr.load_weights("weights/weights-improvement-100-0.31.hdf5")
        curr_model = lr
    elif MODEL_TYPE == "DT":
        print("LOADING DT...")
        curr_model = load("dt.joblib")
    elif MODEL_TYPE == "GB":
        print("LOADING GB...")
        curr_model = load("gb.joblib")
    elif MODEL_TYPE == "RF":
        print("LOADING RF...")
        curr_model = load("rfc.joblib")
    elif MODEL_TYPE == "NB":
        print("LOADING NB...")
        curr_model = load("nb.joblib")
    elif MODEL_TYPE == "AB":
        print("LOADING AB...")
        curr_model = load("ab.joblib")
    elif MODEL_TYPE == "DQN":
        print("LOADING DQN...")
        BetNet = DQNAgent(75)
        BetNet.load("weights/betnet-weights-dqn.h5")
        curr_model = BetNet
    else:
        print("LOADING NN...")
        BetNet = Network(74)  #(env.matches.shape[1])
        BetNet.load_weights(
            'weights/Adadelta/test9_400_Best/weights-improvement-400-0.48.hdf5'
        )  #PCA("weights/Adadelta/test13_100iter_reglast2/weights-improvement-100-0.52.hdf5")  # Most recent weights
        curr_model = BetNet
    return curr_model
示例#23
0
def main():
    # vehicle_network
    veh_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=4)
    # Attacker network
    # att_network = create_lstm_model(nb_time_steps, nb_input_vector, num_actions=4)
    veh_agent = DQNAgent(q_network=veh_network,
                         q_network2=veh_network,
                         preprocessor=core.Preprocessor(),
                         RLmemory=core.ReplayMemory(),
                         SLmemory=core.ReplayMemory(),
                         policy=1,
                         gamma=0.1,
                         target_update_freq=100,
                         num_burn_in=100,
                         train_freq=20,
                         batch_size=32)
    # att_agent = DQNAgent(q_network=att_network,
    #                      q_network2=att_network,
    #                      preprocessor=core.Preprocessor(),
    #                      memory=core.ReplayMemory(),
    #                      policy=1,
    #                      gamma=0.1,
    #                      target_update_freq=100,
    #                      num_burn_in=100,
    #                      train_freq=20,
    #                      batch_size=32)
    veh_agent.compile('Adam', 'mse')
    # att_agent.compile('Adam', 'mse')
    env = VehicleFollowingENV()
    for i_episode in range(20):
        veh_agent.fit(env=env, num_iterations=10 ** 6)
        # att_agent.fit(env, 10 ** 6)
    # env.close()
    model_json = veh_network.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
示例#24
0
					debug_log(f'file content with syntax error\n{s}')
					debug_log('')

				for i in range(5):
					try:
						os.remove(file_path)
						break
					except PermissionError:
						debug_log('Permission error when removing the file')
						time.sleep(1)

			#NOTE: train
			#NOTE: create/load DQN and target DQN in main thread
			keras.backend.clear_session()
			agent = DQNAgent(INPUT_SHAPE, training=True, 
				replay_memory=minibatch, copy_target_model=False
			)
			agent.target_model = load_model(target_model_path)
			#NOTE: train newly loaded model on new data
			if len(minibatch) > 0:
				minibatch_history = agent.train_only(len(minibatch), len(minibatch))
				if minibatch_history == None:
					debug_log('ERROR: Unable to train on iteration\'s data')
				replay_memory.extend(minibatch)
			else:
				debug_log('WARNING: Skipping minibatch training since no new data was found')

			#NOTE: train newly loaded model on random selection of old data
			agent.replay_memory = replay_memory
			sum_loss = 0
			if len(replay_memory) > MIN_REPLAY_MEMORY_SIZE: 
# Select a policy. We use eps-greedy action selection, which means that a random action is selected
# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that
# the agent initially explores the environment (high eps) and then gradually sticks to what it knows
# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05
# so that the agent still performs some random actions. This ensures that the agent cannot get stuck.
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)

# The trade-off between exploration and exploitation is difficult and an on-going research topic.
# If you want, you can experiment with the parameters or use a different policy. Another popular one
# is Boltzmann-style exploration:
# policy = BoltzmannQPolicy(tau=1.)
# Feel free to give it a try!

dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, window_length=WINDOW_LENGTH, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, delta_range=(-1., 1.),
               target_model_update=10000, train_interval=4)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

if args.mode == 'train':
    # Okay, now it's time to learn something! We capture the interrupt exception so that training
    # can be prematurely aborted. Notice that you can the built-in Keras callbacks!
    weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name)
    checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(args.env_name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000)

    # After training is done, we save the final weights one more time.
    dqn.save_weights(weights_filename, overwrite=True)
示例#26
0
def main(argv):
    args = parser.parse_args(argv[1:])

    if args.usage == 'help':
        return parser.print_help()

    if is_environments_gen(args):
        _write_env_file(args)
    elif is_environments_list(args):
        all_registry = registry.all()
        registry_envs_name = [
            trim_env_spec_name(env.__repr__()) for env in all_registry
        ]
        for environment in registry_envs_name:
            print(environment)
    elif is_environments_act(args):
        env = gym.make(args.environment_name)
        if is_action_type('dqn', args):
            if args.pre_defined_state_size == 'nesgym':
                pre_state_size = 172032
            elif args.pre_defined_state_size == 'gym':
                pre_state_size = env.observation_space.shape[0]
            elif args.pre_defined_state_size == 'gym-atari':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-atari-extend':
                pre_state_size = 120000
            elif args.pre_defined_state_size == 'gym-atari-small':
                pre_state_size = 100800
            elif args.pre_defined_state_size == 'gym-gomoku':
                pre_state_size = 361
            # state_size = (1,) + env.observation_space.shape
            state_size = pre_state_size
            action_size = env.action_space.n
            agent = DQNAgent(state_size, action_size)
            # try:
            #     agent.load('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
            #                                           args.i_episodes))
            # except Exception:
            #     pass
            done = False
            batch_size = 64
        i_episodes = args.i_episodes
        timesteps = args.timesteps
        factor = args.seed_factor
        for i_episode in range(i_episodes):
            state = env.reset()
            if is_action_type('dqn', args):
                state = np.reshape(state, [1, pre_state_size])
            for t in range(timesteps):
                try:
                    if args.render == 'present': env.render()
                    if args.render == 'presented': env.render(args.render)
                    if args.action_type == 'alternate':
                        action_choice = i_episodes * 2
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'specific':
                        action = env.action_space.sample()
                    elif args.action_type == 'conditional':
                        action_choice = i_episodes
                        action = random_action_space_sample_choice(
                            action_choice, env, factor)
                    elif args.action_type == 'numerical':
                        action = env.action_space.n
                    elif is_action_type('dqn', args) and len(state) == 5:
                        action = agent.act(state)
                    elif is_action_type('dqn', args) and len(state) != 5:
                        action = env.action_space.sample()
                    collect_stat(action, ['input', 'actions'], stats)
                    observation, reward, done, info = env.step(action)
                    if is_action_type('dqn', args):
                        reward = reward if not done else -10
                        observation = np.reshape(observation,
                                                 [1, pre_state_size])
                        agent.remember(state, action, reward, observation,
                                       done)
                        state = observation
                    # collect_stat(observation,['observation'],stats)
                    collect_stat(reward, ['rewards'], stats)
                    # collect_stat(done,['output','done'],stats)
                    # collect_stat(info,['output','info'],stats)
                    if done:
                        max_episodes_range = (i_episodes - 1)
                        episode_timesteps_iteration_limit = max_episodes_range - 1
                        is_latest_episode = is_filled_latest_episode_with_iteration(
                            i_episode, episode_timesteps_iteration_limit)
                        increased_timestep = increase_timestep(t)
                        print('i_episode {}'.format(i_episode))
                        print('Episode finished after {} timesteps'.format(
                            increased_timestep))
                        if is_action_type('dqn', args):
                            print('Episode: {}/{}, score: {}, e: {:.2}'.format(
                                i_episode, i_episodes, t, agent.epsilon))
                        collect_stat(t, ['output', 'timestep', 'iteration'],
                                     stats)
                        collect_stat(increased_timestep,
                                     ['output', 'timestep', 'increased'],
                                     stats)
                        is_latest_episode_to_save_state = lambda args_cached: is_latest_episode and args_cached.output_stats_filename
                        if is_latest_episode_to_save_state(args):
                            filename = args.output_stats_filename
                            pre_df = {
                                # 'observations': stats['observations'],
                                'rewards': stats['rewards'],
                                # 'done-output': stats['output']['done'],
                                # 'info-output': stats['output']['info'],
                                # 'iteration-timestep': stats['output']['timestep']['iteration'],
                                # 'increased-timestep': stats['output']['timestep']['increased'],
                                'actions-input': stats['input']['actions']
                            }
                            df = pd.DataFrame(pre_df)
                            stamp = lambda: '%s' % (int(datetime.now().
                                                        timestamp()))
                            with open(
                                    'data/{}-{}.csv'.format(stamp(), filename),
                                    'w') as f:
                                f.write(df.to_csv())
                                f.close()
                            print('Statistics file saved ({}.csv)!'.format(
                                filename))
                            del df
                            del filename
                        print(check_output_env_label())
                        del is_latest_episode_to_save_state
                        del increased_timestep
                        del is_latest_episode
                        del episode_timesteps_iteration_limit
                        del max_episodes_range
                        break
                except Exception as e:
                    print('Rendering execution ({})'.format(e))
                finally:
                    print('Execution of timestep done')
            if is_action_type('dqn',
                              args) and (len(agent.memory) > batch_size):
                agent.replay(batch_size)
        # agent.save('./weights/dqn_{}_{}_{}.h5'.format(args.environment_name.lower(), args.timesteps,
        #                                       args.i_episodes))
        # env.close()
    else:
        parser.print_help()
示例#27
0
文件: pyrl.py 项目: koeller21/ma_code
    def run(self):
        ### create TORCS environment
        env = TorcsEnv(vision=False, throttle=True)   

        ### start run according to supplied arguments
        if self.algorithm == "dqn" and self.modus == "train":
            agent = DQNAgent(env, self.track, self.numOfEpisodes)
            agent.trainAgent()
        elif self.algorithm == "dqn" and self.modus == "test":
            agent = DQNAgent(env, self.track, self.numOfEpisodes)
            agent.testAgent()
        elif self.algorithm == "ddpg" and self.modus == "train":
            agent = DDPGAgent(env, self.track, self.numOfEpisodes)
            agent.trainAgent()
        elif self.algorithm == "ddpg" and self.modus == "test":
            agent = DDPGAgent(env, self.track, self.numOfEpisodes)
            agent.testAgent()
示例#28
0
文件: test.py 项目: aaiteam/code_gen
def main():
    print "Creating DQN agent..."
    # env = gym.make("codegen-v0")
    set_debugger_org_frc()

    iters = 6300
    n_goal = 0
    n_goal_all = 0
    time_stamp = 0

    max_steps = 5
    agent = DQNAgent(max_steps)
    agent.dqn.initial_exploration = 6000 * max_steps

    for iter in range(iters):
        print "\n********Iteration # ", iter, "***********\n"
        # 1 iteration
        env = gym.make("codegen-v0")
        num = random.randrange(1, 100)
        print "Goal Number : ", num + 1
        env.my_input = num
        #env.goal = "['" + env.my_input + "']"
        env.goal = str(num + 1)

        code = env._reset()
        step_in_episode = 0
        total_score = 0.0
        reward = 0.0
        mystate = []
        my_state_new = []

        # debug : the sys
        # sss = []
        # for arg in sys.argv[1:]:
        #    sss.append(arg)
        # print "sss = " , sss

        # while True:
        while step_in_episode < max_steps:

            # state = env.code_index_list + [-1]*(max_steps-len(env.code_index_list
            state = env.code_index_list[:]
            state += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                              dtype=int).tolist()
            # state = state.tolist()
            # state = 1;
            # print "env = ",env.code_index_list
            # print "state = ",state
            # raw_input()

            if step_in_episode == 0:
                action_idx = agent.start(code, state)
            else:
                action_idx = agent.act(code, state, reward)

            code, reward, terminal, info = env._step(action_idx,
                                                     agent.dqn.actions)
            state_prime = env.code_index_list[:]
            state_prime += np.zeros([
                max_steps - len(env.code_index_list), agent.dqn.code_idx_size
            ],
                                    dtype=int).tolist()

            # debug : the sys
            # sss = []
            # for arg in sys.argv[1:]:
            #    sss.append(arg)
            # print "sss = " , sss

            print "state : "
            print state
            print "state' : "
            print state_prime

            if step_in_episode == max_steps - 1:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 1)
            else:
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 0)

            agent.dqn.experience_replay(agent.dqn.time_stamp)

            agent.dqn.target_model_update(agent.dqn.time_stamp,
                                          soft_update=False)

            total_score += reward

            if terminal:

                agent.dqn.goal_idx.append(agent.dqn.time_stamp)

                agent.end(reward)
                agent.dqn.stock_experience(agent.dqn.time_stamp, state,
                                           action_idx, reward, state_prime, 1)

                n_goal_all += 1
                step_in_episode += 1
                agent.dqn.time_stamp += 1

                if iters - iter <= 100:
                    n_goal += 1

                break

            step_in_episode += 1
            agent.dqn.time_stamp += 1

        if iter == 1 + (agent.dqn.initial_exploration / max_steps):
            print "n_goal_all = ", n_goal_all
            print agent.dqn.goal_idx
            raw_input()

    print "n_goal : ", n_goal
    print "epsilon : ", agent.epsilon
示例#29
0
class VideoStreamingTest(object):
    def __init__(self, host, port):
        self.state_size = 3
        self.action_size = 7
        self.done = False
        self.batch_size = 32
        self.agent = DQNAgent(self.state_size, self.action_size)
        self.state_now = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                    [1, self.state_size])
        self.state_last = np.reshape([0.10606659, -0.52737298, 0.47917915],
                                     [1, self.state_size])
        self.action_for_next = 0
        self.action_for_now = 0
        self.reward = 0
        self.forward = "T394"
        self.left = "S450"
        self.right = "S270"
        self.backward = "T330"
        self.stop = "T370"
        self.middle = "S360"
        #dqn parameters
        self.server_socket = socket.socket()
        self.server_socket.bind((host, port))
        self.server_socket.listen(0)
        self.connection, self.client_address = self.server_socket.accept()
        self.connection = self.connection.makefile("rb")
        self.host_name = socket.gethostname()
        self.host_ip = socket.gethostbyname(self.host_name)
        self.temp_result = None
        self.finnal_result = None
        self.RANGE = 350
        self.WIDTH = 720
        self.time_now = 0
        self.count = 0
        self.streaming()

    def dqn_loop(self):
        if self.finnal_result['me']['r'] > 1:
            self.done = True
        else:
            self.done = False
        if True:
            self.prepare_state()  #更新前一次状态,并获取这一次状态
            self.prepare_action()  #更新前一次动作,并获取本次操作

            if self.count == 1:
                self.prepare_reward()  #获取上一次活动的奖励
            else:
                self.count += 1
            self.act_move()  #更新小车运动状态
            if self.count == 1:
                self.remember_step()  #收集本次数据
            if len(self.agent.memory) > self.batch_size:
                self.agent.replay(self.batch_size)

    def prepare_state(self):
        self.state_last = self.state_now
        state_now_ = [self.finnal_result['me']['alpha_big'], \
        self.finnal_result['me']['alpha_small'], \
        self.finnal_result['me']['r']]
        self.state_now = np.reshape(state_now_, [1, self.state_size])
        #self.state_now = state_now_

    def prepare_action(self):
        self.action_for_now = self.action_for_next
        self.action_for_next = self.agent.act(self.state_now)

    def prepare_reward(self):  #运行条件:state_last非空
        if self.done:
            self.reward = -10
        else:
            self.reward = (self.state_last[0][2] - self.state_now[0][2]) * 100
            #self.reward = (self.state_last[2] - self.state_now[2])*100
    def remember_step(self):
        self.agent.remember(self.state_last, self.action_for_now, self.reward,
                            self.state_now, self.done)

    def act_move(self):
        if self.done:
            self.action_for_next = 0

        if self.action_for_next == 0:  #停止
            str_S = self.middle
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 1:  #前进
            str_S = self.middle
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 2:  #左转前进
            str_S = self.left
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 3:  #右转前进
            str_S = self.right
            str_T = self.forward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 4:  #后退
            str_S = self.middle
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.middle
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.middle
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

        elif self.action_for_next == 5:  #左转后退
            str_S = self.left
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.left
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            str_S = self.left
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")

        elif self.action_for_next == 6:  #右转后退
            str_S = self.right
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.right
            str_T = self.stop
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)
            str_S = self.right
            str_T = self.backward
            str_S = str_S.encode("utf-8")
            str_T = str_T.encode("utf-8")
            socket_tcp.send(str_S)
            socket_tcp.send(str_T)

    def get_one_car(self, x1, y1, x2, y2):
        x0 = (x1 + x2) / 2
        y0 = (y1 + y2) / 2
        detx = x1 - x2
        dety = y1 - y2
        temp_x0 = x0 - self.WIDTH / 2
        temp_y0 = y0 - self.WIDTH / 2
        if detx > 0:
            alpha_small = math.atan(dety / detx)
        elif detx < 0:
            alpha_small = math.atan(dety / detx) + math.pi
        else:
            if dety > 0:
                alpha_small = math.pi / 2
            else:
                alpha_small = 0 - math.pi / 2

        if temp_x0 > 0:
            alpha_big = math.atan(temp_y0 / temp_x0)
        elif temp_x0 < 0:
            alpha_big = math.atan(temp_y0 / temp_x0) + math.pi
        else:
            if temp_y0 > 0:
                alpha_big = math.pi / 2
            else:
                alpha_big = 0 - math.pi / 2

        alpha_small = alpha_small / math.pi - 0.5
        alpha_big = alpha_big / math.pi - 0.5
        r = math.sqrt(temp_x0**2 + temp_y0**2) / self.RANGE
        return {
            "alpha_big": alpha_big,
            "alpha_small": alpha_small,
            "r": r,
            "x0": x0,
            "y0": y0
        }

    def get_finnal_result(self):
        red_x = self.temp_result["red"]["x"]
        red_y = self.temp_result["red"]["y"]
        green_x = self.temp_result["green"]["x"]
        green_y = self.temp_result["green"]["y"]
        blue_x = self.temp_result["blue"]["x"]
        blue_y = self.temp_result["blue"]["y"]
        yellow_x = self.temp_result["yellow"]["x"]
        yellow_y = self.temp_result["yellow"]["y"]
        finnal_temp = {}
        me_temp = self.get_one_car(red_x, red_y, green_x, green_y)
        enemy_temp = self.get_one_car(blue_x, blue_y, yellow_x, yellow_y)
        finnal_temp["me"] = me_temp
        finnal_temp["enemy"] = enemy_temp
        self.finnal_result = finnal_temp

    def draw(self, frame, lowerRGB, upperRGB, word):

        hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
        # 根据阈值构建掩膜
        mask = cv2.inRange(hsv, lowerRGB, upperRGB)
        # 腐蚀操作
        mask = cv2.erode(mask, None, iterations=2)
        # 膨胀操作,其实先腐蚀再膨胀的效果是开运算,去除噪点
        mask = cv2.dilate(mask, None, iterations=2)
        cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
                                cv2.CHAIN_APPROX_SIMPLE)[-2]
        # 初始化瓶盖圆形轮廓质心
        center = None
        # 如果存在轮廓
        if len(cnts) > 0:
            # 找到面积最大的轮廓
            c = max(cnts, key=cv2.contourArea)
            # 确定面积最大的轮廓的外接圆
            ((x, y), radius) = cv2.minEnclosingCircle(c)
            # 计算轮廓的矩
            M = cv2.moments(c)
            # 计算质心
            center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
            # 只有当半径大于10时,才执行画图
            if radius > 10:
                cv2.circle(frame, (int(x), int(y)), int(radius), (0, 255, 255),
                           2)
                cv2.circle(frame, center, 5, (0, 0, 255), -1)

                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(frame, word, (int(x), int(y)), font, 1.2,
                            (255, 255, 255), 2)
                result = {}
                result["x"] = x
                result["y"] = y

                return result

    def streaming(self):

        try:
            print("Host: ", self.host_name + " " + self.host_ip)
            print("Connection from: ", self.client_address)
            print("Streaming...")
            print("Press 'q' to exit")

            redLower = np.array([170, 100, 200])
            redUpper = np.array([179, 255, 255])

            greenLower = np.array([65, 100, 100])
            greenUpper = np.array([85, 255, 255])

            #blueLower = np.array([0, 0, 150])
            #blueUpper = np.array([100, 100, 255])
            blueLower = np.array([95, 100, 100])
            blueUpper = np.array([115, 255, 255])
            yellowLower = np.array([5, 100, 100])
            yellowUpper = np.array([20, 255, 255])
            # need bytes here
            stream_bytes = b" "
            while True:
                stream_bytes += self.connection.read(1024)
                first = stream_bytes.find(b"\xff\xd8")
                last = stream_bytes.find(b"\xff\xd9")
                #str_ = 'S270'
                #str_ = str_.encode("utf-8")
                #socket_tcp.send(str_)

                #f = open('record_' + str(self.count) + '.json', 'w')
                #json.dump(dic_dump, f)
                #f.close()

                if first != -1 and last != -1:
                    jpg = stream_bytes[first:last + 2]
                    stream_bytes = stream_bytes[last + 2:]
                    image = cv2.imdecode(np.frombuffer(jpg, dtype=np.uint8),
                                         cv2.IMREAD_COLOR)
                    frame = image
                    result_red = self.draw(frame, redLower, redUpper, "RED")
                    result_green = self.draw(frame, greenLower, greenUpper,
                                             "GREEN")
                    result_blue = self.draw(frame, blueLower, blueUpper,
                                            "blue")
                    result_yellow = self.draw(frame, yellowLower, yellowUpper,
                                              "YELLOW")
                    result = {}
                    result["red"] = result_red
                    result["green"] = result_green
                    result["blue"] = result_blue
                    result["yellow"] = result_yellow

                    self.temp_result = result
                    flag = True
                    if not result_red:
                        flag = False
                    if not result_green:
                        flag = False
                    if not result_blue:
                        flag = False
                    if not result_yellow:
                        flag = False
                    if flag:
                        self.get_finnal_result()
                        self.time_now = int((time.time() - start_time) * 1000)
                        self.dqn_loop()
                        '''
                        dic_dump = {'data': self.finnal_result, 'time' : self.time_now}
                        f = open('./test_1/record_' + str(self.count) + '.json', 'w')
                        json.dump(dic_dump, f)
                        f.close()
                        self.count +=1
                        '''
                        cv2.line(frame, (int(self.temp_result["red"]["x"]),
                                         int(self.temp_result["red"]["y"])),
                                 (int(self.temp_result["green"]["x"]),
                                  int(self.temp_result["green"]["y"])),
                                 (0, 255, 0), 1, 4)
                        cv2.line(frame, (int(self.temp_result["blue"]["x"]),
                                         int(self.temp_result["blue"]["y"])),
                                 (int(self.temp_result["yellow"]["x"]),
                                  int(self.temp_result["yellow"]["y"])),
                                 (0, 255, 0), 1, 4)
                        cv2.line(frame, (int(self.finnal_result["me"]["x0"]),
                                         int(self.finnal_result["me"]["y0"])),
                                 (int(self.WIDTH / 2), int(self.WIDTH / 2)),
                                 (0, 0, 255), 4, 4)
                        cv2.line(frame,
                                 (int(self.finnal_result["enemy"]["x0"]),
                                  int(self.finnal_result["enemy"]["y0"])),
                                 (int(self.WIDTH / 2), int(self.WIDTH / 2)),
                                 (255, 0, 0), 4, 4)
                        font = cv2.FONT_HERSHEY_SIMPLEX
                        cv2.putText(frame,
                                    str(self.finnal_result["me"]["alpha_big"]),
                                    (int(self.finnal_result["me"]["x0"]),
                                     int(self.finnal_result["me"]["y0"])),
                                    font, 1, (0, 255, 0), 1)
                        cv2.putText(
                            frame,
                            str(self.finnal_result["enemy"]["alpha_small"]),
                            (int(self.finnal_result["enemy"]["x0"]),
                             int(self.finnal_result["enemy"]["y0"])), font, 1,
                            (0, 255, 0), 1)
                    else:
                        str_S = "S360"
                        str_T = "T370"
                        str_S = str_S.encode("utf-8")
                        str_T = str_T.encode("utf-8")
                        socket_tcp.send(str_S)
                        socket_tcp.send(str_T)
                    #print(self.finnal_result)
                    cv2.imshow("Frame", frame)

                    if cv2.waitKey(1) & 0xFF == ord("q"):
                        break
        finally:
            self.connection.close()
            self.server_socket.close()
示例#30
0
    curr_model = load("dt.joblib")
elif MODEL_TYPE == "GB":
    print("LOADING GB...")
    curr_model = load("gb.joblib")
elif MODEL_TYPE == "RF":
    print("LOADING RF...")
    curr_model = load("rfc.joblib")
elif MODEL_TYPE == "NB":
    print("LOADING NB...")
    curr_model = load("nb.joblib")
elif MODEL_TYPE == "AB":
    print("LOADING AB...")
    curr_model = load("ab.joblib")
elif MODEL_TYPE == "DQN":
    print("LOADING DQN...")
    BetNet = DQNAgent(75)
    BetNet.load("weights/betnet-weights-dqn.h5")
    curr_model = BetNet
else:
    print("LOADING NN...")
    BetNet = Network(env.matches.shape[1])
    BetNet.load_weights(
        "weights/Adadelta/test13_100iter_reglast2/weights-improvement-100-0.52.hdf5"
    )  # Most recent weights
    curr_model = BetNet

###############################################################################


#GETS THE PREDICTION VEC GIVEN MODEL
def generatePrediction(mt, curr_model, to_process):
示例#31
0
from mdp import MDP
import random
'''
MDP Parameters
'''
mdp = MDP(duration_history=3, duration_simulation=1, delta_t=0.1)
'''
Environment Parameters
'''
w = wind(mean=45 * TORAD, std=0 * TORAD, samples=10)
WH = w.generateWind()

hdg0 = 0 * np.ones(10)
mdp.initializeMDP(hdg0, WH)

agent = DQNAgent(mdp.size, action_size=2)
#agent.load("../Networks/lighter_archi")
batch_size = 50

EPISODES = 500
hdg0_rand_vec = [-3, 0, 3, 6, 9, 12, 15, 18, 21]

loss_of_episode = []
i = []
v = []
r = []
for e in range(EPISODES):
    WH = w.generateWind()
    hdg0_rand = random.choice(hdg0_rand_vec) * TORAD
    hdg0 = hdg0_rand * np.ones(10)
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)