Exemplo n.º 1
0
    def train(self):
        self.reset()
        score_logger = ScoreLogger(self.env_name)

        for run in range(self.max_iter):
            state = self.env.reset(self.num)
            if self.log_state:
                print("STATE")
                print(state)
            state = np.reshape(state, [1, self.observation_space])
            step = 0
            while self.num < self.max_examples:
                step += 1
                #self.env.render()
                action = self.act(state)
                state_next, reward, terminal, info = self.env.step(action, self.num)
                if self.log_state:
                    print(action, reward, terminal)
                #reward = reward if not terminal else -reward
                state_next = np.reshape(state_next, [1, self.observation_space])
                self.remember(state, action, reward, state_next, terminal)
                if self.num >= self.max_examples:
                    terminal = True
                state = state_next
                if terminal:
                    str = "Run: {:d}, exploration: {:.5f}, score: {:d}, memory: {:d}, balance: {:.2f}"
                    print(str.format(run+1, self.exploration_rate, step, len(self.memory), self.env.getBalance()))
                    score_logger.add_score(step, run+1)
                    break
                self.experience_replay()

            if self.num >= self.max_examples:
                break
Exemplo n.º 2
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])  # reshape to row array
        step = 0
        while True:  # run the Deep-Q Net
            step += 1
            env.render()
            action = dqn_solver.act(state)  # make action according to e-greedy
            state_next, reward, terminal, info = env.step(action)  # observe SARS from env
            reward = reward if not terminal else -reward  # ? negative
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)  # store transition
            state = state_next  # advance to next state
            if terminal:
                print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
        if run % SAVE_FREQUENCY == 0:
            save_model(dqn_solver.model, "saved_model")  # save the model parameters every once in a while
Exemplo n.º 3
0
def cartpole(iteration=0, params=None):
    if params is None:
        params = DefaultParams()
    env = gym.make(params.ENV_NAME)
    score_logger = ScoreLogger(params.ENV_NAME, iteration, params.EXP_NAME,
                               params.FIXED_NB_RUNS)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space, params)
    run = 0
    done = False
    while done == False:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            #env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                done = score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
Exemplo n.º 4
0
def cartpole():
    env = gym.make(ENV_NAME)
    env.spec.reward_threshold = 1000
    env.spec.max_episode_steps = 1000
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    if LOAD and os.path.exists(LOAD_MODULE):
        dqn_solver.load_mode()
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print(
                    "Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                if len(score_logger.scores) > 0 and step > max(score_logger.scores) > 450:
                    dqn_solver.save_model(step)
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
Exemplo n.º 5
0
def cartpole():
    env = Env('localhost:32822')
    env.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.box.shape[0]
    action_space = env.action_space.discrete.n
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    while True:
        run += 1
        state = env.reset()
        # print(state)
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            # env.render()
            print("acting on state: ", state)
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, run)
                plt.plot(dqn_solver.loss)
                plt.title('Model loss')
                plt.ylabel('Loss')
                plt.xlabel('Episode')
                plt.savefig("loss.png")
                break
            dqn_solver.experience_replay()
Exemplo n.º 6
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(action_space, is_partial_fit=False)
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            # comment next line for faster learning, without stopping to show the GUI
            env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
Exemplo n.º 7
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
# NEEDED TO RUN CODE ONLY ON CPU ON DGX
    config = tf.ConfigProto(
        device_count = {'GPU': 1}
    )
    sess = tf.Session(config=config)
    set_session(sess)

    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            #env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
Exemplo n.º 8
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    #dqn_solver.load("./save/cartpole-dqn.h5")
    run = 0
    for e in range(EPISODES):
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            #env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
            if step % 5 == 0:
                dqn_solver.save("./save/cartpole-dqn_step_{}.h5".format(step))
Exemplo n.º 9
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space, False)
    run = 0
    # while True:
    for i in range(200):
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            print(state_next)
            state_next = np.reshape(state_next, [1, observation_space])
            print(state_next)
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
    dqn_solver.save_model_and_table()
Exemplo n.º 10
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            #env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                score_logger.add_score(step, run)
                break
            if step % TRAINING_STEP == 1:
                dqn_solver.experience_replay()
Exemplo n.º 11
0
def mountain_car():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(action_space, is_partial_fit=True)
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        max_position = -1.2
        while True:
            step += 1
            # comment next line for faster learning, without stopping to show the GUI
            #env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            # if max_position < state_next[0]:
            #     reward = 0
            # if terminal and reward > 0:
            #     reward = 100
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            max_position = max(max_position, state[0][0])
            if terminal:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step) + ", max: " + str(max_position))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
Exemplo n.º 12
0
def cartpole2():
    env = gym.make(ENV_NAME)
    env._max_episode_steps = 700
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    score = [[0, 0, 0]]
    agents_weights = []
    check = [50, 100, 200, 400, 500, 1000, 2000, 3000]
    while mean_score_check(np.mean(score, axis=1)) < 500:
        run += 1
        score_times = []
        for i in range(n_times):
            step = 0
            state = env.reset()
            state = np.reshape(state, [1, observation_space])
            done = False
            if i == 0:
                while (not done):
                    step += 1
                    #env.render()
                    action = dqn_solver.act(state)
                    agents_weights.append(dqn_solver.model_q.get_weights())
                    state_next, reward, done, info = env.step(action)
                    reward = reward if not done else -reward
                    state_next = np.reshape(state_next, [1, observation_space])
                    dqn_solver.remember(state, action, reward, state_next,
                                        done)
                    state = state_next
                    dqn_solver.experience_replay()
            else:
                while (not done):
                    step += 1
                    # env.render()
                    action = dqn_solver.act_eval(state)
                    #agents_weights.append(dqn_solver.model_q.get_weights())
                    state_next, reward, done, info = env.step(action)
                    reward = reward if not done else -reward
                    state_next = np.reshape(state_next, [1, observation_space])
                    state = state_next

                    if done:
                        score_times.append(step)

        print("Run: " + str(run) + ", exploration: " +
              str(dqn_solver.exploration_rate) + ", score: " +
              str(np.mean(score_times)))
        score_logger.add_score(int(np.mean(score_times)), run)
        score.append(score_times)
        #print(score_times)

        if run in check:
            np.save('weights', np.array(agents_weights))
            np.save('scores', np.array(score))
    np.save('weights', np.array(agents_weights))
    np.save('scores', np.array(score))
Exemplo n.º 13
0
def cartpole():
    #Creating the gym environment
    env = gym.make(ENV_NAME)

    env = gym.wrappers.Monitor(env, "./vid", video_callable=lambda episode_id: True, force=True)

    #initializing the score logger to visualize later on the performance of the AI
    score_logger = ScoreLogger(ENV_NAME)

    """
        Observation: 
        Type: Box(4)
        Num	Observation                 Min         Max
        0	Cart Position             -4.8            4.8
        1	Cart Velocity             -Inf            Inf
        2	Pole Angle                 -24 deg        24 deg
        3	Pole Velocity At Tip      -Inf            Inf
        
    Actions:
        Type: Discrete(2)
        Num	Action
        0	Push cart to the left
        1	Push cart to the right
        
    """
    #There are 4 possible observations in this environment, so observation space will be equal to 4
    observation_space = env.observation_space.shape[0]
    #There are 2 possible actions, moving to the left and moving to the right as seen above
    action_space = env.action_space.shape[0]
    #DQNSolver is the "AI", the agent that from the list of observations and actions will try to determine the best actions for given circumstances (observation)
    #Initializing the dqn_solver
    dqn_solver = DQNSolver(observation_space, action_space)
    #Run is a variable to track how many runs it has been
    run = 0
    while True:
        run += 1
        #When you call env.reset(), it returns the initial state as a np.ndarray of shape (4,) since there are 4 observations
        state = env.reset()
        #Reshaping the state into a 2d array of (1,4)
        state = state.reshape(1, observation_space)
        step = 0
        while True:
            #Each step is a new action undertaken by the agent
            step += 1
            env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()

    env.close()
Exemplo n.º 14
0
def cartpole(GAMMA, LEARNING_RATE, EXPLORATION_DECAY):
    #environment - cartpole
    env = gym.make(ENV_NAME)


    score_logger = ScoreLogger(ENV_NAME)

    #observational space - possible state values
    observation_space = env.observation_space.shape[0]

    #action space - possible actions that can be performed
    action_space = env.action_space.n

    #agent - object of DQN Solver class, see below
    dqn_solver = DQNSolver(observation_space, action_space, GAMMA, LEARNING_RATE, EXPLORATION_DECAY)


    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1

            #visualize environment 
            #env.render()
            
            #determine action 
            action = dqn_solver.act(state)

            #determine new state and corresponding reward
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])

            #remember to learn - used in experience replay 
            dqn_solver.remember(state, action, reward, state_next, terminal)

            #set future state as current state
            state = state_next
            if terminal:
                #Add score to score logger once pendulum falls
                score_logger.add_score(step, run)
                
                break
            #calling experience replay to update Q value    
            dqn_solver.experience_replay(GAMMA, EXPLORATION_DECAY)
            
            #return runs to solve
            if score_logger.solve_score != 0:
                return score_logger.solve_score
Exemplo n.º 15
0
def cartpole():

    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    port = 12345

    # connection to hostname on the port.
    s.connect(('192.168.43.110', port))

    print("Connected to environment")

    score_logger = ScoreLogger(ENV_NAME)
    observation_space = 4
    action_space = 4
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0

    for _ in range(10):

        run += 1
        func = dict()
        func["function"] = "render"

        s.send(pickle.dumps(func))
        state = pickle.loads(s.recv(1024))["state"]

        step = 0
        while True:
            startTime = time.time()
            step += 1
            state = np.reshape(state, [1, observation_space])
            action = dqn_solver.act(state)
            # action = 0
            func["function"] = "step"
            func["action"] = action
            s.send(pickle.dumps(func))
            recieved = pickle.loads(s.recv(1024))
            state_next, reward, terminal = recieved["state"], recieved[
                "reward"], recieved["terminal"]
            print(state, reward, terminal)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            print(state_next.shape)
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal or time.time() - startTime > 30:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
    dqn_solver.save_model_and_table()
Exemplo n.º 16
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(action_space, is_partial_fit=True)
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        # run = 0
        while True:
            step += 1
            env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward

            state_next = np.reshape(
                state_next,
                [1, observation_space
                 ])  # Gives a new shape to an array without changing its data.
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration_rate: " +
                      str(dqn_solver.exploration_rate) + ", episodes: " +
                      str(step))
                # score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
Exemplo n.º 17
0
def mountaincar():
    # initialize game and score logger
    env = gym.make(ENV_NAME)
    # tool created to display 'score'
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    score = 0
    numTries = 200

    # continue indefinitely to train and perfect the mdoel
    while run < numTries:
        run += 1
        # a new frame
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0

        # keep stepping til you fall out of frame
        while True:
            # more steps = more successful (even if it is moving)
            env.render()

            # choose what to do using DQN
            action = dqn_solver.act(state)
            # analyze what happened
            state_next, reward, dead, info = env.step(action)

            # reinforce positive outcome, penalize bad outcome
            reward = get_reward(state_next)
            step += reward

            state_next = np.reshape(state_next, [1, observation_space])
            # memorize this iteration to shape the following
            dqn_solver.memorize(state, action, reward, state_next, dead)
            state = state_next
            if dead:
                # score = # of steps taken in a particular run (too many steps is bad)
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.replay()
Exemplo n.º 18
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    # Size of observations
    observation_space = env.observation_space.shape[0]
    # Size of actions
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    game_nb = 0
    while True:
        game_nb += 1
        # Game creation/reset
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        finished = False
        # While game does not end
        while not finished:
            step += 1
            env.render()

            # Decide which move
            action = dqn_solver.act(state)

            # Perform the move
            state_next, reward, finished, info = env.step(action)
            state_next = np.reshape(state_next, [1, observation_space])

            # If the game has finished (we failed) invert the reward
            reward = reward if not finished else -reward

            # Save the movement and its reward
            dqn_solver.remember(state, action, reward, state_next, finished)

            state = state_next

            # Learn from experience
            dqn_solver.experience_replay()

            if finished:
                # Save result
                print ("Game number: " + str(game_nb) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, game_nb)
Exemplo n.º 19
0
def training():
    env = gym.make(ENV_NAME)
    # If the user chooses an environment with a non-discrete action space, return an error because DQN only works with discrete action spaces
    if (type(env.action_space) != gym.spaces.discrete.Discrete):
        raise ActionSpaceError(
            'This environment uses an action space that is not discrete. DQN can only be trained using discrete action spaces. Please select an envionment with a discrete action space.'
        )

    act_space = env.action_space.n

    score_logger = ScoreLogger(ENV_NAME)
    observation_input = find_input_shape(env)

    dims = reshape_dims(env.observation_space)

    dqn_solver = DQNSolver(observation_input, act_space)
    for i in range(NUM_EPISODES):
        state = env.reset()
        #reshape state array if it has more than one dimension
        if (len(dims) > 1):
            state = state.reshape(dims)
        step = 0
        while True:
            step += 1
            if (WATCH_TRAINING):
                env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            #reshape state array if it has more than one dimension
            if (len(dims) > 1):
                state_next = state_next.reshape(dims)
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print("Run: " + str(i + 1) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                score_logger.add_score(step, i + 1)
                break
            dqn_solver.experience_replay()
    return dqn_solver
Exemplo n.º 20
0
def main():
    # Kornyezet import
    env = gym.make(ENV_NAME)
    #  Pont szamolas
    score_logger = ScoreLogger(ENV_NAME)
    #  Observation space + action space definialas
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    #  RL modell
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0

    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            env.render()

            action = dqn_solver.act(state)

            state_next, reward, terminal, info = env.step(action)

            reward = reward if not terminal else -reward

            state_next = np.reshape(state_next, [1, observation_space])

            dqn_solver.add_to_memory(state, action, reward, state_next,
                                     terminal)

            state = state_next
            if terminal:
                result = "Run: " + str(run) + ", exploration: " + str(
                    dqn_solver.exploration_rate) + ", score: " + str(step)
                print(result)
                score_logger.add_score(step, run)
                break
            dqn_solver.experience()
Exemplo n.º 21
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(action_space, is_partial_fit=IS_PARTIAL_FIT)
    run = 0
    current_max = 0
    while run < NO_ITERATIONS:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        # run = 0
        while True:
            step += 1
            if RENDER:
                env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)

            if MY_LOGS:
                print(state_next, reward, terminal)

            reward = reward if not terminal else -reward

            state_next = np.reshape(state_next,
                                    [1, observation_space])  # Gives a new shape to an array without changing its data.
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            current_max = max(current_max, step)
            if terminal:
                print("Run: " + str(run) + ", exploration_rate: " + str(
                    dqn_solver.exploration_rate) + ", episodes: " + str(step), ", max episodes reached: ",
                      str(current_max))
                if (SCORE_LOGGER):
                    score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()
Exemplo n.º 22
0
def main():
    n_episode = 50
    for method in ['DoubleDQN']:
        env = gym.make(ENV_NAME)
        agent = None
        if method == 'DDPG':
            agent = DDPG(env)
        elif method == 'QLeaning':
            agent = QLearningTabular(env)
        elif method == 'DQN':
            agent = DQN(env)
        elif method == 'DoubleDQN':
            agent = DoubleDQN(env)
        else:
            raise NotImplementedError
        score_logger = ScoreLogger(ENV_NAME, method)
        print('Algorithm:', method)

        i_episode = 0
        while i_episode < n_episode:
            i_episode += 1
            agent.run_begin()
            state = env.reset()
            i_step = 0
            while True:
                i_step += 1
                # env.render()
                action = agent.act(state)
                state_next, reward, terminal, info = env.step(action)
                agent.remember(state, action, reward, state_next, terminal)
                agent.experience_replay()
                state = state_next
                if terminal:
                    agent.run_finish()
                    print(f"Run {i_episode} finished after {i_step + 1} steps")
                    score_logger.add_score(i_step, i_episode)
                    break
        env.close()
Exemplo n.º 23
0
def cartpole():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space
    action_space = env.action_space
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space.shape[0]])
        step = 0
        over2 = False
        over0 = False
        over3 = False
        while True:
            step += 1
            action = dqn_solver.act(state)
            env.render()
            state_next, reward, terminal, info = env.step(action)
            # reward = reward if not terminal else -20
            if state_next[0] > -0.2 and not over2:
                over2 = True
                reward = 1
            if state_next[0] > 0 and not over0:
                over0 = True
                reward = 1
            if state_next[0] > 0.2 and not over3:
                over3 = True
                reward = 1
            if state_next[0] >= 0.5:
                reward = 20

            # reward = -reward
            state_next = np.reshape(state_next,
                                    [1, observation_space.shape[0]])
            dqn_solver.remember(state, action, reward, state_next, terminal)

            dqn_solver.experience_replay()
            dqn_solver.target_train()

            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(reward))
                print("state: " + str(state_next[0, 0]))
                # score_logger.add_score(reward, run)
                break
Exemplo n.º 24
0
def cartpole():
    env = gym.make("CartPole-v1")
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    dqn_solver.build_fresh()
    # dqn_solver.load_model()
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            if dqn_solver.train_mode:
                dqn_solver.remember(state, action, reward, state_next,
                                    terminal)
            state = state_next
            if terminal:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                is_solved = score_logger.add_score(step, run)
                if is_solved and dqn_solver.train_mode:
                    dqn_solver.save_model()
                    exit()
                break
            if dqn_solver.train_mode:
                dqn_solver.experience_replay()
Exemplo n.º 25
0
def traffic():
    env = gym.make(ENV_NAME)
    #env = gym.wrappers.Monitor(env, "dqn")
    env.seed(1)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    while run < 1:
        run += 1
        step = 0
        state = env._reset()
        obs0, reward_previous, don, signal = state

        reward_current = 0
        total_reward = reward_previous - reward_current
        print("STATUS")
        print(signal)
        if (signal == 0):
            status = 0
        elif (signal == 1):
            status = 1
        next_state = obs0

        while step < 1000:
            #env.render()
            step += 1
            action = dqn_solver.act(state)
            #print(next_state)
            #action = env.action_space.sample()
            #obs1, reward_previous, done, _ = env.step(action)

            if (status == 0 and action == 0):
                print("Status is: 0. Action is 0.")
                status = 0
                next_state, reward_current, done, _, t_step = phase(env, 0, 15)

                step += t_step

            elif (status == 0 and action == 1):
                print("Status is 0. Action is now 1. Switching to Status 1.")
                phase(env, 2, 25)
                #print("Action is 1. Status is 0. Lights are H-Y, V-R -> H-R, V-G")
                status = 1
                next_state, reward_current, done, _, t_step = phase(env, 1, 45)
                step += t_step

            elif (status == 1 and action == 1):
                print("Status is 1. Action is 1.")
                status = 1
                next_state, reward_current, done, _, t_step = phase(env, 1, 15)
                step += t_step

            elif (status == 1 and action == 0):
                print("Status is 1. Action is now 0. Switching to Status 0.")
                phase(env, 4, 25)
                status = 0
                next_state, reward_current, done, _, t_step = phase(env, 0, 45)
                step += t_step

            total_reward = reward_previous - reward_current
            state_next = np.reshape(next_state, [1, OBS_SPACE])
            dqn_solver.remember(state, action, total_reward, state_next, done)
            state = state_next
            score_logger.add_score(step, run)

            print(step)
            if done:
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                #score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()

    print("Episode done in %d steps, total reward %.2f" % (step, total_reward))
    env.close()
Exemplo n.º 26
0
def cartpole():
    #Creating the gym environment
    env = gym.make(ENV_NAME)

    #saving all the episodes to a folder vid in a video format
    recorder = gym.wrappers.monitoring.video_recorder.VideoRecorder(env, base_path="./every_100")

    #initializing the score logger to visualize later on the performance of the AI
    score_logger = ScoreLogger(ENV_NAME)

    """
        Observation: 
        Type: Box(4)
        Num	Observation                 Min         Max
        0	Cart Position             -4.8            4.8
        1	Cart Velocity             -Inf            Inf
        2	Pole Angle                 -24 deg        24 deg
        3	Pole Velocity At Tip      -Inf            Inf
        
    Actions:
        Type: Discrete(2)
        Num	Action
        0	Push cart to the left
        1	Push cart to the right
        
    """
    #There are 4 possible observations in this environment, so observation space will be equal to 4. These are game specific inputs
    observation_space = env.observation_space.shape[0]
    #There are 2 possible actions, moving to the left and moving to the right as seen above
    action_space = env.action_space.n
    #DQNSolver is the "AI", the agent that from the list of observations and actions will try to determine the best actions for given circumstances (observation)
    #Initializing the dqn_solver
    dqn_solver = DQNSolver(observation_space, action_space)
    #Run is a variable to track how many runs it has been
    run = 0
    while True:

        run += 1
        #When you call env.reset(), it returns the initial state as a np.ndarray of shape (4,) since there are 4 observations
        state = env.reset()
        #Reshaping the state into a 2d array of (1,4)
        state = state.reshape(1, observation_space)
        step = 0
        while True:
            #Each step is a new action undertaken by the agent
            step += 1
            #Comment this line below if you don't want to see the animation rendered live. It will make it the computations quicker
            if run % RECORD_EVERY == 0:
                # env.render()
                recorder.capture_frame()            #Return an action based on the state
            
            action = dqn_solver.act(state)

            #Environment returns informations based on the action decided by the dqn_solver
            state_next, reward, terminal, info = env.step(action)
            #Positive or negative reward
            reward = reward if not terminal else -reward
            #The next state
            state_next = state_next.reshape(1, observation_space)
            #This is how our AI has "memory"
            dqn_solver.remember(state, action, reward, state_next, terminal)
            #update the state from the past to present
            state = state_next
            #Terminal means the episode/run has finished
            if terminal:
                print("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                score_logger.add_score(step, run)
                break
            dqn_solver.experience_replay()



    env.close()
Exemplo n.º 27
0
def runCat3D():
    env = gym.make(ENV_NAME)
    score_logger = ScoreLogger(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    '''
    method = 1: use DQNSolver
    method = 2: use cyclic_action, which makes the cat land on its feet (under the current initial conditions and parameters)
    method = 3: user inputs the actions
    '''
    method = 1

    if method == 1:
        print("Using DQNSolver.")
        dqn_solver = DQNSolver(observation_space, action_space)
        fileWeights = "weights3D_simpler.h5"
        #uncomment to start off with saved weights
        #dqn_solver.load_weights(fileWeights)
    elif method == 2:
        print("Using cyclic method.")
    else:
        print("User inputs.")

    run = 0
    average_score = 0
    while run < 100:  #True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            env.render()

            if method == 1:
                action = dqn_solver.act(state)
            elif method == 2:
                action = cyclic_action(step, env)
            else:
                action = input("Enter action")

            action = int(action)
            state_next, reward, terminal, info = env.step(action)
            reward = -reward
            #reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            if method == 1:
                dqn_solver.remember(state, action, reward, state_next,
                                    terminal)

            state = state_next

            if terminal:
                average_score += reward
                if method == 1:
                    print("Run: " + str(run) + ", exploration: " +
                          str(round(dqn_solver.exploration_rate, 4)) +
                          ", score: " + str(round(reward, 2)))
                else:
                    print("Run: " + str(run) + ", score: " +
                          str(round(reward, 2)))
                #score_logger.add_score(int(reward), run)
                break

            if method == 1:
                dqn_solver.experience_replay()
                if run % 50 == 0:
                    dqn_solver.save_weights(fileWeights)

            step += 1
    print("Total runs: " + str(run) + ", average score: " +
          str(round(average_score / run, 2)))
    input("End. Press any key")
    env.close()
Exemplo n.º 28
0
def connect4dqn():
    env = Connect4()
    score_logger = ScoreLogger('Connect4')
    player1won = 0
    player2won = 0
    observation_space = env.reset().shape
    action_space = env.validMoves().size
    # Assign GPU to DGX
    config = tf.ConfigProto(device_count={'GPU': 2})
    sess = tf.Session(config=config)
    set_session(sess)

    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    state = env.reset(
    )  #moved one loop up. otherwise player two wont be able to start if player one wins
    while True:
        run += 1
        if run % 50 == 0:
            print('Saving weights and starting evaluation...')
            dqn_solver.save()
            score, ties = evaluate_dqn(env, dqn_solver, 1000)
            score_logger.add_score(score + ties, run)  #logging ties as success
        step = 0

        while True:
            step += 1
            player = env.getNextPlayer()

            if player == 1:
                action_player1 = dqn_solver.act(state, env)
                state_next, reward_player1, terminal, info = env.makeMove(
                    player, action_player1)
                state_copy = np.copy(state)
                state_next_copy = np.copy(state_next)
                if terminal:
                    dqn_solver.pop(
                    )  # if player 1 wins, pop player 2's last move from and give it a negative reward
                    dqn_solver.remember(normalized_state, action_player2,
                                        reward_player1 * -1,
                                        normalized_state_next, terminal)
                dqn_solver.remember(state, action_player1, reward_player1,
                                    state_next, terminal)
                state = state_next
            else:
                normalized_state = np.roll(state, 1, axis=-1)
                action_player2 = dqn_solver.act(normalized_state, env)
                #                userInput = int(input("Which row silly Human? "))
                #                action_player2 = userInput
                state_next, reward_player2, terminal, info = env.makeMove(
                    player, action_player2)
                normalized_state_next = np.roll(state_next, 1, axis=-1)
                if terminal:
                    dqn_solver.pop(
                    )  # if player 2 wins, pop player 1's last move from and give it a negative reward
                    dqn_solver.remember(state_copy, action_player1,
                                        reward_player2 * -1, state_next_copy,
                                        terminal)
                dqn_solver.remember(normalized_state, action_player2,
                                    reward_player2, normalized_state_next,
                                    terminal)
                state = state_next

            if terminal:
                if player == 1:
                    player1won += 1
                else:
                    player2won += 1
                try:
                    winRatio = player1won / player2won
                except ZeroDivisionError:
                    winRatio = 0
                print('Win ratio: {}'.format(winRatio))
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", moves: " +
                      str(step))
                break
                for i in range(20):
                    dqn_solver.experience_replay()
Exemplo n.º 29
0
def cartpole():
    env = UnityEnv(environment_filename=ENV_NAME,
                   worker_id=2,
                   use_visual=False,
                   multiagent=True)
    score_logger = ScoreLogger(ENV_NAME)
    agents_brain = []
    agents_action = []
    index_list = []
    agents_alive = []
    count = 0
    count1 = 0
    num_agents = env.number_agents
    print("___________Number of agents in cartpole __")
    print(num_agents)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    print("__dqn solver______")
    print(dqn_solver)
    #model = tf.keras.models.load_model("")
    for x in range((env.number_agents)):
        agents_brain.append(dqn_solver)
        print("______agentbrain____")
        print(agents_brain)
        print("_Agent action___")
        print(agents_action)

    learning_brain = copy.deepcopy(agents_brain)
    run = 0
    state = env.reset()
    initialstate = copy.deepcopy(state)
    while True:
        run += 1
        env.reset()
        print("____________STATE____________-")
        print(state[0])
        state = copy.deepcopy(initialstate)
        agents_brain = []
        agents_action = []
        index_list = []
        agents_alive = []
        count = 0
        count1 = 0
        num_agents = int(state[0][-5])
        agents_brain = copy.deepcopy(learning_brain)
        print(learning_brain)
        print(agents_brain)
        print(state)
        #for x in range ( (env.number_agents - 1) ):

        step = 0
        while True:
            step += 1
            env.render()
            print("___________STatte Lenth_______")
            print(len(state))
            print("______selffish___")
            print(state[0])
            agents_action = [1] * len(state)
            copied_agents_alive = copy.deepcopy(agents_alive)
            print("__________numagents_____")
            for x in range(num_agents - 1):
                state[x] = np.reshape(state[x], [1, observation_space])
                agents_action[x] = agents_brain[x].act(state[x])
            print(agents_action)
            state_next, reward, terminal, info = env.step(
                agents_action, num_agents)
            print("_______Reward________")
            print(reward)
            print("_____________NEXT STATE LENGTH____________")
            print(len(state_next))
            if (len(state_next) == 0):
                break
            agents_alive = state_next[0][-13:-5]
            num_agents = int(state_next[0][-5])
            print("_______num agnets in cartpole________")
            print(num_agents)
            print("_____index list")
            print(index_list)
            print(agents_alive)
            agents_alive1 = np.delete(agents_alive, index_list)
            print("_______Alive agent list_______")
            print(agents_alive1)
            flag = False
            # del agents_alive[index_list[x]]
            for x in range(len(agents_alive)):
                if (agents_alive[x] == float(1)):
                    for y in range(len(index_list)):
                        if (index_list[y] == x):
                            flag = True
                    if (flag == False):
                        index_list.append(x)

                flag = False

            index_to_remove = []
            for x in range(len(agents_alive1)):
                if (agents_alive1[x] == float(1)):
                    learning_brain[index_list[count]] = agents_brain[x]
                    index_to_remove.append(x)
                    count = count + 1

            agents_brain = [
                i for j, i in enumerate(agents_brain)
                if j not in index_to_remove
            ]
            print("____________AGENTS_BRAIN_________")
            print(len(agents_brain))
            print("_______________Terminal_____________")
            print(terminal)
            if (terminal[0] == True):
                print("Run: " + str(run) + ", exploration: " +
                      str(dqn_solver.exploration_rate) + ", score: " +
                      str(step))
                score_logger.add_score(step, run)
                for x in range(len(copied_agents_alive)):
                    learning_brain[x] = agents_brain[count1]
                    count1 = count1 + 1
                for x in range(len(learning_brain)):
                    learning_brain[x].save(str(run) + "brain" + str(x) + ".h5")

                break

            for x in range(num_agents - 1):
                state[x] = np.reshape(state[x], [1, observation_space])
                state_next[x] = np.reshape(state_next[x],
                                           [1, observation_space])
                agents_brain[x].remember(state[x], agents_action[x], reward[x],
                                         state_next[x], terminal[x])
                agents_brain[x].experience_replay()
            state = state_next
Exemplo n.º 30
0
MEMORY_SIZE = 1000000
BATCH_SIZE = 32

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01  # 1% of the time the agent will explore
EXPLORATION_DECAY = 0.995

N_EPISODES = 1001

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0, 1, 2"

# Set Parameters
env = gym.make(ENV_NAME)
score_logger = ScoreLogger(ENV_NAME)

state_size = env.observation_space.shape[0]
action_size = env.action_space.n

output_dir = 'model_output/cartpole'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

class DQNNN(nn.Module):

    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.dense1 = nn.Linear(state_size, 24)
        self.dense2 = nn.Linear(24, 24)
        self.output = nn.Linear(23, action_size)