示例#1
0
                        a = randaction
                        #print(" - selecting generated optimal policy ",a)

#        for i in range (np.alen(a)):
#            if a[i] < -1: a[i]=-0.99999999999
#            if a[i] > 1: a[i] = 0.99999999999
        if step % 50 == 0:
            print("a =>", a)

        env.render()
        env.refresh(render=True)

        qs_a = np.concatenate((qs, a), axis=0)

        #get the target state and reward
        s, r, done, info = env.step(a)
        #record only the first x number of states

        #if done and step<max_steps-3:
        #    r = -50

        if step == 0:
            gameSA[0] = qs_a
            gameS[0] = qs
            gameR[0] = np.array([r])
            gameA[0] = np.array([r])
            gameW[0] = np.array([0.000000005])
        else:
            gameSA = np.vstack((gameSA, qs_a))
            gameS = np.vstack((gameS, qs))
            gameR = np.vstack((gameR, np.array([r])))
示例#2
0
        initial_epsilon = 0.8
    )

    observation = env.reset()

    left_or_right_barge_movement = np.random.randint(0, 2)
    epsilon = 0.05


    for episode in range(EPISODES):
        while True:
            # 1. Choose an action based on observation
            action = PG.choose_action(observation)

            # 2. Take action in the environment
            observation_, reward, done, info = env.step(action)

            # 3. Store transition for training
            # if reward > -0.20:
            PG.store_transition(observation, action, reward)

            if RENDER_ENV:
                # -------------------------------------
                # Optional render
                env.render()
                # Draw the target
                env.draw_marker(env.landing_coordinates[0], env.landing_coordinates[1])
                # Refresh render
                env.refresh(render=False)

            # When should the barge move? Water movement, dynamics etc can be simulated here.