Exemplo n.º 1
0

if __name__ == "__main__":
    max_round = 30
    file_path = '000065.SZ_NormalData.csv'
    df = pd.read_csv(file_path)
    df = df.sort_values('trade_date', ascending=True)
    df = df.iloc[22:].reset_index(drop=True)  # 去除前几天没有均线信息

    env = stock(df.iloc[0:1500])
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.02,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=4000,
        batch_size=512,
        # output_graph=True
    )
    run(max_round)
    # env = stock(df)
    # env = BackTest(env, show_log=True)
    # env.draw('trade.png', 'profit.png')

    env = stock(df.iloc[1500:].reset_index(drop=True))
    env = BackTest(env, show_log=True)
    env.draw('trade1.png', 'profit1.png')

    env = BackTest(env, show_log=True, my_trick=True)
Exemplo n.º 2
0

import gym
from RL_brain2 import DeepQNetwork

env = gym.make('SpaceInvaders-v0')
env = env.unwrapped

print("action_space:", env.action_space)
print("observation_space:", env.observation_space)
print("observation_space.high:", env.observation_space.high)
print("observation_space.low:", env.observation_space.low)

RL = DeepQNetwork(n_actions=env.action_space.n,
                  n_features=env.observation_space.shape[0] * env.observation_space.shape[1] * env.observation_space.shape[2],
                  learning_rate=0.01, e_greedy=0.9,
                  replace_target_iter=100, memory_size=2000,
                  e_greedy_increment=0.001,)

total_steps = 0

ep_rhistory = []


for i_episode in range(500):

    observation = env.reset()
    ep_r = 0
    while True:
        # env.render()
import tkinter as tk
from env import crossing
from visual import Visual
np.set_printoptions(threshold=np.inf)

#print(env.observation_space.shape[0])
parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
parser.add_argument('--train', dest='train', action='store_true', default=False)
parser.add_argument('--test', dest='test', action='store_true', default=True)
args = parser.parse_args()



RL = DeepQNetwork(n_actions=4,  #2*2
                  #n_features=env.observation_space.shape[0],
                  n_features=10, #2*5
                  learning_rate=0.01, e_greedy=0.9,
                  replace_target_iter=100, memory_size=2000,
                  e_greedy_increment=0.001,)





def road_map():

    cross1=crossing(light_state=0,q_states=[0,0,0,1])
    cross2=crossing(light_state=0,q_states=[0,0,1,0])


step_set=[]
reward_set=[]
#print(env.observation_space.shape[0])
parser = argparse.ArgumentParser(
    description='Train or test neural net motor controller.')
parser.add_argument('--train', dest='train', action='store_true', default=True)
# parser.add_argument('--test', dest='test', action='store_true', default=True)
args = parser.parse_args()

# size of crossroads grid
grid_x = 4
grid_y = 4

RL = DeepQNetwork(
    n_actions=2**(grid_x * grid_y),  #0,1 for each crossroad
    n_features=5 *
    (grid_x * grid_y),  #2*5 (5 = 4 numbers of cars + 1 light state)
    learning_rate=0.01,
    e_greedy=0.9,
    replace_target_iter=100,
    memory_size=2000,
    e_greedy_increment=0.001,
)

x = []
y = []
for i in range(grid_x):
    x.append(i + 1)
for i in range(grid_y):
    y.append(i + 1)

#property of visualization
times = 100  #interval: crossroad & crossroad
bias = 6  #distance: light & center of crossroad
Exemplo n.º 5
0
                print 'success =', success
                break
            # swap observation
            observation = observation_
            step += 1
    # end of game
    print('game over')
    plt.plot(np.arange(episode_number), rr_episode, '.')
    plt.ylabel('reward')
    plt.xlabel('training episode')
    plt.show()
    plt.plot(np.arange(episode_number), step_episode, '.')
    plt.ylabel('step')
    plt.xlabel('training episode')
    plt.show()


if __name__ == "__main__":
    # maze game
    env = UR5()
    RL = DeepQNetwork(4,
                      12,
                      learning_rate=0.00001,
                      reward_decay=0.5,
                      e_greedy=1,
                      replace_target_iter=3,
                      memory_size=4000,
                      output_graph=False)
    run()
    RL.plot_cost()