示例#1
0
                break


# def q_learning():

if __name__ == '__main__':
    EPISODES = 50
    pin_servo = 9
    batch_size = 32
    game_name = 'CartPole-v1'

    board = Arduino('9600', port='/dev/cu.usbmodem14311')
    # demo_(board, pin_servo)

    # initialise game
    env, state_size, action_size = env_init(game_name)

    # initialise agent
    agent = DQNAgent(state_size, action_size)

    # load model
    agent.load("../models/cartpole-dqn.h5")

    # train
    # agent.train(agent, env, EPISODES, state_size, batch_size)

    # save model/agent state
    agent.save("cartpole-dqn.h5")

    # move servo!!
    demo(agent, env, EPISODES, state_size, batch_size)
示例#2
0
            s.insert(0, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    return s


if __name__ == '__main__':
    tetris = Tetris()
    genetic = None

    dict = {}
    for e in Figure:
        dict[e.name] = e.value

    state_size = 200
    action_size = 38
    agent = DQNAgent(10, action_size)
    agent.load("mreza-dqn.h5")
    agent.epsilon = 0.12
    done = False
    episodes = 0

    app = TetrisApp()
    ###############################################################
    ####################Deo bitan za tetris########################
    key_actions = {
        'ESCAPE': app.quit,
        'LEFT': lambda: app.move(-1),
        'RIGHT': lambda: app.move(+1),
        'DOWN': lambda: app.drop(True),
        'UP': app.rotate_stone,
        'p': app.toggle_pause,
        'RETURN': app.start_game,
              expl_decay=expl_decay,
              nhl=nl,
              sl_f=sl_f)
batch_size = 16

# Simulation Runtime
testing = 1000
testing_comps = 1

for r in range(4, 5):
    # sim_env.T = T_choice[r]
    # load Q network
    print(r)
    parameters = 'old/DQN__S{}_rho{}_SNR{}_PS{}_W4_lr0.0001_df0.0_sl24_nhl1_ef0.9'.format(
        r, 0.9, SNR, sim_env.pi)
    QN.load(sim_env.channel_type + '/' + parameters)
    sim_env = Simulation(number_of_servers=num_of_servers,
                         number_of_users=1,
                         historic_time=hist_timeslots,
                         snr_set=avg_SNR,
                         csi=0,
                         channel=0.9)
    sim_env.reset()
    states = sim_env.state  # get first state
    for u in range(testing):
        states = np.reshape(
            states, [1, state_size])  # reshape state to vector for network
        action = QN.act_test(states)  # get action from DQN agent
        # print('SNR:', sim_env.SNR[-1])
        # print('action:', sim_env.action[action])
        next_state, rewards, overall_err = sim_env.Assign_Cores(