示例#1
0
def train():
    env = wlanEnv(CONTROLLER_IP, BUFFER_LEN, timeInterval=ENV_REFRESH_INTERVAL)
    env.start()

    numAPs, numActions, numAdditionDim = env.getDimSpace()
    brain = BrainDQN(numActions,
                     numAPs,
                     numAdditionDim,
                     BUFFER_LEN,
                     param_file='saved_networks/network-dqn.params')

    while not env.observe()[0]:
        time.sleep(0.5)

    observation0 = env.observe()[1]
    brain.setInitState(observation0)

    np.set_printoptions(threshold=5)
    print 'Initial observation:\n' + str(observation0)

    data = {}
    fig = Display(env.id2ap)
    fig.display()

    try:
        while True:
            action, q = brain.getAction()
            print 'action:\n' + str(action.argmax())
            reward, throught, nextObservation = env.step(action)
            print 'reward: ' + str(reward) + ', throught: ' + str(throught)
            print 'Next observation:\n' + str(nextObservation)

            data['timestamp'] = time.time()
            data['rssi'] = nextObservation[-1]
            data['q'] = q
            data['reward'] = reward
            data['action_index'] = np.argmax(action)
            fig.append(data)

            brain.setPerception(nextObservation, action, reward, False)
    except KeyboardInterrupt:
        print 'Saving replayMemory......'
        brain.saveReplayMemory()
        fig.stop()
    pass
示例#2
0
def test():
    env = wlanEnv(CONTROLLER_IP,
                  BUFFER_LEN,
                  timeInterval=ENV_REFRESH_INTERVAL,
                  no_guarantee=True)
    env.start()

    numAPs, numActions, numAdditionDim = env.getDimSpace()
    brain = BrainDQN(numActions,
                     numAPs,
                     numAdditionDim,
                     BUFFER_LEN,
                     param_file='saved_networks/network-dqn.params')

    while not env.observe()[0]:
        time.sleep(0.5)

    observation = env.observe()[1]

    np.set_printoptions(threshold=5)

    data = {}
    fig = Display(env.id2ap, PREDICT=True)
    fig.display()
    try:
        while True:
            action, q_value, action_index, feature_vector = brain.predict(
                observation)
            print 'action:\n' + str(action_index)
            reward, throught, observation = env.step(action)
            print 'q_value: ' + str(q_value)
            print 'reward: ' + str(reward) + ', throught: ' + str(throught)
            data['timestamp'] = time.time()
            data['rssi'] = observation[-1]
            data['q'] = q_value
            data['reward'] = reward
            data['action_index'] = action_index
            data['feature_vector'] = feature_vector
            fig.append(data)
            print 'Next observation:\n' + str(observation)
            time.sleep(2)
    except KeyboardInterrupt:
        fig.stop()