Exemplo n.º 1
0
def choose_action(step):
    image = get_observation()
    history = np.array([image]*4)
    history_batch = np.array([history])
    prediction = model.predict(history_batch)[0]
        
    best_action = legal_actions[np.argmax(prediction)]
    random_action = random.choice(legal_actions)
    
    EPSILON = epsilon(step)
    action = select_with_probability([random_action, best_action], [EPSILON, 1-EPSILON])
    print EPSILON, action
    return best_action
Exemplo n.º 2
0
def choose_action(step):
    image = get_observation()
    history = np.array([image] * 4)
    history_batch = np.array([history])
    prediction = model.predict(history_batch)[0]

    best_action = legal_actions[np.argmax(prediction)]
    random_action = random.choice(legal_actions)

    EPSILON = epsilon(step)
    action = select_with_probability([random_action, best_action],
                                     [EPSILON, 1 - EPSILON])
    print EPSILON, action
    return best_action
Exemplo n.º 3
0
def choose_action(image, step, epoch):
    history = np.array([image]*4)
    history_batch = np.array([history])
    prediction = model.predict(history_batch)[0]
        
    best_action = legal_actions[np.argmax(prediction)]
    random_action = random.choice(legal_actions)
    #EPSILON = 1.0
    if MODE == "test":
        EPSILON = 0.0
    elif MODE == "random":
        EPSILON = 1.0
    else:
        EPSILON = epsilon(step, epoch)
    action = select_with_probability([random_action, best_action], [EPSILON, 1-EPSILON])
    print "Step: %d, Epsilon: %f, Epoch: %d" % (step, EPSILON, epoch)
    return best_action
Exemplo n.º 4
0
def choose_action(image, step, epoch):
    history = np.array([image] * 4)
    history_batch = np.array([history])
    prediction = model.predict(history_batch)[0]

    best_action = legal_actions[np.argmax(prediction)]
    random_action = random.choice(legal_actions)
    #EPSILON = 1.0
    if MODE == "test":
        EPSILON = 0.0
    elif MODE == "random":
        EPSILON = 1.0
    else:
        EPSILON = epsilon(step, epoch)
    action = select_with_probability([random_action, best_action],
                                     [EPSILON, 1 - EPSILON])
    print "Step: %d, Epsilon: %f, Epoch: %d" % (step, EPSILON, epoch)
    return best_action