def choose_action(step): image = get_observation() history = np.array([image]*4) history_batch = np.array([history]) prediction = model.predict(history_batch)[0] best_action = legal_actions[np.argmax(prediction)] random_action = random.choice(legal_actions) EPSILON = epsilon(step) action = select_with_probability([random_action, best_action], [EPSILON, 1-EPSILON]) print EPSILON, action return best_action
def choose_action(step): image = get_observation() history = np.array([image] * 4) history_batch = np.array([history]) prediction = model.predict(history_batch)[0] best_action = legal_actions[np.argmax(prediction)] random_action = random.choice(legal_actions) EPSILON = epsilon(step) action = select_with_probability([random_action, best_action], [EPSILON, 1 - EPSILON]) print EPSILON, action return best_action
def choose_action(image, step, epoch): history = np.array([image]*4) history_batch = np.array([history]) prediction = model.predict(history_batch)[0] best_action = legal_actions[np.argmax(prediction)] random_action = random.choice(legal_actions) #EPSILON = 1.0 if MODE == "test": EPSILON = 0.0 elif MODE == "random": EPSILON = 1.0 else: EPSILON = epsilon(step, epoch) action = select_with_probability([random_action, best_action], [EPSILON, 1-EPSILON]) print "Step: %d, Epsilon: %f, Epoch: %d" % (step, EPSILON, epoch) return best_action
def choose_action(image, step, epoch): history = np.array([image] * 4) history_batch = np.array([history]) prediction = model.predict(history_batch)[0] best_action = legal_actions[np.argmax(prediction)] random_action = random.choice(legal_actions) #EPSILON = 1.0 if MODE == "test": EPSILON = 0.0 elif MODE == "random": EPSILON = 1.0 else: EPSILON = epsilon(step, epoch) action = select_with_probability([random_action, best_action], [EPSILON, 1 - EPSILON]) print "Step: %d, Epsilon: %f, Epoch: %d" % (step, EPSILON, epoch) return best_action