def get_features_values_by_competition(graph): iterations = run.feature_iterations features = np.zeros((iterations * 1600, 7)) counter = 0 for iteration in range(iterations): g = graph.copy() while len(g.graph['free']) > 1: feature = tools.get_feature(g) features[counter][:] = feature counter += 1 # first player action action_choice1 = np.random.randint(3) if action_choice1 == 0: seed1 = actions.action_degree(g) elif action_choice1 == 1: seed1 = actions.action_weight(g) elif action_choice1 == 2: seed1 = actions.action_blocking(g, 1) # illegal action if seed1 == -1: print("Illegal action") action_choice1 = 0 seed1 = actions.action_degree(g) tools.activate_node(g, seed1, 1) # second player action action_choice2 = np.random.randint(3) if action_choice2 == 0: seed2 = actions.action_degree(g) elif action_choice2 == 1: seed2 = actions.action_weight(g) elif action_choice2 == 2: seed2 = actions.action_blocking(g, 2) # illegal action if seed2 == -1: print("Illegal action") action_choice2 = 0 seed2 = actions.action_degree(g) tools.activate_node(g, seed2, 2) a1, a2 = tools.diffuse(g) for n in a1: tools.activate_node(g, n, 1) for n in a2: tools.activate_node(g, n, 2) return features[0:counter, :]
def train(graph, feature_lmh_ranges): alpha = 0.5 eps = 0.8 gamma = 0.98 d = 0.998 opponent_action = run.opponent q_table = np.zeros((2187, 3)) for iteration in tqdm(range(run.train_n_rounds)): g = graph.copy() state = tools.get_state(g, feature_lmh_ranges) for t in range(21): # first player action if np.random.random() < eps: action_choice1 = np.random.randint(3) else: action_choice1 = np.argmax(q_table[state, :]) if action_choice1 == 0: seed1 = actions.action_degree(g) elif action_choice1 == 1: seed1 = actions.action_weight(g) elif action_choice1 == 2: seed1 = actions.action_blocking(g, 1) # elif action_choice1 == 3: # seed1 = actions.action_last(g) # elif action_choice1 == 4: # seed1 = actions.action_min_degree(g) # illegal action if seed1 == -1: print("Illegal action") action_choice1 = 0 seed1 = actions.action_degree(g) tools.activate_node(g, seed1, 1) # second player action action_choice2 = opponent_action if action_choice2 == 0: seed2 = actions.action_degree(g) elif action_choice2 == 1: seed2 = actions.action_weight(g) elif action_choice2 == 2: seed2 = actions.action_blocking(g, 2) # illegal action if seed2 == -1: print("Illegal action") action_choice2 = 0 seed2 = actions.action_degree(g) tools.activate_node(g, seed2, 2) a1, a2 = tools.diffuse(g) for n in a1: tools.activate_node(g, n, 1) for n in a2: tools.activate_node(g, n, 2) next_state = tools.get_state(g, feature_lmh_ranges) # delayed reward r = 0 if t % 6 == 0: r = len(g.graph['1']) - len(g.graph['2']) # immediate reward # r = len(g.graph['1']) - len(g.graph['2']) q_table[state, action_choice1] = ( 1 - alpha) * q_table[state, action_choice1] + alpha * ( r + gamma * max(q_table[next_state, :])) state = next_state alpha = d * alpha eps = 0.9 - (0.6 * iteration / 500) return q_table
def compete(graph: nx.Graph, q_table): opponent_action = 0 counter = 0 max_iteration = 20000000 un_seen = 0 while len(graph.graph['free']) > 1 and counter < max_iteration: state = tools.get_state(graph) # first player action action_choice1 = np.argmax(q_table[state, :]) if np.sum(q_table[state, :]) == 0: un_seen += 1 action_choice1 = np.random.randint(5) if action_choice1 == 0: seed1 = actions.action_degree(graph) elif action_choice1 == 1: seed1 = actions.action_weight(graph) elif action_choice1 == 2: seed1 = actions.action_blocking(graph, 1) elif action_choice1 == 3: seed1 = actions.action_last(graph) elif action_choice1 == 4: seed1 = actions.action_min_degree(graph) # illegal action if seed1 == -1: print("illegal 1") print(action_choice1) action_choice1 = 0 seed1 = actions.action_degree(graph) tools.activate_node(graph, seed1, 1) # second player action action_choice2 = opponent_action if action_choice2 == 0: seed2 = actions.action_degree(graph) elif action_choice2 == 1: seed2 = actions.action_weight(graph) elif action_choice2 == 2: seed2 = actions.action_blocking(graph, 2) # illegal action if seed2 == -1: print("illegal 2") print(action_choice2) action_choice2 = 0 seed2 = actions.action_degree(graph) tools.activate_node(graph, seed2, 2) a1, a2 = tools.diffuse(graph) for n in a1: tools.activate_node(graph, n, 1) for n in a2: tools.activate_node(graph, n, 2) counter += 1 score = len(graph.graph['1']) - len(graph.graph['2']) print("counter: " + str(counter)) print("unseen: " + str(un_seen)) print("score: " + str(score)) result = {"counter": counter, "un_seen": un_seen, "score": score} return result