示例#1
0
def get_features_values_by_competition(graph):
    iterations = run.feature_iterations
    features = np.zeros((iterations * 1600, 7))
    counter = 0
    for iteration in range(iterations):
        g = graph.copy()

        while len(g.graph['free']) > 1:
            feature = tools.get_feature(g)
            features[counter][:] = feature
            counter += 1

            # first player action
            action_choice1 = np.random.randint(3)
            if action_choice1 == 0:
                seed1 = actions.action_degree(g)
            elif action_choice1 == 1:
                seed1 = actions.action_weight(g)
            elif action_choice1 == 2:
                seed1 = actions.action_blocking(g, 1)

            # illegal action
            if seed1 == -1:
                print("Illegal action")
                action_choice1 = 0
                seed1 = actions.action_degree(g)
            tools.activate_node(g, seed1, 1)

            # second player action
            action_choice2 = np.random.randint(3)
            if action_choice2 == 0:
                seed2 = actions.action_degree(g)
            elif action_choice2 == 1:
                seed2 = actions.action_weight(g)
            elif action_choice2 == 2:
                seed2 = actions.action_blocking(g, 2)

            # illegal action
            if seed2 == -1:
                print("Illegal action")
                action_choice2 = 0
                seed2 = actions.action_degree(g)
            tools.activate_node(g, seed2, 2)

            a1, a2 = tools.diffuse(g)
            for n in a1:
                tools.activate_node(g, n, 1)
            for n in a2:
                tools.activate_node(g, n, 2)

    return features[0:counter, :]
示例#2
0
文件: train.py 项目: ah-ansari/stormq
def train(graph, feature_lmh_ranges):
    alpha = 0.5
    eps = 0.8
    gamma = 0.98
    d = 0.998

    opponent_action = run.opponent

    q_table = np.zeros((2187, 3))

    for iteration in tqdm(range(run.train_n_rounds)):
        g = graph.copy()
        state = tools.get_state(g, feature_lmh_ranges)

        for t in range(21):
            # first player action
            if np.random.random() < eps:
                action_choice1 = np.random.randint(3)
            else:
                action_choice1 = np.argmax(q_table[state, :])

            if action_choice1 == 0:
                seed1 = actions.action_degree(g)
            elif action_choice1 == 1:
                seed1 = actions.action_weight(g)
            elif action_choice1 == 2:
                seed1 = actions.action_blocking(g, 1)
            # elif action_choice1 == 3:
            #     seed1 = actions.action_last(g)
            # elif action_choice1 == 4:
            #     seed1 = actions.action_min_degree(g)

            # illegal action
            if seed1 == -1:
                print("Illegal action")
                action_choice1 = 0
                seed1 = actions.action_degree(g)

            tools.activate_node(g, seed1, 1)

            # second player action
            action_choice2 = opponent_action
            if action_choice2 == 0:
                seed2 = actions.action_degree(g)
            elif action_choice2 == 1:
                seed2 = actions.action_weight(g)
            elif action_choice2 == 2:
                seed2 = actions.action_blocking(g, 2)

            # illegal action
            if seed2 == -1:
                print("Illegal action")
                action_choice2 = 0
                seed2 = actions.action_degree(g)
            tools.activate_node(g, seed2, 2)

            a1, a2 = tools.diffuse(g)
            for n in a1:
                tools.activate_node(g, n, 1)
            for n in a2:
                tools.activate_node(g, n, 2)

            next_state = tools.get_state(g, feature_lmh_ranges)

            # delayed reward
            r = 0
            if t % 6 == 0:
                r = len(g.graph['1']) - len(g.graph['2'])

            # immediate reward
            # r = len(g.graph['1']) - len(g.graph['2'])

            q_table[state, action_choice1] = (
                1 - alpha) * q_table[state, action_choice1] + alpha * (
                    r + gamma * max(q_table[next_state, :]))
            state = next_state

        alpha = d * alpha
        eps = 0.9 - (0.6 * iteration / 500)

    return q_table
示例#3
0
def compete(graph: nx.Graph, q_table):
    opponent_action = 0

    counter = 0
    max_iteration = 20000000
    un_seen = 0

    while len(graph.graph['free']) > 1 and counter < max_iteration:
        state = tools.get_state(graph)

        # first player action
        action_choice1 = np.argmax(q_table[state, :])
        if np.sum(q_table[state, :]) == 0:
            un_seen += 1
            action_choice1 = np.random.randint(5)

        if action_choice1 == 0:
            seed1 = actions.action_degree(graph)
        elif action_choice1 == 1:
            seed1 = actions.action_weight(graph)
        elif action_choice1 == 2:
            seed1 = actions.action_blocking(graph, 1)
        elif action_choice1 == 3:
            seed1 = actions.action_last(graph)
        elif action_choice1 == 4:
            seed1 = actions.action_min_degree(graph)

        # illegal action
        if seed1 == -1:
            print("illegal 1")
            print(action_choice1)
            action_choice1 = 0
            seed1 = actions.action_degree(graph)

        tools.activate_node(graph, seed1, 1)

        # second player action
        action_choice2 = opponent_action
        if action_choice2 == 0:
            seed2 = actions.action_degree(graph)
        elif action_choice2 == 1:
            seed2 = actions.action_weight(graph)
        elif action_choice2 == 2:
            seed2 = actions.action_blocking(graph, 2)

        # illegal action
        if seed2 == -1:
            print("illegal 2")
            print(action_choice2)
            action_choice2 = 0
            seed2 = actions.action_degree(graph)
        tools.activate_node(graph, seed2, 2)

        a1, a2 = tools.diffuse(graph)
        for n in a1:
            tools.activate_node(graph, n, 1)
        for n in a2:
            tools.activate_node(graph, n, 2)

        counter += 1

    score = len(graph.graph['1']) - len(graph.graph['2'])
    print("counter: " + str(counter))
    print("unseen: " + str(un_seen))
    print("score: " + str(score))

    result = {"counter": counter, "un_seen": un_seen, "score": score}
    return result