Exemplo n.º 1
0
def confirm_max_status():
    "最大値となる状態を求める."
    max = 0
    x = 1000
    for i in np.arange(0.1, 1.001, 0.001):
        node_pos, input_nodes, input_vectors,\
            output_nodes, output_vectors, frozen_nodes,\
            edges_indices, edges_thickness, frozen_nodes = easy_dev()
        env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                        output_vectors, frozen_nodes, edges_indices,
                        edges_thickness, frozen_nodes)
        env.reset()

        action = {}
        action['which_node'] = np.array([0, 3])
        action['end'] = 0
        action['edge_thickness'] = np.array([i])
        action['new_node'] = np.array([[0, 2]])

        next_nodes_pos, _, done, _ = env.step(action)
        reward = env.calculate_simulation()
        if max < reward:
            max = reward
            x = i
    print("最小は", x, max)
Exemplo n.º 2
0
def check_maximum_point():
    # step5_priorにおいて,最も良い値を出力する条件を調査する為の関数
    resolution = 100
    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors,
                    output_nodes, output_vectors, frozen_nodes,
                    edges_indices, edges_thickness, frozen_nodes)
    env.reset()
    reward = env.calculate_simulation()
    rewards = np.zeros((resolution, resolution))
    max = 0
    for ix, x in enumerate(tqdm(np.linspace(0, 1, resolution))):
        for iy, y in enumerate(np.linspace(1, 0, resolution)):
            env.reset()
            action = {}
            action['which_node'] = np.array([3, 4])
            action['end'] = 0
            action['edge_thickness'] = np.array([1])
            action['new_node'] = np.array([[x, y]])
            next_nodes_pos, _, done, _ = env.step(action)
            env.input_nodes = [2, 4]
            env.input_vectors = np.array([[1, 0], [0, 1]])
            reward = env.calculate_simulation()
            if max < reward:
                max = reward
                max_x = x
                max_y = y
                max_reward = reward
            rewards[iy, ix] = reward
    fig = plt.figure(figsize=(6, 6))
    ax = fig.add_subplot(111)

    im = plt.imshow(rewards, extent=(0, 1, 0, 1))
    plt.colorbar(im)
    ax.set_xlabel(r"x", fontsize=20)
    ax.set_ylabel(r"y", fontsize=20)
    ax.tick_params(axis='x', labelsize=20)
    ax.tick_params(axis='y', labelsize=20)

    plt.savefig("distribution.png")
    print(max_x, max_y)
    print(max_reward)
Exemplo n.º 3
0
    def calculate_efficiency(gene_nodes_pos, gene_edges_thickness, gene_adj_element, np_save_path=False):
        condition_nodes_pos, input_nodes, input_vectors, output_nodes, \
            output_vectors, frozen_nodes, condition_edges_indices, condition_edges_thickness\
            = make_main_node_edge_info(*condition(), condition_edge_thickness=0.2)

        # make edge_indices
        edges_indices = make_adj_triu_matrix(gene_adj_element, node_num, condition_edges_indices)

        # make nodes_pos
        nodes_pos = np.concatenate([condition_nodes_pos, gene_nodes_pos])

        # 条件ノードが含まれている部分グラフを抽出
        G = nx.Graph()
        G.add_nodes_from(np.arange(len(nodes_pos)))
        G.add_edges_from(edges_indices)
        condition_node_list = input_nodes + output_nodes + frozen_nodes

        trigger = 0  # 条件ノードが全て接続するグラフが存在するとき,トリガーを発動する
        for c in nx.connected_components(G):
            sg = G.subgraph(c)  # 部分グラフ
            if set(condition_node_list) <= set(sg.nodes):  # 条件ノードが全て含まれているか
                edges_indices = np.array(sg.edges)
                trigger = 1
                break
        if trigger == 0:  # ペナルティを発動する
            return -10.0

        # make edges_thickness
        edges_thickness = make_edge_thick_triu_matrix(gene_edges_thickness, node_num, condition_edges_indices, condition_edges_thickness, edges_indices)

        env = BarFemGym(nodes_pos, input_nodes, input_vectors,
                        output_nodes, output_vectors, frozen_nodes,
                        edges_indices, edges_thickness, frozen_nodes)
        env.reset()
        efficiency = env.calculate_simulation()
        if np_save_path:
            env.render(save_path=os.path.join(np_save_path, "image.png"))
            np.save(os.path.join(np_save_path, "nodes_pos.npy"), nodes_pos)
            np.save(os.path.join(np_save_path, "edges_indices.npy"), edges_indices)
            np.save(os.path.join(np_save_path, "edges_thickness.npy"), edges_thickness)

        return float(efficiency)
Exemplo n.º 4
0
def actor_gcn_critic_gcn(max_episodes=5000,
                         test_name="test",
                         log_file=False,
                         save_pth=False):
    """Actor-Criticを行う.Actor,CriticはGCN
    Actorの指定できるものは,ノード1とノード2であり,一つのエッジのみを選択できる.
    max_episodes:学習回数
    test_name:保存ファイルの名前
    log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる."""

    history = {}
    history['epoch'] = []
    history['result_efficiency'] = []
    history['mean_efficiency'] = []  # a_meanの値の時のηの値を収納する
    history['a'] = []
    history['a_mean'] = []
    history['a_sigma'] = []
    history['advantage'] = []
    history['critic_value'] = []

    log_dir = "confirm/step3/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    max_steps = 1
    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    actorNet = Select_node1_model(2, 1, 400, 400).to(device).double()
    actorNet2 = Select_node2_model(400 + 2, 400).to(device).double()
    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    edgethickNet = Edgethick_Actor(400).to(device).double()
    optimizer_actor = optim.Adam(actorNet.parameters(), lr=lr_actor)
    optimizer_actor2 = optim.Adam(actorNet2.parameters(), lr=lr_actor)
    optimizer_edgethick = optim.Adam(edgethickNet.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        for step in range(max_steps):
            action = select_action_gcn_critic_gcn(env,
                                                  actorNet,
                                                  actorNet2,
                                                  criticNet,
                                                  edgethickNet,
                                                  device,
                                                  log_dir=log_file)

            next_nodes_pos, _, done, _ = env.step(action)
            reward = env.calculate_simulation()
            criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              actorNet,
                              actorNet2,
                              edgethickNet,
                              optimizer_critic,
                              optimizer_actor,
                              optimizer_actor2,
                              optimizer_edgethick,
                              gamma,
                              log_dir=log_file)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)
        if episode % 100 == 0:
            if save_pth:
                save_model(criticNet,
                           edgethickNet,
                           os.path.join(log_dir, "pth"),
                           save_name=str(episode))

    env.close()
    plot_efficiency_history(history,
                            os.path.join(log_dir, 'learning_effi_curve.png'))

    return history
Exemplo n.º 5
0
def main():
    # running_reward = 0
    prior_efficiency = 0
    continuous_trigger = 0

    best_efficiency = -1000
    best_epoch = 0

    # 1エピソードのループ
    while (1):
        new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
            origin_nodes_positions, origin_edges_indices, origin_input_nodes,
            origin_input_vectors, origin_output_nodes, origin_output_vectors,
            origin_frozen_nodes, EDGE_THICKNESS)
        env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                        new_output_nodes, new_output_vectors, new_frozen_nodes,
                        new_edges_indices, new_edges_thickness)
        env.reset()
        if env.confirm_graph_is_connected():
            break
    nodes_pos, _, _, _ = env.extract_node_edge_info()
    first_node_num = nodes_pos.shape[0]

    # run inifinitely many episodes
    for epoch in tqdm(range(train_num)):
        # for epoch in count(1):

        # reset environment and episode reward
        while (1):
            new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(
                origin_nodes_positions, origin_edges_indices,
                origin_input_nodes, origin_input_vectors, origin_output_nodes,
                origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS)
            env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                            new_output_nodes, new_output_vectors,
                            new_frozen_nodes, new_edges_indices,
                            new_edges_thickness)
            env.reset()
            if env.confirm_graph_is_connected():
                break
        state = env.reset()
        ep_reward = 0
        continuous_trigger = 0

        # for each episode, only run 9999 steps so that we don't
        # infinite loop while learning
        for t in range(max_action):
            # select action from policy
            action = select_action(first_node_num)
            nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info(
            )

            # take the action
            state, _, done, info = env.step(action)
            if (t == (max_action -
                      1)) and (done is not True):  # max_action内にてactionが終わらない時
                reward = -final_penalty
            elif env.confirm_graph_is_connected():
                efficiency = env.calculate_simulation()
                if continuous_trigger == 1:
                    reward = efficiency - prior_efficiency
                else:
                    reward = efficiency + continuous_reward
                    continuous_trigger = 1
                prior_efficiency = efficiency

            elif continuous_trigger == 1:
                reward = -penalty
            else:
                reward = 0

            GCN.rewards.append(reward)

            ep_reward += reward
            if done:
                steps = t
                break

        # update cumulative reward
        # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward

        # perform backprop
        loss = finish_episode()

        # efficiencyの最終結果を求める
        if env.confirm_graph_is_connected():
            result_efficiency = env.calculate_simulation()
        else:
            result_efficiency = -1

        if best_efficiency < result_efficiency:
            best_epoch = epoch
            best_efficiency = result_efficiency
            save_model(save_name="Good")
            # env.render(os.path.join(
            #    log_dir, 'render_image/{}.png'.format(epoch+1)))

        history['epoch'].append(epoch + 1)
        history['loss'].append(loss)
        history['ep_reward'].append(ep_reward)
        history['result_efficiency'].append(result_efficiency)
        history['steps'].append(steps + 1)

        # 学習履歴を保存
        with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
            pickle.dump(history, f)
        with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
            f.writelines(
                'epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n'
                % (epoch + 1, loss, ep_reward, result_efficiency))
        with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f:
            f.writelines('epoch %d,  best_efficiency: %.4f\n' %
                         (best_epoch + 1, best_efficiency))
        save_model(save_name="Last")

        plot_loss_history(history,
                          os.path.join(log_dir, 'learning_loss_curve.png'))
        plot_reward_history(history,
                            os.path.join(log_dir, 'learning_reward_curve.png'))
        plot_efficiency_history(
            history, os.path.join(log_dir, 'learning_effi_curve.png'))
        plot_steps_history(history,
                           os.path.join(log_dir, 'learning_steps_curve.png'))
def select_action_gcn_critic_gcn(env,
                                 criticNet,
                                 edgethickNet,
                                 device,
                                 log_dir=None,
                                 history=None):
    nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
    )
    node, edge, node_adj, edge_adj, D_v, D_e, T = make_torch_type_for_GCN(
        nodes_pos, edges_indices, edges_thickness, node_adj)
    state_value = criticNet(node, edge, node_adj, edge_adj, D_v, D_e, T)
    node1 = 0
    node2 = 3
    edge_thickness = edgethickNet(node, edge, node_adj, edge_adj, D_v, D_e, T,
                                  node1, node2)
    edge_thickness_tdist = tdist.Normal(edge_thickness[0][0].item(),
                                        edge_thickness[0][1].item())
    edge_thickness_action = edge_thickness_tdist.sample()
    edge_thickness_action = torch.clamp(edge_thickness_action, min=0, max=1)

    action = {}
    action['which_node'] = np.array([node1, node2])
    action['end'] = 0
    action['edge_thickness'] = np.array([edge_thickness_action.item()])
    action['new_node'] = np.array([[0, 2]])

    # save to action buffer
    criticNet.saved_actions.append(Saved_Action(action, state_value))
    edgethickNet.saved_actions.append(
        Saved_mean_std_Action(edge_thickness[0][0], edge_thickness[0][1]))

    if log_dir is not None:
        # lossの確認事項
        with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
            print('edge_thick_mean:', edge_thickness[0][0].item(), file=f)
            print('edge_thick_std:', edge_thickness[0][1].item(), file=f)
            print('edge_thickness:', edge_thickness_action.item(), file=f)
    if history is not None:
        node_pos, input_nodes, input_vectors,\
            output_nodes, output_vectors, frozen_nodes,\
            edges_indices, edges_thickness, frozen_nodes = easy_dev()
        calc_effi_env = BarFemGym(node_pos, input_nodes, input_vectors,
                                  output_nodes, output_vectors, frozen_nodes,
                                  edges_indices, edges_thickness, frozen_nodes)
        calc_effi_env.reset()
        mean_action = {}
        mean_action['which_node'] = np.array([node1, node2])
        mean_action['end'] = 0
        mean_action['edge_thickness'] = np.array([edge_thickness[0][0].item()])
        mean_action['new_node'] = np.array([[0, 2]])
        next_nodes_pos, _, done, _ = calc_effi_env.step(mean_action)
        mean_efficiency = calc_effi_env.calculate_simulation(mode='force')

        # historyにログを残す
        history['mean_efficiency'].append(mean_efficiency)
        history['a'].append(edge_thickness_action.item())
        history['a_mean'].append(edge_thickness[0][0].item())
        history['a_sigma'].append(edge_thickness[0][1].item())
        history['critic_value'].append(state_value.item())

    return action
Exemplo n.º 7
0
def select_action_gcn_critic_gcn(env,
                                 node1Net,
                                 node2Net,
                                 criticNet,
                                 edgethickNet,
                                 device,
                                 log_dir=None,
                                 history=None):
    nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
    )
    node_num = nodes_pos.shape[0]
    node, edge, node_adj, edge_adj, D_v, D_e, T = make_torch_type_for_GCN(
        nodes_pos, edges_indices, edges_thickness, node_adj)
    state_value = criticNet(node, edge, node_adj, edge_adj, D_v, D_e, T)
    # ノード1を求める
    emb_node, node1_prob = node1Net(node, edge, node_adj, edge_adj, D_v, D_e,
                                    T)
    node1_categ = Categorical(node1_prob)
    node1 = node1_categ.sample()

    # ノード1を除いたnodeの作成
    non_node1_node = torch.cat([node[:, 0:node1, :], node[:, node1 + 1:, :]],
                               1)

    # ノード1の情報抽出
    H1 = emb_node[0][node1]
    H1_cat = H1.repeat(node_num - 1, 1)
    H1_cat = H1_cat.unsqueeze(0)
    # HとH1のノード情報をconcat
    emb_graph_cat = torch.cat([non_node1_node, H1_cat], 2)

    # ノード2を求める
    emb_edge, node2_prob = node2Net(emb_graph_cat)
    node2_categ = Categorical(node2_prob)
    node2_temp = node2_categ.sample()
    if node2_temp >= node1:
        node2 = node2_temp + 1  # node1分の調整
    else:
        node2 = node2_temp

    edge_thickness = edgethickNet(emb_edge)
    edge_thickness_tdist = tdist.Normal(edge_thickness[0][0].item(),
                                        edge_thickness[0][1].item())
    edge_thickness_action = edge_thickness_tdist.sample()
    edge_thickness_action = torch.clamp(edge_thickness_action, min=0, max=1)

    action = {}
    action['which_node'] = np.array([node1.item(), node2.item()])
    action['end'] = 0
    action['edge_thickness'] = np.array([edge_thickness_action.item()])
    action['new_node'] = np.array([[0, 2]])

    # save to action buffer
    criticNet.saved_actions.append(Saved_Action(action, state_value))
    node1Net.saved_actions.append(
        Saved_prob_Action(node1_categ.log_prob(node1)))
    node2Net.saved_actions.append(
        Saved_prob_Action(node2_categ.log_prob(node2_temp)))
    edgethickNet.saved_actions.append(
        Saved_mean_std_Action(edge_thickness[0][0], edge_thickness[0][1]))

    if log_dir is not None:
        # lossの確認事項
        with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
            print('node1_prob:', node1_prob, file=f)
            print('node1:', node1.item(), file=f)
            print('node2_prob:', node2_prob, file=f)
            print('node2:', node2.item(), file=f)
            print('edge_thick_mean:', edge_thickness[0][0].item(), file=f)
            print('edge_thick_std:', edge_thickness[0][1].item(), file=f)
            print('edge_thickness:', edge_thickness_action.item(), file=f)
    if history is not None:
        node_pos, input_nodes, input_vectors,\
            output_nodes, output_vectors, frozen_nodes,\
            edges_indices, edges_thickness, frozen_nodes = easy_dev()
        calc_effi_env = BarFemGym(node_pos, input_nodes, input_vectors,
                                  output_nodes, output_vectors, frozen_nodes,
                                  edges_indices, edges_thickness, frozen_nodes)
        calc_effi_env.reset()
        mean_action = {}
        mean_action['which_node'] = np.array([node1, node2])
        mean_action['end'] = 0
        mean_action['edge_thickness'] = np.array([edge_thickness[0][0].item()])
        mean_action['new_node'] = np.array([[0, 2]])
        next_nodes_pos, _, done, _ = calc_effi_env.step(mean_action)
        mean_efficiency = calc_effi_env.calculate_simulation()

        # historyにログを残す
        history['mean_efficiency'].append(mean_efficiency)
        history['a'].append(edge_thickness_action.item())
        history['a_mean'].append(edge_thickness[0][0].item())
        history['a_sigma'].append(edge_thickness[0][1].item())
        history['critic_value'].append(state_value.item())

    return action
Exemplo n.º 8
0
def load_actor_gcn_critic_gcn(load_dir,
                              load_epoch,
                              max_episodes=5000,
                              test_name="test",
                              history=None,
                              log_file=False):
    """ActorCriticにおいて保存されpthをロードし,そこから学習を開始する.

    Args:
        load_dir ([type]): ロードする対象のpthが複数存在するディレクトリのパスを指定する.
        load_epoch ([type]): いつのepochから学習を開始するかを決める.
        max_episodes (int, optional): 学習回数. Defaults to 5000.
        test_name (str, optional): 保存ファイルの名前. Defaults to "test".
        history ([type], optional): 保存したhistory.これを指定した時,グラフにもロード結果が適用される. Defaults to None.
        log_file (bool, optional): Trueにすると,progress.txtに損失関数などの情報のログをとる. Defaults to False.
    """

    if history is None:
        history = {}
        history['epoch'] = []
        history['result_efficiency'] = []
        history['mean_efficiency'] = []  # a_meanの値の時のηの値を収納する
        history['a'] = []
        history['a_mean'] = []
        history['a_sigma'] = []
        history['advantage'] = []
        history['critic_value'] = []
    else:
        for key in history.keys():
            history[key] = history[key][:load_epoch]

    log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    max_steps = 1
    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    edgethickNet = Edgethick_Actor(2, 1, 400, 400).to(device).double()

    criticNet.load_state_dict(
        torch.load(
            os.path.join(load_dir, "pth/{}_criticNet.pth".format(load_epoch))))
    edgethickNet.load_state_dict(
        torch.load(
            os.path.join(load_dir,
                         "pth/{}_edgethickNet.pth".format(load_epoch))))

    optimizer_edgethick = optim.SGD(edgethickNet.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(load_epoch, max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        for step in range(max_steps):
            action = select_action_gcn_critic_gcn(env,
                                                  criticNet,
                                                  edgethickNet,
                                                  device,
                                                  log_dir=log_file,
                                                  history=history)

            next_nodes_pos, _, done, _ = env.step(action)
            reward = env.calculate_simulation(mode='force')
            criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              edgethickNet,
                              optimizer_critic,
                              optimizer_edgethick,
                              gamma,
                              log_dir=log_file,
                              history=history)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)

    env.close()
    plot_efficiency_history(history,
                            os.path.join(log_dir, 'learning_effi_curve.png'))

    return history
Exemplo n.º 9
0
def actor_gcn_critic_gcn(max_episodes=5000,
                         test_name="test",
                         log_file=False,
                         save_pth=False):
    """Actor-Criticを行う.Actor,CriticはGCN
    Actorの指定できるものは,一つのエッジのみの幅を選択できる.
    max_episodes:学習回数
    test_name:保存ファイルの名前
    log_file: Trueにすると,progress.txtに損失関数などの情報のログをとる."""

    history = {}
    history['epoch'] = []
    history['result_efficiency'] = []
    history['x'] = []
    history['x_mean'] = []
    history['x_sigma'] = []
    history['y'] = []
    history['y_mean'] = []
    history['y_sigma'] = []
    history['advantage'] = []
    history['critic_value'] = []

    log_dir = "confirm/step5_entropy/a_gcn_c_gcn_results/{}".format(test_name)

    assert not os.path.exists(log_dir), "already folder exists"
    if log_file:
        log_file = log_dir
    else:
        log_file = None
    os.makedirs(log_dir, exist_ok=True)

    node_pos, input_nodes, input_vectors,\
        output_nodes, output_vectors, frozen_nodes,\
        edges_indices, edges_thickness, frozen_nodes = easy_dev()
    env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                    output_vectors, frozen_nodes, edges_indices,
                    edges_thickness, frozen_nodes)
    env.reset()

    lr_actor = 1e-4
    lr_critic = 1e-3
    weight_decay = 1e-2
    gamma = 0.99

    device = torch.device('cpu')

    criticNet = CriticNetwork_GCN(2, 1, 400, 400).to(device).double()
    x_y_Net = X_Y_Actor(2, 1, 400, 400).to(device).double()
    node1Net = Select_node1_model(2, 1, 400, 400).to(device).double()
    node2Net = Select_node2_model(400 + 2, 400).to(
        device).double()  # 400+2における400は,Select_node1_modelのinput3の部分に対応
    optimizer_node1 = optim.Adam(node1Net.parameters(), lr=lr_actor)
    optimizer_node2 = optim.Adam(node2Net.parameters(), lr=lr_actor)
    optimizer_xy = optim.Adam(x_y_Net.parameters(), lr=lr_actor)
    optimizer_critic = optim.Adam(criticNet.parameters(),
                                  lr=lr_critic,
                                  weight_decay=weight_decay)

    for episode in tqdm(range(max_episodes)):
        if log_file:
            with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
                print('\nepoch:', episode, file=f)
        env = BarFemGym(node_pos, input_nodes, input_vectors, output_nodes,
                        output_vectors, frozen_nodes, edges_indices,
                        edges_thickness, frozen_nodes)
        env.reset()
        nodes_pos, edges_indices, edges_thickness, node_adj = env.extract_node_edge_info(
        )
        action = select_action_gcn_critic_gcn(env,
                                              criticNet,
                                              node1Net,
                                              node2Net,
                                              x_y_Net,
                                              device,
                                              log_dir=log_file,
                                              history=history)
        next_nodes_pos, _, done, _ = env.step(action)
        if 4 in action['which_node']:
            env.input_nodes = [2, 4]
            env.input_vectors = np.array([[1, 0], [0, 1]])
        if 2 in action['which_node'] and 4 in action[
                'which_node']:  # TODO [2,4]を選択しないように学習させる
            reward = np.array([0])
        else:
            reward = env.calculate_simulation()
        criticNet.rewards.append(reward)

        loss = finish_episode(criticNet,
                              x_y_Net,
                              node1Net,
                              node2Net,
                              optimizer_critic,
                              optimizer_xy,
                              optimizer_node1,
                              optimizer_node2,
                              gamma,
                              log_dir=log_file,
                              history=history)

        history['epoch'].append(episode + 1)
        history['result_efficiency'].append(reward)
        plot_efficiency_history(
            history, os.path.join(log_dir, 'learning_effi_curve.png'))
        if episode % 100 == 0:
            if save_pth:
                save_model(criticNet,
                           x_y_Net,
                           os.path.join(log_dir, "pth"),
                           save_name=str(episode))

    env.close()
    with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
        pickle.dump(history, f)

    return history
barfem_output_nodes = [68]
condition_nodes = origin_input_nodes + origin_output_nodes + origin_frozen_nodes

env = BarFemGym(origin_nodes_positions, barfem_input_nodes,
                origin_input_vectors, barfem_output_nodes,
                origin_output_vectors, origin_frozen_nodes,
                origin_edges_indices, origin_edges_thickness,
                origin_frozen_nodes)
env.reset()
# env.render(os.path.join(log_dir, 'render_image/first.png'))

# 初期状態を作成
best_efficiency = -1000

if env.confirm_graph_is_connected():
    current_efficiency = env.calculate_simulation()
else:
    current_efficiency = -1

current_edges_indices = origin_edges_indices.copy()

for epoch, temperature in enumerate(tqdm(temperatures)):
    # 条件ノードの間にあるエッジ以外のエッジを選択
    while (1):
        chosen_edge_indice = np.random.randint(0, edge_num)
        target_edge_indice = origin_edges_indices[chosen_edge_indice]
        if not np.any(np.isin(target_edge_indice, condition_nodes)):
            break

    # proposed_efficiencyを求める
    mask = np.isin(current_edges_indices[:, 0], target_edge_indice) & np.isin(
    origin_input_vectors, origin_output_nodes, origin_output_vectors,
    origin_frozen_nodes)

env = BarFemGym(new_node_pos, new_input_nodes, new_input_vectors,
                new_output_nodes, new_output_vectors, new_frozen_nodes,
                new_edges_indices, new_edges_thickness)

# 1エピソードのループ
state = env.reset()
total_time = 0
total_calc_time = 0
for i in range(500):
    # ランダム行動の取得
    action = env.random_action()
    # 1ステップの実行
    state, reward, done, info = env.step(action)
    if env.confirm_graph_is_connected():
        reward = 0
        start = time.time()
        efficiency = env.calculate_simulation()
        elapsed_time = time.time() - start
        print("elapsed_time:{0}".format(elapsed_time) + "[sec]")
        reward = efficiency

        total_time += elapsed_time
        total_calc_time += 1
    else:
        reward = -1

print("一回辺りの計算時間:", total_time / total_calc_time)
Exemplo n.º 12
0
class Worker(mp.Process):
    def __init__(self,
                 global_criticNet,
                 global_x_y_Net,
                 global_node1Net,
                 global_node2Net,
                 Critic_opt,
                 x_y_opt,
                 Node1_opt,
                 Node2_opt,
                 global_ep,
                 global_ep_r,
                 res_queue,
                 name,
                 gamma=0.99,
                 total_episodes=5000):
        super(Worker, self).__init__()
        self.name = 'w%i' % name
        self.g_ep, self.g_ep_r, self.res_queue = global_ep, global_ep_r, res_queue
        self.Critic_opt, self.x_y_opt, self.Node1_opt, self.Node2_opt = Critic_opt, x_y_opt, Node1_opt, Node2_opt
        self.global_criticNet, self.global_x_y_Net,\
            self.global_node1Net, self.global_node2Net = global_criticNet, global_x_y_Net, global_node1Net, global_node2Net
        device = torch.device('cpu')
        self.local_criticNet = CriticNetwork_GCN(2, 1, 400,
                                                 400).double().to(device)
        self.local_x_y_Net = X_Y_Actor(2, 1, 400, 400).double().to(device)
        self.local_node1Net = Select_node1_model(2, 1, 400,
                                                 400).double().to(device)
        self.local_node2Net = Select_node2_model(400 + 2,
                                                 400).double().to(device)

        node_pos, input_nodes, input_vectors,\
            output_nodes, output_vectors, frozen_nodes,\
            edges_indices, edges_thickness, frozen_nodes = easy_dev()
        self.env = BarFemGym(node_pos, input_nodes, input_vectors,
                             output_nodes, output_vectors, frozen_nodes,
                             edges_indices, edges_thickness, frozen_nodes)
        self.env.reset()

        self.gamma = gamma  # 報酬減衰率
        self.total_episodes = total_episodes  # すべてのプロセスにおいての合計epoch

    def finish_episode(self, log_dir=None, history=None):
        R = 0
        GCN_saved_actions = self.local_criticNet.saved_actions
        x_y_saved_actions = self.local_x_y_Net.saved_actions
        node1Net_saved_actions = self.local_node1Net.saved_actions
        node2Net_saved_actions = self.local_node2Net.saved_actions

        policy_losses = []  # list to save actor (policy) loss
        value_losses = []  # list to save critic (value) loss
        returns = []  # list to save the true values

        # calculate the true value using rewards returned from the environment
        for r in self.local_criticNet.rewards[::-1]:
            # calculate the discounted value
            R = r + self.gamma * R
            returns.insert(0, R)
        returns = torch.tensor(returns)

        x_y_opt_trigger = False  # advantage>0の場合したときにx_y_optを作動出来るようにする為のトリガー
        for (action,
             value), (x_y_mean, x_y_std, x_dist,
                      y_dist), (node1_prob,
                                node1_dist), (node2_prob,
                                              node2_dist), R in zip(
                                                  GCN_saved_actions,
                                                  x_y_saved_actions,
                                                  node1Net_saved_actions,
                                                  node2Net_saved_actions,
                                                  returns):

            advantage = R - value.item()

            # calculate actor (policy) loss
            if action["end"]:
                print("okasii")
            else:
                log_probs = torch.cat([node1_prob, node2_prob])
                policy_loss = -torch.mean(log_probs) * advantage

                policy_losses.append(policy_loss)
                if advantage > 0:
                    x_y_mean_loss = F.l1_loss(
                        torch.from_numpy(action["new_node"][0]).double(),
                        x_y_mean.double())
                    x_y_var_loss = F.l1_loss(
                        torch.from_numpy(
                            np.abs(
                                action["new_node"][0] -
                                x_y_mean.to('cpu').detach().numpy().copy())),
                        x_y_std.double())
                    policy_losses.append(
                        (x_y_mean_loss + x_y_var_loss) * advantage)

                    x_y_opt_trigger = True  # x_y_optのトリガーを起動
                else:
                    x_y_mean_loss = torch.zeros(1)
                    x_y_var_loss = torch.zeros(1)

            # calculate critic (value) loss using L1 loss
            value_losses.append(
                F.l1_loss(value.double(),
                          torch.tensor([[R]]).double()))

        # reset gradients
        self.Critic_opt.zero_grad()
        self.Node1_opt.zero_grad()
        self.Node2_opt.zero_grad()
        if x_y_opt_trigger:
            self.x_y_opt.zero_grad()

        # sum up all the values of policy_losses and value_losses
        if len(policy_losses) == 0:
            loss = torch.stack(value_losses).sum()
        else:
            loss = torch.stack(policy_losses).sum() + \
                torch.stack(value_losses).sum()

        # perform backprop
        loss.backward()
        for lp, gp in zip(self.local_criticNet.parameters(),
                          self.global_criticNet.parameters()):
            gp._grad = lp.grad
        for lp, gp in zip(self.local_node1Net.parameters(),
                          self.global_node1Net.parameters()):
            gp._grad = lp.grad
        for lp, gp in zip(self.local_node2Net.parameters(),
                          self.global_node2Net.parameters()):
            gp._grad = lp.grad
        if x_y_opt_trigger:
            for lp, gp in zip(self.local_x_y_Net.parameters(),
                              self.global_x_y_Net.parameters()):
                gp._grad = lp.grad

        self.Critic_opt.step()
        self.Node1_opt.step()
        self.Node2_opt.step()
        if x_y_opt_trigger:
            self.x_y_opt.step()

        # pull global parameters
        self.local_criticNet.load_state_dict(
            self.global_criticNet.state_dict())
        self.local_x_y_Net.load_state_dict(self.global_x_y_Net.state_dict())
        self.local_node1Net.load_state_dict(self.global_node1Net.state_dict())
        self.local_node2Net.load_state_dict(self.global_node2Net.state_dict())

        # reset rewards and action buffer
        del self.local_criticNet.rewards[:]
        del self.local_criticNet.saved_actions[:]
        del self.local_x_y_Net.saved_actions[:]
        del self.local_node1Net.saved_actions[:]
        del self.local_node2Net.saved_actions[:]

        if history is not None:
            history['advantage'].append(advantage.item())
        return loss.item()

    def run(self,
            max_episodes=5000,
            test_name="test",
            log_file=False,
            save_pth=False,
            history=None,
            device=torch.device('cpu')):
        while self.g_ep.value < self.total_episodes:
            # 入力ノードを再設定している為,ここに追加
            node_pos, input_nodes, input_vectors,\
                output_nodes, output_vectors, frozen_nodes,\
                edges_indices, edges_thickness, frozen_nodes = easy_dev()
            self.env = BarFemGym(node_pos, input_nodes, input_vectors,
                                 output_nodes, output_vectors, frozen_nodes,
                                 edges_indices, edges_thickness, frozen_nodes)
            self.env.reset()
            for episode in range(max_episodes):
                action = select_action_gcn_critic_gcn(self.env,
                                                      self.local_criticNet,
                                                      self.local_node1Net,
                                                      self.local_node2Net,
                                                      self.local_x_y_Net,
                                                      device,
                                                      log_dir=log_file,
                                                      history=history)
                next_nodes_pos, _, done, _ = self.env.step(action)
                if 4 in action['which_node']:
                    self.env.input_nodes = [2, 4]
                    self.env.input_vectors = np.array([[1, 0], [0, 1]])
                if 2 in action['which_node'] and 4 in action[
                        'which_node']:  # TODO [2,4]を選択しないように学習させる
                    reward = np.array([0])
                else:
                    reward = self.env.calculate_simulation()
                self.local_criticNet.rewards.append(reward)

                done = True  # 今回はonestepの為

                if done:  # update global and assign to local net
                    record(self.g_ep, self.g_ep_r, reward, self.res_queue,
                           self.name)
                    # sync
                    # 各プロセスの重み更新をglobalにpushし,その更新後のものを各プロセスの重みに戻す
                    self.finish_episode()

                    break

        self.res_queue.put(None)
from FEM.bar_fem import barfem
import numpy as np
from env.gym_barfem import BarFemGym

nodes_pos = np.array([[0, 0],
                      [0.5, 0],
                      [0.5, 0.5],
                      [0, 0.5]])
nodes_pos[:, 0] += 0.25
nodes_pos[:, 1] += 0.25

edges_indices = np.array([[0, 1],
                          [1, 2],
                          [0, 3],
                          [2, 3], [1, 3]])

edges_thickness = np.array([1.0, 1.0, 1.0, 1.0, 1])

input_nodes = [2]
input_vectors = np.array([[1, 0]])
frozen_nodes = [1]
output_nodes = [0]
output_vectors = np.array([[0.5, 0.5]])

env = BarFemGym(nodes_pos, input_nodes, input_vectors,
                output_nodes, output_vectors, frozen_nodes,
                edges_indices, edges_thickness, frozen_nodes)
env.reset()

print(env.calculate_simulation())