def main():
    # running_reward = 0
    prior_efficiency = 0
    continuous_trigger = 0

    best_efficiency = -1000
    best_epoch = 0

    # 1エピソードのループ
    while(1):
        new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors,
                                                                                                                                                                                      origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS)
        env = FEMGym(new_node_pos,
                     new_edges_indices, new_edges_thickness)
        env.reset()
        if env.confirm_graph_is_connected():
            break
    nodes_pos, _, _, _ = env.extract_node_edge_info()
    first_node_num = nodes_pos.shape[0]

    # run inifinitely many episodes
    for epoch in tqdm(range(train_num)):
        # for epoch in count(1):

        # reset environment and episode reward
        while(1):
            new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors,
                                                                                                                                                                                          origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS)
            env = FEMGym(new_node_pos,
                         new_edges_indices, new_edges_thickness)
            env.reset()
            if env.confirm_graph_is_connected():
                break
        state = env.reset()
        ep_reward = 0
        continuous_trigger = 0

        # for each episode, only run 9999 steps so that we don't
        # infinite loop while learning
        for t in range(max_action):
            # select action from policy
            action = select_action(first_node_num)
            nodes_pos, edges_indices, edges_thickness, adj = env.extract_node_edge_info()

            # take the action
            state, _, done, info = env.step(action)
            if (t == (max_action-1)) and (done is not True):  # max_action内にてactionが終わらない時
                reward = -final_penalty
            elif env.confirm_graph_is_connected():
                efficiency = env.calculate_simulation()
                if continuous_trigger == 1:
                    reward = efficiency-prior_efficiency
                else:
                    reward = efficiency+continuous_reward
                    continuous_trigger = 1
                prior_efficiency = efficiency

            elif continuous_trigger == 1:
                reward = -penalty
            else:
                reward = 0

            GCN.rewards.append(reward)

            ep_reward += reward
            if done:
                steps = t
                break

        # update cumulative reward
        # running_reward = 0.05 * ep_reward + (1 - 0.05) * running_reward

        # perform backprop
        loss = finish_episode()

        # efficiencyの最終結果を求める
        if env.confirm_graph_is_connected():
            result_efficiency = env.calculate_simulation()
        else:
            result_efficiency = -1

        if best_efficiency < result_efficiency:
            best_epoch = epoch
            best_efficiency = result_efficiency
            save_model(save_name="Good")
            env.render(os.path.join(
                log_dir, 'render_image/{}.png'.format(epoch+1)))

        history['epoch'].append(epoch+1)
        history['loss'].append(loss)
        history['ep_reward'].append(ep_reward)
        history['result_efficiency'].append(result_efficiency)
        history['steps'].append(steps+1)

        # 学習履歴を保存
        with open(os.path.join(log_dir, 'history.pkl'), 'wb') as f:
            pickle.dump(history, f)
        with open(os.path.join(log_dir, "progress.txt"), mode='a') as f:
            f.writelines('epoch %d, loss: %.4f ep_reward: %.4f result_efficiency: %.4f\n' %
                         (epoch + 1, loss, ep_reward, result_efficiency))
        with open(os.path.join(log_dir, "represent_value.txt"), mode='w') as f:
            f.writelines('epoch %d,  best_efficiency: %.4f\n' %
                         (best_epoch+1, best_efficiency))
        save_model(save_name="Last")

        plot_loss_history(history, os.path.join(
            log_dir, 'learning_loss_curve.png'))
        plot_reward_history(history, os.path.join(
            log_dir, 'learning_reward_curve.png'))
        plot_efficiency_history(history, os.path.join(
            log_dir, 'learning_effi_curve.png'))
        plot_steps_history(history, os.path.join(
            log_dir, 'learning_steps_curve.png'))
Пример #2
0
    [-1, 0],
    [-1, 0],
])

origin_frozen_nodes = [1, 3, 5, 7, 9, 11, 13, 15]

# gymに入力する要素を抽出
new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_main_node_edge_info(origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors,
                                                                                                                                                                            origin_output_nodes, origin_output_vectors, origin_frozen_nodes)
env = FEMGym(new_node_pos,
             new_edges_indices, new_edges_thickness)

# 1エピソードのループ
state = env.reset()
env.confirm_graph_is_connected()
env.render("fem_images/image_first.png")
total_time = 0
total_calc_time = 0
for i in range(500):
    # ランダム行動の取得
    action = env.random_action()
    # 1ステップの実行
    state, reward, done, info = env.step(action)
    if env.confirm_graph_is_connected():
        reward = 0
        start = time.time()
        efficiency = env.calculate_simulation()
        elapsed_time = time.time() - start
        print("elapsed_time:{0}".format(elapsed_time) + "[sec]")
        reward = efficiency
Select_node1 = model.Select_node1_model(node_out_features, 2).double()
Select_node2 = model.Select_node2_model(
    node_features+node_out_features, 2).double()
Edge_thickness = model.Edge_thickness_model(
    node_features+node_out_features, 2).double()


# 連続状態を作成
while(1):
    new_node_pos, new_input_nodes, new_input_vectors, new_output_nodes, new_output_vectors, new_frozen_nodes, new_edges_indices, new_edges_thickness = make_continuous_init_graph(origin_nodes_positions, origin_edges_indices, origin_input_nodes, origin_input_vectors,
                                                                                                                                                                                  origin_output_nodes, origin_output_vectors, origin_frozen_nodes, EDGE_THICKNESS)
    env = FEMGym(new_node_pos,
                 new_edges_indices, new_edges_thickness)
    env.reset()
    if env.confirm_graph_is_connected():
        env.render('render_image/yatta.png')
        break

Saved_Action = namedtuple('SavedAction', ['action', 'value'])
Saved_prob_Action = namedtuple('SavedAction', ['log_prob'])
Saved_mean_std_Action = namedtuple(
    'SavedAction', ['mean', 'variance'])
GCN_optimizer = optim.Adam(GCN.parameters(), lr=lr)
X_Y_optimizer = optim.Adam(X_Y.parameters(), lr=lr)
Stop_optimizer = optim.Adam(Stop.parameters(), lr=lr)
Select_node1_optimizer = optim.Adam(Select_node1.parameters(), lr=lr)
Select_node2_optimizer = optim.Adam(Select_node2.parameters(), lr=lr)
Edge_thickness_optimizer = optim.Adam(Edge_thickness.parameters(), lr=lr)


def select_action(first_node_num):