Exemplo n.º 1
0
            minerEnv.step(
                str(action)
            )  # Performing the action in order to obtain the new state
            reward = minerEnv.get_reward()  # Getting a reward
            new_state_map, new_state_users = minerEnv.get_state(
            )  # Getting a new state
            terminate = minerEnv.check_terminate(
            )  # Checking the end status of the episode

            #t1=0
            #t2=0
            #t3=0

            # Add this transition to the memory batch
            #tmp_t1 = time.time()
            memory.append(state_map, state_users, action, reward,
                          new_state_map, new_state_users, terminate)
            #t1 = time.time() - tmp_t1

            episode_reward += reward  # Plus the reward to the total reward of the episode
            state_map = new_state_map  # Assign the next state for the next step.
            state_users = new_state_users  # Assign the next state for the next step.

            score = minerEnv.state.score
            """
            summary_2 = tf.Summary()
            summary_2.value.add(tag='time_append', simple_value=t1)
            summary_2.value.add(tag='time_take_samples', simple_value=t2)
            summary_2.value.add(tag='time_train', simple_value=t3)
            summary_writer_time.add_summary(summary_2, total_step)
            summary_writer_time.flush()
            """