Python HumanOutputFormat示例

编程语言: Python

命名空间/包名称: logger

方法/功能: HumanOutputFormat

hotexamples.com的示例: 2

Python HumanOutputFormat - 已找到2个示例。这些是从开源项目中提取的最受好评的logger.HumanOutputFormat现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def main(args):
    time_str = time.strftime("%Y%m%d-%H%M%S")
    logger_ins = logger.Logger(
        HOME + '/catkin_ws/src/Turtlebot3_Pheromone/src/log',
        output_formats=[logger.HumanOutputFormat(sys.stdout)])
    board_logger = tensorboard_logging.Logger(
        os.path.join(logger_ins.get_dir(), "tf_board", time_str))
    ########################################################
    game_state = phero_turtlebot_exp2.Env(
    )  # game_state has frame_step(action) function
    actor_critic = ActorCritic(game_state)
    random.seed(args.random_seed)
    ########################################################
    num_trials = 600
    trial_len = 256
    log_interval = 5
    train_indicator = 1
    tfirststart = time.time()

    # Reward Logging
    with open(HOME +
              '/catkin_ws/src/Turtlebot3_Pheromone/src/log/csv/{}.csv'.format(
                  actor_critic.file_name),
              mode='w') as csv_file:
        csv_writer = csv.writer(csv_file,
                                delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_MINIMAL)
        csv_writer.writerow(['Episode', 'Average Reward'])

    # Double ended queue with max size 100 to store episode info
    epinfobuf = deque(maxlen=100)

    # Experiment related
    num_robots = game_state.num_robots

    current_state = game_state.reset()

    # actor_critic.read_human_data()

    step_reward = np.array([0, 0]).reshape(1, 2)
    #step_Q = [0,0]
    step = 0

    if (train_indicator == 2):
        for i in range(num_trials):
            print("trial:" + str(i))
            #game_state.step(0.3, 0.2, 0.0)
            #game_state.reset()

            current_state = game_state.reset()
            ##############################################################################################
            total_reward = 0

            for j in range(100):
                step = step + 1
                #print("step is %s", step)

                ###########################################################################################
                #print('wanted value is %s:', game_state.observation_space.shape[0])
                current_state = current_state.reshape(
                    (1, game_state.observation_space.shape[0]))
                action, eps = actor_critic.act(current_state)
                action = action.reshape((1, game_state.action_space.shape[0]))
                print("action is speed: %s, angular: %s", action[0][1],
                      action[0][0])
                _, new_state, reward, done, _ = game_state.step(
                    0.1, action[0][1] * 5, action[0][0] * 5
                )  # we get reward and state here, then we need to calculate if it is crashed! for 'dones' value
                total_reward = total_reward + reward

    if (train_indicator == 1):

        # actor_critic.actor_model.load_weights("actormodel-90-1000.h5")
        # actor_critic.critic_model.load_weights("criticmodel-90-1000.h5")
        step_reward = np.array([0, 0]).reshape(1, 2)
        for i in range(num_trials):
            print("trial:" + str(i))

            #game_state.step(0.3, 0.2, 0.0)
            #game_state.reset()
            ''' Get states of multiple robots (num_robots x num_states) '''
            _, current_states = game_state.reset()
            ##############################################################################################
            #total_reward = 0
            epinfos = []
            for j in range(trial_len):

                ###########################################################################################
                #print('wanted value is %s:', game_state.observation_space.shape[0])
                current_states = current_states.reshape(
                    (num_robots, game_state.observation_space.shape[0]))
                actions = []
                for k in range(num_robots):
                    action, eps = actor_critic.act(current_states[k])
                    action = action.reshape(
                        (1, game_state.action_space.shape[0]))
                    actions.append(action)
                actions = np.squeeze(np.asarray(actions))
                #print("Actions: {}".format(actions))
                #print("action is speed: %s, angular: %s", action[0][1], action[0][0])
                _, new_states, rewards, dones, infos = game_state.step(
                    actions, 0.1
                )  # we get reward and state here, then we need to calculate if it is crashed! for 'dones' value
                #print("Rewards: {}".format(rewards))
                #total_reward = total_reward + reward
                ###########################################################################################

                if j == (trial_len - 1):
                    dones = np.array([True,
                                      True]).reshape(game_state.num_robots, 1)
                    #print("this is reward:", total_reward)
                    #print('eps is', eps)

                step = step + 1
                #plot_reward(step,reward,ax,fig)
                #step_reward = np.append(step_reward,[step,reward])
                #step_start = time.time()
                #sio.savemat('step_reward.mat',{'data':step_reward},True,'5', False, False,'row')
                #print("step is %s", step)
                #print("info: {}".format(info[0]['episode']['r']))
                #Q_values = actor_critic.read_Q_values(current_state, action)
                #step_Q = np.append(step_Q,[step,Q_values[0][0]])
                #print("step_Q is %s", Q_values[0][0])
                #sio.savemat('step_Q.mat',{'data':step_Q},True,'5', False, False,'row')
                #print("Train_step time: {}".format(time.time() - step_start))

                epinfos.append(infos[0]['episode'])

                start_time = time.time()

                if (j % 5 == 0):
                    actor_critic.train(j)
                    actor_critic.update_target()

                end_time = time.time()
                print("Train time: {}".format(end_time - start_time))
                #print("new_state: {}".format(new_state))
                new_states = new_states.reshape(
                    (num_robots, game_state.observation_space.shape[0]))

                # print shape of current_state
                #print("current_state is %s", current_state)
                ##########################################################################################
                actor_critic.remember(current_states, actions, rewards,
                                      new_states, dones)
                actor_critic.replay_buffer.add(current_states, actions,
                                               rewards, new_states, dones)
                current_states = new_states

                ##########################################################################################
            if (i % 10 == 0):
                actor_critic.save_weight(i, trial_len)
            epinfobuf.extend(epinfos)
            tnow = time.time()
            #fps = int(nbatch / (tnow - tstart))

            ##################################################
            ##      Logging and saving model & weights      ##
            ##################################################

            if i % log_interval == 0 or i == 0:
                #ev = explained_variance(values, returns)
                reward_mean = safemean([epinfo['r'] for epinfo in epinfobuf])
                logger_ins.logkv("serial_timesteps", i * trial_len)
                logger_ins.logkv("nupdates", i)
                logger_ins.logkv("total_timesteps", i * trial_len)
                logger_ins.logkv('eprewmean', reward_mean)
                logger_ins.logkv(
                    'eplenmean',
                    safemean([epinfo['l'] for epinfo in epinfobuf]))
                logger_ins.logkv('time_elapsed', tnow - tfirststart)
                # for (lossval, lossname) in zip(lossvals, model.loss_names):
                #     logger_ins.logkv(lossname, lossval)
                # logger_ins.dumpkvs()
                # for (lossval, lossname) in zip(lossvals, model.loss_names):
                #     board_logger.log_scalar(lossname, lossval, update)
                board_logger.log_scalar("eprewmean", reward_mean, i)

                board_logger.flush()
                with open(
                        HOME +
                        '/catkin_ws/src/Turtlebot3_Pheromone/src/log/csv/{}.csv'
                        .format(actor_critic.file_name),
                        mode='a') as csv_file:
                    csv_writer = csv.writer(csv_file,
                                            delimiter=',',
                                            quotechar='"',
                                            quoting=csv.QUOTE_MINIMAL)
                    csv_writer.writerow(['%i' % i, '%0.2f' % reward_mean])
                step_reward = np.append(step_reward,
                                        [[num_trials, reward_mean]],
                                        axis=0)
                sio.savemat(
                    HOME +
                    '/catkin_ws/src/Turtlebot3_Pheromone/src/log/MATLAB/step_reward_{}.mat'
                    .format(actor_critic.time_str), {'data': step_reward},
                    True, '5', False, False, 'row')

    if train_indicator == 0:
        for i in range(num_trials):
            print("trial:" + str(i))
            current_state = game_state.reset()

            actor_critic.actor_model.load_weights(path +
                                                  "actormodel-2950-256.h5")
            actor_critic.critic_model.load_weights(path +
                                                   "criticrmodel-2950-256.h5")
            ##############################################################################################
            total_reward = 0

            for j in range(trial_len):

                ###########################################################################################
                current_state = current_state.reshape(
                    (1, game_state.observation_space.shape[0]))

                start_time = time.time()
                action = actor_critic.play(
                    current_state
                )  # need to change the network input output, do I need to change the output to be [0, 2*pi]
                action = action.reshape((1, game_state.action_space.shape[0]))
                end_time = time.time()
                print(1 / (end_time - start_time),
                      "fps for calculating next step")

                _, new_state, reward, done = game_state.step(
                    0.1, action[0][1], action[0][0]
                )  # we get reward and state here, then we need to calculate if it is crashed! for 'dones' value
                total_reward = total_reward + reward
                ###########################################################################################

                if j == (trial_len - 1):
                    done = 1
                    print("this is reward:", total_reward)

                # if (j % 5 == 0):
                # 	actor_critic.train()
                # 	actor_critic.update_target()

                new_state = new_state.reshape(
                    (1, game_state.observation_space.shape[0]))
                # actor_critic.remember(cur_state, action, reward, new_state, done)   # remember all the data using memory, memory data will be samples to samples automatically.
                # cur_state = new_state

                ##########################################################################################
                #actor_critic.remember(current_state, action, reward, new_state, done)
                current_state = new_state

示例#2

显示文件

文件： phero_network_exp1_eval.py 项目： Miyembe/Turtlebot3_Pheromone

    def learn(self):
        # For logging
        time_str = time.strftime("%Y%m%d-%H%M%S")
        logger_ins = logger.Logger('/home/swn/catkin_ws/src/turtlebot3_waypoint_navigation/src/log', output_formats=[logger.HumanOutputFormat(sys.stdout)])
        board_logger = tensorboard_logging.Logger(os.path.join(logger_ins.get_dir(), "tf_board", time_str))

        # reassigning the members of class into this function for simplicity
        total_timesteps = int(self.total_timesteps)
        nenvs = 1
        #nenvs = env.num_envs # for multiple instance training
        ob_space = self.env.observation_space
        ac_space = self.env.action_space
        nbatch = nenvs * self.nsteps
        nminibatches = self.nminibatches
        nbatch_train = nbatch // nminibatches
        noptepochs = self.noptepochs
        nsteps = self.nsteps
        save_interval = self.save_interval
        log_interval = self.log_interval
        restore_path = self.restore_path
        gamma = self.gamma
        lam = self.lam
        lr = self.lr
        cliprange = self.cliprange
        deterministic = self.deterministic
        step_reward = [[0.0, 0.0]]

        # Define a function to make Actor-Critic Model 
        make_model = lambda : Model(policy=self.policy, ob_space=ob_space, ac_space=ac_space, nbatch_act=nenvs, nbatch_train=nbatch_train,
                                    nsteps=self.nsteps, ent_coef=self.ent_coef, vf_coef=self.vf_coef,
                                    max_grad_norm=self.max_grad_norm, deterministic=self.deterministic)
        
        # Save function 
        # if save_interval and logger_ins.get_dir():
        #     import cloudpickle
        #     with open(osp.join(logger_ins.get_dir(), 'make_model.pkl'), 'wb') as fh:
        #         fh.write(cloudpickle.dumps(make_model))

        # Make a model
        model = make_model()

        # Restore when the path is provided
        if restore_path is not None:
            model.restore(restore_path)
        
        # Create a runner instance (generating samples with nsteps)
        runner = Runner(env=self.env, model=model, nsteps=nsteps, gamma=gamma, lam=lam)

        # Double ended queue with max size 100 to store episode info
        epinfobuf = deque(maxlen=100)

        # Get the start time
        tfirststart = time.time()

        # Calculate # for update (iteration)
        nupdates = total_timesteps//nbatch
        assert(nupdates > 0)

        '''
        PPO (iterating)
        1. Run policy in the environment for T timesteps
        2. Compute advantage estimates (in Model class)
        3. Optimise Loss w.r.t weights of policy, with K epochs and minibatch size M < N (# of actors) * T (timesteps)
        4. Update weights (in Model class)
        '''
        # In every update, one loop of PPO algorithm is executed
        for update in range(1, nupdates+1):
            
            # INITIALISE PARAMETERS
            assert nbatch % nminibatches == 0
            nbatch_train = nbatch // nminibatches
            tstart = time.time()
            frac = 1.0 - (update - 1.0) / nupdates
            lrnow = lr(frac)
            cliprangenow = cliprange(frac)

            # 1. Run policy and get samples for nsteps
            ids, obs, returns, masks, actions, values, neglogpacs, states, epinfos = runner.run()
            epinfobuf.extend(epinfos)
            mblossvals = []

            # Do not train or log if in deterministic mode:
            if deterministic:
                continue
            
            # 3. Optimise Loss w.r.t weights of policy, with K epochs and minibatch size M < N (# of actors) * T (timesteps)
            if states is None: # nonrecurrent version
                #
                inds = np.arange(nbatch)
                # Update weights using optimiser by noptepochs
                for _ in range(noptepochs):
                    #np.random.shuffle(inds)

                    # In each epoch, update weights using samples every minibatch in the total batch
                    # epoch = m(32)*minibatch(4)
                    for start in range(0, nbatch, nbatch_train):
                        end = start + nbatch_train
                        mbinds = inds[start:end]
                        returns_np = np.asarray(returns[mbinds])
                        # 4. Update weights
                        mblossvals.append(model.train(lrnow, cliprangenow, ids[mbinds], [obs[i] for i in mbinds], returns[mbinds],
                                        masks[mbinds], actions[mbinds], values[mbinds],
                                        neglogpacs[mbinds]))

            else: # recurrent version
                assert nenvs % nminibatches == 0
                envsperbatch = nenvs // nminibatches
                envinds = np.arange(nenvs)
                flatinds = np.arange(nenvs * nsteps).reshape(nenvs, nsteps)
                envsperbatch = nbatch_train // nsteps
                for _ in range(noptepochs):
                    #np.random.shuffle(envinds)
                    for start in range(0, nenvs, envsperbatch):
                        end = start + envsperbatch
                        mbenvinds = envinds[start:end]
                        mbflatinds = flatinds[mbenvinds].ravel()
                        print(mbflatinds)
                        slices = (arr[mbflatinds] for arr in (obs, returns, masks, actions, values, neglogpacs))
                        mbstates = states[mbenvinds]
                        mblossvals.append(model.train(lrnow, cliprangenow,
                                        [obs[i] for i in mbinds], returns[mbflatinds], masks[mbflatinds], actions[mbflatinds],
                                        values[mbflatinds], neglogpacs[mbflatinds], mbstates))

            # Calculate mean loss
            lossvals = np.mean(mblossvals, axis=0)

            tnow = time.time()
            fps = int(nbatch / (tnow - tstart))
            
            '''
            Logging and saving model & weights
            '''
            if update % log_interval == 0 or update == 1:
                #ev = explained_variance(values, returns)
                logger_ins.logkv("serial_timesteps", update*nsteps)
                logger_ins.logkv("nupdates", update)
                logger_ins.logkv("total_timesteps", update*nbatch)
                logger_ins.logkv("fps", fps)
                #logger.logkv("explained_variance", float(ev))
                logger_ins.logkv('eprewmean', self.safemean([epinfo['r'] for epinfo in epinfobuf]))
                logger_ins.logkv('eplenmean', self.safemean([epinfo['l'] for epinfo in epinfobuf]))
                logger_ins.logkv('time_elapsed', tnow - tfirststart)
                for (lossval, lossname) in zip(lossvals, model.loss_names):
                    logger_ins.logkv(lossname, lossval)
                logger_ins.dumpkvs()
                for (lossval, lossname) in zip(lossvals, model.loss_names):
                    board_logger.log_scalar(lossname, lossval, update)
                board_logger.log_scalar("eprewmean", self.safemean([epinfo['r'] for epinfo in epinfobuf]), update)
                board_logger.flush()

                reward_arr = np.asarray([epinfo['r'] for epinfo in epinfobuf])
                #reward_new = np.delete(reward_arr, np.where(reward_arr == 0.0))
                step_reward = np.append(step_reward,[[update, self.safemean([reward for reward in reward_arr])]], axis=0)
                sio.savemat('/home/swn/catkin_ws/src/Turtlebot3_Pheromone/src/log/MATLAB/step_reward_{}.mat'.format(self.time_str), {'data':step_reward},True,'5',False,False,'row')
            if save_interval and (update % save_interval == 0 or update == 1) and logger_ins.get_dir():
                checkdir = osp.join(logger_ins.get_dir(), 'checkpoints', '{}'.format(self.time_str))
                if not os.path.isdir(checkdir):
                    os.makedirs(checkdir)
                savepath = osp.join(checkdir, '%.5i'%update +"r"+"{:.2f}".format(self.safemean([epinfo['r'] for epinfo in epinfobuf])))
                print('Saving to', savepath)
                model.save(savepath)
        print("Done with training. Exiting.")
        self.env.close()
        return model