示例#1
0
from Normal import exploration
from Normal import measure_length
from Normal import moving_average


#load data
double_without=pickle.load(open( "double_without_record.p", "rb"))
double_record=pickle.load(open( "double_record.p", "rb"))
double_evaluation_record_without=pickle.load(open( "double_evaluation_without_record.p", "rb"))
double_evaluation_record=pickle.load(open( "double_evaluation_record.p", "rb"))
evalu_without_ave=sum(double_evaluation_record_without)/evaluation_episode_number
evalu_ave=sum(double_evaluation_record)/evaluation_episode_number


#plot training results

def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'valid') / w
double_without_average=moving_average(double_without,average_over)
double_record_average=moving_average(double_record,average_over)

fig=plt.figure(figsize=(13,7))
env_standard=800
x=range(len(double_record_average))
plt.plot(x,double_without_average,label='Normal Training\nEvaluation %s'%evalu_without_ave,color='black',linestyle='-.')
plt.plot(x,double_record_average,label='Coached by PID Controller\nEvaluation %s'%evalu_ave,color='magenta')
plt.xlabel('Episode Number', fontsize='large')
plt.ylabel('Episode Reward', fontsize='large')
plt.legend(loc='upper left',ncol=1, borderaxespad=0,prop={'size': 18})
plt.axhline(y=env_standard, color='black', linestyle='dotted')
plt.savefig('double.png')
                    actions=[thigh_actions,leg_actions,foot_actions,left_thigh_actions,left_leg_actions,left_foot_actions]                                   
                    states, terminal, reward = environment.execute(actions=actions)
                    reward= -1
                    episode_reward+=reward
                    agent.observe(terminal=terminal, reward=reward)

                else:
                    states, terminal, reward = environment.execute(actions=actions)
                    agent.observe(terminal=terminal, reward=reward)
                    episode_reward+=reward

            record.append(episode_reward)

        reward_record[k][i]=record
        temp=np.array(record)
        reward_record_average[k][i]=moving_average(temp,average_over)

        #evaluate
        episode_reward = 0.0
        eva_reward_record=[]
        print('evaluating agent with boundary position at %s and prohibitive parameter %s' %(prohibition_position[k],prohibition_parameter[i]))
        for j in tqdm(range(evaluation_episode_number)):
            episode_reward=0
            states = environment.reset()
            internals = agent.initial_internals()
            terminal = False
            while not terminal:
                actions, internals = agent.act(states=states, internals=internals, independent=True, deterministic=True)
                states, terminal, reward = environment.execute(actions=actions)
                episode_reward += reward
            eva_reward_record.append(episode_reward)
示例#3
0
ip_without = pickle.load(open("ip_without_record.p", "rb"))
ip_record = pickle.load(open("ip_record.p", "rb"))
ip_evaluation_record_without = pickle.load(
    open("ip_evaluation_without_record.p", "rb"))
ip_evaluation_record = pickle.load(open("ip_evaluation_record.p", "rb"))
evalu_without_ave = sum(
    ip_evaluation_record_without) / evaluation_episode_number
evalu_ave = sum(ip_evaluation_record) / evaluation_episode_number


def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'valid') / w


ip_without_average = moving_average(ip_without, average_over)
ip_record_average = moving_average(ip_record, average_over)

fig = plt.figure(figsize=(13, 7))
env_standard = 800
x = range(len(ip_record_average))
plt.plot(x,
         ip_without_average,
         label='Normal Training\nEvaluation %s' % evalu_without_ave,
         color='black',
         linestyle='-.')
plt.plot(x,
         ip_record_average,
         label='Coached by PID Controller\nEvaluation %s' % evalu_ave,
         color='magenta')
plt.xlabel('Episode Number', fontsize='large')