示例#1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('log_file')
    args = parser.parse_args()

    log_file = args.log_file

    learning_curve(log_file)
示例#2
0
    def train(self):
        self.iter = 0
        for epoch in tqdm.trange(self.epoch,
                                 self.epochs + 1,
                                 desc='Train',
                                 ncols=80):
            self.epoch = epoch
            self.train_epoch()

        learning_curve(osp.join(self.out, 'log.csv'))
示例#3
0
 def test_learning_curve(self):
     error_train, error_val = learning_curve(self.X, self.y, self.Xval, self.yval, lamda=0.0)
     plt.xlabel('Number of Training Examples')
     plt.ylabel("Error")
     plt.plot( range(0,self.m), error_train)
     plt.plot( range(0,self.m), error_val)
     plt.show()
示例#4
0
 def test_learning_curve(self):
     error_train, error_val = learning_curve(self.X,
                                             self.y,
                                             self.Xval,
                                             self.yval,
                                             lamda=0.0)
     plt.xlabel('Number of Training Examples')
     plt.ylabel("Error")
     plt.plot(range(0, self.m), error_train)
     plt.plot(range(0, self.m), error_val)
     plt.show()
示例#5
0
 def test_learning_curve_poly(self):
     highest_degree = 8
     X_poly = map_poly_features(self.X, highest_degree)
     X_poly, mu, sigma = feature_normalization(X_poly)
     Xval_poly = map_poly_features(self.Xval, highest_degree)
     Xval_poly = (Xval_poly - mu) / sigma
     lamda = 0.0
     
     error_train, error_val = learning_curve(X_poly, self.y, Xval_poly, self.yval, lamda)
     plt.xlabel('Number of Training Examples')
     plt.ylabel("Error")
     plt.plot( range(1,self.m+1), error_train)
     plt.plot( range(1,self.m+1), error_val)
     plt.show()
示例#6
0
    def test_learning_curve_poly(self):
        highest_degree = 8
        X_poly = map_poly_features(self.X, highest_degree)
        X_poly, mu, sigma = feature_normalization(X_poly)
        Xval_poly = map_poly_features(self.Xval, highest_degree)
        Xval_poly = (Xval_poly - mu) / sigma
        lamda = 0.0

        error_train, error_val = learning_curve(X_poly, self.y, Xval_poly,
                                                self.yval, lamda)
        plt.xlabel('Number of Training Examples')
        plt.ylabel("Error")
        plt.plot(range(1, self.m + 1), error_train)
        plt.plot(range(1, self.m + 1), error_val)
        plt.show()
示例#7
0
#######################################################################
# =========== Part 3: Learning Curve for Linear Regression ===========#
#######################################################################

<<<<<<< HEAD
reg = 0.0
=======
reg = 1.0
>>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d
XXval = np.vstack([np.ones((Xval.shape[0],)),Xval]).T

# implement the learning_curve function in utils.py
# this script will run your function and save the curves in fig8.pdf

error_train, error_val = utils.learning_curve(XX,y,XXval,yval,reg)
plot_utils.plot_learning_curve(error_train, error_val,reg)
plt.savefig('fig8.pdf')

#######################################################################
## =========== Part 4: Feature Mapping for Polynomial Regression =====#
#######################################################################

from utils import feature_normalize
import sklearn
from sklearn.preprocessing import PolynomialFeatures

# Map X onto polynomial features and normalize
# We will consider a 6th order polynomial fit for the data

p = 6
示例#8
0
import gym
from gym import Env
from gridworld import WindyGridWorld  # 导入自建的有风世界环境
from core import Agent  # 导入自建的agent基类
from utils import learning_curve  # 画图方法

env = WindyGridWorld()
env.reset()
env.render()

agent = Agent(env, capacity=10000)  # 一个记忆容量为10000的agent
data = agent.learning(max_episode_num=180, display=True)
learning_curve(data,
               2,
               0,
               title="learning curve",
               x_name="episode",
               y_name="time steps")
env.close()
            s1, r1, is_done, info, total_reward = self.act(a0)
            if display:
                self.env.render()
            a1 = self.perform_policy(s1, self.Q, epsilon)
            old_q = get_dict(self.Q, s0, a0)
            q_prime = get_dict(self.Q, s1, a1)
            td_target = r1 + gamma * q_prime
            #alpha = alpha / num_episode
            new_q = old_q + alpha * (td_target - old_q)
            set_dict(self.Q, new_q, s0, a0)
            s0, a0 = s1, a1
            time_in_episode += 1
        if display:
            print(self.experience.last_episode)
        return time_in_episode, total_reward


env = WindyGridWorld()
agent = SarsaAgent(env, capacity=10000)
statistics = agent.learning(gamma=1.0,
                            epsilon=1,
                            decaying_epsilon=True,
                            alpha=0.5,
                            max_episode_num=800,
                            display=True)

agent.learning_method(epsilon=0.01, display=True)
learning_curve(statistics, 0, 2)
env.reset()
env.render()
env.close()
示例#10
0
import gym
from puckworld_env import PuckWorldEnv
from ddpg_agent import DDPGAgent
from utils import learning_curve
import numpy as np

env = PuckWorldEnv()
agent =DDPGAgent(env)
data = agent.learning(max_episode_num=200, display=True)
learning_curve(data, 2, 1, #title="DDPGAgent performance on PuckWorld with continuous action space",
               x_name="episodes", y_name="rewards of episode")
示例#11
0
import gym
from puckworld import PuckWorldEnv
from agents import DQNAgent
from utils import learning_curve

env = PuckWorldEnv()
agent = DQNAgent(env)
data = agent.learning(gamma=0.99,
                      epsilon=1,
                      decaying_epsilon=True,
                      alpha=1e-3,
                      max_episode_num=100,
                      display=False)
learning_curve(data,
               2,
               1,
               title="DQNAgent performance on PuckWorld",
               x_name="episodes",
               y_name="rewards of episode")
示例#12
0
            set_dict(self.Q, new_q, s0, a0)
            # s0, a0 = s1, a1
            s0 = s1
            time_in_episode += 1
        if display:
            print(self.experience.last_episode)
        return time_in_episode, total_reward


env = WindyGridWorld()
agent = QAgent(env)

data = agent.learning(gamma=1.0,
                      epsilon=0.1,
                      decaying_epsilon=True,
                      alpha=0.5,
                      max_episode_num=800,
                      display=True)

agent.learning_method(epsilon=0.01, display=False)
data = agent.learning(gamma=1.0, display=False, max_episode_num=100)
learning_curve(data,
               x_index=0,
               y1_index=2,
               y2_index=None,
               title="Q learning curve",
               x_name="time step num",
               y_name="episode num")
env.reset()
env.render()
env.close()
示例#13
0
'''
#################################################################################################
# author wudong
# date 20190813
# 功能 
#   测试DQN算法,状态空间连续,行为空间离散
#################################################################################################
'''

import gym
from puckworld import PuckWorldEnv
from agent import DQNAgent
from utils import learning_curve
from gridworld import *

env = PuckWorldEnv()
agent = DQNAgent(env)

data = agent.learning(gamma=0.99,
                      epsilon=1,
                      decaying_epsilon=True,
                      alpha=1e-3,
                      max_episode_num=100,
                      display=True)

learning_curve(data, 2, 1, title="DQN", x_name="episodes", y_name="rewards")