def main(): parser = argparse.ArgumentParser() parser.add_argument('log_file') args = parser.parse_args() log_file = args.log_file learning_curve(log_file)
def train(self): self.iter = 0 for epoch in tqdm.trange(self.epoch, self.epochs + 1, desc='Train', ncols=80): self.epoch = epoch self.train_epoch() learning_curve(osp.join(self.out, 'log.csv'))
def test_learning_curve(self): error_train, error_val = learning_curve(self.X, self.y, self.Xval, self.yval, lamda=0.0) plt.xlabel('Number of Training Examples') plt.ylabel("Error") plt.plot( range(0,self.m), error_train) plt.plot( range(0,self.m), error_val) plt.show()
def test_learning_curve(self): error_train, error_val = learning_curve(self.X, self.y, self.Xval, self.yval, lamda=0.0) plt.xlabel('Number of Training Examples') plt.ylabel("Error") plt.plot(range(0, self.m), error_train) plt.plot(range(0, self.m), error_val) plt.show()
def test_learning_curve_poly(self): highest_degree = 8 X_poly = map_poly_features(self.X, highest_degree) X_poly, mu, sigma = feature_normalization(X_poly) Xval_poly = map_poly_features(self.Xval, highest_degree) Xval_poly = (Xval_poly - mu) / sigma lamda = 0.0 error_train, error_val = learning_curve(X_poly, self.y, Xval_poly, self.yval, lamda) plt.xlabel('Number of Training Examples') plt.ylabel("Error") plt.plot( range(1,self.m+1), error_train) plt.plot( range(1,self.m+1), error_val) plt.show()
def test_learning_curve_poly(self): highest_degree = 8 X_poly = map_poly_features(self.X, highest_degree) X_poly, mu, sigma = feature_normalization(X_poly) Xval_poly = map_poly_features(self.Xval, highest_degree) Xval_poly = (Xval_poly - mu) / sigma lamda = 0.0 error_train, error_val = learning_curve(X_poly, self.y, Xval_poly, self.yval, lamda) plt.xlabel('Number of Training Examples') plt.ylabel("Error") plt.plot(range(1, self.m + 1), error_train) plt.plot(range(1, self.m + 1), error_val) plt.show()
####################################################################### # =========== Part 3: Learning Curve for Linear Regression ===========# ####################################################################### <<<<<<< HEAD reg = 0.0 ======= reg = 1.0 >>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d XXval = np.vstack([np.ones((Xval.shape[0],)),Xval]).T # implement the learning_curve function in utils.py # this script will run your function and save the curves in fig8.pdf error_train, error_val = utils.learning_curve(XX,y,XXval,yval,reg) plot_utils.plot_learning_curve(error_train, error_val,reg) plt.savefig('fig8.pdf') ####################################################################### ## =========== Part 4: Feature Mapping for Polynomial Regression =====# ####################################################################### from utils import feature_normalize import sklearn from sklearn.preprocessing import PolynomialFeatures # Map X onto polynomial features and normalize # We will consider a 6th order polynomial fit for the data p = 6
import gym from gym import Env from gridworld import WindyGridWorld # 导入自建的有风世界环境 from core import Agent # 导入自建的agent基类 from utils import learning_curve # 画图方法 env = WindyGridWorld() env.reset() env.render() agent = Agent(env, capacity=10000) # 一个记忆容量为10000的agent data = agent.learning(max_episode_num=180, display=True) learning_curve(data, 2, 0, title="learning curve", x_name="episode", y_name="time steps") env.close()
s1, r1, is_done, info, total_reward = self.act(a0) if display: self.env.render() a1 = self.perform_policy(s1, self.Q, epsilon) old_q = get_dict(self.Q, s0, a0) q_prime = get_dict(self.Q, s1, a1) td_target = r1 + gamma * q_prime #alpha = alpha / num_episode new_q = old_q + alpha * (td_target - old_q) set_dict(self.Q, new_q, s0, a0) s0, a0 = s1, a1 time_in_episode += 1 if display: print(self.experience.last_episode) return time_in_episode, total_reward env = WindyGridWorld() agent = SarsaAgent(env, capacity=10000) statistics = agent.learning(gamma=1.0, epsilon=1, decaying_epsilon=True, alpha=0.5, max_episode_num=800, display=True) agent.learning_method(epsilon=0.01, display=True) learning_curve(statistics, 0, 2) env.reset() env.render() env.close()
import gym from puckworld_env import PuckWorldEnv from ddpg_agent import DDPGAgent from utils import learning_curve import numpy as np env = PuckWorldEnv() agent =DDPGAgent(env) data = agent.learning(max_episode_num=200, display=True) learning_curve(data, 2, 1, #title="DDPGAgent performance on PuckWorld with continuous action space", x_name="episodes", y_name="rewards of episode")
import gym from puckworld import PuckWorldEnv from agents import DQNAgent from utils import learning_curve env = PuckWorldEnv() agent = DQNAgent(env) data = agent.learning(gamma=0.99, epsilon=1, decaying_epsilon=True, alpha=1e-3, max_episode_num=100, display=False) learning_curve(data, 2, 1, title="DQNAgent performance on PuckWorld", x_name="episodes", y_name="rewards of episode")
set_dict(self.Q, new_q, s0, a0) # s0, a0 = s1, a1 s0 = s1 time_in_episode += 1 if display: print(self.experience.last_episode) return time_in_episode, total_reward env = WindyGridWorld() agent = QAgent(env) data = agent.learning(gamma=1.0, epsilon=0.1, decaying_epsilon=True, alpha=0.5, max_episode_num=800, display=True) agent.learning_method(epsilon=0.01, display=False) data = agent.learning(gamma=1.0, display=False, max_episode_num=100) learning_curve(data, x_index=0, y1_index=2, y2_index=None, title="Q learning curve", x_name="time step num", y_name="episode num") env.reset() env.render() env.close()
''' ################################################################################################# # author wudong # date 20190813 # 功能 # 测试DQN算法,状态空间连续,行为空间离散 ################################################################################################# ''' import gym from puckworld import PuckWorldEnv from agent import DQNAgent from utils import learning_curve from gridworld import * env = PuckWorldEnv() agent = DQNAgent(env) data = agent.learning(gamma=0.99, epsilon=1, decaying_epsilon=True, alpha=1e-3, max_episode_num=100, display=True) learning_curve(data, 2, 1, title="DQN", x_name="episodes", y_name="rewards")