def init(): file = open('models/BreakoutNoFrameskip-v4.pkl', 'rb') _, zoo_weights = pickle.load(file) model = MyDQN(MyCnnPolicy, Globals.env, double_q=double_q, learning_starts=learning_starts, learning_rate=learning_rate, tensorboard_log=tensorboard_log, verbose=verbose, exploration_fraction=exploration_fraction, prioritized_replay=prioritized_replay, exploration_final_eps=exploration_final_eps) zoo_model = DQN(CnnPolicy, Globals.env, double_q=double_q, learning_starts=learning_starts) zoo_model.load_parameters(zoo_weights) model.load_parameters(zoo_model.get_parameters(), exact_match=False) params = model.get_parameters() r = (np.random.rand(4, 4) - 0.5) * magnitude params['deepq/model/action_value/fully_connected_1/biases:0'] = np.zeros(4) params[ 'deepq/model/action_value/fully_connected_1/weights:0'] = np.identity( 4) + r model.load_parameters(params) Globals.model = model
timeSteps=100000 if doTraining: ppoModel.learn(total_timesteps=timeSteps) dqnModel.learn(total_timesteps=timeSteps) print("Training Finished") if overWriteModels: print("Overwriting Models") ppoModel.save(ppoModelLocation) dqnModel.save(dqnModelLocation) with open('/home/john/ai-safety-gridworlds/logs/dqnparamsBefore.csv', 'w') as csvFile: csvWriter = csv.writer(csvFile) params = dqnModel.get_parameters() csvWriter.writerow(params) csvWriter.writerow(params.items()) with open('/home/john/ai-safety-gridworlds/logs/ppoparamsBefore.csv', 'w') as csvFile: csvWriter = csv.writer(csvFile) params = ppoModel.get_parameters() csvWriter.writerow(params) csvWriter.writerow(params.items()) #results_plotter.plot_results([log_dir], timeSteps, results_plotter.X_TIMESTEPS, "PPO Vase World") #plt.show() #meanReward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10, deterministic=False) #print(meanReward, std_reward) #print(evaluatePolicy(env, model, difficulties=[1,2,3,4,5])) wallSize=[13,12,11,10,9,8,7,6,5,4,3,2,1]