import krpc import time from ksp_env import GameEnv import math import numpy as np from config import ip conn = krpc.connect(name='Tracker', ip=ip) env = GameEnv(conn) vessel = env.vessel frame = vessel.orbit.body.reference_frame vert_speed = conn.add_stream(getattr, vessel.flight(frame), 'vertical_speed') def states(): message = ('sin', round( math.sin((math.radians(env.heading()))) * (90 - env.pitch()) / 90, 2), 'cos', round( math.cos((math.radians(env.heading()))) * (90 - env.pitch()) / 90, 2), 'sinr', round( math.sin((math.radians(env.heading() + env.roll()))) * (90 - env.pitch()) / 90, 2), 'cosr', round( math.cos((math.radians(env.heading() + env.roll()))) * (90 - env.pitch()) / 90, 2), "p", round(env.pitch(), 2), "h", round(env.heading(), 2), "r",
class Worker(object): def __init__(self, name, globalAC, sess, conn): self.conn = conn self.env = GameEnv(conn=self.conn) self.name = name self.AC = ACNet(name, sess, globalAC) self.sess = sess def work(self): global global_rewards, global_episodes total_step = 1 buffer_s, buffer_a, buffer_r = [], [], [] while not coord.should_stop(): s = self.env.reset(self.conn) ep_r = 0 self.env.activate_engine() for ep_t in range(MAX_EP_STEP): a = self.AC.choose_action(s) # estimate stochastic action based on policy s_, r, done, info = self.env.step(a) # make step in environment ep_r += r buffer_s.append(s) buffer_a.append(a) buffer_r.append(r) if total_step % UPDATE_GLOBAL_ITER == 0 or done: # update global and assign to local net if done: v_s_ = 0 # terminal else: v_s_ = self.sess.run(self.AC.v, {self.AC.states: [s_]})[0, 0] buffer_v_target = [] for r in buffer_r[::-1]: # reverse buffer r v_s_ = r + GAMMA * v_s_ buffer_v_target.append(v_s_) buffer_v_target.reverse() buffer_s, buffer_a, buffer_v_target = np.vstack(buffer_s), np.vstack(buffer_a), np.vstack( buffer_v_target) feed_dict = { self.AC.states: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target, } self.AC.update_global(feed_dict) # actual training step, update global ACNet buffer_s, buffer_a, buffer_r = [], [], [] self.AC.pull_global() # get global parameters to local ACNet s = s_ total_step += 1 if done: global_rewards.append(ep_r) self.save_results(ep_r, global_episodes, global_rewards) global_episodes += 1 break def save_results(self, ep_r, global_episodes, global_rewards): altitude = self.env.get_altitude() with open(result_file, 'a', newline='') as csvf: wri = csv.DictWriter(csvf, fieldnames=fieldnames) wri.writerow({'counter': global_episodes, 'altitude': altitude, 'reward': round(ep_r, 2)}) print( self.name, "Episode: {:4}".format(global_episodes), "| Reward: {:7.1f}".format(global_rewards[-1]), "| Altitude: {:7.1f}".format(altitude) )
def __init__(self, name, globalAC, sess, conn): self.conn = conn self.env = GameEnv(conn=self.conn) self.name = name self.AC = ACNet(name, sess, globalAC) self.sess = sess
import numpy as np import csv import os import shutil import tensorflow as tf import krpc from config import OUTPUT_GRAPH, LOG_DIR, result_file, fieldnames, N_WORKERS, MAX_EP_STEP, GLOBAL_NET_SCOPE, \ UPDATE_GLOBAL_ITER, GAMMA, ENTROPY_BETA, LR_A, LR_C, conns from ksp_env import GameEnv print(conns) connections = [krpc.connect(**conns[i]) for i in range(N_WORKERS)] # connections = [krpc.connect()] env = GameEnv(conn=connections[0]) env.reset(connections[0]) NUM_STATES = env.observation_space.shape[0] NUM_ACTIONS = env.action_space.shape[0] ACTION_BOUND = [env.action_space.low, env.action_space.high] # Network for the Actor Critic class ACNet(object): def __init__(self, scope, sess, globalAC=None): self.sess = sess self.actor_optimizer = tf.train.RMSPropOptimizer(LR_A, name='RMSPropA') self.critic_optimizer = tf.train.RMSPropOptimizer(LR_C, name='RMSPropC') if scope == GLOBAL_NET_SCOPE: with tf.variable_scope(scope):