def __init__(self, name, env, policy_net, value_net, global_counter, returns_list, discount_factor=0.99, max_global_steps=None): self.name = name self.env = env now = datetime.now() dt_string = now.strftime("%d_%m_%Y_%H:%M:%S") pathname = dt_string tensorboard_name = "results" + '/runs/' + str(name) + "_" + pathname self.writer = SummaryWriter(tensorboard_name) self.global_policy_net = policy_net self.global_value_net = value_net self.global_counter = global_counter self.discount_factor = discount_factor self.max_global_steps = max_global_steps self.global_step = tf.train.get_global_step() self.img_transformer = ImageTransformer() self.steps = 0 self.vid_path = "vid/" self.saver = tf.train.Saver() # Create local policy and value networks that belong only to this worker with tf.variable_scope(name): # self.policy_net = PolicyNetwork(num_outputs=policy_net.num_outputs) # self.value_net = ValueNetwork() self.policy_net, self.value_net = create_networks( policy_net.num_outputs) # We will use this op to copy the global network weights # back to the local policy and value networks self.copy_params_op = get_copy_params_op( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="global"), tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name + '/')) # These will take the gradients from the local networks # and use those gradients to update the global network self.vnet_train_op = make_train_op(self.value_net, self.global_value_net) self.pnet_train_op = make_train_op(self.policy_net, self.global_policy_net) self.state = None # Keep track of the current state self.total_reward = 0. # After each episode print the total (sum of) reward self.returns_list = returns_list # Global returns list to plot later
def __init__( self, name, env, policy_net, value_net, global_counter, returns_list, discount_factor=0.99, max_global_steps=None): self.name = name self.env = env self.global_policy_net = policy_net self.global_value_net = value_net self.global_counter = global_counter self.discount_factor = discount_factor self.max_global_steps = max_global_steps self.global_step = tf.train.get_global_step() self.img_transformer = ImageTransformer() # Create local policy and value networks that belong only to this worker with tf.variable_scope(name): # self.policy_net = PolicyNetwork(num_outputs=policy_net.num_outputs) # self.value_net = ValueNetwork() self.policy_net, self.value_net = create_networks(policy_net.num_outputs) # We will use this op to copy the global network weights # back to the local policy and value networks self.copy_params_op = get_copy_params_op( tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="global"), tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name+'/')) # These will take the gradients from the local networks # and use those gradients to update the global network self.vnet_train_op = make_train_op(self.value_net, self.global_value_net) self.pnet_train_op = make_train_op(self.policy_net, self.global_policy_net) self.state = None # Keep track of the current state self.total_reward = 0. # After each episode print the total (sum of) reward self.returns_list = returns_list # Global returns list to plot later
def __init__( self, name, env, policy_net, value_net, global_counter, returns_list, discount_factor=0.99, max_global_steps=None): self.name = name self.env = env self.global_policy_net = policy_net self.global_value_net = value_net self.global_counter = global_counter self.discount_factor = discount_factor self.max_global_steps = max_global_steps self.global_step = tf.compat.v1.train.get_global_step() self.img_transformer = ImageTransformer() # Create local policy and value networks that belong only to this worker with tf.compat.v1.variable_scope(name): # self.policy_net = PolicyNetwork(num_outputs=policy_net.num_outputs) # self.value_net = ValueNetwork() self.policy_net, self.value_net = create_networks(policy_net.num_outputs) # We will use this op to copy the global network weights # back to the local policy and value networks self.copy_params_op = get_copy_params_op( tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope="global"), tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=self.name+'/')) # These will take the gradients from the local networks # and use those gradients to update the global network self.vnet_train_op = make_train_op(self.value_net, self.global_value_net) self.pnet_train_op = make_train_op(self.policy_net, self.global_policy_net) self.state = None # Keep track of the current state self.total_reward = 0. # After each episode print the total (sum of) reward self.returns_list = returns_list # Global returns list to plot later
y[i] = float(x[start:(i+1)].sum()) / (i - start + 1) return y # Set the number of workers NUM_WORKERS = multiprocessing.cpu_count() with tf.device("/cpu:0"): # Keeps track of the number of updates we've performed # https://www.tensorflow.org/api_docs/python/tf/train/global_step global_step = tf.Variable(0, name="global_step", trainable=False) # Global policy and value nets with tf.variable_scope("global") as vs: policy_net, value_net = create_networks(NUM_ACTIONS) # Global step iterator global_counter = itertools.count() # Save returns returns_list = [] # Create workers workers = [] for worker_id in range(NUM_WORKERS): worker = Worker( name="worker_{}".format(worker_id), env=Env(), policy_net=policy_net, value_net=value_net,
y[i] = float(x[start:(i + 1)].sum()) / (i - start + 1) return y # Set the number of workers NUM_WORKERS = multiprocessing.cpu_count() with tf.device("/cpu:0"): # Keeps track of the number of updates we've performed # https://www.tensorflow.org/api_docs/python/tf/train/global_step global_step = tf.Variable(0, name="global_step", trainable=False) # Global policy and value nets with tf.compat.v1.variable_scope("global") as vs: policy_net, value_net = create_networks(NUM_ACTIONS) # Global step iterator global_counter = itertools.count() # Save returns returns_list = [] # Create workers workers = [] for worker_id in range(NUM_WORKERS): worker = Worker(name="worker_{}".format(worker_id), env=Env(), policy_net=policy_net, value_net=value_net, global_counter=global_counter,