示例#1
0
    def __init__(self,
                 name,
                 env,
                 policy_net,
                 value_net,
                 global_counter,
                 returns_list,
                 discount_factor=0.99,
                 max_global_steps=None):

        self.name = name
        self.env = env
        now = datetime.now()
        dt_string = now.strftime("%d_%m_%Y_%H:%M:%S")
        pathname = dt_string
        tensorboard_name = "results" + '/runs/' + str(name) + "_" + pathname
        self.writer = SummaryWriter(tensorboard_name)
        self.global_policy_net = policy_net
        self.global_value_net = value_net
        self.global_counter = global_counter
        self.discount_factor = discount_factor
        self.max_global_steps = max_global_steps
        self.global_step = tf.train.get_global_step()
        self.img_transformer = ImageTransformer()
        self.steps = 0
        self.vid_path = "vid/"
        self.saver = tf.train.Saver()
        # Create local policy and value networks that belong only to this worker
        with tf.variable_scope(name):
            # self.policy_net = PolicyNetwork(num_outputs=policy_net.num_outputs)
            # self.value_net = ValueNetwork()
            self.policy_net, self.value_net = create_networks(
                policy_net.num_outputs)

        # We will use this op to copy the global network weights
        # back to the local policy and value networks
        self.copy_params_op = get_copy_params_op(
            tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                              scope="global"),
            tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                              scope=self.name + '/'))

        # These will take the gradients from the local networks
        # and use those gradients to update the global network
        self.vnet_train_op = make_train_op(self.value_net,
                                           self.global_value_net)
        self.pnet_train_op = make_train_op(self.policy_net,
                                           self.global_policy_net)

        self.state = None  # Keep track of the current state
        self.total_reward = 0.  # After each episode print the total (sum of) reward
        self.returns_list = returns_list  # Global returns list to plot later
  def __init__(
      self,
      name,
      env,
      policy_net,
      value_net,
      global_counter,
      returns_list,
      discount_factor=0.99,
      max_global_steps=None):

    self.name = name
    self.env = env
    self.global_policy_net = policy_net
    self.global_value_net = value_net
    self.global_counter = global_counter
    self.discount_factor = discount_factor
    self.max_global_steps = max_global_steps
    self.global_step = tf.train.get_global_step()
    self.img_transformer = ImageTransformer()

    # Create local policy and value networks that belong only to this worker
    with tf.variable_scope(name):
      # self.policy_net = PolicyNetwork(num_outputs=policy_net.num_outputs)
      # self.value_net = ValueNetwork()
      self.policy_net, self.value_net = create_networks(policy_net.num_outputs)

    # We will use this op to copy the global network weights
    # back to the local policy and value networks
    self.copy_params_op = get_copy_params_op(
      tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="global"),
      tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.name+'/'))

    # These will take the gradients from the local networks
    # and use those gradients to update the global network
    self.vnet_train_op = make_train_op(self.value_net, self.global_value_net)
    self.pnet_train_op = make_train_op(self.policy_net, self.global_policy_net)

    self.state = None # Keep track of the current state
    self.total_reward = 0. # After each episode print the total (sum of) reward
    self.returns_list = returns_list # Global returns list to plot later
  def __init__(
      self,
      name,
      env,
      policy_net,
      value_net,
      global_counter,
      returns_list,
      discount_factor=0.99,
      max_global_steps=None):

    self.name = name
    self.env = env
    self.global_policy_net = policy_net
    self.global_value_net = value_net
    self.global_counter = global_counter
    self.discount_factor = discount_factor
    self.max_global_steps = max_global_steps
    self.global_step = tf.compat.v1.train.get_global_step()
    self.img_transformer = ImageTransformer()

    # Create local policy and value networks that belong only to this worker
    with tf.compat.v1.variable_scope(name):
      # self.policy_net = PolicyNetwork(num_outputs=policy_net.num_outputs)
      # self.value_net = ValueNetwork()
      self.policy_net, self.value_net = create_networks(policy_net.num_outputs)

    # We will use this op to copy the global network weights
    # back to the local policy and value networks
    self.copy_params_op = get_copy_params_op(
      tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope="global"),
      tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=self.name+'/'))

    # These will take the gradients from the local networks
    # and use those gradients to update the global network
    self.vnet_train_op = make_train_op(self.value_net, self.global_value_net)
    self.pnet_train_op = make_train_op(self.policy_net, self.global_policy_net)

    self.state = None # Keep track of the current state
    self.total_reward = 0. # After each episode print the total (sum of) reward
    self.returns_list = returns_list # Global returns list to plot later
    y[i] = float(x[start:(i+1)].sum()) / (i - start + 1)
  return y


# Set the number of workers
NUM_WORKERS = multiprocessing.cpu_count()

with tf.device("/cpu:0"):

  # Keeps track of the number of updates we've performed
  # https://www.tensorflow.org/api_docs/python/tf/train/global_step
  global_step = tf.Variable(0, name="global_step", trainable=False)

  # Global policy and value nets
  with tf.variable_scope("global") as vs:
    policy_net, value_net = create_networks(NUM_ACTIONS)

  # Global step iterator
  global_counter = itertools.count()

  # Save returns
  returns_list = []

  # Create workers
  workers = []
  for worker_id in range(NUM_WORKERS):
    worker = Worker(
      name="worker_{}".format(worker_id),
      env=Env(),
      policy_net=policy_net,
      value_net=value_net,
        y[i] = float(x[start:(i + 1)].sum()) / (i - start + 1)
    return y


# Set the number of workers
NUM_WORKERS = multiprocessing.cpu_count()

with tf.device("/cpu:0"):

    # Keeps track of the number of updates we've performed
    # https://www.tensorflow.org/api_docs/python/tf/train/global_step
    global_step = tf.Variable(0, name="global_step", trainable=False)

    # Global policy and value nets
    with tf.compat.v1.variable_scope("global") as vs:
        policy_net, value_net = create_networks(NUM_ACTIONS)

    # Global step iterator
    global_counter = itertools.count()

    # Save returns
    returns_list = []

    # Create workers
    workers = []
    for worker_id in range(NUM_WORKERS):
        worker = Worker(name="worker_{}".format(worker_id),
                        env=Env(),
                        policy_net=policy_net,
                        value_net=value_net,
                        global_counter=global_counter,