示例#1
0
    def __init__(self, env, policy_net, summary_writer, saver=None):

        self.video_dir = os.path.join(summary_writer.get_logdir(), "../videos")
        self.video_dir = os.path.abspath(self.video_dir)

        self.env = Monitor(env, directory=self.video_dir, video_callable=lambda x: True, resume=True)
        self.global_policy_net = policy_net
        self.summary_writer = summary_writer
        self.saver = saver
        self.sp = StateProcessor()

        self.checkpoint_path = os.path.abspath(os.path.join(summary_writer.get_logdir(), "../checkpoints/model"))

        try:
            os.makedirs(self.video_dir)
        except FileExistsError:
            pass

        # Local policy net
        with tf.variable_scope("policy_eval"):
            self.policy_net = PolicyEstimator(policy_net.num_outputs)

        # Op to copy params from global policy/value net parameters
        self.copy_params_op = make_copy_params_op(
            tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
            tf.contrib.slim.get_variables(scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
示例#2
0
  def __init__(self, env, policy_net, summary_writer, saver=None):


    self.global_policy_net = policy_net
    self.summary_writer = summary_writer
    self.saver = saver
    #self.sp = StateProcessor()

    self.env = CDLL('./PythonAccessToSim.so')
    self.env.step.restype = step_result
    self.env.send_command.restype = c_int
    self.env.initialize.restype = c_int
    self.env.recieve_state_gui.restype = step_result

    self.actions = list(range(0,3*Num_Targets))

    self.checkpoint_path = os.path.abspath(os.path.join(summary_writer.get_logdir(), "./checkpoints/model"))
    print(self.checkpoint_path)
    # Local policy net
    with tf.variable_scope("policy_eval"):
      self.policy_net = PolicyEstimator(policy_net.num_outputs)

    # Op to copy params from global policy/value net parameters
    self.copy_params_op = make_copy_params_op(
      tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
      tf.contrib.slim.get_variables(scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
  def __init__(self, env, policy_net, summary_writer, saver=None):

    self.video_dir = os.path.join(summary_writer.get_logdir(), "../videos")
    self.video_dir = os.path.abspath(self.video_dir)

    self.env = Monitor(env, directory=self.video_dir, video_callable=lambda x: True, resume=True)
    self.global_policy_net = policy_net
    self.summary_writer = summary_writer
    self.saver = saver
    self.sp = StateProcessor()

    self.checkpoint_path = os.path.abspath(os.path.join(summary_writer.get_logdir(), "../checkpoints/model"))

    try:
      os.makedirs(self.video_dir)
    except FileExistsError:
      pass

    # Local policy net
    with tf.variable_scope("policy_eval"):
      self.policy_net = PolicyEstimator(policy_net.num_outputs)

    # Op to copy params from global policy/value net parameters
    self.copy_params_op = make_copy_params_op(
      tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
      tf.contrib.slim.get_variables(scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
    def __init__(self, env, policy_net, summary_writer, saver=None):

        self.video_dir = os.path.join(summary_writer.get_logdir(), "../videos")
        self.video_dir = os.path.abspath(self.video_dir)

        self.env = Monitor(env,
                           directory=self.video_dir,
                           video_callable=lambda x: True,
                           resume=True)
        self.global_policy_net = policy_net
        self.summary_writer = summary_writer
        self.saver = saver

        self.checkpoint_path = os.path.abspath(
            os.path.join(summary_writer.get_logdir(), "../checkpoints/model"))

        try:
            os.makedirs(self.video_dir)
        except OSError as e:
            # FileExistsError was added in Python 3.3; You can't use FileExistsError.
            # https: // stackoverflow.com / questions / 20790580 / python - specifically - handle - file - exists - exception
            #  Use errno.EEXIST
            pass

        # Local policy net
        with tf.variable_scope("policy_eval"):
            self.policy_net = PolicyEstimator(policy_net.num_outputs)

        # Op to copy params from global policy/value net parameters
        self.copy_params_op = make_copy_params_op(
            tf.contrib.slim.get_variables(
                scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
            tf.contrib.slim.get_variables(
                scope="policy_eval",
                collection=tf.GraphKeys.TRAINABLE_VARIABLES))
示例#5
0
    def __init__(self, env, global_net, summary_writer, saver=None):
        self.env = env
        self.global_net = global_net
        self.summary_writer = summary_writer
        self.saver = saver
        self.sp = StateProcessor()

        self.video_dir = os.path.join(summary_writer.get_logdir(), "../videos")
        self.video_dir = os.path.abspath(self.video_dir)
        self.checkpoint_path = os.path.abspath(
            os.path.join(summary_writer.get_logdir(), "../checkpoints/model"))

        try:
            os.makedirs(self.video_dir)
        except:
            pass

        # Local policy net
        with tf.variable_scope("policy_eval"):
            self.local_net = PolicyValueEstimator()

        # Op to copy params from global policy/value net parameters
        self.copy_params_op = make_copy_params_op(
            tf.contrib.slim.get_variables(
                scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
            tf.contrib.slim.get_variables(
                scope="policy_eval",
                collection=tf.GraphKeys.TRAINABLE_VARIABLES))
示例#6
0
    def __init__(self, env, policy_net, summary_writer, saver=None):

        self.global_policy_net = policy_net
        self.summary_writer = summary_writer
        self.saver = saver
        self.env = env

        # Correct the path
        self.checkpoint_path = os.path.abspath(
            os.path.join(summary_writer.get_logdir(), "../checkpoints/model"))
        print('[PM] checkpoint_path: {}'.format(self.checkpoint_path))

        # Local policy net
        with tf.variable_scope("policy_eval"):
            if LSTM_POLICY:
                self.policy_net = LSTMPolicyEstimator(policy_net.num_outputs)
            else:
                self.policy_net = PolicyEstimator(policy_net.num_outputs)

        # Op to copy params from global policy/value net parameters
        self.copy_params_op = make_copy_params_op(
            tf.contrib.slim.get_variables(
                scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
            tf.contrib.slim.get_variables(
                scope="policy_eval",
                collection=tf.GraphKeys.TRAINABLE_VARIABLES))
示例#7
0
    def __init__(self, env, policy_net):
        self.env = env
        self.global_policy_net = policy_net

        # Local policy net
        with tf.variable_scope("policy_eval"):
            self.policy_net = PolicyEstimator(policy_net.num_outputs)

        self.copy_params_op = make_copy_params_op(
        tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
        tf.contrib.slim.get_variables(scope="policy_eval", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
示例#8
0
  def __init__(self, env, policy_net, task):

    self.env = env
    self.global_policy_net = policy_net
    self.task = task

    # Local policy net
    with tf.variable_scope("policy_visualization"):
      self.policy_net = PolicyEstimator(policy_net.num_outputs, state_dims=self.env.get_state_size())

    # Op to copy params from global policy/value net parameters
    self.copy_params_op = make_copy_params_op(
      tf.contrib.slim.get_variables(scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
      tf.contrib.slim.get_variables(scope="policy_visualization", collection=tf.GraphKeys.TRAINABLE_VARIABLES))
    def __init__(self,
                 env,
                 env_id,
                 curriculum,
                 policy_net,
                 saver=None,
                 n_eval=10,
                 logfile=None,
                 checkpoint_path=None):

        self.env = env
        self.env_id = env_id
        self.curriculum = curriculum
        self.global_policy_net = policy_net
        self.saver = saver
        self.n_eval = n_eval
        self.checkpoint_path = checkpoint_path
        self.logger = logging.getLogger('eval runs {}'.format(env_id))
        hdlr = logging.FileHandler(logfile)
        formatter = logging.Formatter(
            '[%(asctime)s] [%(levelname)s] %(message)s')
        hdlr.setFormatter(formatter)
        self.logger.addHandler(hdlr)
        self.logger.setLevel(logging.INFO)

        # Local policy net
        with tf.variable_scope("policy_eval_{}".format(env_id)):
            self.policy_net = PolicyEstimator(
                policy_net.num_outputs, state_dims=self.env.get_state_size())

        #Directory to save checkpoints to. Op to copy params from global policy/value net parameters
        self.copy_params_op = make_copy_params_op(
            tf.contrib.slim.get_variables(
                scope="global_{}".format(env_id),
                collection=tf.GraphKeys.TRAINABLE_VARIABLES),
            tf.contrib.slim.get_variables(
                scope="policy_eval_{}".format(env_id),
                collection=tf.GraphKeys.TRAINABLE_VARIABLES))
        self.epochs = 0
    def __init__(self, env, policy_net, summary_writer, saver=None):

        self.env = env
        self.global_policy_net = policy_net
        self.summary_writer = summary_writer
        self.saver = saver
        self.counter = 0

        self.checkpoint_path = os.path.abspath(
            os.path.join(summary_writer.get_logdir(), "../checkpoints/model"))

        # Local policy net
        with tf.variable_scope("policy_eval"):
            self.policy_net = PolicyEstimator(policy_net.num_outputs,
                                              policy_net.observation_space)

        # Op to copy params from global policy/value net parameters
        self.copy_params_op = make_copy_params_op(
            tf.contrib.slim.get_variables(
                scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
            tf.contrib.slim.get_variables(
                scope="policy_eval",
                collection=tf.GraphKeys.TRAINABLE_VARIABLES))
示例#11
0
  # different policy and value nets for all tasks
  policy_nets = []
  value_nets = []
  for e in range(len(envs)):
    with tf.variable_scope("global_{}".format(e)) as vs:
      policy_nets.append(PolicyEstimator(
        num_outputs=len(VALID_ACTIONS), state_dims=envs[e].get_state_size()))
      value_nets.append(ValueEstimator(
        reuse=True, state_dims=envs[e].get_state_size()))
  if FLAGS.shared_final_layer:
    # make all final layer weights the same
    initial_copy_ops = []
    for e in range(1, len(envs)):
      initial_copy_ops += make_copy_params_op(
        tf.contrib.slim.get_variables(scope="global_0/policy_net", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
        tf.contrib.slim.get_variables(scope="global_{}/policy_net".format(e), collection=tf.GraphKeys.TRAINABLE_VARIABLES))
      initial_copy_ops += make_copy_params_op(
        tf.contrib.slim.get_variables(scope="global_0/value_net", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
        tf.contrib.slim.get_variables(scope="global_{}/value_net".format(e), collection=tf.GraphKeys.TRAINABLE_VARIABLES))

  # Global step iterator
  global_counter = itertools.count()

  # Create worker graphs
  workers = []
  for worker_id in range(NUM_WORKERS):
    # add a curriculum for avoid task
    env_id = worker_id % len(envs)
    curriculum = None
    if 'avoid' in envs[env_id].task:
示例#12
0
    def __init__(self,
                 envs,
                 policy_net,
                 domain,
                 instances,
                 summary_writer,
                 saver=None):

        self.stats_dir = os.path.join(summary_writer.get_logdir(), "../stats")
        self.stats_dir = os.path.abspath(self.stats_dir)

        self.n = envs[0].num_state_vars
        self.domain = domain
        self.instances = instances
        self.N = len(instances)

        self.envs = envs
        self.global_policy_net = policy_net

        # Construct adjacency list
        self.adjacency_lists = [None] * self.N
        self.single_adj_preprocessed_list = [None] * self.N

        for i in range(self.N):
            self.instance_parser = InstanceParser(self.domain,
                                                  self.instances[i])
            self.fluent_feature_dims, self.nonfluent_feature_dims = self.instance_parser.get_feature_dims(
            )
            self.nf_features = self.instance_parser.get_nf_features()
            adjacency_list = self.instance_parser.get_adjacency_list()
            self.adjacency_lists[i] = nx.adjacency_matrix(
                nx.from_dict_of_lists(adjacency_list))
            self.single_adj_preprocessed_list[i] = preprocess_adj(
                self.adjacency_lists[i])

        self.summary_writer = summary_writer
        self.saver = saver

        self.checkpoint_path = os.path.abspath(
            os.path.join(summary_writer.get_logdir(), "../checkpoints/model"))

        try:
            os.makedirs(self.stats_dir)
        except:
            pass

        # Local policy net
        with tf.variable_scope("policy_eval"):
            self.policy_net = PolicyEstimator(
                policy_net.num_inputs, policy_net.N, policy_net.num_hidden1,
                policy_net.num_hidden2, policy_net.num_hidden_transition,
                policy_net.num_outputs, policy_net.fluent_feature_dims,
                policy_net.nonfluent_feature_dims, policy_net.activation,
                policy_net.learning_rate)

        # Op to copy params from global policy/value net parameters
        self.copy_params_op = make_copy_params_op(
            tf.contrib.slim.get_variables(
                scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
            tf.contrib.slim.get_variables(
                scope="policy_eval",
                collection=tf.GraphKeys.TRAINABLE_VARIABLES))

        self.num_inputs = policy_net.num_inputs
示例#13
0
    def __init__(self,
                 envs,
                 policy_net,
                 domain,
                 instances,
                 neighbourhood,
                 summary_writer,
                 saver=None):

        self.stats_dir = os.path.join(summary_writer.get_logdir(), "../stats")
        self.stats_dir = os.path.abspath(self.stats_dir)

        self.domain = domain
        self.instances = instances
        self.N = len(instances)
        self.num_nodes_list = policy_net.num_nodes_list
        self.num_adjacency_list = policy_net.num_adjacency_list

        self.envs = envs
        self.global_policy_net = policy_net

        # Construct adjacency list
        self.adjacency_lists = [None] * self.N
        self.nf_features = [None] * self.N
        self.adjacency_lists_with_biases = [None] * self.N

        for i in range(self.N):
            self.fluent_feature_dims, self.nonfluent_feature_dims = self.envs[
                i].get_feature_dims()
            self.nf_features[i] = self.envs[i].get_nf_features()

            adjacency_list = self.envs[i].get_adjacency_list()
            self.adjacency_lists[i] = [
                get_adj_mat_from_list(aj) for aj in adjacency_list
            ]
            self.adjacency_lists_with_biases[i] = [
                process.adj_to_bias(np.array([aj]), [self.num_nodes_list[i]],
                                    nhood=neighbourhood)[0]
                for aj in self.adjacency_lists[i]
            ]

        self.summary_writer = summary_writer
        self.saver = saver

        self.checkpoint_path = os.path.abspath(
            os.path.join(summary_writer.get_logdir(), "../checkpoints/model"))

        try:
            os.makedirs(self.stats_dir)
        except:
            pass

        # Local policy net
        with tf.variable_scope("policy_eval"):
            self.policy_net = PolicyEstimator(
                policy_net.num_nodes_list, policy_net.fluent_feature_dims,
                policy_net.nonfluent_feature_dims, policy_net.N,
                policy_net.num_valid_actions_list,
                policy_net.action_details_list,
                policy_net.num_graph_fluent_list, policy_net.num_gcn_hidden,
                policy_net.num_action_dim, policy_net.num_decoder_dim,
                policy_net.num_adjacency_list, policy_net.num_gat_layers,
                policy_net.activation, policy_net.learning_rate)

        # Op to copy params from global policy/value net parameters
        self.copy_params_op = make_copy_params_op(
            tf.contrib.slim.get_variables(
                scope="global", collection=tf.GraphKeys.TRAINABLE_VARIABLES),
            tf.contrib.slim.get_variables(
                scope="policy_eval",
                collection=tf.GraphKeys.TRAINABLE_VARIABLES))