def critic_network(self, gtemp, name, action_input_ph, reuse=False): with tf.variable_scope(name) as scope: if reuse: scope.reuse_variables() N_edges = len(gtemp.edges) criticModel = models.EncodeProcessDecode(edge_output_size=1, node_output_size=0) critic_output_ops = criticModel(self.inputPh, 1) critic_output_ops_edge = critic_output_ops[0].edges critic_output_ops_edge = tf.transpose(critic_output_ops_edge) critic_output_ops_edge = tf.slice(critic_output_ops_edge, [0, 0], [1, N_edges]) exten = tf.concat([critic_output_ops_edge, action_input_ph], axis=1) out = tf.layers.dense(exten, 64) out = tf.nn.relu(out) out = tf.layers.dense(out, 1) return out
def actor_network(self, gtemp, name): with tf.variable_scope(name) as scope: N_edges = len(gtemp.edges) actorModel = models.EncodeProcessDecode(edge_output_size=1, node_output_size=0) actor_output_ops_graphs = actorModel(self.inputPh, 1) actor_output_ops_edge = actor_output_ops_graphs[0].edges actor_output_ops_edge = tf.transpose(actor_output_ops_edge) actor_output_ops_edge = tf.slice(actor_output_ops_edge, [0, 0], [1, N_edges]) return actor_output_ops_graphs, actor_output_ops_edge
def __init__(self,gtemp): tf.reset_default_graph() with tf.variable_scope("defscope"): self.SEED = 3 self.GAMMA = 0.99 self.BATCH = 5 random.seed(a=self.SEED) self.random = np.random.RandomState(seed=self.SEED) np.random.seed(self.SEED) tf.set_random_seed(self.SEED) """ replay memory """ self.D = deque() """ initialize and setup the network """ self.inputPh = utils_tf.placeholders_from_networkxs( [self._gtmp2intmp(gtemp)]) self.targetPh = utils_tf.placeholders_from_networkxs( [self._gtmp2ttmp(gtemp)]) self.numProcessingSteps = 10 self.model = models.EncodeProcessDecode(edge_output_size=1, node_output_size=0) self.output_ops_tr = self.model(self.inputPh, self.numProcessingSteps) self.loss_ops_tr = self._create_loss_ops(self.targetPh, self.output_ops_tr) self.loss_op_tr = sum(self.loss_ops_tr) / self.numProcessingSteps self.learning_rate = 1e-3 self.optimizer = tf.train.AdamOptimizer(self.learning_rate) self.step_op = self.optimizer.minimize(self.loss_op_tr) self.inputPh, self.targetPh = self._make_all_runnable_in_session(self.inputPh, self.targetPh) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) self.epsilon = 0.01 self.outg = None
# Model parameters. num_processing_steps_tr = 1 num_processing_steps_ge = 1 # Data / training parameters. num_training_iterations = 100000 batch_size_tr = 256 batch_size_ge = 100 num_time_steps = 50 step_size = 0.1 num_masses_min_max_tr = (5, 9) dist_between_masses_min_max_tr = (0.2, 1.0) # Create the model. model = models.EncodeProcessDecode(node_output_size=2) # Data. # Base graphs for training. num_masses_tr = rand.randint(*num_masses_min_max_tr, size=batch_size_tr) dist_between_masses_tr = rand.uniform(*dist_between_masses_min_max_tr, size=batch_size_tr) static_graph_tr = [ base_graph(n, d) for n, d in zip(num_masses_tr, dist_between_masses_tr) ] base_graph_tr = utils_tf.data_dicts_to_graphs_tuple(static_graph_tr) # Base graphs for testing. # 4 masses 1m apart in a chain like structure. base_graph_4_ge = utils_tf.data_dicts_to_graphs_tuple([base_graph(4, 0.5)] * batch_size_ge) # 9 masses 0.5m apart in a chain like structure.
tf.reset_default_graph() # Model parameters. num_processing_steps_tr = 2 num_processing_steps_te = 2 # Data / training parameters. num_training_iterations = 5000 batch_size_tr = 256 batch_size_ge = 100 num_time_steps = 50 step_size = 0.001 # Create the model. model = models.EncodeProcessDecode(edge_output_size=1) # Base graphs for training. (static graph) static_graph = magneto_base_graph( 'magneto/model/MagnetoSim_Dart.urdf') # data_dicts # print(NodeIdDict) # print(EdgeIDDict) # for NodeName in NodeIdDict: # print(NodeName) # print(MagnetoLink[NodeName]) # Read Trajectory Data and construct graphs for training # TODO 1 # q, q_des, dotq, dotq_des, trq, foot_ct, base_ori traj_dicts = read_trajectory() print("traj_dicts size = ")
# Create the model tf.reset_default_graph() # Older former model """graph_network = modules.GraphNetwork( #edge_model_fn=lambda: snt.Linear(output_size=OUTPUT_EDGE_SIZE), edge_model_fn=lambda: snt.nets.MLP([256, 256, 1]), #edge_model_fn=lambda: MyMLP("edge_gnn"), #node_model_fn=lambda: snt.Linear(output_size=OUTPUT_NODE_SIZE), node_model_fn=lambda: snt.nets.MLP([256, 256, 1]), #global_model_fn=lambda: snt.Linear(output_size=OUTPUT_GLOBAL_SIZE)) global_model_fn=lambda: snt.nets.MLP([256, 256, 1]))""" # ENCODE PROCESS DECODE NETWORK graph_network = models.EncodeProcessDecode(edge_output_size=1) def concat_in_list(numpy_list): return np.concatenate([nl for nl in numpy_list], axis=-1) # Create placeholders from current states and desired states graphs_tuple_ph = utils_tf.placeholders_from_data_dicts( all_current_states[0:BATCH_SIZE]) training_desired_ph = utils_tf.placeholders_from_data_dicts( all_desired_states[0:BATCH_SIZE]) # Initialise other tensorflow variables epoch_ph = tf.placeholder(tf.float32) start_learning_rate = tf.constant(START_LEARNING_RATE)
#batch_size_ge = 5 # Number of nodes per graph sampled uniformly from this range. # num_nodes_min_max_tr = (8, 17) # num_nodes_min_max_ge = (9, 18) # Data. # Input and target placeholders. # input_ph, target_ph = create_placeholders(rand, batch_size_tr, # num_nodes_min_max_tr, theta) # Connect the data to the model. # Instantiate the model. ### encode process decode import yr_models as models model = models.EncodeProcessDecode(edge_output_size=None, node_output_size=None) # A list of outputs, one per processing step. num_processing_steps = 1 if debug_flag == True else 10 output_ops = model( input_ph, num_processing_steps) # this output is global vec [n_graph hidsz] output_global = output_ops[-1].globals output_global = tf.cast(output_global, dtype=tf.float32) #output_ops_global =[tf.cast(output.globals,dtype=tf.float32) for output in output_ops] #print '' #### encode graph -> vec [n_graph,hid] latent_dim = 256 instan = lstmDecoder(vocab_size=FLAGS.node_vocabsz, latent_dim=latent_dim, train_flag=True)
t0 = _gtmp2ttmp(gtemp) inputPh = utils_tf.placeholders_from_networkxs([_gtmp2intmp(gtemp)]) targetPh = utils_tf.placeholders_from_networkxs([_gtmp2ttmp(gtemp)]) # input state input_graphs = utils_np.networkxs_to_graphs_tuple([g0]) # target target_graphs = utils_np.networkxs_to_graphs_tuple([t0]) feed_dict = {inputPh: input_graphs, targetPh: target_graphs} # for critic network criticModel = models.EncodeProcessDecode(edge_output_size=1, node_output_size=0) # number of edges N_edges = len(gtemp.edges) # list of GraphTuple of placeholders critic_output_ops = criticModel(inputPh, 1) critic_output_ops_edge = critic_output_ops[0].edges critic_output_ops_edge = tf.transpose(critic_output_ops_edge) # now actor_output_ops_edge has shape [1,N_edges] actor_output_ops_edge = tf.slice(critic_output_ops_edge, [0, 0], [1, N_edges]) # now add in the action from other agents (attacker and uav)
def init(self, seed: int, atomizer): # tf.reset_default_graph() self.model = gn_models.EncodeProcessDecode(global_output_size=2)
def train(): seed = 2 rand = np.random.RandomState(seed=seed) # Model parameters. # Number of processing (message-passing) steps. num_processing_steps_tr = 10 num_processing_steps_ge = 10 # Data / training parameters. num_training_iterations = 10000 theta = 20 # Large values (1000+) make trees. Try 20-60 for good non-trees. batch_size_tr = 32 batch_size_ge = 100 # Number of nodes per graph sampled uniformly from this range. num_nodes_min_max_tr = (8, 17) num_nodes_min_max_ge = (16, 33) # Data. # Input and target placeholders. input_ph, target_ph = create_placeholders(rand, batch_size_tr, num_nodes_min_max_tr, theta) # Connect the data to the model. # Instantiate the model. model = models.EncodeProcessDecode(edge_output_size=2, node_output_size=2) # A list of outputs, one per processing step. output_ops_tr = model(input_ph, num_processing_steps_tr) output_ops_ge = model(input_ph, num_processing_steps_ge) # Training loss. loss_ops_tr = create_loss_ops(target_ph, output_ops_tr) # Loss across processing steps. loss_op_tr = sum(loss_ops_tr) / num_processing_steps_tr # Test/generalization loss. loss_ops_ge = create_loss_ops(target_ph, output_ops_ge) loss_op_ge = loss_ops_ge[-1] # Loss from final processing step. # Optimizer. learning_rate = 1e-3 optimizer = tf.train.AdamOptimizer(learning_rate) step_op = optimizer.minimize(loss_op_tr) # Lets an iterable of TF graphs be output from a session as NP graphs. input_ph, target_ph = make_all_runnable_in_session(input_ph, target_ph) tf.reset_default_graph() try: sess.close() except NameError: pass sess = tf.Session() sess.run(tf.global_variables_initializer()) last_iteration = 0 logged_iterations = [] losses_tr = [] corrects_tr = [] solveds_tr = [] losses_ge = [] corrects_ge = [] solveds_ge = [] # How much time between logging and printing the current results. log_every_seconds = 20 print("# (iteration number), T (elapsed seconds), " "Ltr (training loss), Lge (test/generalization loss), " "Ctr (training fraction nodes/edges labeled correctly), " "Str (training fraction examples solved correctly), " "Cge (test/generalization fraction nodes/edges labeled correctly), " "Sge (test/generalization fraction examples solved correctly)") start_time = time.time() last_log_time = start_time for iteration in range(last_iteration, num_training_iterations): last_iteration = iteration feed_dict, _ = create_feed_dict(rand, batch_size_tr, num_nodes_min_max_tr, theta, input_ph, target_ph) train_values = sess.run( { "step": step_op, "target": target_ph, "loss": loss_op_tr, "outputs": output_ops_tr }, feed_dict=feed_dict) the_time = time.time() elapsed_since_last_log = the_time - last_log_time if elapsed_since_last_log > log_every_seconds: last_log_time = the_time feed_dict, raw_graphs = create_feed_dict(rand, batch_size_ge, num_nodes_min_max_ge, theta, input_ph, target_ph) test_values = sess.run( { "target": target_ph, "loss": loss_op_ge, "outputs": output_ops_ge }, feed_dict=feed_dict) correct_tr, solved_tr = compute_accuracy( train_values["target"], train_values["outputs"][-1], use_edges=True) correct_ge, solved_ge = compute_accuracy( test_values["target"], test_values["outputs"][-1], use_edges=True) elapsed = time.time() - start_time losses_tr.append(train_values["loss"]) corrects_tr.append(correct_tr) solveds_tr.append(solved_tr) losses_ge.append(test_values["loss"]) corrects_ge.append(correct_ge) solveds_ge.append(solved_ge) logged_iterations.append(iteration) print("# {:05d}, T {:.1f}, Ltr {:.4f}, Lge {:.4f}, Ctr {:.4f}, Str" " {:.4f}, Cge {:.4f}, Sge {:.4f}".format( iteration, elapsed, train_values["loss"], test_values["loss"], correct_tr, solved_tr, correct_ge, solved_ge)) return raw_graphs, test_values