示例#1
0
    def __init__(self, param, example_seq_input, example_pattern_input, example_target):

        self.model = ProteinGNN(param)

        self.seq_input_ph = utils_tf.placeholders_from_data_dicts([example_seq_input])
        self.pattern_input_ph = utils_tf.placeholders_from_data_dicts([example_pattern_input])
        self.target_ph = utils_tf.placeholders_from_data_dicts([example_target])

        self.train_logits = self.model(self.seq_input_ph, self.pattern_input_ph, param["train_mp_iterations"])
        self.test_logits = self.model(self.seq_input_ph, self.pattern_input_ph, param["test_mp_iterations"])


        def create_loss():

            loss_op = tf.math.reduce_sum( tf.losses.softmax_cross_entropy() )

            return loss_op

        self.loss_train = create_loss(self.train_logits)
        self.loss_test = create_loss(self.test_logits)

        optimizer = tf.train.MomentumOptimizer(param["learning_rate"], param["optimizer_momentum"])
        self.step_op = optimizer.minimize(self.loss_train)

        self.seq_input_ph_run = utils_tf.make_runnable_in_session(self.seq_input_ph)
        self.pattern_input_ph_run = utils_tf.make_runnable_in_session(self.pattern_input_ph)
        self.target_ph_run = utils_tf.make_runnable_in_session(self.target_ph)

        self.train_out = tf.sigmoid(self.train_logits.edges)
        self.test_out = tf.sigmoid(self.test_logits.edges)
示例#2
0
def build_gnn(args,input_shapes):
    # Setup the GNN model.
    banner_print("Building model.")
    tf.reset_default_graph()
    num_features = args.num_features.strip().split(",")
    num_features_p, num_features_l = int(num_features[0]), int(num_features[1])
    gnn_layers = args.gnn_layers.strip().split(",")
    gnn_layers_p, gnn_layers_l = int(gnn_layers[0]), int(gnn_layers[1])
    mlp_latent = args.mlp_latent.strip().split(",")
    mlp_latent_p, mlp_latent_l = int(mlp_latent[0]), int(mlp_latent[1])
    mlp_layers = args.mlp_layers.strip().split(",")
    mlp_layers_p, mlp_layers_l = int(mlp_layers[0]), int(mlp_layers[1])
    gnn_model_p = models.EncodeProcessDecode(edge_output_size=None, node_output_size=None, global_output_size=num_features_p,
                                             mlp_latent=mlp_latent_p, mlp_layers=mlp_layers_p,
                                             num_processing_steps=gnn_layers_p,
                                             name="gnn_model_protein")
    gnn_model_l = models.EncodeProcessDecode(edge_output_size=None, node_output_size=None, global_output_size=num_features_l,
                                             mlp_latent=mlp_latent_l, mlp_layers=mlp_layers_l,
                                             num_processing_steps=gnn_layers_l,
                                             name="gnn_model_ligand")
    ip, il, it = input_shapes
    inputs_p_ph = utils_tf.placeholders_from_data_dicts([ip], force_dynamic_num_graphs=True)
    inputs_l_ph = utils_tf.placeholders_from_data_dicts([il], force_dynamic_num_graphs=True)
    targets_ph  = utils_tf.placeholders_from_data_dicts([it], force_dynamic_num_graphs=True)
    inputs_p_op = utils_tf.make_runnable_in_session(inputs_p_ph)
    inputs_l_op = utils_tf.make_runnable_in_session(inputs_l_ph)
    targets_op  = utils_tf.make_runnable_in_session(targets_ph)
    if DEBUG:
        print("Input protein placeholder = ", inputs_p_ph)
        print("Input ligand placeholder = ", inputs_l_ph)
        print("Target placeholder = ", targets_ph)
    output_p_ops = gnn_model_p(inputs_p_ph)
    output_l_ops = gnn_model_l(inputs_l_ph)
    merged_output = tf.concat([output_p_ops[0].globals,output_l_ops[0].globals],axis=-1)
    initers = { "w": tf.contrib.layers.variance_scaling_initializer(factor=2.0, mode='FAN_IN', uniform=False),
                "b": tf.truncated_normal_initializer(stddev=1.0)}
    regs = { "w": tf.contrib.layers.l1_regularizer(scale=0.1),
             "b": tf.contrib.layers.l2_regularizer(scale=0.1)}
    classifier0 = snt.Linear(num_features_p*num_features_l, initializers=initers, regularizers=regs)
    classifier1 = snt.Linear(num_features_p+num_features_l,initializers=initers, regularizers=regs)
    final_output = snt.Linear(2, initializers=initers, regularizers=regs)
    output_ops = [final_output(tf.nn.relu(classifier1(tf.nn.relu(tf.nn.dropout(classifier0(tf.nn.relu(merged_output)),keep_prob=0.5)))))]
    if MODE == 'classification':
        loss_ops = [ tf.losses.softmax_cross_entropy(targets_ph.globals, output_ops[0]) ]
    elif MODE == 'regression':
        loss_ops = [ tf.losses.mean_squared_error(targets_ph.globals, output_ops[0]) ]
    loss_op = sum(loss_ops)

    # Return ops and placeholders
    return inputs_p_ph, inputs_l_ph, targets_ph, inputs_p_op, inputs_l_op, targets_op, output_ops, loss_op
示例#3
0
def generate_placeholder(file, batch_size, keep_features):
    """
    Generates a placeholder
    :param file: The path to the graph json file
    :param batch_size: Size of the batch
    :param keep_features: Whether to keep all features of the graph. It is advised to do so in case of input graphs.
    :return: Generated placeholder
    """
    return utils_tf.placeholders_from_data_dicts(
        get_first_batch_graph_dict(file, batch_size, keep_features))
示例#4
0
def create_placeholders(rand, batch_size, traj_idx_min_max_tr, static_graph,
                        trajectory):
    input_graphs_dicts, target_graphs_dicts = generate_graphs_dicts(
        rand, batch_size, traj_idx_min_max_tr, static_graph, trajectory)
    input_ph = utils_tf.placeholders_from_data_dicts(input_graphs_dicts)
    target_ph = utils_tf.placeholders_from_data_dicts(target_graphs_dicts)

    # print('input_graphs_dicts #########################')
    # print(input_graphs_dicts[0])
    # print(type(input_graphs_dicts[0]))
    # print('target_graphs_dicts #########################')
    # print(target_graphs_dicts[0])
    # print(type(target_graphs_dicts[0]))
    # print('input_ph #########################')
    # print(input_ph)
    # print(type(input_ph))
    # print('target_ph #########################')
    # print(target_ph)
    # print(type(target_ph))
    # print('#########################')

    return input_ph, target_ph
示例#5
0
 def test_placeholders_from_data_dicts(self, force_dynamic_num_graphs):
   num_graphs = len(self.graphs_dicts_in)
   placeholders = utils_tf.placeholders_from_data_dicts(
       self.graphs_dicts_in, force_dynamic_num_graphs=force_dynamic_num_graphs)
   self.assertAllEqual([None, 7, 11], placeholders.nodes.shape.as_list())
   self.assertAllEqual([None, 13, 14], placeholders.edges.shape.as_list())
   self.assertAllEqual([None, 5, 3], placeholders.globals.shape.as_list())
   for key in ["receivers", "senders"]:
     self.assertAllEqual([None], getattr(placeholders, key).shape.as_list())
   for key in ["n_node", "n_edge"]:
     if force_dynamic_num_graphs:
       self.assertAllEqual([None], getattr(placeholders, key).shape.as_list())
     else:
       self.assertAllEqual([num_graphs],
                           getattr(placeholders, key).shape.as_list())
示例#6
0
def snap2graph(h5file,
               day,
               tg,
               use_tf=False,
               placeholder=False,
               name=None,
               normalize=True):
    snapstr = 'day' + str(day) + 'tg' + str(tg)
    if normalize:
        edges = h5file['nn_edge_features/' + snapstr]
        nodes = h5file['nn_node_features/' + snapstr]
        glbls = h5file['nn_glbl_features/' + snapstr]
    else:
        edges = h5file['nn_edge_features/' + snapstr]
        nodes = h5file['node_features/' + snapstr]
        glbls = h5file['glbl_features/' + snapstr]
    senders = h5file['senders']
    receivers = h5file['receivers']

    node_arr = nodes[:]
    edge_arr = edges[:]
    glbl_arr = glbls[0]

    graphdat_dict = {
        "globals": glbl_arr.astype(np.float),
        "nodes": node_arr.astype(np.float),
        "edges": edge_arr.astype(np.float),
        "senders": senders[:],
        "receivers": receivers[:],
        "n_node": node_arr.shape[0],
        "n_edge": edge_arr.shape[0]
    }

    if not use_tf:
        graphs_tuple = utils_np.data_dicts_to_graphs_tuple([graphdat_dict])
    else:
        if placeholder:
            name = "placeholders_from_data_dicts" if not name else name
            graphs_tuple = utils_tf.placeholders_from_data_dicts(
                [graphdat_dict], name=name)
        else:
            name = "tuple_from_data_dicts" if not name else name
            graphs_tuple = utils_tf.data_dicts_to_graphs_tuple([graphdat_dict],
                                                               name=name)

    return graphs_tuple
示例#7
0
def create_trained_model(config_name, input_ckpt=None):
    """
    @config: configuration for train_nx_graph
    """
    # load configuration file
    config = load_yaml(config_name)
    config_tr = config['train']

    log_every_seconds = config_tr['time_lapse']
    batch_size = n_graphs = config_tr['batch_size']  # need optimization
    num_processing_steps_tr = config_tr['n_iters']  ## level of message-passing
    prod_name = config['prod_name']
    if input_ckpt is None:
        input_ckpt = os.path.join(config['output_dir'], prod_name)

    # generate inputs
    generate_input_target = prepare.inputs_generator(
        config['data']['output_nxgraph_dir'], n_train_fraction=0.8)

    # build TF graph
    tf.reset_default_graph()
    model = get_model(config['model']['name'])

    input_graphs, target_graphs = generate_input_target(n_graphs)
    input_ph = utils_tf.placeholders_from_data_dicts(
        input_graphs, force_dynamic_num_graphs=True)
    target_ph = utils_tf.placeholders_from_data_dicts(
        target_graphs, force_dynamic_num_graphs=True)

    output_ops_tr = model(input_ph, num_processing_steps_tr)

    def evaluator(iteration, n_test_graphs=10):
        try:
            sess.close()
        except NameError:
            pass

        sess = tf.Session()
        saver = tf.train.Saver()
        saver.restore(sess,
                      os.path.join(input_ckpt, ckpt_name.format(iteration)))
        odds = []
        tdds = []
        for _ in range(n_test_graphs):
            feed_dict = utils_train.create_feed_dict(generate_input_target,
                                                     batch_size,
                                                     input_ph,
                                                     target_ph,
                                                     is_trained=False)
            predictions = sess.run(
                {
                    "outputs": output_ops_tr,
                    'target': target_ph
                },
                feed_dict=feed_dict)
            output = predictions['outputs'][-1]
            target = predictions['target']
            odd, tdd = utils_train.eval_output(target, output)
            odds.append(odd)
            tdds.append(tdd)
        return np.concatenate(odds), np.concatenate(tdds)

    return evaluator
示例#8
0
    def __init__(self,
                 sess,
                 env,
                 handle,
                 name,
                 len_nei=6,
                 update_every=5,
                 learning_rate=1e-4,
                 tau=0.99,
                 gamma=0.95,
                 num_bits_msg=3,
                 isHie=False,
                 is_comm=False):
        self.env = env
        self.name = name
        self._saver = None
        self.sess = sess

        self.isHie = isHie
        self.handle = handle
        self.view_space = env.get_view_space(handle)
        assert len(self.view_space) == 3
        self.feature_space = env.get_feature_space(handle)
        self.num_actions = env.get_action_space(handle)[0]
        self.num_bits_msg = num_bits_msg

        self.update_every = update_every

        self.len_nei = len_nei
        self.temperature = 0.1
        self.act_input = tf.placeholder(tf.int32, (None, ), name="Act")
        self.act_one_hot = tf.one_hot(self.act_input,
                                      depth=self.num_actions,
                                      on_value=1.0,
                                      off_value=0.0)
        self.lr = learning_rate
        self.tau = tau
        self.gamma = gamma
        self.is_comm = is_comm
        with tf.variable_scope(name or "GHCrpNet"):
            self.name_scope = tf.get_variable_scope().name
            self.input_ph = utils_tf.placeholders_from_data_dicts(
                cons_datas(self.num_actions))
            with tf.variable_scope("Eval-Net"):
                self.eval_name = tf.get_variable_scope().name

                self.e_graph = self._construct_net()
                self.e_variables = tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES, scope=self.eval_name)
            with tf.variable_scope("Target-Net"):
                self.target_name = tf.get_variable_scope().name
                self.t_graph = self._construct_net()
                self.t_variables = tf.get_collection(
                    tf.GraphKeys.GLOBAL_VARIABLES, scope=self.target_name)

            with tf.variable_scope("Update"):
                self.update_op = [
                    tf.assign(
                        self.t_variables[i], self.tau * self.e_variables[i] +
                        (1. - self.tau) * self.t_variables[i])
                    for i in range(len(self.t_variables))
                ]

            with tf.variable_scope("Optimization"):
                self.target_q_input = tf.placeholder(tf.float32, (None, ),
                                                     name="Q-Input")
                self.e_q_max = tf.reduce_sum(tf.multiply(
                    self.act_one_hot, self.e_graph.q),
                                             axis=1)
                self.loss = tf.reduce_sum(tf.square(self.target_q_input - self.e_q_max)) / \
                    tf.cast(tf.reduce_sum(self.e_graph.n_node), tf.float32)
                self.train_op = tf.train.AdamOptimizer(self.lr).minimize(
                    self.loss)
    def __init__(self, scope: str, model: GraphModel, reg_param):
        # Process parameters
        self.scope = scope
        self.model = model
        self.n_out = self.model.get_global_output_size()

        # Configure regularization
        self.regularizer = tf.contrib.layers.l2_regularizer(scale=reg_param)
        self.reg_linear = {"w": self.regularizer, "b": self.regularizer}
        #        self.reg_embed = {}
        self.reg_embed = {"embeddings": self.regularizer}

        # Set up input tensors
        with tf.variable_scope(self.scope + "/state_input"):
            self.input_graphs = utils_tf.placeholders_from_data_dicts(
                [self.model.placeholder_graph()],
                force_dynamic_num_graphs=True,
                name="local_state")

        with tf.variable_scope(self.scope + "/ground_truth"):
            # Reinforcement learning inputs
            self.true_action = tf.placeholder(tf.int32,
                                              shape=(None, ),
                                              name="action")
            self.n_objects = tf.placeholder(tf.int32,
                                            shape=(None, ),
                                            name="n_objects")
            self.target_q = tf.placeholder(tf.float32,
                                           shape=(None, ),
                                           name="target_q")
            self.target_value = tf.placeholder(tf.float32,
                                               shape=(None, ),
                                               name="target_value")

        with tf.variable_scope(self.scope):
            # Separately embed different categorical variables as dense vectors
            self.encoder_module = GraphEncoder(model,
                                               name="encoder",
                                               regularizers=self.reg_embed)
            self.embedded_graphs = self.encoder_module(self.input_graphs)

            # Apply an intermediate  transformation to pass information between neighboring nodes
            self.intermediate_graphs = DenseGraphTransform(
                model.hidden_edge_dimension,
                model.hidden_node_dimension,
                model.hidden_global_dimension,
                name="intermediate",
                regularizer=self.regularizer)(self.embedded_graphs)

            # Then apply a final transformation to produce a global output and node-level evaluations
            self.output_graphs = DenseGraphTransform(
                model.action_dimension,
                1,
                1,
                node_activation=None,
                global_activation=None,
                name="output",
                regularizer=self.regularizer)(self.intermediate_graphs)

        with tf.variable_scope(self.scope + "/outputs"):
            # If given a true action, get the corresponding output
            self.graph_indices = tf.math.cumsum(self.output_graphs.n_node,
                                                exclusive=True,
                                                name="starting_node_index")
            self.true_indices = self.graph_indices + self.true_action
            self.chosen_node_outputs = tf.reshape(
                tf.gather(self.output_graphs.nodes,
                          self.true_indices,
                          name="chosen_action_outputs"), [-1])

            # In case we need a policy output, build the following tensors:
            # 1) a learned stochastic policy for all possible actions,
            # 2) the individual probability of the chosen action
            # 3) the log of that individual probability."""
            # First, get each node's index
            node_indices = tf.range(tf.shape(self.output_graphs.nodes)[0])
            # Then, get the index of each graphs' first action
            first_action_indices = self.graph_indices + self.n_objects
            # broadcast action indices to nodes and compare to node indices
            first_action_broadcast = blocks.broadcast_globals_to_nodes(
                self.output_graphs.replace(
                    globals=tf.reshape(first_action_indices, [-1, 1])))
            action_mask = tf.greater_equal(
                node_indices, tf.reshape(first_action_broadcast, [-1]))
            # Zero out the objects and apply softmax to the actions (treat action-nodes as logits)
            exp_or_zero = self.output_graphs.replace(nodes=tf.where(
                action_mask, tf.math.exp(self.output_graphs.nodes),
                tf.zeros_like(self.output_graphs.nodes)))
            # Sum the node values so that the global for each graph is the softmax denominator
            sum_nodes = blocks.GlobalBlock(lambda: tf.identity,
                                           use_edges=False,
                                           use_globals=False)
            softmax_graph = sum_nodes(exp_or_zero)

            # Then divide each node's value by that denominator, or set to 1 where denominator is 0
            def node_value_to_prob(node_inputs):
                p = tf.div_no_nan(node_inputs[:, 0], node_inputs[:, 1])
                return tf.where(p > 0, p, tf.ones_like(p))

            policy_graph = blocks.NodeBlock(
                lambda: node_value_to_prob,
                use_received_edges=False,
                use_sent_edges=False)(softmax_graph)
            self.policy = policy_graph.nodes
            self.p_chosen = tf.gather(self.policy,
                                      self.true_indices,
                                      name="p_true_action")
            self.log_p_chosen = tf.log(self.p_chosen, name="logp_true_action")

        # Configure metrics for training and display
        self.TRAIN_METRIC_OPS = self.scope + "/TRAIN_METRIC_OPS"
        self.VAL_METRIC_OPS = self.scope + "/VAL_METRIC_OPS"
        self.reg_term = tf.reduce_sum(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
        tf.summary.scalar('reg_loss', self.reg_term)
示例#10
0
def get_graph_ph(graph_dicts):
    return utils_tf.placeholders_from_data_dicts(graph_dicts)
示例#11
0
def main(_):
    with open('data/train.pkl', 'rb') as f:
        graphs = pickle.load(f)
    f.close()

    ## merge TI and TCI

    for g in graphs:
        if g['y'] == 3:
            g['y'] = 2

    train_graphs = graphs[:5120]
    test_graphs = graphs[5120:]

    ## for sampling ##
    category_ids = [[] for i in range(3)]
    for i in range(len(train_graphs)):
        c = train_graphs[i]['y']
        category_ids[c].append(i)

    #  create placeholder
    if FLAGS.model == "rnn":
        pos = tf.placeholder(tf.float32, [None, None, 3])
        ids = tf.placeholder(tf.int32, [None, None])
        lattice = tf.placeholder(tf.float32, [None, 3, 3])
        y = tf.placeholder(tf.int64, [None])
        seq_len = tf.placeholder(tf.int32, [None])
        h_hat = model.naive(pos, ids, seq_len, FLAGS.RNN_num_layers)

    elif FLAGS.model == "graph":
        modified_graphs, _, _ = build_dict(train_graphs, 'graph')
        input_graph = utils_tf.placeholders_from_data_dicts(
            modified_graphs[0:1])
        y = tf.placeholder(tf.int64, [None])
        lattice = tf.placeholder(tf.float32, [None, 3, 3])
        h_hat = model.tinet(input_graph)

    # merge lattice information w/ atoms
    h_lattice = tf.reshape(lattice, [-1, 9])
    h = tf.concat([h_hat, h_lattice], axis=1)
    y_hat = tf.layers.dense(h, 3, activation=None)

    # count total params
    N = np.sum(
        [np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()])
    print('Total number of params is ', N)

    # define loss and metric
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                          logits=y_hat)
    loss = tf.reduce_mean(loss)
    predicts = tf.argmax(y_hat, 1)
    correct_pred = tf.equal(predicts, y)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # optimizer
    optimizer = tf.train.AdamOptimizer().minimize(loss)
    init = tf.global_variables_initializer()

    # saver
    saver = tf.train.Saver()

    # run session
    with tf.Session() as sess:
        sess.run(init)
        for it in range(10000):

            batch_graphs = []
            for k in range(32):
                cat = int(np.random.random() * 3)
                sample_idx = random.choice(category_ids[cat])
                batch_graphs.append(train_graphs[sample_idx])

            if FLAGS.model == 'rnn':
                batch_pos, batch_ids, batch_lattice, batch_y, batch_seq_len = build_dict(
                    batch_graphs, FLAGS.model)

                # TODO
                # @Pawan: you are still inserting arrays to tensor.
                # And it is still very slow
                batch_length, seq_length, _ = np.shape(batch_pos)
                batch_pos_new = np.array([]).reshape(0, seq_length, 3)
                batch_ids_new = np.array([]).reshape(0, seq_length)
                for i in range(batch_length):
                    actual_len = batch_seq_len[i]
                    batch_pos_i = batch_pos[i][:actual_len]
                    batch_ids_i = batch_ids[i][:actual_len].reshape(1, -1)
                    concatenated_matrix = np.concatenate(
                        (batch_pos_i, batch_ids_i.T), axis=1)
                    np.random.shuffle(concatenated_matrix)
                    pos_empty = np.zeros((seq_length - actual_len, 4))
                    this_pos = np.concatenate((concatenated_matrix, pos_empty),
                                              axis=0)
                    batch_pos_i = this_pos[:, :-1]
                    batch_pos_i = batch_pos_i.reshape(1, seq_length, -1)
                    batch_ids_i = this_pos[:, -1:].T
                    batch_pos_new = np.concatenate(
                        (batch_pos_new, batch_pos_i), axis=0)
                    batch_ids_new = np.concatenate(
                        (batch_ids_new, batch_ids_i), axis=0)

                batch_pos = batch_pos_new
                batch_ids = batch_ids_new
                feed_dict = {
                    pos: batch_pos,
                    ids: batch_ids,
                    lattice: batch_lattice,
                    y: batch_y,
                    seq_len: batch_seq_len
                }
            elif FLAGS.model == "graph":
                batch_graphnets, batch_labels, batch_lattice = build_dict(
                    batch_graphs, FLAGS.model)
                train_batch_graph_data = utils_np.data_dicts_to_graphs_tuple(
                    batch_graphnets)
                feed_dict = {
                    input_graph: train_batch_graph_data,
                    y: batch_labels,
                    lattice: batch_lattice
                }
            loss_value, _ = sess.run([loss, optimizer], feed_dict=feed_dict)

            if it % 100 == 99:
                total_loss = 0
                total_acc = 0

                relevant_elements = [0, 0, 0]
                selected_elements = [0, 0, 0]
                true_positives = [0, 0, 0]

                discrimiate = np.zeros((3, 3)).astype(np.int32)
                for j in range(len(test_graphs)):
                    if FLAGS.model == 'rnn':
                        batch_pos, batch_ids, batch_lattice, batch_y, batch_seq_len = build_dict(
                            [test_graphs[j]], FLAGS.model)
                        feed_dict = {
                            pos: batch_pos,
                            ids: batch_ids,
                            lattice: batch_lattice,
                            y: batch_y,
                            seq_len: batch_seq_len
                        }
                    elif FLAGS.model == "graph":
                        batch_graphnets, batch_labels, batch_lattice = build_dict(
                            [test_graphs[j]], FLAGS.model)
                        if not batch_labels:
                            continue
                        test_batch_graph_data = utils_np.data_dicts_to_graphs_tuple(
                            batch_graphnets)
                        feed_dict = {
                            input_graph: test_batch_graph_data,
                            y: batch_labels,
                            lattice: batch_lattice
                        }

                    loss_value, acc, predicts_value = sess.run(
                        [loss, accuracy, predicts], feed_dict=feed_dict)

                    total_acc += acc
                    total_loss += loss_value
                    discrimiate[test_graphs[j]['y']][predicts_value[0]] += 1

                    selected_elements[predicts_value[0]] += 1
                    relevant_elements[test_graphs[j]['y']] += 1
                    if predicts_value[0] == test_graphs[j]['y']:
                        true_positives[predicts_value[0]] += 1

                precision = None
                recall = None
                if selected_elements[2] != 0:
                    precision = true_positives[2] / selected_elements[2]

                if relevant_elements[2] != 0:
                    recall = true_positives[2] / relevant_elements[2]

                if precision is not None and recall is not None:
                    f1_score = 2 * precision * recall / (precision + recall)

                print('F1 score is', f1_score)

                print('Test Loss is ', total_loss / len(test_graphs),
                      '; accuracy is ', total_acc / len(test_graphs))
                print(discrimiate)

        save_path = saver.save(sess, "model/" + FLAGS.model + "/model.ckpt")
        print("Model saved in path: %s" % save_path)
示例#12
0
def create_evaluator(config_name, iteration, input_ckpt=None):
    """
    @config: configuration for train_nx_graph
    """
    # load configuration file
    all_config = load_yaml(config_name)
    config = all_config['segment_training']
    config_tr = config['parameters']

    batch_size = n_graphs = config_tr['batch_size']  # need optimization
    num_processing_steps_tr = config_tr['n_iters']  ## level of message-passing
    prod_name = config['prod_name']
    if input_ckpt is None:
        input_ckpt = os.path.join(config['output_dir'], prod_name)

    # generate inputs
    generate_input_target = inputs_generator(
        all_config['make_graph']['out_graph'], n_train_fraction=0.8)

    # build TF graph
    tf.compat.v1.reset_default_graph()
    model = get_model(config['model_name'])

    input_graphs, target_graphs = generate_input_target(n_graphs)
    input_ph = utils_tf.placeholders_from_data_dicts(
        input_graphs, force_dynamic_num_graphs=True)
    target_ph = utils_tf.placeholders_from_data_dicts(
        target_graphs, force_dynamic_num_graphs=True)

    output_ops_tr = model(input_ph, num_processing_steps_tr)
    try:
        sess.close()
    except NameError:
        pass

    sess = tf.Session()
    saver = tf.train.Saver()
    if iteration < 0:
        saver.restore(sess, tf.train.latest_checkpoint(input_ckpt))
    else:
        saver.restore(sess,
                      os.path.join(input_ckpt, ckpt_name.format(iteration)))

    def evaluator(input_graphs,
                  target_graphs,
                  use_digraph=False,
                  bidirection=False):
        """
        input is graph tuples, sizes should match batch_size
        """
        feed_dict = {input_ph: input_graphs, target_ph: target_graphs}
        predictions = sess.run({
            "outputs": output_ops_tr,
            "target": target_ph
        },
                               feed_dict=feed_dict)
        output = predictions['outputs'][-1]

        return utils_data.predicted_graphs_to_nxs(output,
                                                  input_graphs,
                                                  target_graphs,
                                                  use_digraph=use_digraph,
                                                  bidirection=bidirection)

    return evaluator
    #edge_model_fn=lambda: MyMLP("edge_gnn"),
    #node_model_fn=lambda: snt.Linear(output_size=OUTPUT_NODE_SIZE),
    node_model_fn=lambda: snt.nets.MLP([256, 256, 1]),
    #global_model_fn=lambda: snt.Linear(output_size=OUTPUT_GLOBAL_SIZE))
    global_model_fn=lambda: snt.nets.MLP([256, 256, 1]))"""

# ENCODE PROCESS DECODE NETWORK
graph_network = models.EncodeProcessDecode(edge_output_size=1)


def concat_in_list(numpy_list):
    return np.concatenate([nl for nl in numpy_list], axis=-1)


# Create placeholders from current states and desired states
graphs_tuple_ph = utils_tf.placeholders_from_data_dicts(
    all_current_states[0:BATCH_SIZE])
training_desired_ph = utils_tf.placeholders_from_data_dicts(
    all_desired_states[0:BATCH_SIZE])

# Initialise other tensorflow variables
epoch_ph = tf.placeholder(tf.float32)
start_learning_rate = tf.constant(START_LEARNING_RATE)
learning_rate_decay = tf.constant(LEARNING_RATE_DECAY)
dynamic_learning_rate = start_learning_rate / (1.0 +
                                               epoch_ph / learning_rate_decay)

# Pass placeholder of current state to graph to make a prediction
graph_predictions = graph_network(graphs_tuple_ph, PROCESSING_STEPS_TR)


# Loss is MSE between edges of current graph and predicted graph
def main(_):

    with open('data/predict.pkl', 'rb') as f:
        predict_graphs = pickle.load(f)
    f.close()

    ## merge TI and TCI

    for g in predict_graphs:
        if g['y'] == 3:
            g['y'] = 2

    if FLAGS.mode == 'graph':
        modified_graphs, _, _ = build_dict(predict_graphs[:1], 'graph')
        input_graph = utils_tf.placeholders_from_data_dicts(
            modified_graphs[0:1])
        y = tf.placeholder(tf.int64, [None])
        lattice = tf.placeholder(tf.float32, [None, 3, 3])
        h_hat = model.tinet(input_graph)
    elif FLAGS.mode == 'rnn':
        pos = tf.placeholder(tf.float32, [None, None, 3])
        ids = tf.placeholder(tf.int32, [None, None])
        lattice = tf.placeholder(tf.float32, [None, 3, 3])
        y = tf.placeholder(tf.int64, [None])
        seq_len = tf.placeholder(tf.int32, [None])
        h_hat = model.naive(pos, ids, seq_len, 2)

    # merge lattice information w/ atoms
    h_lattice = tf.reshape(lattice, [-1, 9])
    h = tf.concat([h_hat, h_lattice], axis=1)
    y_hat = tf.layers.dense(h, 3, activation=None)

    # load model
    saver = tf.train.Saver()

    ti = []
    tci = []

    # sess predict
    with tf.Session() as sess:
        saver.restore(sess, "model/" + FLAGS.mode + "/model.ckpt")

        for i in range(len(predict_graphs)):
            if FLAGS.mode == 'graph':
                batch_graphnets, batch_labels, batch_lattice = build_dict(
                    [predict_graphs[i]], 'graph')
                if not batch_labels:
                    continue
                test_batch_graph_data = utils_np.data_dicts_to_graphs_tuple(
                    batch_graphnets)
                feed_dict = {
                    input_graph: test_batch_graph_data,
                    y: batch_labels,
                    lattice: batch_lattice
                }
            elif FLAGS.mode == 'rnn':
                batch_pos, batch_ids, batch_lattice, batch_y, batch_seq_len = build_dict(
                    [predict_graphs[i]], 'rnn')
                feed_dict = {
                    pos: batch_pos,
                    ids: batch_ids,
                    lattice: batch_lattice,
                    y: batch_y,
                    seq_len: batch_seq_len
                }

            y_hat_value = sess.run(y_hat, feed_dict=feed_dict)[0]
            y_value = np.argmax(y_hat_value)

            if y_value == 2 and predict_graphs[i]['y'] == 0:
                ti.append(
                    softmaxAndName(y_hat_value, predict_graphs[i]['name']))

    ti.sort(reverse=True)

    print(ti)
示例#15
0
def main():

    # A bunch of configuration stuff to clean up...
    parser = argparse.ArgumentParser(
        description='Train nx-graph with configurations')
    add_arg = parser.add_argument
    add_arg('name', nargs='?', default='unnamed')
    args = parser.parse_args()

    results_dir = 'results/{}'.format(args.name)
    os.makedirs(results_dir, exist_ok=True)
    config = load_config('configs/nxgraph_default.yaml')
    base_dir = config['data']['input_dir']
    config_tr = config['train']
    log_every_seconds = config_tr['time_lapse']
    batch_size = config_tr['batch_size']  # need optimization
    num_training_iterations = config_tr['iterations']
    iter_per_job = config_tr['iter_per_job']
    num_processing_steps_tr = config_tr['n_iters']  ## level of message-passing
    prod_name = config['prod_name']
    learning_rate = config_tr['learning_rate']
    output_dir = os.path.join(config['output_dir'], prod_name)

    # Start to build tensorflow sessions
    tf.reset_default_graph()

    # Creates a placeholder for training examples. The placeholders define a
    # slot for training examples given in feed dict to be assigned. We create
    # graphs.GraphsTuple placeholders using the graph_nets utility functions.
    # They are automatically generated from the first graph in the first batch.
    # By assigning force_dynamic_num_graphs=True, we ensure the the placeholders
    # accepts graphs of any size.
    _, _, input_graphs, truth_values = batch_iterator(base_dir,
                                                      batch_size).__next__()
    input_ph = utils_tf.placeholders_from_data_dicts(
        input_graphs[0:1], force_dynamic_num_graphs=True)
    truth_ph = tf.placeholder(tf.float64, shape=[None])

    # Here, we define our computational graphs.
    # - First, we compute the model output using the graph_nets library.
    # - Then, we compute our loss function only on edge features, where we utilize a log_loss
    #   function between the truth values and the model output. There is also some factor
    #   'num_processing_steps_tr' that describes the level of message passing that somehow
    #   plays into this. I need to figure out the details.
    # -  Finally, we will minimize training loss using the Adam Optimizer algorithm.
    model_outputs = SegmentClassifier()(input_ph, num_processing_steps_tr)
    triplet_output = triplets_ph[1]
    edge_losses = tf.losses.log_loss(truth_ph,
                                     tf.transpose(model_outputs[-1].edges)[0])
    training_loss = edge_losses
    training_optimizer = tf.train.AdamOptimizer(learning_rate).minimize(
        training_loss)

    # Allows a graph containing `None` fields to be run in a Tensorflow
    # session. This is currently not needed since we have data for all
    # elements in the graph, including useless data for the global variable.
    input_ph = utils_tf.make_runnable_in_session(input_ph)

    # According to documentation, represent a connection between the client
    # program and a C++ runtime. See the following link for more information.
    # https://www.tensorflow.org/guide/graphs
    sess = tf.Session()

    # Create session saver
    saver = tf.train.Saver()

    # Our computation graph uses global variables, so we are required to
    # initialize them for the first pass. See the following link for more
    # information on Tensorflow variables
    # https://www.tensorflow.org/guide/variables
    sess.run(tf.global_variables_initializer())

    output_index = 0
    last_output = time.time()

    # We will iterate through our dataset many times to train.
    for iteration in range(0, num_training_iterations):

        # Iterate through all of the batches and retrieve batch data accordingly.
        for batch_index, batch_count, input_batch, truth_batch in batch_iterator(
                base_dir, batch_size):

            # Turn our data dictionary into a proper graphs.GraphsTuple
            # object for use with graph_nets library.
            input_graphs = utils_np.data_dicts_to_graphs_tuple(input_batch)

            # The utility function make_runnable_in_session to fix problems resulting from
            # None fields in graph.
            input_graphs = utils_tf.make_runnable_in_session(input_graphs)

            # Create a feed dictionary that properly maps graph properties.
            # Documentation states that this is only necessary in the case of
            # missing properties, but we will do it anyway just to be safe.
            feed_dict = utils_tf.get_feed_dict(input_ph, input_graphs)

            # We must pass both the input and target graphs into our computation
            # graph, so we update our feed dictionary with new properties using
            # the same method described above.

            feed_dict.update({truth_ph: truth_batch})

            # Run our computation graph using the feed_dictionary created above.
            # Currently, we appear to be computing multiple values... I need
            # to figure out what each of them means.
            train_values = sess.run(
                {
                    "step": training_optimizer,
                    "loss": training_loss,
                    "outputs": model_outputs
                },
                feed_dict=feed_dict)

            # Compute the time lapse from last save-evaluate-visualize action
            current_time = time.time()
            output_time_lapse = current_time - last_output

            if output_time_lapse > 120:
                last_output = current_time

                # Create a feed dict with 10 training events. These events have not been
                # used during testing, so

                _, _, input_batch, truth_batch = batch_iterator(
                    base_dir, 10, test=True).__next__()

                input_graphs = utils_np.data_dicts_to_graphs_tuple(input_batch)
                input_graphs = utils_tf.make_runnable_in_session(input_graphs)
                feed_dict = utils_tf.get_feed_dict(input_ph, input_graphs)
                feed_dict.update({truth_ph: truth_batch})

                train_values = sess.run(
                    {
                        "loss": training_loss,
                        "target": truth_ph,
                        "outputs": model_outputs
                    },
                    feed_dict=feed_dict)

                cutoff_list = []
                purity_list = []
                efficiency_list = []

                # Compute purity and efficiency for every cutoff from 0 to 1 in steps of 0.01
                for filter_cutoff in np.linspace(0, 1, 100):
                    result = np.transpose(
                        np.where(
                            train_values['outputs'][-1].edges > filter_cutoff,
                            1, 0))[0]
                    correct = np.sum(
                        np.where(
                            np.logical_and(result == truth_batch,
                                           result == np.ones(result.shape)), 1,
                            0))
                    purity = correct / np.sum(result) if np.sum(
                        result) != 0 else 1.0
                    purity_list.append(purity)
                    efficiency = correct / np.sum(truth_batch)
                    efficiency_list.append(efficiency)
                    cutoff_list.append(filter_cutoff)

                # Create purity-efficiency plot and save to folder
                plt.figure()
                plt.plot(purity_list, efficiency_list)
                plt.axis([0, 1, 0, 1])
                plt.xlabel('Purity')
                plt.ylabel('Efficiency')
                os.makedirs(os.path.join(results_dir, 'figures'),
                            exist_ok=True)
                plt.savefig(
                    os.path.join(
                        results_dir,
                        'figures/purity_vs_efficiency{:02d}.png'.format(
                            output_index)))
                plt.close()

                # Write the purity-efficiency
                csv_path = os.path.join(
                    results_dir,
                    'figures/purity_vs_efficiency{:02d}.csv'.format(
                        output_index))
                with open(csv_path, 'w') as csvfile:
                    csv_writer = csv.writer(csvfile)
                    csv_writer.writerow(['cutoff', 'purity', 'efficiency'])
                    for (cutoff, purity,
                         efficiency) in zip(cutoff_list, purity_list,
                                            efficiency_list):
                        csv_writer.writerow([cutoff, purity, efficiency])

                os.makedirs(os.path.join(results_dir, 'models'), exist_ok=True)
                saver.save(
                    sess,
                    os.path.join(results_dir,
                                 'models/model{}.ckpt'.format(output_index)))

                visualize_hitgraph(
                    os.path.join(results_dir, 'images'), output_index, {
                        'nodes': input_batch[0]['nodes'],
                        'edges': truth_batch,
                        'senders': input_batch[0]['senders'],
                        'receivers': input_batch[0]['receivers']
                    })

                print('\repoch: {} progress: {:.4f} loss: {:.4f}'.format(
                    iteration, batch_index / batch_count,
                    train_values['loss']))

                output_index += 1

    sess.close()