Пример #1
0
def MP_graph(D, x):
    N, M = D.shape
    z = np.zeros((M, 1))
    z_temp = np.zeros(M)
    r = np.copy(x)
    num_iter = 30
    # Create bipartite graph
    G = SGraph()
    x_vertices = [Vertex(i) for i in xrange(N)]
    z_vertices = [Vertex(j + N) for j in xrange(M)]
    D_edges = [Edge(i, j) for i in xrange(N) for j in xrange(N, N + M)]
    G.add_vertices(x_vertices, z_vertices)
    G.add_edges(D_edges)

    for i in xrange(N):
        x_vertices[i]["value"] = x[i]
    for j in xrange(M):
        z_vertices[j]["value"] = 0.0
        z_vertices[j]["dummy"] = 0.0
        z_vertices[j]["max"] = 0.0
    for i in xrange(N):
        for j in xrange(M):
            Edge(x_vertices[i], z_vertices[j])["value"] = D[i][j]

    def inner_prod(s, e, t):
        t["dummy"] += e["value"] * s["value"]

    def update_z(s, e, t):
        if not t["max"] == 0.0:
            t["value"] += e["value"] * s["value"]

    def compute_residual(s, e, t):
        if not t["max"] == 0.0:
            s["value"] -= t["value"] * e["value"]

    for itr in xrange(num_iter):
        # Compute inner products with r
        print "NUM ITR = ", itr
        G = G.triple_apply(inner_prod, mutated_fields=["value", "dummy"])
        for i in xrange(M):
            z_vertices[i]["max"] = 0.0
            z_temp[i] = z_vertices[i]["dummy"]
        max_pos = np.argmax(z_temp)
        z_vertices[max_pos]["max"] = z_temp[max_pos]
        G = G.triple_apply(update_z, mutated_fields=["max", "value"])

    for i in xrange(M):
        z[i] = z_vertices[i]["value"]

    return z
Пример #2
0
def MP_graph(D, x):
    N, M = D.shape
    z = np.zeros((M, 1))
    z_temp = np.zeros(M)
    r = np.copy(x)
    num_iter = 30
    # Create bipartite graph
    G = SGraph()
    x_vertices = [Vertex(i) for i in xrange(N)]
    z_vertices = [Vertex(j + N) for j in xrange(M)]
    D_edges = [Edge(i, j) for i in xrange(N) for j in xrange(N, N + M)]
    G.add_vertices(x_vertices, z_vertices)
    G.add_edges(D_edges)

    for i in xrange(N):
        x_vertices[i]["value"] = x[i]
    for j in xrange(M):
        z_vertices[j]["value"] = 0.0
        z_vertices[j]["dummy"] = 0.0
        z_vertices[j]["max"] = 0.0
    for i in xrange(N):
        for j in xrange(M):
            Edge(x_vertices[i], z_vertices[j])["value"] = D[i][j]

    def inner_prod(s, e, t):
        t["dummy"] += e["value"] * s["value"]

    def update_z(s, e, t):
        if not t["max"] == 0.0:
            t["value"] += e["value"] * s["value"]

    def compute_residual(s, e, t):
        if not t["max"] == 0.0:
            s["value"] -= t["value"] * e["value"]

    for itr in xrange(num_iter):
        # Compute inner products with r
        print "NUM ITR = ", itr
        G = G.triple_apply(inner_prod, mutated_fields=["value", "dummy"])
        for i in xrange(M):
            z_vertices[i]["max"] = 0.0
            z_temp[i] = z_vertices[i]["dummy"]
        max_pos = np.argmax(z_temp)
        z_vertices[max_pos]["max"] = z_temp[max_pos]
        G = G.triple_apply(update_z, mutated_fields=["max", "value"])

    for i in xrange(M):
        z[i] = z_vertices[i]["value"]

    return z
Пример #3
0
def extract_backbone(flavor_network, vertices, edges, alpha):
    """
    Builds a new graph with only the edges with weights that exceed the threshold for statistical significance
    :param flavor_network: flavor-ingredient network to prune
    :param vertices: separate list of vertices (to speed extraction)
    :param edges: separate list of edges (to speed extraction)
    :param alpha: threshold p-value for keeping an edge in the network
    :return: the pruned SGraph
    """
    def degree_count_fn(src, connecting_edge, dst):
        """
        increments the degree of the nodes on this edge
        :param src: source node
        :param connecting_edge: connecting edge
        :param dst: destination node
        :return: source and destination with degree attribute incremented
        """
        src['deg'] += 1
        dst['deg'] += 1
        return src, connecting_edge, dst

    def compute_node_moments(node_k):
        """
        computes mean and standard deviation for this node
        :param node_k: node to compute
        :return: mean and sigma
        """
        mean = 2*node_k/(node_k+1)
        sigma = sqrt(node_k**2*((20 + 4*node_k)/((node_k + 1)*(node_k + 2)*(node_k + 3)) - 4/(node_k + 1)**2))
        return mean, sigma

    def test_for_significance(edge, weights_lookup, alpha):
        """
        tests this edge for statistical significance based on it's source and destination nodes
        :param edge: edge to test
        :param weights_lookup: quick (hash table) lookup for the edge weights
        :param alpha: significance threshold
        :return: significance boolean check
        """
        y_obs = edge.attr['weight']
        node1_k = weights_lookup[edge.dst_vid]
        node2_k = weights_lookup[edge.src_vid]
        m1, sig1 = compute_node_moments(float(node1_k))
        m2, sig2 = compute_node_moments(float(node2_k))
        return y_obs >= abs(m1 + alpha*sig1) or y_obs >= abs(m2 + alpha*sig2)

    flavor_network_w_degree = SGraph()
    new_node_list = flavor_network.vertices.fillna('deg', 0)
    flavor_network_w_degree = flavor_network_w_degree.add_vertices(new_node_list).add_edges(edges)
    flavor_network_w_degree = flavor_network_w_degree.triple_apply(degree_count_fn, mutated_fields=['deg'])
    weights_dict = flavor_network_w_degree.vertices.to_dataframe().set_index('__id').to_dict()['deg']

    significant_edges = []
    for edge in edges:
        if test_for_significance(edge, weights_dict, alpha):
            significant_edges.append(edge)
    pruned_network = SGraph().add_vertices(new_node_list)
    pruned_network = pruned_network.add_edges(significant_edges)
    return significant_edges, pruned_network
Пример #4
0
def extract_backbone(flavor_network, alpha):
    """
    makes a new graph with only the edges with weights that exceed the threshold for statistical significance
    :param ing_comp_graph: full flavor ingredient network
    :return: the pruned SGraph
    """
    def degree_count_fn(src, edge, dst):
        """
        increments the degree of the nodes on this edge
        :param src:
        :param edge:
        :param dst:
        :return:
        """
        src['deg'] += 1
        dst['deg'] += 1
        return src, edge, dst

    def compute_node_moments(node_k):
        mean = 2 * node_k / (node_k + 1)
        sigma = sqrt(node_k**2 * ((20 + 4 * node_k) /
                                  ((node_k + 1) * (node_k + 2) *
                                   (node_k + 3)) - 4 / (node_k + 1)**2))
        return mean, sigma

    def test_for_significance(edge, weights_lookup, alpha):
        y_obs = edge['weight']
        node1_k = weights_lookup[edge['__dst_id']]
        node2_k = weights_lookup[edge['__src_id']]
        m1, sig1 = compute_node_moments(float(node1_k))
        m2, sig2 = compute_node_moments(float(node2_k))

        return y_obs >= abs(m1 + alpha * sig1) or y_obs >= abs(m2 +
                                                               alpha * sig2)

    flav_net_w_deg = SGraph()
    edge_list = flavor_network.get_edges()
    new_node_list = flavor_network.vertices.fillna('deg', 0)
    flav_net_w_deg = flav_net_w_deg.add_vertices(new_node_list).add_edges(
        edge_list)
    flav_net_w_deg = flav_net_w_deg.triple_apply(degree_count_fn,
                                                 mutated_fields=['deg'])
    weights_dict = flav_net_w_deg.vertices.to_dataframe().set_index(
        '__id').to_dict()['deg']

    significant_edges = []
    for edge in flav_net_w_deg.get_edges():
        if test_for_significance(edge, weights_dict, alpha):
            significant_edges.append(
                flav_net_w_deg.get_edges(src_ids=edge['__src_id'],
                                         dst_ids=edge['__dst_id'],
                                         format='list')[0])
    pruned_network = SGraph().add_vertices(new_node_list)
    pruned_network = pruned_network.add_edges(significant_edges)
    return pruned_network
Пример #5
0
def extract_backbone(flavor_network, alpha):
    """
    makes a new graph with only the edges with weights that exceed the threshold for statistical significance
    :param ing_comp_graph: full flavor ingredient network
    :return: the pruned SGraph
    """
    def degree_count_fn(src, edge, dst):
        """
        increments the degree of the nodes on this edge
        :param src:
        :param edge:
        :param dst:
        :return:
        """
        src['deg'] += 1
        dst['deg'] += 1
        return src, edge, dst

    def compute_node_moments(node_k):
        mean = 2*node_k/(node_k+1)
        sigma = sqrt(node_k**2*((20 + 4*node_k)/((node_k + 1)*(node_k + 2)*(node_k + 3)) - 4/(node_k + 1)**2))
        return mean, sigma

    def test_for_significance(edge, weights_lookup, alpha):
        y_obs = edge['weight']
        node1_k = weights_lookup[edge['__dst_id']]
        node2_k = weights_lookup[edge['__src_id']]
        m1, sig1 = compute_node_moments(float(node1_k))
        m2, sig2 = compute_node_moments(float(node2_k))

        return y_obs >= abs(m1 + alpha*sig1) or y_obs >= abs(m2 + alpha*sig2)

    flav_net_w_deg = SGraph()
    edge_list = flavor_network.get_edges()
    new_node_list = flavor_network.vertices.fillna('deg', 0)
    flav_net_w_deg = flav_net_w_deg.add_vertices(new_node_list).add_edges(edge_list)
    flav_net_w_deg = flav_net_w_deg.triple_apply(degree_count_fn, mutated_fields=['deg'])
    weights_dict = flav_net_w_deg.vertices.to_dataframe().set_index('__id').to_dict()['deg']

    significant_edges = []
    for edge in flav_net_w_deg.get_edges():
        if test_for_significance(edge, weights_dict, alpha):
            significant_edges.append(flav_net_w_deg.get_edges(src_ids=edge['__src_id'],
                                                              dst_ids=edge['__dst_id'], format='list')[0])
    pruned_network = SGraph().add_vertices(new_node_list)
    pruned_network = pruned_network.add_edges(significant_edges)
    return pruned_network