Exemplo n.º 1
0
class EmbeddingNetworkBuilder:
    """ Basically a wrapper around sklearns LSH forest """
    def __init__(self, lsh_init=None):
        if lsh_init == None:
            self._lsh_forest = LSHForest(n_estimators=25, n_candidates=1000)
        else:
            self._lsh_forest = lsh_init
        self.iw = None
        self.m = None

    def fit_lsh_forest(self, embedding):
        self._lsh_forest.fit(embedding.m)
        self._embedding = embedding

    def extract_nn_network(self, nn=20):
        dir_graph_mat = self._lsh_forest.kneighbors_graph(X=self._embedding.m,
                                                          n_neighbors=nn + 1)
        return dir_graph_mat

    def make_undirected(self, dir_graph_mat):
        nodes = set(range(dir_graph_mat.shape[0]))
        edges = set([])
        for node_i in dir_graph_mat.shape[0]:
            for node_j in dir_graph_mat[node_i].nonzero()[1]:
                edges.add((node_i, node_j))
        return nodes, edges

    def get_forest(self):
        return self._lsh_forest

    def get_node_to_word(self):
        return self.iw
Exemplo n.º 2
0
class EmbeddingNetworkBuilder:
    """ Basically a wrapper around sklearns LSH forest """

    def __init__(self, lsh_init=None):
        if lsh_init == None:
            self._lsh_forest = LSHForest(n_estimators=25, n_candidates=1000)
        else:
            self._lsh_forest = lsh_init 
        self.iw = None
        self.m = None

    def fit_lsh_forest(self, embedding):
        self._lsh_forest.fit(embedding.m)
        self._embedding = embedding

    def extract_nn_network(self, nn=20):
        dir_graph_mat = self._lsh_forest.kneighbors_graph(X=self._embedding.m, n_neighbors=nn+1)
        return dir_graph_mat

    def make_undirected(self, dir_graph_mat):
        nodes = set(range(dir_graph_mat.shape[0]))
        edges = set([])
        for node_i in dir_graph_mat.shape[0]:
            for node_j in dir_graph_mat[node_i].nonzero()[1]:
                edges.add((node_i, node_j))
        return nodes, edges

    def get_forest(self):
        return self._lsh_forest
    
    def get_node_to_word(self):
        return self.iw
Exemplo n.º 3
0
def test_graphs():
    """Smoke tests for graph methods."""
    n_samples_sizes = [5, 10, 20]
    n_features = 3
    rng = np.random.RandomState(42)

    for n_samples in n_samples_sizes:
        X = rng.rand(n_samples, n_features)
        lshf = LSHForest(min_hash_match=0)
        lshf.fit(X)

        kneighbors_graph = lshf.kneighbors_graph(X)
        radius_neighbors_graph = lshf.radius_neighbors_graph(X)

        assert_equal(kneighbors_graph.shape[0], n_samples)
        assert_equal(kneighbors_graph.shape[1], n_samples)
        assert_equal(radius_neighbors_graph.shape[0], n_samples)
        assert_equal(radius_neighbors_graph.shape[1], n_samples)
def test_graphs():
    # Smoke tests for graph methods.
    n_samples_sizes = [5, 10, 20]
    n_features = 3
    rng = np.random.RandomState(42)

    for n_samples in n_samples_sizes:
        X = rng.rand(n_samples, n_features)
        lshf = LSHForest(min_hash_match=0)
        ignore_warnings(lshf.fit)(X)

        kneighbors_graph = lshf.kneighbors_graph(X)
        radius_neighbors_graph = lshf.radius_neighbors_graph(X)

        assert_equal(kneighbors_graph.shape[0], n_samples)
        assert_equal(kneighbors_graph.shape[1], n_samples)
        assert_equal(radius_neighbors_graph.shape[0], n_samples)
        assert_equal(radius_neighbors_graph.shape[1], n_samples)
Exemplo n.º 5
0
import numpy as np
from sklearn.neighbors import NearestNeighbors, LSHForest

from igraph import Graph, EdgeSeq
from timeit import timeit
import random

random.seed(100)

#robjects.r['load']('../processed_sub_Data.RData')
print "Reading sparce matrix..."
matrix = mmread("sub_matrix")
print "Converting matrix to dense format..."
a = np.array(matrix.todense())
print a.shape
print "Initialize LSH..."
lshf = LSHForest(n_neighbors=10, random_state=1, n_estimators=10)
print "fit LSH..."
lshf.fit(a)

K = lshf.kneighbors_graph(a)

print "convert into adjacency matrix..."
K = K.toarray()

g = Graph.Adjacency(K.tolist())
es = EdgeSeq(g)

print "writing graph edgelist..."
g.write_edgelist("src_dst_lsh.csv")