示例#1
0
 def inner(query_features):
     for features in query_features:
         for feature in features:
             shingles = shingle_extraction.extract_shingles(feature)
             fingerprints = fingerprint.get_fingerprints(shingles)
             for fp in fingerprints:
                 yield fp
示例#2
0
 def inner(query_features):
     for features in query_features:
         for feature in features:
             shingles = shingle_extraction.extract_shingles(feature)
             fingerprints = fingerprint.get_fingerprints(shingles)
             for fp in fingerprints:
                 yield fp
示例#3
0
 def process_shingles(shingles, record_data_vector, wl_it):
     next_shingle_id_key = "next_shingle_id" if accumulate_wl_shingles else "wl_{0}_next_shingle_id".format(wl_it)
     if not fingerprints:
         for shingle in shingles:
             if shingle not in shingle_id_map:
                 shingle_id_map[shingle] = wl_state[next_shingle_id_key]
                 wl_state[next_shingle_id_key] += 1
             record_data_vector.add((shingle_id_map[shingle], 1))
     else:
         shingle_ids = set(fingerprint.get_fingerprints(shingles, size=24))
         record_data_vector |= set(map(lambda shingle_id: (shingle_id, 1), shingle_ids))
 def build_with_w_shingles(self, w_shingle_lists, initial_sparse_matrix={}):
     self.sparse_matrix = initial_sparse_matrix
     i = -1
     for _, record_w_shingles, _ in w_shingle_lists:
         i += 1
         if self.print_progress:
             print "Ch.Mat.: Processing column", i, "of", self.cols_count
         fingerprints = fingerprint.get_fingerprints(record_w_shingles)
         for fp in fingerprints:
             if not self.sparse_matrix.has_key(fp):
                 self.sparse_matrix[fp] = set()
             self.sparse_matrix[fp].add(i)
 def build(self, feature_lists):
     self.sparse_matrix = {}
     i = -1
     for _, record_features, _ in feature_lists:
         i += 1
         if self.print_progress:
             print "Ch.Mat.: Processing column", i, "of", self.cols_count
         for feature in record_features:
             shingles = shingle_extraction.extract_shingles(feature)
             fingerprints = fingerprint.get_fingerprints(shingles)
             for fp in fingerprints:
                 if not self.sparse_matrix.has_key(fp):
                     self.sparse_matrix[fp] = set()
                 self.sparse_matrix[fp].add(i)
 def compute_column_fingerprints(self, record_graphs):
     assert self.wl_state
     features = []
     for hypergraph in record_graphs:
         new_features, self.wl_state = feature_extraction.extract_features(hypergraph, self.wl_iterations, self.wl_state)
         features += new_features
     
     column = set()
     
     for feature in features:
         shingles = shingle_extraction.extract_shingles(feature)
         fingerprints = fingerprint.get_fingerprints(shingles)
         column |= set(fingerprints)
     
     return sorted(column)