def _search_via_relational_roles(self, distance, provisional_roles, relations): for entity_key in sorted(self.entity_keys_to_visit): node = self.nodes.get(entity_key) if not node: continue entity = node.entity relational_count = entity.roles_to_relation_count( provisional_roles) if 0 < distance and self.max_links < relational_count: self.entity_keys_to_visit.remove(entity_key) message = ' Pre-pruned {} [{}]' message = message.format(entity.name, relational_count) print(message) if provisional_roles and distance < self.degree: print(' Retrieving relational relations') keys = sorted(self.entity_keys_to_visit) step = 500 stop = len(keys) for start in range(0, stop, step): key_slice = keys[start:start + step] print(' {}-{} of {}'.format( start + 1, min(start + step, stop), stop, )) relations.update( PostgresRelation.search_multi( key_slice, roles=provisional_roles, ))
def _search_via_relational_roles(self, distance, provisional_roles, relations): for entity_key in sorted(self.entity_keys_to_visit): node = self.nodes.get(entity_key) if not node: continue entity = node.entity relational_count = entity.roles_to_relation_count(provisional_roles) if 0 < distance and self.max_links < relational_count: self.entity_keys_to_visit.remove(entity_key) message = ' Pre-pruned {} [{}]' message = message.format(entity.name, relational_count) print(message) if provisional_roles and distance < self.degree: print(' Retrieving relational relations') keys = sorted(self.entity_keys_to_visit) step = 500 stop = len(keys) for start in range(0, stop, step): key_slice = keys[start:start + step] print(' {}-{} of {}'.format( start + 1, min(start + step, stop), stop, )) relations.update( PostgresRelation.search_multi( key_slice, roles=provisional_roles, ) )
def _cross_reference(self, distance): # TODO: We don't need to test all nodes, only those missing credit role # relations. That may significantly reduce the computational # load. if not self.relational_roles: print(' Skipping cross-referencing: no relational roles') return elif distance < 2: print( ' Skipping cross-referencing: maximum distance less than 2') return else: print(' Cross-referencing...') relations = {} entity_keys = sorted(self.nodes) entity_keys.remove(self.center_entity.entity_key) entity_key_slices = [] step = 250 for start in range(0, len(entity_keys), step): entity_key_slices.append(entity_keys[start:start + step]) iterator = itertools.product(entity_key_slices, entity_key_slices) for lh_entities, rh_entities in iterator: print(' {} & {}'.format(len(lh_entities), len(rh_entities))) found = PostgresRelation.search_bimulti( lh_entities, rh_entities, roles=self.relational_roles, ) relations.update(found) self._process_relations(relations) message = ' Cross-referenced: {} nodes / {} links' message = message.format(len(self.nodes), len(self.links)) print(message)
def _cross_reference(self, distance): # TODO: We don't need to test all nodes, only those missing credit role # relations. That may significantly reduce the computational # load. if not self.relational_roles: print(' Skipping cross-referencing: no relational roles') return elif distance < 2: print(' Skipping cross-referencing: maximum distance less than 2') return else: print(' Cross-referencing...') relations = {} entity_keys = sorted(self.nodes) entity_keys.remove(self.center_entity.entity_key) entity_key_slices = [] step = 250 for start in range(0, len(entity_keys), step): entity_key_slices.append(entity_keys[start:start + step]) iterator = itertools.product(entity_key_slices, entity_key_slices) for lh_entities, rh_entities in iterator: print(' {} & {}'.format(len(lh_entities), len(rh_entities))) found = PostgresRelation.search_bimulti( lh_entities, rh_entities, roles=self.relational_roles, ) relations.update(found) self._process_relations(relations) message = ' Cross-referenced: {} nodes / {} links' message = message.format(len(self.nodes), len(self.links)) print(message)