示例#1
0
 def _search_via_relational_roles(self, distance, provisional_roles,
                                  relations):
     for entity_key in sorted(self.entity_keys_to_visit):
         node = self.nodes.get(entity_key)
         if not node:
             continue
         entity = node.entity
         relational_count = entity.roles_to_relation_count(
             provisional_roles)
         if 0 < distance and self.max_links < relational_count:
             self.entity_keys_to_visit.remove(entity_key)
             message = '            Pre-pruned {} [{}]'
             message = message.format(entity.name, relational_count)
             print(message)
     if provisional_roles and distance < self.degree:
         print('        Retrieving relational relations')
         keys = sorted(self.entity_keys_to_visit)
         step = 500
         stop = len(keys)
         for start in range(0, stop, step):
             key_slice = keys[start:start + step]
             print('            {}-{} of {}'.format(
                 start + 1,
                 min(start + step, stop),
                 stop,
             ))
             relations.update(
                 PostgresRelation.search_multi(
                     key_slice,
                     roles=provisional_roles,
                 ))
示例#2
0
 def _search_via_relational_roles(self, distance, provisional_roles, relations):
     for entity_key in sorted(self.entity_keys_to_visit):
         node = self.nodes.get(entity_key)
         if not node:
             continue
         entity = node.entity
         relational_count = entity.roles_to_relation_count(provisional_roles)
         if 0 < distance and self.max_links < relational_count:
             self.entity_keys_to_visit.remove(entity_key)
             message = '            Pre-pruned {} [{}]'
             message = message.format(entity.name, relational_count)
             print(message)
     if provisional_roles and distance < self.degree:
         print('        Retrieving relational relations')
         keys = sorted(self.entity_keys_to_visit)
         step = 500
         stop = len(keys)
         for start in range(0, stop, step):
             key_slice = keys[start:start + step]
             print('            {}-{} of {}'.format(
                 start + 1, 
                 min(start + step, stop),
                 stop,
                 ))
             relations.update(
                 PostgresRelation.search_multi(
                     key_slice,
                     roles=provisional_roles,
                     )
                 )
示例#3
0
 def _cross_reference(self, distance):
     # TODO: We don't need to test all nodes, only those missing credit role
     #       relations. That may significantly reduce the computational
     #       load.
     if not self.relational_roles:
         print('    Skipping cross-referencing: no relational roles')
         return
     elif distance < 2:
         print(
             '    Skipping cross-referencing: maximum distance less than 2')
         return
     else:
         print('    Cross-referencing...')
     relations = {}
     entity_keys = sorted(self.nodes)
     entity_keys.remove(self.center_entity.entity_key)
     entity_key_slices = []
     step = 250
     for start in range(0, len(entity_keys), step):
         entity_key_slices.append(entity_keys[start:start + step])
     iterator = itertools.product(entity_key_slices, entity_key_slices)
     for lh_entities, rh_entities in iterator:
         print('        {} & {}'.format(len(lh_entities), len(rh_entities)))
         found = PostgresRelation.search_bimulti(
             lh_entities,
             rh_entities,
             roles=self.relational_roles,
         )
         relations.update(found)
     self._process_relations(relations)
     message = '        Cross-referenced: {} nodes / {} links'
     message = message.format(len(self.nodes), len(self.links))
     print(message)
示例#4
0
 def _cross_reference(self, distance):
     # TODO: We don't need to test all nodes, only those missing credit role 
     #       relations. That may significantly reduce the computational 
     #       load.
     if not self.relational_roles:
         print('    Skipping cross-referencing: no relational roles')
         return
     elif distance < 2:
         print('    Skipping cross-referencing: maximum distance less than 2')
         return
     else:
         print('    Cross-referencing...')
     relations = {}
     entity_keys = sorted(self.nodes)
     entity_keys.remove(self.center_entity.entity_key)
     entity_key_slices = []
     step = 250
     for start in range(0, len(entity_keys), step):
         entity_key_slices.append(entity_keys[start:start + step])
     iterator = itertools.product(entity_key_slices, entity_key_slices)
     for lh_entities, rh_entities in iterator:
         print('        {} & {}'.format(len(lh_entities), len(rh_entities)))
         found = PostgresRelation.search_bimulti(
             lh_entities,
             rh_entities,
             roles=self.relational_roles,
             )
         relations.update(found)
     self._process_relations(relations)
     message = '        Cross-referenced: {} nodes / {} links'
     message = message.format(len(self.nodes), len(self.links))
     print(message)