def query_relations(self, entity_keys, role_names=None, year=None, verbose=True): print(" Roles:", role_names) entity_query_cap = 999 entity_query_cap -= (1 + len(role_names)) * 2 if isinstance(year, int): entity_query_cap -= 2 elif year: entity_query_cap -= 4 entity_query_cap //= 2 range_stop = len(entity_keys) relations = [] for start in range(0, range_stop, entity_query_cap): stop = start + entity_query_cap entity_key_slice = entity_keys[start:stop] found = SqliteRelation.search_multi(entity_key_slice, role_names=role_names, verbose=verbose, year=year) relations.extend(found) return relations
def collect_entities_2(self): original_role_names = self.role_names or () provisional_role_names = set(original_role_names) provisional_role_names.update(['Alias', 'Member Of']) provisional_role_names = sorted(provisional_role_names) if type(self.center_entity).__name__.endswith('Artist'): initial_key = (1, self.center_entity.discogs_id) else: initial_key = (2, self.center_entity.discogs_id) entity_keys_to_visit = set([initial_key]) links = dict() nodes = dict() entity_query_cap = 999 entity_query_cap -= (1 + len(provisional_role_names)) * 2 entity_query_cap //= 2 break_on_next_loop = False for distance in range(self.degree + 1): current_entity_keys_to_visit = list(entity_keys_to_visit) for key in current_entity_keys_to_visit: nodes.setdefault(key, self.entity_key_to_node(key, distance)) #print(' At distance {}:'.format(distance)) #print(' {} new nodes'.format( # len(current_entity_keys_to_visit))) #print(' {} old nodes'.format( # len(nodes) - len(current_entity_keys_to_visit))) #print(' {} old links'.format(len(links))) if break_on_next_loop: #print(' Leaving search loop.') break if ( 1 < distance and self.max_nodes and self.max_nodes <= len(nodes) ): #print(' Maxed out node count.') break_on_next_loop = True entity_keys_to_visit.clear() relations = [] range_stop = len(current_entity_keys_to_visit) for start in range(0, range_stop, entity_query_cap): # Split into multiple queries to avoid variable maximum. stop = start + entity_query_cap #print(' Querying: {} to {} of {} new nodes'.format( # start, stop, len(current_entity_keys_to_visit) # )) entity_key_slice = current_entity_keys_to_visit[start:stop] relations.extend(SqliteRelation.search_multi( entity_key_slice, role_names=provisional_role_names, )) for relation in relations: e1k = (relation['entity_one_type'], relation['entity_one_id']) e2k = (relation['entity_two_type'], relation['entity_two_id']) if e1k not in nodes: entity_keys_to_visit.add(e1k) nodes[e1k] = self.entity_key_to_node(e1k, distance + 1) if e2k not in nodes: entity_keys_to_visit.add(e2k) nodes[e2k] = self.entity_key_to_node(e2k, distance + 1) if relation['role_name'] == 'Alias': nodes[e1k]['aliases'].add(e2k[1]) nodes[e2k]['aliases'].add(e1k[1]) elif relation['role_name'] in ('Member Of', 'Sublabel Of'): nodes[e2k]['members'].add(e1k[1]) if relation['role_name'] not in original_role_names: continue link = self.relation_to_link(relation) link['distance'] = min( nodes[e1k]['distance'], nodes[e2k]['distance'], ) links[link['key']] = link nodes[e1k]['links'].add(link['key']) nodes[e2k]['links'].add(link['key']) #print(' Collected: {} / {}'.format(len(nodes), len(links))) # Query node names. artist_ids = [] label_ids = [] for entity_type, entity_id in nodes.keys(): if entity_type == 1: artist_ids.append(entity_id) else: label_ids.append(entity_id) artists = [] for i in range(0, len(artist_ids), 999): query = (SqliteArtist .select() .where(SqliteArtist.id.in_(artist_ids[i:i + 999])) ) artists.extend(query) labels = [] for i in range(0, len(artist_ids), 999): query = (SqliteLabel .select() .where(SqliteLabel.id.in_(label_ids[i:i + 999])) ) labels.extend(query) for artist in artists: nodes[(1, artist.id)]['name'] = artist.name for label in labels: nodes[(2, label.id)]['name'] = label.name # Prune nameless nodes. for node in tuple(nodes.values()): if not node.get('name'): self.prune_node(node, nodes, links, update_missing_count=False) #print(' Pruning nameless: {} / {}'.format(len(nodes), len(links))) # Prune unvisited nodes and links. for key in entity_keys_to_visit: node = nodes.get(key) self.prune_node(node, nodes, links) #print(' Pruned unvisited: {} / {}'.format( # len(nodes), len(links))) # Prune nodes beyond maximum. if self.max_nodes: nodes_to_prune = sorted(nodes.values(), key=lambda x: (x['distance'], x['id']), )[self.max_nodes:] for node in nodes_to_prune: self.prune_node(node, nodes, links) #print(' Pruned by max nodes: {} / {}'.format( # len(nodes), len(links))) # Prune links beyond maximum. if self.max_links: links_to_prune = sorted(links.values(), key=self.link_sorter, )[self.max_links:] for link in links_to_prune: self.prune_link(link, nodes, links) #print(' Pruned by max links: {} / {}'.format( # len(nodes), len(links))) #print('Finally: {} / {}'.format(len(nodes), len(links))) return nodes, links