def _process_collaborations_node(collaborations_node): for artist_node in list(collaborations_node): name = None namevars = [] roles = [] discogs_id = None for elem in list(artist_node): if elem.tag == ARTIST_NAME: name = normalize_discogs_name(elem.text) elif elem.tag == ARTIST_NAMEVAR: namevars.append(normalize_discogs_name(elem.tag)) elif elem.tag == ARTIST_ROLE: for role in elem.text.split(","): candidate_r = map_discogs_role(role.strip()) if candidate_r is not None: roles.append(candidate_r) elif elem.tag == ARTIST_DISCOGS_ID: discogs_id = int(elem.text) if name is not None: for role in roles: if role == ROLE_FEATURER: yield (ROLE_FEATURER, Artist(canonical=name, # Returning a tuple namevars=namevars, discogs_id=discogs_id)) elif role in Collaboration.valid_roles(): yield (role, Collaboration(collaborator=Artist(canonical=name, # Returning a tuple namevars=namevars, discogs_id=discogs_id), role=role))
def _process_songs_node(songs_node, artists, collaborations, album, genres, country, release_date, release_id): for song_node in list(songs_node): title = None alt_titles = None duration = None discogs_id = None extra_collaborations = [] for elem in list(song_node): if elem.tag == SONG_TITLE: title = normalize_discogs_name(elem.text) elif elem.tag == SONG_DURATION: duration = DiscogsSongParser._parse_duration(elem.text) elif elem.tag == COLLABORATIONS: for a_coll in DiscogsSongParser._process_collaborations_node(elem): if a_coll[0] == ROLE_FEATURER: artists.append(a_coll[1]) else: extra_collaborations.append(a_coll[1]) elif elem.tag == TRACK_POSITION: discogs_id = DiscogsSongParser.build_discogs_id(release_id, elem.text) if title not in EMPTY_CONTENT: candidate_alt_title = remove_brackets_info(title) if title != candidate_alt_title: alt_titles = [candidate_alt_title] yield Song(canonical=title, artists=artists, alt_titles=alt_titles, collaborations=collaborations + extra_collaborations, duration=duration, genres=genres, release_date=release_date, album=album, country=country, discogs_id=discogs_id)
def _process_artists_node(artists_node): for artist_node in list(artists_node): name = None namevars = [] discogs_id = None for elem in list(artist_node): if elem.tag == ARTIST_NAME: name = normalize_discogs_name(elem.text) elif elem.tag == ARTIST_NAMEVAR: candidate_namevar = normalize_discogs_name(elem.text) if candidate_namevar is not None: namevars.append(candidate_namevar) elif elem.tag == ARTIST_DISCOGS_ID: discogs_id = int(elem.text) if name is not None and name not in ARTIST_NAMES_TO_IGNORE: yield Artist(canonical=name, namevars=namevars, discogs_id=discogs_id)
def _process_nodes(nodes_to_process): # canonical canonical = normalize_discogs_name(nodes_to_process[NAME].text) #discogs_id discogs_id = int(nodes_to_process[DISCOGS_ID].text) #Declaring artist result return Artist(canonical=canonical, discogs_id=discogs_id)
def _process_artists_node(artists_node): for artist_node in list(artists_node): name = None discogs_id = None for elem in list(artist_node): if elem.tag == ARTIST_NAME: name = normalize_discogs_name(elem.text) elif elem.tag == ARTIST_DISCOGS_ID: discogs_id = int(elem.text) if name is not None and name not in ARTIST_NAMES_TO_IGNORE: yield Artist(canonical=name, discogs_id=discogs_id)
def _process_nodes_of_person(nodes_to_process): # canonical canonical = normalize_discogs_name(nodes_to_process[NAME].text) #discogs_id discogs_id = int(nodes_to_process[DISCOGS_ID].text) #civil civil = None if REAL_NAME in nodes_to_process: civil = normalize_discogs_name(nodes_to_process[REAL_NAME].text) #Declaring artist result result = ArtistPerson(canonical=canonical, civil=civil, discogs_id=discogs_id) #namevariations if NAME_VARIATIONS in nodes_to_process: DiscogsArtistParser._process_namevariations_node(result, nodes_to_process[NAME_VARIATIONS]) #aliases if ALIASES in nodes_to_process: DiscogsArtistParser._process_aliases_node(result, nodes_to_process[ALIASES]) return result
def _process_songs_node(songs_node, artists, collaborations, album, genres, country, release_date, release_id): for song_node in list(songs_node): title = None discogs_id = None extra_collaborations = [] for elem in list(song_node): if elem.tag == SONG_TITLE: title = normalize_discogs_name(elem.text) elif elem.tag == TRACK_POSITION: discogs_id = DiscogsSongParser.build_discogs_id(release_id, elem.text) elif elem.tag == COLLABORATIONS: for a_coll in DiscogsSongParserFilteringNoNamevars._process_collaborations_node(elem): if a_coll[0] == ROLE_FEATURER: artists.append(a_coll[1]) else: extra_collaborations.append(a_coll[1]) if title not in EMPTY_CONTENT: yield Song(canonical=title, discogs_id=discogs_id, artists=artists, collaborations=collaborations)
def _process_nodes_of_group(nodes_to_process): # canonical canonical = normalize_discogs_name(nodes_to_process[NAME].text) #discogs_id discogs_id = int(nodes_to_process[DISCOGS_ID].text) #Declaring artist result result = ArtistGroup(canonical=canonical, discogs_id=discogs_id) #namevariations if NAME_VARIATIONS in nodes_to_process: DiscogsArtistParser._process_namevariations_node(result, nodes_to_process[NAME_VARIATIONS]) #aliases if ALIASES in nodes_to_process: DiscogsArtistParser._process_aliases_node(result, nodes_to_process[ALIASES]) # MEMBER . This node should be present, no need to check. It is the way we have # to distinguish between single artists and groups DiscogsArtistParser._process_members_node(result, nodes_to_process[MEMBERS]) return result
def _process_country_node(country_node): country_text = normalize_discogs_name(country_node.text) if country_text not in EMPTY_CONTENT: return country_text else: return None
def _process_album_node(album_node): return normalize_discogs_name(album_node.text)