Exemplo n.º 1
0
def tracks_lyrics(**kwargs):
    track_df = read_feature_dataframe('track_name')
    track_album_artists_id = data.track_album_artists_id()
    artist_df = read_feature_dataframe('artist_name')
    df = track_df.merge(track_album_artists_id, how='left').merge(
        artist_df, how='left')
    argument_values = [df.track_name.values, df.artist_name.values]
    array_feature(track_lyrics, argument_values=argument_values, **kwargs)
Exemplo n.º 2
0
def release_groups_id(**kwargs):
    artist_name = rdf('artist_name')
    album_name = rdf('album_name')

    # Correction to track_album_artist_id dataframe:
    # based on how it is created, an album can have more than one author
    # Infact, it is track-indexed in principle: for every track, it tells author and album
    # if in an album there are two different authors (e.g., compilation), then we will
    # have more author associated to that album. In develop, we force just one manually like this
    track_album_artists_id = data.track_album_artists_id()
    track_album_artists_id = track_album_artists_id[[
        'alid', 'arid'
    ]].drop_duplicates().groupby('alid').head(1)

    df = album_name.merge(track_album_artists_id, how='left',
                          on='alid').merge(artist_name, how='left', on='arid')

    assert len(df) == len(album_name)

    argument_values = [df.album_name.values, df.artist_name.values]

    array_feature(release_group_musicbrainz_id,
                  argument_values=argument_values,
                  **kwargs)
Exemplo n.º 3
0
    Arguments:
        artist_musicbrainz_id {str} -- 

    Returns:
        list -- Record lables ids
    """
    releases = mz.browse_releases(artist=artist_musicbrainz_id['value'],
                                  includes=['labels'])
    labels = set()
    for r in releases['release-list']:
        label_list = r['label-info-list']
        for l in label_list:
            try:
                if l['label']['name'] != '[no label]':
                    labels.add(l['label']['id'])
            except KeyError:
                continue

    labels = list(labels)
    if len(labels) > 0:
        return [{'value': l} for l in labels]
    else:
        logging.getLogger('root.features').warning(
            f"I was not able to find any label for which artist {artist_musicbrainz_id['value']} has recorded"
        )
        return None


if __name__ == "__main__":
    array_feature(artist_recorded_label, mp=True)
Exemplo n.º 4
0
    is_chorus = False
    phrase_chorus_repetition = {}
    for phrase in track_lyrics['value'].split('\n'):
        if re.search('\[.*.*\]', phrase):
            is_chorus = True if re.search(r'\[(.*Chorus.*)|(.*Hook.*)\]',
                                          phrase, re.I) else False
            continue
        if is_chorus:
            phrase_chorus_repetition[phrase] = phrase_chorus_repetition[phrase] + \
                1 if phrase in phrase_chorus_repetition else 1

    # if no chorus, return None
    if len(phrase_chorus_repetition.values()) == 0:
        return None
    highest_frequency = max(phrase_chorus_repetition.values())
    candidate_phrases = [
        key for key in phrase_chorus_repetition.keys()
        if phrase_chorus_repetition[key] == highest_frequency
    ]

    assert len(
        candidate_phrases
    ) >= 1, "At this point, we should have at least one candidate phrase"
    candidate_phrases = sorted(candidate_phrases, key=len, reverse=True)
    return {'value': candidate_phrases[0]}


if __name__ == '__main__':
    array_feature(track_chorus, mp=False)
        str -- Year, formatted as a string with 4 digits. Eg. 1994
    """
    if artist_wikidata_id is not None and artist_type['value'] == 'Person':
        query = "select ?y where {" + \
            artist_wikidata_id['value'] + " wdt:P2032 ?y .}"
        results = query_sparql(query)
        if len(results) > 0:
            if len(results) == 1:
                date = results[0]['y']['value']

                # Check if satisfies the pattern
                if re.match(r"^\d{4}-\d{2}-\d{2}T00:00:00Z$", date):
                    year = date.split('-')[0]
                    return {'value': year}
                else:
                    logging.getLogger('root.features').warning(
                        f"Date {date} does not match pattern")
            else:
                logging.getLogger('root.features').warning(
                    f"Found more than one value for work period (end) for entity {artist_wikidata_id['value']}, skipping"
                )

        else:
            logging.getLogger('root.features').warning(
                f"No attribute work period (end) associated with entity {artist_wikidata_id['value']}"
            )


if __name__ == "__main__":
    array_feature(artist_solo_end_activity_year, mp=False)
import musicbrainzngs as mz


@musicbrainz_feature
@cached_feature
@timing_feature
def artist_self_releasing_records(
        artist_musicbrainz_id) -> 'artist_self_releasing_records':
    """Tell if the artist have self released records (i.e. without label) or not

    Arguments:
        artist_musicbrainz_id {str} -- 

    Returns:
        bool --
    """
    releases = mz.browse_releases(artist=artist_musicbrainz_id['value'],
                                  includes=['labels'])
    for r in releases['release-list']:
        label_list = r['label-info-list']
        for l in label_list:
            try:
                if l['label']['name'] == '[no label]':
                    return {'value': True}
            except KeyError:
                continue


if __name__ == "__main__":
    array_feature(artist_self_releasing_records, mp=True)
Exemplo n.º 7
0
                elif 'American Music Awards' in ceremony_label:
                    d['award_series'] = 'American Music Award'
                elif 'World Music Awards' in ceremony_label:
                    d['award_series'] = 'World Music Award'
                elif 'Tony Award' in ceremony_label:
                    d['award_series'] = 'Tony Award'
                elif 'Golden Raspberry Awards' in ceremony_label:
                    d['award_series'] = 'Golden Raspberry Award'
                elif 'BRIT Awards' in ceremony_label or 'Brit Awards' in ceremony_label:
                    d['award_series'] = 'Brit Award'
                elif 'BET' in ceremony_label:
                    d['award_series'] = 'BET Award'
                elif "People's Choice Awards" in ceremony_label:
                    d['award_series'] = "People's Choice Award"
                elif 'Academy Award' in ceremony_label:
                    d['awards_series'] = "Oscar"
                else:
                    logging.getLogger('root.features').warning(
                        f"Not able to associate any award series to the award {ceremony_label}"
                    )
            except KeyError:
                pass

            awards.append(d)

        return [{'value': a} for a in awards]


if __name__ == "__main__":
    array_feature(artist_awards, mp=False)
Exemplo n.º 8
0
            f"Artist {artist_musicbrainz_id['value']} do not have a type, skipping")
        return None

    if artist['artist']['type'] == 'Person':
        try:
            birth_date = artist['artist']['life-span']['begin']
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Artist {artist_musicbrainz_id['value']} do not have known a birth date")
            return None

        try:
            datetime.datetime.strptime(birth_date, '%Y-%m-%d')
        except ValueError:
            logging.getLogger('root.features').warning(
                f"Incorrect artist_birth_date format for {artist_musicbrainz_id['value']}, should be YYYY-MM-DD, but got {birth_date}")
            return None

        try:
            birth_date_pandas = pd.to_datetime(birth_date)
        except pd.errors.OutOfBoundsDatetime:
            logging.getLogger('root.features').warning(
                f"Invalid artist_birth_date for {artist_musicbrainz_id['value']}: {birth_date}. Skipping")
            return None

        return {'value': birth_date_pandas}


if __name__ == "__main__":
    array_feature(artist_birth_date, mp=True)
Exemplo n.º 9
0
        try:
            date = release_group['first-release-date']
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Release-group {release_group_musicbrainz_id['value']} has not first-release-date attribute"
            )
            return None

        try:
            datetime.datetime.strptime(date, '%Y-%m-%d')
        except ValueError:
            logging.getLogger('root.features').warning(
                f"Incorrect first release date format for {release_group_musicbrainz_id['value']}, should be YYYY-MM-DD, but got {date}"
            )
            return None

        try:
            date_pandas = pd.to_datetime(date)
        except pd.errors.OutOfBoundsDatetime:
            logging.getLogger('root.features').warning(
                f"Invalid artist_date for {release_group_musicbrainz_id['value']}: {date}. Skipping"
            )
            return None

        return {'value': date_pandas}


if __name__ == "__main__":
    array_feature(album_release_date, mp=False)
Exemplo n.º 10
0
    """
    artist = musicbrainzngs.get_artist_by_id(artist_musicbrainz_id['value'],
                                             includes=['tags'])['artist']
    genres = []

    try:
        tags = artist['tag-list']
    except:
        logging.getLogger('root.features').warning(
            f"No tag list associated with artist {artist_musicbrainz_id['value']}"
        )
        return None

    for tag in tags:
        try:
            musicbrainz_genre_id = genres_musicbrainz(tag['name'])
            genres.append(musicbrainz_genre_id)
        except KeyError:
            continue

    if len(genres) > 0:
        return [{'value': g} for g in genres]
    else:
        logging.getLogger('root.features').warning(
            f"No genres associated with artist {artist_musicbrainz_id['value']}"
        )


if __name__ == "__main__":
    array_feature(artist_genres, mp=True)
Exemplo n.º 11
0
import musicbrainzngs
import logging


@musicbrainz_feature
@cached_feature
@timing_feature
def artist_birth_place_area(artist_musicbrainz_id) -> 'area_musicbrainz_id':
    """Extracts the area the actual artist was born

    Arguments:
        artist_musicbrainz_id {str} --

    Returns:
        str -- The id of the area in musicbrainz
    """
    if artist_musicbrainz_id is not None:
        artist = musicbrainzngs.get_artist_by_id(
            artist_musicbrainz_id['value'])
        try:
            birth_area_id = artist['artist']['begin-area']['id']
            return {'value': birth_area_id}
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Artist {artist_musicbrainz_id} has not begin-area attribute")
            return None


if __name__ == "__main__":
    array_feature(artist_birth_place_area, mp=True)
Exemplo n.º 12
0
import musicbrainzngs
import logging


@musicbrainz_feature
@timing_feature
def record_label_area(record_label_musicbrainz_id) -> 'area_musicbrainz_id':
    """Extracts the area the actual record label is based in

    Arguments:
        record_label_musicbrainz_id {str} --

    Returns:
        str -- The id of the area in musicbrainz
    """
    if record_label_musicbrainz_id is not None:
        label = musicbrainzngs.get_label_by_id(
            record_label_musicbrainz_id['value'])
        try:
            label_area_id = label['label']['area']['id']
            return {'value': label_area_id}
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Record label {record_label_musicbrainz_id} has not area attribute"
            )
            return None


if __name__ == "__main__":
    array_feature(record_label_area, mp=False)
Exemplo n.º 13
0
@cached_feature
@timing_feature
def artist_type(artist_musicbrainz_id) -> 'artist_type':
    """States the type of the artist.
       Artist can be Person, Group, Choir, Orchestra or Character (a finctional character).
       We discard Other in this method

    Arguments:
        artist_musicbrainz_id {str} -- 

    Returns:
        str -- Artist type
    """
    if artist_musicbrainz_id is not None:
        artist = musicbrainzngs.get_artist_by_id(
            artist_musicbrainz_id['value'])
        try:
            artist_type = artist['artist']['type']
            if artist_type != 'Other':
                return {'value': artist_type}
            else:
                return None
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Artist {artist_musicbrainz_id} do not have a type, skipping")
            return None


if __name__ == "__main__":
    array_feature(artist_type, mp=False)
    Returns:
        str --
    """
    if record_label_musicbrainz_id is not None:
        label = musicbrainzngs.get_label_by_id(
            record_label_musicbrainz_id['value'])

        try:
            start_activity_year = label['label']['life-span']['end']
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Label {record_label_musicbrainz_id} does not have known dissolution date"
            )
            return None

        if re.match(r"^\d{4}$", start_activity_year):
            return {'value': start_activity_year}
        elif re.match(r"^\d{4}-\d{2}$", start_activity_year):
            return {'value': start_activity_year.split('-')[0]}
        elif re.match(r"^\d{4}-\d{2}-\d{2}$", start_activity_year):
            return {'value': start_activity_year.split('-')[0]}
        else:
            logging.getLogger('root.features').warning(
                f"Record label dissolution year {start_activity_year} does not match pattern"
            )


if __name__ == "__main__":
    array_feature(record_label_dissolution_year, mp=False)
Exemplo n.º 15
0
def artist_gender(artist_musicbrainz_id) -> 'artist_gender':
    """States whether the artist is a male or a female

    Arguments:
        artist_musicbrainz_id {str} -- 

    Returns:
        str -- Either Male or Female
    """
    if artist_musicbrainz_id is not None:
        artist = musicbrainzngs.get_artist_by_id(
            artist_musicbrainz_id['value'])
        try:
            artist_type = artist['artist']['gender']
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Artist {artist_musicbrainz_id} has no gender, skipping")
            return None

        if artist['artist']['gender'] not in ['Male', 'Female']:
            logging.getLogger(
                f"Artist {artist_musicbrainz_id} has unknown gender {artist['artist']['gender']}, skipping"
            )
            return None
        else:
            return {'value': artist['artist']['gender']}


if __name__ == "__main__":
    array_feature(artist_gender, mp=False)
Exemplo n.º 16
0
    if artist['artist']['type'] == 'Person':
        try:
            death_date = artist['artist']['life-span']['end']
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Artist {artist_musicbrainz_id['value']} do not have known a death date"
            )
            return None

        try:
            datetime.datetime.strptime(death_date, '%Y-%m-%d')
        except ValueError:
            logging.getLogger('root.features').warning(
                f"Incorrect artist_death_date format for {artist_musicbrainz_id['value']}, should be YYYY-MM-DD, but got {death_date}"
            )
            return None

        try:
            death_date_pandas = pd.to_datetime(death_date)
        except pd.errors.OutOfBoundsDatetime:
            logging.getLogger('root.features').warning(
                f"Invalid artist_death_date for {artist_musicbrainz_id['value']}: {death_date}. Skipping"
            )
            return None

        return {'value': death_date_pandas}


if __name__ == "__main__":
    array_feature(artist_death_date, mp=True)
Exemplo n.º 17
0
    This allows us to retrieve the artist page in the 89% of the cases

    Arguments:
        artist_name {str} - -

    Returns:
        uri - - uri
    """
    artist = musicbrainzngs.get_artist_by_id(
        artist_musicbrainz_id['value'], includes=['url-rels'])['artist']

    try:
        urls = artist['url-relation-list']
    except KeyError:
        logging.getLogger('root.features').warning(
            f"No relations to external pages specified for {artist_musicbrainz_id['value']}")
        return None

    url_wikidata = [u for u in urls if u['type'] ==
                    'wikidata' and u['direction'] == 'forward']
    if len(url_wikidata) > 0:
        return {'value': f"wd:{url_wikidata[0]['target'].split('/')[-1]}"}
    else:
        logging.getLogger('root.features').warning(
            f"No wikidata page specified for artist {artist_musicbrainz_id['value']}")
        return None


if __name__ == '__main__':
    array_feature(artist_wikidata_id, mp=True,)
        str -- Year, formatted as a string with 4 digits. Eg. 1994
    """
    if artist_wikidata_id is not None and artist_type['value'] == 'Person':
        query = "select ?y where {" + \
            artist_wikidata_id['value'] + " wdt:P2031 ?y .}"
        results = query_sparql(query)
        if len(results) > 0:
            if len(results) == 1:
                date = results[0]['y']['value']

                # Check if satisfies the pattern
                if re.match(r"^\d{4}-\d{2}-\d{2}T00:00:00Z$", date):
                    year = date.split('-')[0]
                    return {'value': year}
                else:
                    logging.getLogger('root.features').warning(
                        f"Date {date} does not match pattern")
            else:
                logging.getLogger('root.features').warning(
                    f"Found more than one value for work period (start) for entity {artist_wikidata_id['value']}, skipping"
                )

        else:
            logging.getLogger('root.features').warning(
                f"No attribute work period (start) associated with entity {artist_wikidata_id['value']}"
            )


if __name__ == "__main__":
    array_feature(artist_solo_start_activity_year, mp=True)
Exemplo n.º 19
0
import musicbrainzngs
import logging


@musicbrainz_feature
@cached_feature
@timing_feature
def artist_based_in_area(artist_musicbrainz_id) -> 'area_musicbrainz_id':
    """Extracts the area the actual artist is based in

    Arguments:
        artist_musicbrainz_id {str} --

    Returns:
        str -- The id of the area in musicbrainz
    """
    if artist_musicbrainz_id is not None:
        artist = musicbrainzngs.get_artist_by_id(
            artist_musicbrainz_id['value'])
        try:
            birth_area_id = artist['artist']['area']['id']
            return {'value': birth_area_id}
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Artist {artist_musicbrainz_id} has not area attribute")
            return None


if __name__ == "__main__":
    array_feature(artist_based_in_area, mp=True)
Exemplo n.º 20
0
    Returns:
        str -- 
    """
    if record_label_musicbrainz_id is not None:
        label = musicbrainzngs.get_label_by_id(
            record_label_musicbrainz_id['value'])

        try:
            start_activity_year = label['label']['life-span']['begin']
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Label {record_label_musicbrainz_id} does not have known fundation date"
            )
            return None

        if re.match(r"^\d{4}$", start_activity_year):
            return {'value': start_activity_year}
        elif re.match(r"^\d{4}-\d{2}$", start_activity_year):
            return {'value': start_activity_year.split('-')[0]}
        elif re.match(r"^\d{4}-\d{2}-\d{2}$", start_activity_year):
            return {'value': start_activity_year.split('-')[0]}
        else:
            logging.getLogger('root.features').warning(
                f"Record label fundation year {start_activity_year} does not match pattern"
            )


if __name__ == "__main__":
    array_feature(record_label_fundation_year, mp=False)
Exemplo n.º 21
0
def album_record_label(
        release_group_musicbrainz_id) -> 'record_label_musicbrainz_id':
    """Extracts the record label associated with an album.
       Taken the releases belonging to the release-group, the first valid record label is considered

    Returns:
        str -- record label musicbrainz id
    """
    if release_group_musicbrainz_id is not None:
        releases = mz.browse_releases(
            release_group=release_group_musicbrainz_id['value'],
            includes=['labels'])

        for r in releases['release-list']:
            label_list = r['label-info-list']
            if len(label_list) > 0:
                for l in label_list:
                    try:
                        if l['label']['name'] != '[no label]':
                            return {'value': l['label']['id']}
                    except KeyError:
                        continue

            logging.getLogger('root.features').warning(
                f"Release {r['id']} has no associated record label")
            return None


if __name__ == "__main__":
    array_feature(album_record_label, mp=False)
Exemplo n.º 22
0
    if release_group_musicbrainz_id is not None:
        release_group = musicbrainzngs.get_release_group_by_id(
            release_group_musicbrainz_id['value'],
            includes=['tags'])['release-group']
        try:
            tags = release_group['tag-list']
        except KeyError:
            logging.getLogger('root.features').warning(
                f"Release-group {release_group_musicbrainz_id} has not tags")
            return None

        genres = []
        for tag in tags:
            try:
                musicbrainz_genre_id = genres_musicbrainz(tag['name'])
                genres.append(musicbrainz_genre_id)
            except KeyError:
                continue

        if len(genres) > 0:
            return [{'value': g} for g in genres]
        else:
            logging.getLogger('root.features').warning(
                f"No genres associated with release-group {release_group_musicbrainz_id}"
            )
            return None


if __name__ == "__main__":
    array_feature(album_genres, mp=True)