Exemplo n.º 1
0
    def read(self, inputs):

        data = [{
            'area_id': self.area_id,
            'start_year': self.start_year,
            'end_year': self.end_year
        }]
        r = requests.post(self.SERVER_URL, json=data)
        if r.status_code != 200:
            raise PipelineError(
                "Cannot fetch area random recordings from ListenBrainz. HTTP code %s"
                % r.status_code)

        try:
            rows = ujson.loads(r.text)
        except ValueError as err:
            raise PipelineError(
                "Cannot fetch area random recordings from ListenBrainz. Invalid JSON returned: "
                + str(err))

        recordings = []
        for row in rows:
            recordings.append(
                Recording(mbid=row['recording_mbid'],
                          name=row['recording_name'],
                          year=row['year'],
                          artist=Artist(
                              name=row['artist_credit_name'],
                              artist_credit_id=row['artist_credit_id'])))

        return recordings
    def read(self, inputs):
        self.debug("read for %s/%s" % (self.metric, self.mbid))

        url = self.SERVER_URL + self.metric + "/" + self.mbid
        self.debug(f"url: {url}")
        r = requests.get(url, params={'remove_dups': 'true'})
        if r.status_code != 200:
            raise PipelineError("Cannot fetch annoy similarities from AcousticBrainz: HTTP code %d" % r.status_code)

        try:
            results = ujson.loads(r.text)
        except ValueError as err:
            raise PipelineError("Cannot fetch annoy similarities from AcousticBrainz: Invalid JSON returned: " + str(err))

        entities = []
        for row in results:
            r = Recording(mbid=row['recording_mbid'], 
                          acousticbrainz={
                              'metric': self.metric,
                              'similarity_from': self.mbid,
                              'similarity': row['distance'], 
                              'offset': row['offset']
                          }
                          )
            r.add_note("Related to %s with metric %s" % (self.mbid, self.metric))
            entities.append(r)

        self.debug("read %d recordings" % len(entities))

        return entities
    def create(self, inputs):
        area_name = inputs['area']
        start_year = inputs['start_year']
        end_year = inputs['end_year']

        area_id = troi.tools.area_lookup.area_lookup(area_name)

        if not start_year or start_year < 1800 or start_year > datetime.datetime.today(
        ).year:
            raise PipelineError(
                "start_year must be given and be an integer between 1800 and the current year."
            )

        if not end_year or end_year < 1800 or end_year > datetime.datetime.today(
        ).year:
            raise PipelineError(
                "end_year must be given and be an integer between 1800 and the current year."
            )

        if end_year < start_year:
            raise PipelineError(
                "end_year must be equal to or greater than start_year.")

        area = troi.listenbrainz.area_random_recordings.AreaRandomRecordingsElement(
            area_id, start_year, end_year)
        r_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
        r_lookup.set_sources(area)

        artist_limiter = troi.filters.ArtistCreditLimiterElement()
        artist_limiter.set_sources(r_lookup)

        return artist_limiter
Exemplo n.º 4
0
    def create(self, inputs):
        user_name = inputs['user_name']
        type = inputs['type']
        day = inputs['day']
        if day is None:
            day = 0

        if day > 7:
            raise PipelineError("day must be an integer between 0-7.")
        if day == 0:
            day = datetime.today().weekday() + 1

        if type not in ("top", "similar"):
            raise PipelineError("type must be either 'top' or 'similar'")

        recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(
            user_name=user_name, artist_type=type, count=-1)
        r_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
        r_lookup.set_sources(recs)

        # If an artist should never appear in a playlist, add the artist_credit_id here
        artist_filter = troi.filters.ArtistCreditFilterElement([])
        artist_filter.set_sources(r_lookup)

        artist_limiter = troi.filters.ArtistCreditLimiterElement()
        artist_limiter.set_sources(artist_filter)

        jams = DailyJamsElement(recs, user=user_name, day=day)
        jams.set_sources(artist_limiter)

        return jams
    def read(self, inputs):

        ac_ids = []
        index = {}
        for a in inputs[0]:
            ac_ids.append(str(a.artist_credit_id))
            index[a.artist_credit_id] = a

        params = {"[artist_credit_id]": ",".join(ac_ids)}
        r = requests.get(self.SERVER_URL, params=params)
        if r.status_code != 200:
            raise PipelineError(
                "Cannot fetch artist credits from ListenBrainz: HTTP code %d" %
                r.status_code)

        try:
            artists = ujson.loads(r.text)
        except Exception as err:
            raise PipelineError(
                "Cannot fetch artist credits from ListenBrainz: Invalid JSON returned: "
                + str(err))

        entities = []
        for row in artists:
            a = index[row['artist_credit_id']]
            a.name = row['artist_credit_name']
            a.mbids = row['artist_credit_mbids']
            entities.append(a)

        return entities
    def read(self, inputs):

        artists = inputs[0]
        ac_ids = ",".join([str(a.artist_credit_id) for a in artists])
        params = {"[artist_credit_id]": ac_ids, "threshold": self.threshold}
        r = requests.get(self.SERVER_URL, params=params)
        if r.status_code != 200:
            raise PipelineError(
                "Cannot fetch related artist credits from ListenBrainz: HTTP code %d"
                % r.status_code)

        try:
            relations = r.text
        except ValueError as err:
            raise PipelineError(
                "Cannot fetch related artist credits from ListenBrainz: Invalid JSON returned: "
                + str(err))

        index = defaultdict(list)
        for row in relations:
            index[row['artist_credit_id']].append(row)

        entities = []
        for artist in artists:
            a = copy.deepcopy(artist)
            a.mb['related_artist_credit_ids'] = index[artist.artist_credit_id]
            entities.append(a)

        return entities
    def read(self, inputs):

        recordings = inputs[0]
        if not recordings:
            return []

        data = []
        for r in recordings:
            data.append({'[recording_mbid]': r.mbid})

        self.debug("- debug %d recordings" % len(recordings))

        r = requests.post(self.SERVER_URL % len(recordings), json=data)
        if r.status_code != 200:
            raise PipelineError(
                "Cannot fetch recordings from ListenBrainz: HTTP code %d" %
                r.status_code)

        try:
            rows = ujson.loads(r.text)
            self.debug("- debug %d rows in response" % len(rows))
        except ValueError as err:
            raise PipelineError("Cannot fetch recordings from ListenBrainz: " +
                                str(err))

        mbid_index = {}
        for row in rows:
            mbid_index[row['original_recording_mbid']] = row

        for r in recordings:
            try:
                row = mbid_index[r.mbid]
            except KeyError:
                self.debug("- debug recording MBID %s not found, skipping." %
                           r.mbid)
                continue

            if not r.artist:
                a = Artist(name=row['artist_credit_name'],
                           mbids=row.get('[artist_credit_mbids]', []),
                           artist_credit_id=row['artist_credit_id'])
                r.artist = a
            else:
                r.artist.name = row['artist_credit_name']
                r.artist.mbids = row.get('[artist_credit_mbids]', []),
                r.artist.artist_credit_id = row['artist_credit_id']

            r.name = row['recording_name']
            r.length = row['length']
            r.mbid = row['recording_mbid']

        return recordings
    def read(self, inputs=[]):
        recording_list = []

        remaining = self.MAX_RECORDINGS_TO_FETCH if self.count < 0 else self.count
        while True:
            try:
                recordings = self.client.get_user_recommendation_recordings(
                    self.user_name,
                    self.artist_type,
                    count=min(MAX_NUM_RECORDINGS_PER_REQUEST, remaining),
                    offset=self.offset + len(recording_list))
            except (requests.exceptions.HTTPError,
                    pylistenbrainz.errors.ListenBrainzAPIException) as err:
                if not str(err):
                    err = "Does the user '%s' exist?" % self.user_name
                raise PipelineError(
                    "Cannot fetch recommeded tracks from ListenBrainz: " +
                    str(err))

            if not recordings or not len(recordings['payload']['mbids']):
                break

            for r in recordings['payload']['mbids']:
                recording_list.append(
                    Recording(mbid=r['recording_mbid'], ranking=r['score']))

            remaining -= len(recordings['payload']['mbids'])
            if remaining <= 0:
                break

        if recordings:
            self._last_updated = recordings['payload']['last_updated']

        return recording_list
    def _filter(self, recordings):
        """
            Carry out the actual artist limiting.
        """

        ac_index = defaultdict(list)
        all_have_rankings = True
        for rec in recordings:
            try:
                ac_index[rec.artist.artist_credit_id].append((rec.mbid, rec.ranking))
                if rec.ranking is None:
                    all_have_rankings = False
            except KeyError:
                raise PipelineError(self.__name__ + " needs to have all input recordings to have artist.artist_credit_id defined!")

        for key in ac_index:
            if all_have_rankings:
                ac_index[key] = sorted(ac_index[key], key=itemgetter(1), reverse=self.exclude_lower_ranked)
            else:
                shuffle(ac_index[key])
            ac_index[key] = ac_index[key][:self.count]

        pass_recs = []
        for key in ac_index:
            for mbid, ranking in ac_index[key]:
                pass_recs.append(mbid)

        results = []
        for r in recordings:
            if r.mbid in pass_recs:
                results.append(r)

        return results
Exemplo n.º 10
0
    def create(self, inputs):
        user_name = inputs['user_name']
        type = inputs['type']

        if type not in ("top", "similar"):
            raise PipelineError("type must be either 'top' or 'similar'")

        recs = troi.listenbrainz.recs.UserRecordingRecommendationsElement(
            user_name=user_name, artist_type=type, count=-1)
        r_lookup = troi.musicbrainz.recording_lookup.RecordingLookupElement()
        r_lookup.set_sources(recs)

        y_lookup = troi.musicbrainz.year_lookup.YearLookupElement()
        y_lookup.set_sources(r_lookup)

        # Filter out tracks that do not fit into the given year range
        year_sort = troi.sorts.YearSortElement()
        year_sort.set_sources(y_lookup)

        # If an artist should never appear in a playlist, add the artist_credit_id here
        artist_filter = troi.filters.ArtistCreditFilterElement([])
        artist_filter.set_sources(year_sort)

        decade_splitter = DecadePlaylistSplitterElement()
        decade_splitter.set_sources(artist_filter)

        artist_limiter = troi.filters.ArtistCreditLimiterElement(3)
        artist_limiter.set_sources(decade_splitter)

        return artist_limiter
Exemplo n.º 11
0
    def read(self, inputs):

        recordings = inputs[0]
        if not recordings:
            return []

        data = []
        for r in recordings:
            data.append({'[recording_mbid]': r.mbid})

        r = requests.post(self.SERVER_URL, json=data)
        if r.status_code != 200:
            raise PipelineError(
                "Cannot fetch recording tags from MusicBrainz: HTTP code %d" %
                r.status_code)

        try:
            rows = ujson.loads(r.text)
        except ValueError as err:
            raise PipelineError(
                "Cannot fetch recording tags from MusicBrainz: " + str(err))

        mbid_index = {}
        for row in rows:
            mbid_index[row['recording_mbid']] = row

        output = []
        for r in recordings:
            try:
                tags = mbid_index[r.mbid].get('tags', '')
                if tags:
                    r.musicbrainz['tags'] = tags.split(',')
                else:
                    r.musicbrainz['tags'] = []
                genres = mbid_index[r.mbid].get('genres', '')
                if genres:
                    r.musicbrainz['genres'] = genres.split(',')
                else:
                    r.musicbrainz['genres'] = []
            except KeyError:
                self.debug("recording (%s) not found, skipping." % r.mbid)
                continue

            output.append(r)

        return output
    def read(self, inputs):

        recordings = inputs[0]
        if not recordings:
            return []

        data = []
        for r in recordings:
            data.append({
                '[recording_name]': r.name,
                '[artist_credit_name]': r.artist.name
            })

        r = requests.post(self.SERVER_URL, json=data)
        if r.status_code != 200:
            raise PipelineError(
                "Cannot fetch recording years from MusicBrainz: HTTP code %d" %
                r.status_code)

        try:
            rows = ujson.loads(r.text)
        except ValueError as err:
            raise PipelineError(
                "Cannot fetch recording years from MusicBrainz: " + str(err))

        mbid_index = {}
        for row in rows:
            mbid_index[row['artist_credit_name'] +
                       row['recording_name']] = row['year']

        output = []
        for r in recordings:
            try:
                r.year = mbid_index[r.artist.name + r.name]
            except KeyError:
                if self.skip_not_found:
                    self.debug("recording (%s %s) not found, skipping." %
                               (r.artist.name, r.name))
                else:
                    output.append(r)
                continue

            output.append(r)

        return output
    def submit(self, token, created_for):
        """
            Submit the playlist to ListenBrainz.

            token - the ListenBrainz user token to use to submit this playlist.
            created_for - the ListenBrainz user name for whom this playlist was created.
                          the token above must be an Approved Playlist Bot in the ListenBrainz
                          server, otherwise the subission will fail.
        """

        if not self.playlists:
            raise PipelineError("Playlists have not been generated yet.")

        playlist_mbids = []
        for playlist in self.playlists:
            print("submit %d tracks" % len(playlist.recordings))
            if len(playlist.recordings) == 0:
                print("skip playlist of length 0")
                continue

            r = requests.post(LISTENBRAINZ_PLAYLIST_CREATE_URL,
                              json=_serialize_to_jspf(playlist, created_for),
                              headers={"Authorization": "Token " + str(token)})
            if r.status_code != 200:
                try:
                    err = r.json()["error"]
                except json.decoder.JSONDecodeError:
                    err = r.text

                raise PipelineError(
                    "Cannot post playlist to ListenBrainz: HTTP code %d: %s" %
                    (r.status_code, err))

            try:
                result = json.loads(r.text)
            except ValueError as err:
                raise PipelineError("Cannot post playlist to ListenBrainz: " +
                                    str(err))

            playlist_mbids.append(
                (LISTENBRAINZ_SERVER_URL + "/playlist/" +
                 result["playlist_mbid"], result["playlist_mbid"]))

        return playlist_mbids
    def save(self):
        """Save each playlist to disk, giving each playlist a unique name if none was provided."""

        if not self.playlists:
            raise PipelineError("Playlists have not been generated yet.")

        for i, playlist in enumerate(self.playlists):
            filename = playlist.filename or "playlist_%03d.jspf" % i
            with open(filename, "w") as f:
                f.write(json.dumps(_serialize_to_jspf(playlist)))
Exemplo n.º 15
0
    def __init__(self, metric, mbid):
        """
            The given recording mbid is the source track that will be looked up 
            in the annoy index using the passed metric.
        """
        super().__init__()
        self.mbid = mbid

        if metric.lower() not in VALID_METRICS:
            raise PipelineError("metric %s is not valid. Must be one of %s" % (metric, '.'.join(VALID_METRICS)))
        self.metric = metric
def area_lookup(area_name):
    '''
        Given an area name, lookup the area_id and return it. Return None if area not found.
    '''

    data = [{'[area]': area_name}]
    r = requests.post(AREA_LOOKUP_SERVER_URL, json=data)
    if r.status_code != 200:
        raise PipelineError("Cannot lookup area name. " + str(r.text))

    try:
        rows = ujson.loads(r.text)
    except ValueError as err:
        raise PipelineError(
            "Cannot lookup area name, invalid JSON returned: " + str(err))

    if len(rows) == 0:
        raise PipelineError(
            "Cannot find area name. Must be spelled exactly as in MusicBrainz."
        )

    return rows[0]['area_id']
Exemplo n.º 17
0
    def read(self, inputs):

        params = []
        for r in inputs[0]:
            params.append({
                "[artist_credit_name]": r.artist.name,
                "[recording_name]": r.name
            })

        r = requests.post(self.SERVER_URL, json=params)
        if r.status_code != 200:
            raise PipelineError(
                "Cannot fetch MBID mapping rows from ListenBrainz: HTTP code %d (%s)"
                % (r.status_code, r.text))

        entities = []
        for row in json.loads(r.text):
            r = inputs[0][int(row['index'])]
            print(r)

            if not row['artist_credit_name']:
                if not self.remove_unmatched:
                    entities.append(r)
                continue

            if r.mbid:
                r.add_note("recording mbid %s overwritten by mbid_lookup" %
                           (r.mbid))
            r.mbid = row['recording_mbid']
            r.name = row['recording_name']

            if r.artist.artist_credit_id:
                r.artist.add_note(
                    "artist_credit_id %d overwritten by mbid_lookup" %
                    (r.artist.artist_credit_id))
            r.artist.artist_credit_id = row['artist_credit_id']
            r.artist.name = row['artist_credit_name']

            if r.release:
                if r.release.mbid:
                    r.release.add_note("mbid %d overwritten by mbid_lookup" %
                                       (r.release.mbid))
                r.release.mbid = row['release_mbid']
                r.release.name = row['release_name']
            else:
                r.release = Release(row['release_name'],
                                    mbid=row['release_mbid'])

            entities.append(r)

        return entities
    def read(self, inputs):
        """
            Determine if recordings or playlists are passed in and call the internal _filter
            function accordingly.
        """

        outputs = []
        for input in inputs:
            if isinstance(input[0], Recording):
                return self._filter(input)
            elif isinstance(input[0], Playlist):
                for playlist in input:
                    playlist.recordings = self._filter(playlist.recordings)
                    outputs.append(playlist)
            else:
                raise PipelineError("ArtistCreditLimiter passed incorrect input types.")

        return outputs
    def read(self, inputs):

        outputs = []
        for input in inputs:
            if len(input) == 0:
                print("No recordings or playlists generated to save.")
                continue

            if isinstance(input[0], Recording):
                if not is_homogeneous(input):
                    raise TypeError("entity list not homogeneous")
                self.playlists.append(Playlist(recordings=input))
            elif isinstance(input[0], Playlist):
                if not is_homogeneous(input):
                    raise TypeError("entity list not homogeneous")
                self.playlists.extend(input)
            else:
                raise PipelineError("Playlist passed incorrect input types.")

        return outputs
    def read(self, inputs):

        recordings = inputs[0]
        ac_index = {}
        for ac in self.artist_credit_ids:
            try:
                ac_index[ac] = 1
            except KeyError:
                raise PipelineError(self.__name__ + " needs to have all input recordings to have artist.artist_credit_id defined!")

        results = []
        for r in recordings:
            if not r.artist or not r.artist.artist_credit_id:
                self.debug("recording %s has not artist credit id" % (r.mbid))
                continue

            if self.include:
                if r.artist.artist_credit_id in ac_index:
                    results.append(r)
            else:
                if r.artist.artist_credit_id not in ac_index:
                    results.append(r)

        return results