Exemplo n.º 1
0
    def __init__(self, quality='all', **kwargs):
        """Creates a GenreTreeProvider with a certain quality.

        A GenreTreeProvider will try to normalize a genre by using a Tree of
        705 single genres that will be matched with the input genre in a fast way.

        The result will be a list of Paths. A Path is a tuple of indices, representing
        a possible way through the Tree. For debugging purpose you can use
        GenreTreeProvider.resolve_path() on the path to get the full genre back.

        The Quality levels are:

            - ``all``: Try to find all possible paths through the Tree, sorted
               by the first index (which is useful for comparing.)
            - ``single``: Simply take the first possible path found. Fastest.
            - ``best_two``: Like list, but also uses the reverse word list in a
              second try. Might give better results than 'single' at the cost
              of some speed.

        Default is ``all``.

        This provider is reversible.

        :param quality: One of ``all``, ``best_two``  ``single`` [*default:* ``all``]
        :type quality: String
        """
        Provider.__init__(self, **kwargs)
        self._root = load_genre_tree(get_cache_path('genre_tree.dump'))
        self._build_func = {
            'all': build_genre_path_all,
            'best_two': build_genre_path_best_of_two,
            'single': build_genre_path_single
        }.get(quality, build_genre_path_all)
Exemplo n.º 2
0
    def __init__(self, provider_list, **kwargs):
        """Creates a proivder that applies subproviders in a certain order to it's input.

        :param provider_list: A ordered list of provider objects.
        """
        self._provider_list = provider_list
        Provider.__init__(self, **kwargs)
Exemplo n.º 3
0
 def __init__(self, **kwargs):
     Provider.__init__(self, **kwargs)
     self._punctuation = re.compile("\W|_")
     self._split_reasons = frozenset(['feat', 'featuring', 'and'])
     self._strip_patterns = [re.compile(pattern) for pattern in [
         r'^the\s*', r'^a\s*', r'\s*of\s*'
     ]]
Exemplo n.º 4
0
    def __init__(self, quality='all', **kwargs):
        """Creates a GenreTreeProvider with a certain quality.

        A GenreTreeProvider will try to normalize a genre by using a Tree of
        705 single genres that will be matched with the input genre in a fast way.

        The result will be a list of Paths. A Path is a tuple of indices, representing
        a possible way through the Tree. For debugging purpose you can use
        GenreTreeProvider.resolve_path() on the path to get the full genre back.

        The Quality levels are:

            - ``all``: Try to find all possible paths through the Tree, sorted
               by the first index (which is useful for comparing.)
            - ``single``: Simply take the first possible path found. Fastest.
            - ``best_two``: Like list, but also uses the reverse word list in a
              second try. Might give better results than 'single' at the cost
              of some speed.

        Default is ``all``.

        This provider is reversible.

        :param quality: One of ``all``, ``best_two``  ``single`` [*default:* ``all``]
        :type quality: String
        """
        Provider.__init__(self, **kwargs)
        self._root = load_genre_tree(get_cache_path('genre_tree.dump'))
        self._build_func = {
            'all': build_genre_path_all,
            'best_two': build_genre_path_best_of_two,
            'single': build_genre_path_single
        }.get(quality, build_genre_path_all)
Exemplo n.º 5
0
 def __init__(self, **kwargs):
     Provider.__init__(self, **kwargs)
     self._punctuation = re.compile("\W|_")
     self._strip_patterns = [re.compile(pattern) for pattern in [
         r'\s*[\(\[{].*?[}\)\]]',  # Strip everything in brackets ([{
         r'\s*(cd|disc)\s*\d+'     # remove CD <X> stuff.
     ]]
Exemplo n.º 6
0
    def __init__(self, provider_list, **kwargs):
        """Creates a proivder that applies subproviders in a certain order to it's input.

        :param provider_list: A ordered list of provider objects.
        """
        self._provider_list = provider_list
        Provider.__init__(self, **kwargs)
Exemplo n.º 7
0
    def __init__(self, **kwargs):
        if not HAS_PLYR:
            raise LookupError('Plyr could be imported, which is needed for lyrics')

        self._cache_failures = kwargs.pop('cache_failures', True)
        self.database = plyr.Database(get_cache_path(None))

        Provider.__init__(self, **kwargs)
Exemplo n.º 8
0
 def __init__(self, **kwargs):
     Provider.__init__(self, **kwargs)
     self._punctuation = re.compile("\W|_")
     self._split_reasons = frozenset(['feat', 'featuring', 'and'])
     self._strip_patterns = [
         re.compile(pattern)
         for pattern in [r'^the\s*', r'^a\s*', r'\s*of\s*']
     ]
Exemplo n.º 9
0
 def __init__(self, use_cache=True, cache_fails=True, **kwargs):
     """
     :param use_cache: Cache found results?
     :param cache_fails: Also cache missed results?
     """
     Provider.__init__(self, **kwargs)
     self._use_cache, self._cache_fails = use_cache, cache_fails
     self._shelve = shelve.open(get_cache_path('discogs_genre.dump'),
                                writeback=True)
Exemplo n.º 10
0
 def __init__(self, **kwargs):
     Provider.__init__(self, **kwargs)
     self._punctuation = re.compile("\W|_")
     self._strip_patterns = [
         re.compile(pattern) for pattern in [
             r'\s*[\(\[{].*?[}\)\]]',  # Strip everything in brackets ([{
             r'\s*(cd|disc)\s*\d+'  # remove CD <X> stuff.
         ]
     ]
Exemplo n.º 11
0
 def __init__(self, use_cache=True, cache_fails=True, **kwargs):
     """
     :param use_cache: Cache found results?
     :param cache_fails: Also cache missed results?
     """
     Provider.__init__(self, **kwargs)
     self._use_cache, self._cache_fails = use_cache, cache_fails
     self._shelve = shelve.open(
         get_cache_path('discogs_genre.dump'),
         writeback=True
     )
Exemplo n.º 12
0
    def __init__(self, language='english', **kwargs):
        """
        See here for a full list of languages:

            http://nltk.org/_modules/nltk/stem/snowball.html

        .. note::

            This does not depend on nltk, it depends on the ``pystemmer`` package.

        :param language: language to use during stemming, defaults to english.
        """
        Provider.__init__(self, **kwargs)
        self._stemmer = Stemmer(language)
Exemplo n.º 13
0
        def test_apply(self):
            provider = Provider(compress=True)
            dist = DistanceFunction(provider=provider)

            a = provider.process('Akrea')
            b = provider.process('Berta')
            c = provider.process('akrea'.capitalize())

            self.assertEqual(a, (1, ))
            self.assertEqual(b, (2, ))
            self.assertEqual(c, (1, ))

            self.assertAlmostEqual(dist.compute(a, b), 1.0)
            self.assertAlmostEqual(dist.compute(a, c), 0.0)
            self.assertAlmostEqual(dist.compute([], []), 1.0)
            self.assertAlmostEqual(dist.compute(a + b, c), 0.5)
Exemplo n.º 14
0
    def __init__(self, name, mask, config=DEFAULT_CONFIG):
        """Create a new session:

        :param name: The name of the session. Used to load it again from disk.
        :param mask: The mask. See :term:`Mask`
        :param config: A dictionary with config values. See :class:`DefaultConfig` for available keys.
        """
        self._config = config
        self._name = name

        # Publicly readable attribute.
        self.mapping = {}

        # Make access to the mask more efficient
        self._mask = copy(mask)
        self._attribute_list = sorted(mask)
        self._listidx_to_key = {k: i for i, k in enumerate(self._attribute_list)}

        # Lookup tables for those attributes (fast access is crucial here)
        def make_index(idx, default_func):
            index = {}
            for key, descr in self._mask.items():
                if descr[idx] is not None:
                    index[key] = descr[idx]
                else:
                    index[key] = default_func(key)

            return index

        # Import this locally, since we might get circular import otherway:
        from munin.distance import DistanceFunction
        from munin.provider import Provider

        # Build indices and set default values:
        self._key_to_providers = make_index(0,
                lambda key: Provider()
        )
        self._key_to_distfuncs = make_index(1,
                lambda key: DistanceFunction(self._key_to_providers[key])
        )
        self._key_to_weighting = make_index(2,
                lambda key: 1.0
        )

        # Sum of the individual weights, pre-calculated once.
        self._weight_sum = sum((descr[2] for descr in mask.values()))

        # Create the associated database.
        self._database = Database(self)

        # Filtering related:
        self._filtering_enabled = config['recom_history_sieving']
        self._recom_history = RecommendationHistory(
            penalty_map=config['recom_history_penalty']
        )

        # Publicly readable attribute.
        self.mapping = bidict()
Exemplo n.º 15
0
        def test_process(self):
            from munin.provider import Provider
            from munin.provider.genre import GenreTreeProvider

            one = GenreTreeProvider()
            two = Provider()
            prv = one | two
            a = prv.process('metalcore')
            self.assertEqual(a, one.process('metalcore'))
Exemplo n.º 16
0
        def test_apply(self):
            provider = Provider(compress=True)
            dist = DistanceFunction(
                provider=provider
            )

            a = provider.process('Akrea')
            b = provider.process('Berta')
            c = provider.process('akrea'.capitalize())

            self.assertEqual(a, (1, ))
            self.assertEqual(b, (2, ))
            self.assertEqual(c, (1, ))

            self.assertAlmostEqual(dist.compute(a, b), 1.0)
            self.assertAlmostEqual(dist.compute(a, c), 0.0)
            self.assertAlmostEqual(dist.compute([], []), 1.0)
            self.assertAlmostEqual(
                    dist.compute(a + b, c),
                    0.5
            )
Exemplo n.º 17
0
 def __init__(self, cache_invalid=False, **kwargs):
     """
     :param cache_invalid: Also cache invalid results of failed calculations?
     """
     Provider.__init__(self, **kwargs)
     self._cache_invalid = cache_invalid