Exemplo n.º 1
0
    def modify(self,
               song,
               sub_value_dict,
               star_threshold=0.75,
               iterstep_threshold=50):
        value_dict = song.to_dict()
        for key, value in sub_value_dict.items():
            try:
                provider = self._session.provider_for_key(key)
                if value is None:
                    sub_value_dict[key] = None
                else:
                    sub_value_dict[key] = provider.process(value)
            except KeyError:
                raise KeyError('key "{k}" is not in mask'.format(k=key))

        value_dict.update(sub_value_dict)
        new_song = Song(self._session,
                        value_dict,
                        max_neighbors=self._session.config['max_neighbors'],
                        max_distance=self._session.config['max_distance'])
        new_song.uid = self.remove(song.uid)
        self._song_list[song.uid] = new_song

        # Clear all know distances:
        new_song.distance_reset()
        return self._insert_song_to_graph(new_song, star_threshold,
                                          iterstep_threshold)
Exemplo n.º 2
0
    def modify(self, song, sub_value_dict, star_threshold=0.75, iterstep_threshold=50):
        value_dict = song.to_dict()
        for key, value in sub_value_dict.items():
            try:
                provider = self._session.provider_for_key(key)
                if value is None:
                    sub_value_dict[key] = None
                else:
                    sub_value_dict[key] = provider.process(value)
            except KeyError:
                raise KeyError('key "{k}" is not in mask'.format(k=key))

        value_dict.update(sub_value_dict)
        new_song = Song(
            self._session, value_dict,
            max_neighbors=self._session.config['max_neighbors'],
            max_distance=self._session.config['max_distance']
        )
        new_song.uid = self.remove(song.uid)
        self._song_list[song.uid] = new_song

        # Clear all know distances:
        new_song.distance_reset()
        return self._insert_song_to_graph(
            new_song, star_threshold, iterstep_threshold
        )
Exemplo n.º 3
0
    def rebuild_stupid(self):
        """(Re)build the graph by calculating the combination of all songs.

        This is a *very* expensive operation which takes quadratic time and
        only should be ever used for a small amount of songs where accuracy
        matters even more thant time.
        """
        for song_a, song_b in combinations(self._song_list, 2):
            distance = Song.distance_compute(song_a, song_b)
            Song.distance_add(song_a, song_b, distance)
Exemplo n.º 4
0
    def rebuild_stupid(self):
        """(Re)build the graph by calculating the combination of all songs.

        This is a *very* expensive operation which takes quadratic time and
        only should be ever used for a small amount of songs where accuracy
        matters even more thant time.
        """
        for song_a, song_b in combinations(self._song_list, 2):
            distance = Song.distance_compute(song_a, song_b)
            Song.distance_add(song_a, song_b, distance)
Exemplo n.º 5
0
    def _insert_song_to_graph(self,
                              new_song,
                              star_threshold=0.75,
                              iterstep_threshold=50):
        next_len = len(self._song_list)
        if len(self) < iterstep_threshold:
            iterstep = 1
        else:
            iterstep = round(max(1, math.log(max(next_len, 1))))

        # Step 1: Find samples with similar songs (similar to the base step)
        distances = deque()
        for song in self._song_list[::iterstep]:
            if song is not None:
                distance = Song.distance_compute(song, new_song)
                distances.append((song, distance))
                new_song.distance_add(song, distance)

        # Step 2: Short refinement step
        for song, distance in distances:
            if distance.distance > star_threshold:
                for neighbor in song.neighbors():
                    distance = new_song.distance_compute(neighbor)
                    new_song.distance_add(neighbor, distance)

        return new_song.uid
Exemplo n.º 6
0
        def test_recommendation_history(self):
            history = RecommendationHistory()
            session = Session('test',
                {'artist': (None, None, 1), 'album': (None, None, 1)}
            )
            fst_song = Song(session, {
                'artist': 'A',
                'album': 'B'
            })

            self.assertEqual(history.allowed(fst_song), True)
            history.feed(fst_song)
            self.assertEqual(history.allowed(fst_song), False)

            for expectation in [False, False, False, False, True]:
                history.feed(Song(session, {
                    'artist': 'X',
                    'album': 'Y'
                }))
                self.assertEqual(history.allowed(fst_song), expectation)
Exemplo n.º 7
0
        def test_count_keys(self):
            history = History(maxlen=19)
            for _ in range(2000):
                history.feed(Song(self._session, {choice('abcdef'): 1.0}))

            counter = history.count_keys()
            for char in 'abdef':
                self.assertTrue(char in counter)

            self.assertEqual(sum(counter.values()), 100)
            self.assertEqual(len(list(history.groups())), 20)
            for group in history.groups():
                self.assertEqual(len(list(group)), 5)
Exemplo n.º 8
0
    def add(self, value_dict):
        for key, value in value_dict.items():
            try:
                provider = self._session.provider_for_key(key)
                if value is None:
                    value_dict[key] = None
                else:
                    value_dict[key] = provider.process(value)
            except KeyError:
                raise KeyError('key "{k}" is not in mask'.format(k=key))

        new_song = Song(self._session,
                        value_dict,
                        max_neighbors=self._session.config['max_neighbors'],
                        max_distance=self._session.config['max_distance'])

        new_song.uid = self._current_uid()
        if new_song.uid >= len(self._song_list):
            self._song_list.append(new_song)
        else:
            self._song_list[new_song.uid] = new_song
        return new_song.uid
Exemplo n.º 9
0
    def add(self, value_dict):
        for key, value in value_dict.items():
            try:
                provider = self._session.provider_for_key(key)
                if value is None:
                    value_dict[key] = None
                else:
                    value_dict[key] = provider.process(value)
            except KeyError:
                raise KeyError('key "{k}" is not in mask'.format(k=key))

        new_song = Song(
            self._session, value_dict,
            max_neighbors=self._session.config['max_neighbors'],
            max_distance=self._session.config['max_distance']
        )

        new_song.uid = self._current_uid()
        if new_song.uid >= len(self._song_list):
            self._song_list.append(new_song)
        else:
            self._song_list[new_song.uid] = new_song
        return new_song.uid
Exemplo n.º 10
0
        def test_relim(self):
            history = ListenHistory()

            songs = [Song(self._session, {'abcdef'[idx]: 1.0}) for idx in range(6)]
            for idx, song in enumerate(songs):
                song.uid = idx

            N = 10000
            for _ in range(N):
                for i, ilem in enumerate(songs):
                    history.feed(ilem)
                    for j, jlem in enumerate(songs[i:]):
                        history.feed(jlem)

            itemsets = history.frequent_itemsets()

            print()
            print('==================')
            print('FREQUENT ITEMSETS:')
            print('==================')
            print()
            for itemset, support in sorted(itemsets.items(), key=lambda x: x[1]):
                print('{: 8d} ({:3.3f}%): {:>20s}'.format(
                    support, support / N * 10,
                    str([song.uid for song in itemset])
                ))

            print()
            print('==================')
            print('ASSOCIATION RULES:')
            print('==================')
            print()

            rules = history.find_rules(itemsets)
            for left, right, support, rating in rules:
                print('{:>15s} <-> {:<15s} [supp={:> 5d}, rating={:.5f}]'.format(
                    str([song.uid for song in left]),
                    str([song.uid for song in right]),
                    support, rating
                ))
Exemplo n.º 11
0
    def _insert_song_to_graph(self, new_song, star_threshold=0.75, iterstep_threshold=50):
        next_len = len(self._song_list)
        if len(self) < iterstep_threshold:
            iterstep = 1
        else:
            iterstep = round(max(1, math.log(max(next_len, 1))))

        # Step 1: Find samples with similar songs (similar to the base step)
        distances = deque()
        for song in self._song_list[::iterstep]:
            if song is not None:
                distance = Song.distance_compute(song, new_song)
                distances.append((song, distance))
                new_song.distance_add(song, distance)

        # Step 2: Short refinement step
        for song, distance in distances:
            if distance.distance > star_threshold:
                for neighbor in song.neighbors():
                    distance = new_song.distance_compute(neighbor)
                    new_song.distance_add(neighbor, distance)

        return new_song.uid