def modify(self, song, sub_value_dict, star_threshold=0.75, iterstep_threshold=50): value_dict = song.to_dict() for key, value in sub_value_dict.items(): try: provider = self._session.provider_for_key(key) if value is None: sub_value_dict[key] = None else: sub_value_dict[key] = provider.process(value) except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) value_dict.update(sub_value_dict) new_song = Song(self._session, value_dict, max_neighbors=self._session.config['max_neighbors'], max_distance=self._session.config['max_distance']) new_song.uid = self.remove(song.uid) self._song_list[song.uid] = new_song # Clear all know distances: new_song.distance_reset() return self._insert_song_to_graph(new_song, star_threshold, iterstep_threshold)
def modify(self, song, sub_value_dict, star_threshold=0.75, iterstep_threshold=50): value_dict = song.to_dict() for key, value in sub_value_dict.items(): try: provider = self._session.provider_for_key(key) if value is None: sub_value_dict[key] = None else: sub_value_dict[key] = provider.process(value) except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) value_dict.update(sub_value_dict) new_song = Song( self._session, value_dict, max_neighbors=self._session.config['max_neighbors'], max_distance=self._session.config['max_distance'] ) new_song.uid = self.remove(song.uid) self._song_list[song.uid] = new_song # Clear all know distances: new_song.distance_reset() return self._insert_song_to_graph( new_song, star_threshold, iterstep_threshold )
def rebuild_stupid(self): """(Re)build the graph by calculating the combination of all songs. This is a *very* expensive operation which takes quadratic time and only should be ever used for a small amount of songs where accuracy matters even more thant time. """ for song_a, song_b in combinations(self._song_list, 2): distance = Song.distance_compute(song_a, song_b) Song.distance_add(song_a, song_b, distance)
def _insert_song_to_graph(self, new_song, star_threshold=0.75, iterstep_threshold=50): next_len = len(self._song_list) if len(self) < iterstep_threshold: iterstep = 1 else: iterstep = round(max(1, math.log(max(next_len, 1)))) # Step 1: Find samples with similar songs (similar to the base step) distances = deque() for song in self._song_list[::iterstep]: if song is not None: distance = Song.distance_compute(song, new_song) distances.append((song, distance)) new_song.distance_add(song, distance) # Step 2: Short refinement step for song, distance in distances: if distance.distance > star_threshold: for neighbor in song.neighbors(): distance = new_song.distance_compute(neighbor) new_song.distance_add(neighbor, distance) return new_song.uid
def test_recommendation_history(self): history = RecommendationHistory() session = Session('test', {'artist': (None, None, 1), 'album': (None, None, 1)} ) fst_song = Song(session, { 'artist': 'A', 'album': 'B' }) self.assertEqual(history.allowed(fst_song), True) history.feed(fst_song) self.assertEqual(history.allowed(fst_song), False) for expectation in [False, False, False, False, True]: history.feed(Song(session, { 'artist': 'X', 'album': 'Y' })) self.assertEqual(history.allowed(fst_song), expectation)
def test_count_keys(self): history = History(maxlen=19) for _ in range(2000): history.feed(Song(self._session, {choice('abcdef'): 1.0})) counter = history.count_keys() for char in 'abdef': self.assertTrue(char in counter) self.assertEqual(sum(counter.values()), 100) self.assertEqual(len(list(history.groups())), 20) for group in history.groups(): self.assertEqual(len(list(group)), 5)
def add(self, value_dict): for key, value in value_dict.items(): try: provider = self._session.provider_for_key(key) if value is None: value_dict[key] = None else: value_dict[key] = provider.process(value) except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) new_song = Song(self._session, value_dict, max_neighbors=self._session.config['max_neighbors'], max_distance=self._session.config['max_distance']) new_song.uid = self._current_uid() if new_song.uid >= len(self._song_list): self._song_list.append(new_song) else: self._song_list[new_song.uid] = new_song return new_song.uid
def add(self, value_dict): for key, value in value_dict.items(): try: provider = self._session.provider_for_key(key) if value is None: value_dict[key] = None else: value_dict[key] = provider.process(value) except KeyError: raise KeyError('key "{k}" is not in mask'.format(k=key)) new_song = Song( self._session, value_dict, max_neighbors=self._session.config['max_neighbors'], max_distance=self._session.config['max_distance'] ) new_song.uid = self._current_uid() if new_song.uid >= len(self._song_list): self._song_list.append(new_song) else: self._song_list[new_song.uid] = new_song return new_song.uid
def test_relim(self): history = ListenHistory() songs = [Song(self._session, {'abcdef'[idx]: 1.0}) for idx in range(6)] for idx, song in enumerate(songs): song.uid = idx N = 10000 for _ in range(N): for i, ilem in enumerate(songs): history.feed(ilem) for j, jlem in enumerate(songs[i:]): history.feed(jlem) itemsets = history.frequent_itemsets() print() print('==================') print('FREQUENT ITEMSETS:') print('==================') print() for itemset, support in sorted(itemsets.items(), key=lambda x: x[1]): print('{: 8d} ({:3.3f}%): {:>20s}'.format( support, support / N * 10, str([song.uid for song in itemset]) )) print() print('==================') print('ASSOCIATION RULES:') print('==================') print() rules = history.find_rules(itemsets) for left, right, support, rating in rules: print('{:>15s} <-> {:<15s} [supp={:> 5d}, rating={:.5f}]'.format( str([song.uid for song in left]), str([song.uid for song in right]), support, rating ))