def build_ftree(self): self.ftree = FTree() self.ftree.set_items(self.playlist)# + self.recommendations) self.ftree.set_fields(self.relevant_fields) self.ftree.build() self.ftree.mine_frequencies() self.ftree.compute_stats()
def build_ftree(self): self.ftree = FTree() self.ftree.set_items(self.playlist + self.recommendations) self.ftree.set_fields(self.relevant_fields) self.ftree.build() self.ftree.mine_frequencies() self.ftree.compute_stats()
class ComboSorter: """ Sorter that relies on frequency of occurences in playlist and recommendations to sort """ #Generic functions def __init__(self): self.playlist = None #list() self.recommendations = None #list() self.sorted = None #list() self.relevant_fields = None self.ftree = None #Private self._stat_dict = None self._field_combos = None self.scoring_function = None def set_scoring_function(self,function_name, subscore_op='add'): self.scoring_function = getattr(self,function_name) self.subscore_op = subscore_op def set_relevant_fields(self,fields ): self.relevant_fields = fields def set_user_playlist(self,playlist): """ Takes a list of items ( of type Song ) in playlist. Stores them in self.playlist""" self.playlist = playlist def set_recommendations(self, recommendations): """ Takes a list of items ( of type Song ) in recommendations . Stores them in self.recommendations """ self.recommendations = recommendations def remove_recommendations_in_playlist(self): new_reco = list() hm = dict() #Create a hashmap for song in self.playlist: hm[song.songid] = True for song in self.recommendations: if song.songid not in hm: #Thank god i know what a hashmap is used for -_- new_reco.append(song) self.recommendations = new_reco def build_ftree(self): self.ftree = FTree() self.ftree.set_items(self.playlist + self.recommendations) self.ftree.set_fields(self.relevant_fields) self.ftree.build() self.ftree.mine_frequencies() self.ftree.compute_stats() #Your access to working functions def compute_scores(self,fields=None): if fields is None: fields = self.relevant_fields self.score = self._compute_scores(self.recommendations,fields) def score_comparator(self,x,y): """ comparator for the sorting function. Compares the score of 2 songs """ if self.score[x] < self.score[y]: return -1 elif self.score[x] == self.score[y]: return 0 else: return 1 def sort(self): """ Sorts recommendations based on their score """ self.sorted = sorted(self.recommendations, cmp=self.score_comparator, reverse=True) return self.sorted # Get functions, for if you're interested def playlist_frequency(self,GMT): #GMT is a tuple of form (G,M,T) """ returns the frequency of the 'value' in attribute ( the #of times the attribute 'attribute' takes up the value 'value' ) in the playlist """ return self.frequencies[GMT] def score(self,item): """ Returns score of the item as computed """ return self.score[item] def _compute_scores(self,items,fields): """ Computes the scores of all recommended songs """ score = dict() for item in items: score[item] = self._compute_score(item,fields) return score def _list_field_combos(self): ''' Returns a list of all combos that can be generated ''' ''' ---------------------------------------- ''' def rec(fields,at_index): #Local scope ftw :p ''' Returns a list of all combo tuples that can be generated from the suffix starting at at_index ''' if at_index == len(fields)-1: return [ [fields[at_index]] ] #Add yourself combos = rec( fields,at_index+1 ) only_this = [ fields[at_index] ] combo_len = len(combos) combos.append( only_this ) for i in range(0,combo_len): combos.append( only_this + combos[i] ) return combos ''' ---------------------------------------- ''' if self._field_combos is None: self._field_combos= rec(self.relevant_fields,0) return self._field_combos def gen_query_dict(self,fc,item): field_combos = self._list_field_combos() query = dict() for field in fc: query[field] = getattr(item,field) return query def _compute_score(self,item,fields=None): """ Computes the score of a certain item. Formula used: ??? """ score = 1 #Doesn't really matter if i use 1 instead of 0 for addition, does it? Stick to it field_combos = self._list_field_combos() for fc in field_combos: query_dict = self.gen_query_dict(fc,item) if self.subscore_op == 'mul': score *= self.scoring_function(query_dict) else: score += self.scoring_function(query_dict) return score ''' ------------------------ SCORE FUNCTIONS ----------------------- ''' def _raw_frequency_score(self,query_dict): return self.ftree.get_frequency_from_dict(query_dict) def _log_score(self, query_dict): ''' Fields with low cardinality ( such as mood ) dominate (?) ''' return math.log(1+ self.ftree.get_frequency_from_dict(query_dict)) def _normalized_score(self,query_dict): ''' Addition: Favours a combination of genre,mood by the looks of it... which isn't bad. Otherwise decent balance ''' ''' Multiplication: Favours artists you've heard before over artists you haven't. Massive bummer :( ''' return self.normalized_frequency(query_dict) ''' The artist field absolutely dominates the cardianlity part. Taking log(1+cardinality) helps massively and it's almost invisible in cardinality score Without cardinality, The more difficult matches are given the same weight as easier matches Multiplying scores with cardinality factors as distribution over all multiplications means the result is just the normalized score * k ( k = product_over_all_field_combos( cardinality ) Not really worth the effort ''' def _normalized_with_cardinality(self,query_dict): ''' Fun but still seems to be dominated by artist ''' #return math.log(1+self.max_cardinality(query_dict)) * self.normalized_frequency(query_dict) #return math.log(1+self.field_combo_cardinality(query_dict)) * self.normalized_frequency(query_dict) #Not a big difference. This will be faster return self.get_field_cardinality(query_dict) * self.normalized_frequency(query_dict) #Not a big difference. This will be faster def _cardinality_score(self,query_dict): ''' Surprising. Can't tell much from my skewed playlist ''' #return math.log(1+self.max_cardinality(query_dict)) * self.ftree.get_frequency_from_dict(query_dict) #return math.log(1+self.field_combo_cardinality(query_dict)) * self.ftree.get_frequency_from_dict(query_dict) #This works horrible because artists has such a high cardinality return self.get_field_cardinality(query_dict) * self.ftree.get_frequency_from_dict(query_dict) #This works horrible because artists has such a high cardinality ''' --------------------------------------------- FUNCTIONS USED WITHIN SCORING FUNCTIONS ---------------------------------------------- ''' def get_field_cardinality(self, query_dict): return self.field_combo_cardinality(query_dict) #return self.max_cardinality(query_dict) def normalized_frequency(self,query_dict): query_fields = [] for key in query_dict: query_fields.append(key) max_val = self.ftree.get_max_freq(query_fields) freq = self.ftree.get_frequency_from_dict(query_dict) return float(freq)/max_val def field_combo_cardinality(self,query_dict): ''' Here's the actual cardinality ''' return self.ftree.get_field_combo_cardinality(query_dict.keys()) """
class ComboSorter: """ Sorter that relies on frequency of occurences in playlist and recommendations to sort """ #Generic functions def __init__(self): self.playlist = None #list() self.recommendations = None #list() self.sorted = None #list() self.relevant_fields = None self.ftree = None #Private self._stat_dict = None self._field_combos = None self.scoring_function = None def set_scoring_function(self,function_name, subscore_op='add'): self.scoring_function = getattr(self,function_name) self.subscore_op = subscore_op def set_relevant_fields(self,fields ): self.relevant_fields = fields def set_user_playlist(self,playlist): """ Takes a list of items ( of type Song ) in playlist. Stores them in self.playlist""" self.playlist = playlist def set_recommendations(self, recommendations): """ Takes a list of items ( of type Song ) in recommendations . Stores them in self.recommendations """ self.recommendations = recommendations def remove_recommendations_in_playlist(self): new_reco = list() hm = dict() #Create a hashmap for song in self.playlist: hm[song.songid] = True for song in self.recommendations: if song.songid not in hm: #Thank god i know what a hashmap is used for -_- new_reco.append(song) self.recommendations = new_reco def build_ftree(self): self.ftree = FTree() self.ftree.set_items(self.playlist)# + self.recommendations) self.ftree.set_fields(self.relevant_fields) self.ftree.build() self.ftree.mine_frequencies() self.ftree.compute_stats() #Your access to working functions def compute_scores(self,fields=None): if fields is None: fields = self.relevant_fields self.score = self._compute_scores(self.recommendations,fields) def score_comparator(self,x,y): """ comparator for the sorting function. Compares the score of 2 songs """ if self.score[x] < self.score[y]: return -1 elif self.score[x] == self.score[y]: return 0 else: return 1 def sort(self): """ Sorts recommendations based on their score """ self.sorted = sorted(self.recommendations, cmp=self.score_comparator, reverse=True) return self.sorted # Get functions, for if you're interested def playlist_frequency(self,GMT): #GMT is a tuple of form (G,M,T) """ returns the frequency of the 'value' in attribute ( the #of times the attribute 'attribute' takes up the value 'value' ) in the playlist """ return self.frequencies[GMT] def score(self,item): """ Returns score of the item as computed """ return self.score[item] def _compute_scores(self,items,fields): """ Computes the scores of all recommended songs """ score = dict() for item in items: score[item] = self._compute_score(item,fields) return score def _list_field_combos(self): ''' Returns a list of all combos that can be generated ''' ''' ---------------------------------------- ''' def rec(fields,at_index): #Local scope ftw :p ''' Returns a list of all combo tuples that can be generated from the suffix starting at at_index ''' if at_index == len(fields)-1: return [ [fields[at_index]] ] #Add yourself combos = rec( fields,at_index+1 ) only_this = [ fields[at_index] ] combo_len = len(combos) combos.append( only_this ) for i in range(0,combo_len): combos.append( only_this + combos[i] ) return combos ''' ---------------------------------------- ''' if self._field_combos is None: self._field_combos= rec(self.relevant_fields,0) return self._field_combos def gen_query_dict(self,fc,item): field_combos = self._list_field_combos() query = dict() for field in fc: query[field] = getattr(item,field) return query def _compute_score(self,item,fields=None): """ Computes the score of a certain item. Formula used: ??? """ score = 1 #Doesn't really matter if i use 1 instead of 0 for addition, does it? Stick to it field_combos = self._list_field_combos() for fc in field_combos: query_dict = self.gen_query_dict(fc,item) if self.subscore_op == 'mul': score *= self.scoring_function(query_dict) else: score += self.scoring_function(query_dict) return score ''' ------------------------ SCORE FUNCTIONS ----------------------- ''' def _raw_frequency_score(self,query_dict): return self.ftree.get_frequency_from_dict(query_dict) def _log_score(self, query_dict): ''' Fields with low cardinality ( such as mood ) dominate (?) ''' return math.log(1+ self.ftree.get_frequency_from_dict(query_dict)) def _normalized_score(self,query_dict): ''' Addition: Favours a combination of genre,mood by the looks of it... which isn't bad. Otherwise decent balance ''' ''' Multiplication: Favours artists you've heard before over artists you haven't. Massive bummer :( ''' return self.normalized_frequency(query_dict) ''' The artist field absolutely dominates the cardianlity part. Taking log(1+cardinality) helps massively and it's almost invisible in cardinality score Without cardinality, The more difficult matches are given the same weight as easier matches Multiplying scores with cardinality factors as distribution over all multiplications means the result is just the normalized score * k ( k = product_over_all_field_combos( cardinality ) Not really worth the effort ''' def _normalized_with_cardinality(self,query_dict): ''' Fun but still seems to be dominated by artist ''' #return math.log(1+self.max_cardinality(query_dict)) * self.normalized_frequency(query_dict) #return math.log(1+self.field_combo_cardinality(query_dict)) * self.normalized_frequency(query_dict) #Not a big difference. This will be faster return self.field_combo_cardinality(query_dict) * self.normalized_frequency(query_dict) #Not a big difference. This will be faster def _cardinality_score(self,query_dict): ''' Surprising. Can't tell much from my skewed playlist ''' #return math.log(1+self.max_cardinality(query_dict)) * self.ftree.get_frequency_from_dict(query_dict) #return math.log(1+self.field_combo_cardinality(query_dict)) * self.ftree.get_frequency_from_dict(query_dict) #This works horrible because artists has such a high cardinality return self.field_combo_cardinality(query_dict) * self.ftree.get_frequency_from_dict(query_dict) #This works horrible because artists has such a high cardinality ''' --------------------------------------------- FUNCTIONS USED WITHIN SCORING FUNCTIONS ---------------------------------------------- ''' def normalized_frequency(self,query_dict): query_fields = [] for key in query_dict: query_fields.append(key) max_val = self.ftree.get_max_freq(query_fields) freq = self.ftree.get_frequency_from_dict(query_dict) return float(freq+1)/(max_val+1) def field_combo_cardinality(self,query_dict): ''' Here's the actual cardinality ''' return self.ftree.get_field_combo_cardinality(query_dict.keys()) """
tup = [ ( "Krishnan", "Maggi", "Cheese"), ( "Krishnan", "Maggi", "Egg"), ( "Srinath", "Maggi", "Cheese"), ( "Krishnan", "Coffee", "Sugar"), ( "Srinath", "Coffee", "Milk"), ( "Srinath", "Coffee", "Sugar"), ( "Krishnan", "Maggi", "Sugar") ] items = [] for t in tup: items.append(Likes(t)) ftree = FTree() ftree.set_items(items) ftree.set_fields( ["person","item","avec"] ) ftree.build() ftree.print_tree() ftree.mine_frequencies() print ftree.frequency #exit(0) print ftree.manual_get_frequency( Likes( ("Krishnan","Maggi","Egg") ) ) print ftree.manual_get_frequency( Likes( ("Krishnan","Maggi","Egg") ), ["person","item"] ) print ftree.manual_get_frequency( Likes( ("Srinath","Maggi","Egg") )) print ftree.manual_get_frequency( Likes( ("Srinath","Maggi","Egg") ), ["person","item"] )
def build_user_ftree(self): self.user_ftree = FTree() self.user_ftree.set_items(self.playlist+self.recommendations) self.user_ftree.set_fields(self.relevant_fields) self.user_ftree.build()
class TwoPassFTSorter: """ Sorter that relies on frequency of occurences in playlist and recommendations to sort """ #Generic functions def __init__(self): self.playlist = None #list() self.recommendations = None #list() self.sorted = None #list() self.relevant_fields = None self.user_ftree = None self.stat_dict = None #Editable functions based on your model def _compute_score(self,item,fields): """ Computes the score of a certain item. = n-tuple score * (n-1)-tuple score """ score = (self.user_ftree.get_subset_frequency(item,[],fields) + 1) for field in fields: score += ( self.user_ftree.get_subset_frequency( item,[field],fields) ) #+ 1 ) #Normalized approach score += ( self.user_ftree.get_subset_frequency( item,[field],fields) ) #+ 1 ) #Cardinality based approach #score *= ( self.user_ftree.get_subset_frequency( item,[field],fields) + 1 ) return score def build_user_ftree(self): self.user_ftree = FTree() self.user_ftree.set_items(self.playlist+self.recommendations) self.user_ftree.set_fields(self.relevant_fields) self.user_ftree.build() def compute_scores(self,fields=None): #First pass on n-tuples if fields is None: fields = self.relevant_fields self.score = self._compute_scores(self.recommendations,fields) #Second pass on (n-1) tuples def set_relevant_fields(self,fields ): self.relevant_fields = fields def set_user_playlist(self,playlist): """ Takes a list of items ( of type Song ) in playlist. Stores them in self.playlist""" self.playlist = playlist def set_recommendations(self, recommendations): """ Takes a list of items ( of type Song ) in recommendations . Stores them in self.recommendations """ self.recommendations = recommendations def stats(self, key, subkey=None): if self.stat_dict is None: #Load stats self.stat_dict = dict() self.stat_dict["max_combos"] = float(8)#(8000) #20 genres, 20 moods, 20 tempos assumed. Do it from a database query when we have data field_frequency = dict() for field in self.relevant_fields: field_frequency[field] = dict() for item in self.playlist: for field in self.relevant_fields: val =getattr(item,field) if field_frequency[field][val] is None: field_frequency[field][val] = 0 field_frequency[field][val] += 1 for item in self.recommendations: for field in self.relevant_fields: val =getattr(item,field) if field_frequency[field][val] is None: field_frequency[field][val] = 0 field_frequency[field][val] += 1 self.stat_dict["cardinality"] = dict() for field in self.relevant_fields: self.stat_dict["cardinality"][field] = len(field_frequency[field]) if key in self.stat_dict: if subkey is None: return self.stat_dict[key] else: if self.stat_dict[key][subkey] is None: return None else: return self.stat_dict[key][subkey] else: return None def score_comparator(self,x,y): """ comparator for the sorting function. Compares the score of 2 songs """ if self.score[x] < self.score[y]: return -1 elif self.score[x] == self.score[y]: return 0 else: return 1 def sort(self): """ Sorts recommendations based on their score """ self.sorted = sorted(self.recommendations, cmp=self.score_comparator, reverse=True) return self.sorted # Get functions, for if you're interested def playlist_frequency(self,GMT): #GMT is a tuple of form (G,M,T) """ returns the frequency of the 'value' in attribute ( the #of times the attribute 'attribute' takes up the value 'value' ) in the playlist """ return self.frequencies[GMT] def score(self,item): """ Returns score of the item as computed """ return self.score[item] def _compute_scores(self,items,fields): """ Computes the scores of all recommended songs """ score = dict() for item in items: score[item] = self._compute_score(item,fields) return score def normalized_frequency(self,frequency): def _cardinality_normalized_frequency(self, frequency_value, fields, avoid_fields): cardinality = 1 for field in fields: if field in avoid_fields: continue else: cardinality *= stats("cardinality",field) return frequency_value * cardinality
class FTSorter: """ Sorter that relies on frequency of occurences in playlist and recommendations to sort """ #Generic functions def __init__(self): self.playlist = None #list() self.recommendations = None #list() self.sorted = None #list() self.relevant_fields = None self.user_ftree = None self.stat_dict = None def set_relevant_fields(self,fields ): self.relevant_fields = fields def set_user_playlist(self,playlist): """ Takes a list of items ( of type Song ) in playlist. Stores them in self.playlist""" self.playlist = playlist def set_recommendations(self, recommendations): """ Takes a list of items ( of type Song ) in recommendations . Stores them in self.recommendations """ self.recommendations = recommendations def build_user_ftree(self): self.user_ftree = FTree() self.user_ftree.set_items(self.playlist) self.user_ftree.set_fields(self.relevant_fields) self.user_ftree.build() def stats(self,key): if self.stat_dict is None: #Load stats self.stat_dict = dict() self.stat_dict["max_combos"] = float(8)#(8000) #20 genres, 20 moods, 20 tempos assumed. Do it from a database query when we have data if key in self.stat_dict: return self.stat_dict[key] else: return None #Your access to working functions def compute_scores(self,fields=None): if fields is None: fields = self.relevant_fields self.score = self._compute_scores(self.recommendations,fields) def score_comparator(self,x,y): """ comparator for the sorting function. Compares the score of 2 songs """ if self.score[x] < self.score[y]: return -1 elif self.score[x] == self.score[y]: return 0 else: return 1 def sort(self): """ Sorts recommendations based on their score """ self.sorted = sorted(self.recommendations, cmp=self.score_comparator, reverse=True) return self.sorted # Get functions, for if you're interested def playlist_frequency(self,GMT): #GMT is a tuple of form (G,M,T) """ returns the frequency of the 'value' in attribute ( the #of times the attribute 'attribute' takes up the value 'value' ) in the playlist """ return self.frequencies[GMT] def score(self,item): """ Returns score of the item as computed """ return self.score[item] def _compute_scores(self,items,fields): """ Computes the scores of all recommended songs """ score = dict() for item in items: score[item] = self._compute_score(item,fields) return score #Editable functions based on your model def _compute_score(self,GMT,fields=None): """ Computes the score of a certain item """ support = (self.fondness(GMT,fields)+1) #/ self.stats("max_combos") score = support #Do something on it, yeah? return score def fondness(self, GMT,fields=None): ''' returns a float = (frequency of GMT in user_ftree / no of items in user_ftree) ''' if fields is None: fields= self.relevant_fields return float(self.user_ftree.get_frequency(GMT,fields)) #/ self.user_ftree.item_count()
class FTSorter: """ Sorter that relies on frequency of occurences in playlist and recommendations to sort """ #Generic functions def __init__(self): self.playlist = None #list() self.recommendations = None #list() self.sorted = None #list() self.relevant_fields = None self.user_ftree = None self.stat_dict = None def set_relevant_fields(self, fields): self.relevant_fields = fields def set_user_playlist(self, playlist): """ Takes a list of items ( of type Song ) in playlist. Stores them in self.playlist""" self.playlist = playlist def set_recommendations(self, recommendations): """ Takes a list of items ( of type Song ) in recommendations . Stores them in self.recommendations """ self.recommendations = recommendations def build_user_ftree(self): self.user_ftree = FTree() self.user_ftree.set_items(self.playlist) self.user_ftree.set_fields(self.relevant_fields) self.user_ftree.build() def stats(self, key): if self.stat_dict is None: #Load stats self.stat_dict = dict() self.stat_dict["max_combos"] = float( 8 ) #(8000) #20 genres, 20 moods, 20 tempos assumed. Do it from a database query when we have data if key in self.stat_dict: return self.stat_dict[key] else: return None #Your access to working functions def compute_scores(self, fields=None): if fields is None: fields = self.relevant_fields self.score = self._compute_scores(self.recommendations, fields) def score_comparator(self, x, y): """ comparator for the sorting function. Compares the score of 2 songs """ if self.score[x] < self.score[y]: return -1 elif self.score[x] == self.score[y]: return 0 else: return 1 def sort(self): """ Sorts recommendations based on their score """ self.sorted = sorted(self.recommendations, cmp=self.score_comparator, reverse=True) return self.sorted # Get functions, for if you're interested def playlist_frequency(self, GMT): #GMT is a tuple of form (G,M,T) """ returns the frequency of the 'value' in attribute ( the #of times the attribute 'attribute' takes up the value 'value' ) in the playlist """ return self.frequencies[GMT] def score(self, item): """ Returns score of the item as computed """ return self.score[item] def _compute_scores(self, items, fields): """ Computes the scores of all recommended songs """ score = dict() for item in items: score[item] = self._compute_score(item, fields) return score #Editable functions based on your model def _compute_score(self, GMT, fields=None): """ Computes the score of a certain item """ support = (self.fondness(GMT, fields) + 1) #/ self.stats("max_combos") score = support #Do something on it, yeah? return score def fondness(self, GMT, fields=None): ''' returns a float = (frequency of GMT in user_ftree / no of items in user_ftree) ''' if fields is None: fields = self.relevant_fields return float(self.user_ftree.get_frequency( GMT, fields)) #/ self.user_ftree.item_count()