def prepare_test_set(self, lower, upper): m = utils.get_itemID_between_intervals(self.cf_matrix, \ lower=lower, upper=upper) s = self.test_set.keys() removes = set(s).difference(m) for element in removes: self.test_set.pop(element)
def test_TopN_intervals(self, N=100, interval=100, min_item=20): mat = self.cf_matrix m = mat.shape[0] # 3417x6000 low = up = 0 values = [] inters = [] while m >= up: up = up + interval movies_compare = utils.get_itemID_between_intervals(mat, low, up) num_movies = len(movies_compare) if num_movies >= min_item: mean = sum(self.take_hits(N=N, movies=movies_compare)) / float(num_movies) values.append(mean) #print "%d - %d | Number of Movies: %d" % (low, up, num_movies) #print mean print "%d-%d %f" % (low+1, up, mean) inters.append((low+up)/2) low = up TopNCorrExperiment.draw_topN_corr(inters, values, N=N)
def test_category_accuracy_interval(self, interval=100, N=100, min_item=20): # If harsh is "True" then every genre(s) of a movie should be fitted # to other movie. cat_data, mid_dict = create_category_database() try: mat = self.cf_matrix except AttributeError: self.lightweight_setup() mat = self.cf_matrix m = mat.shape[0] low = up = 0 values = [] inters = [] while m >= up: up = up + interval movies_compare = utils.get_itemID_between_intervals(mat, low, up) num_movies = len(movies_compare) if num_movies >= min_item: [total, jac_list] = self.take_hits(cat_data, mid_dict, N=N, movies=movies_compare) mean = total values.append(mean) #print "%d - %d | Number of Movies: %d" % (low, up, num_movies) #print mean print "%d-%d %f" % (low+1, up, mean) inters.append((low+up)/2) low = up CategoryCorrExperiment.draw_catN_corr(inters, values, N=N) return (inters, values)