def run(self, strategy_context: RecommenderAlgorithmStrategyContext) -> np.ndarray: data_set_source = strategy_context.data_set_source data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create(data_set_source) data_set: DataFrame = data_frame_reader.parse(DataFrameReaderStrategyContext(data_set_source)) partition = list(partition_users(data=data_set, partitions=1, method=crossfold.SampleFrac(0.2)))[0] test, train = partition.test, partition.train number_of_recommendations = strategy_context.number_of_recommendations algorithm = Recommender.adapt(Bias()) trained_algorithm = algorithm.fit(train) recommendations = lenskit.batch.recommend(trained_algorithm, test['user'].unique(), number_of_recommendations) return recommendations.groupby('user')['item'].apply(lambda x: x).to_numpy().reshape( (-1, number_of_recommendations))
def run(self, strategy_context: RecommenderAlgorithmStrategyContext ) -> np.ndarray: data_set_source = strategy_context.data_set_source data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create( data_set_source) data_set: DataFrame = data_frame_reader.parse( DataFrameReaderStrategyContext(data_set_source)) all_users: np.ndarray = data_set['user'].unique() return np.repeat( data_set.pivot_table(index='item', values='rating', fill_value=float('-inf')).sort_values( by='rating', ascending=False) [:strategy_context.number_of_recommendations].index.values, repeats=all_users.size)
def run(self, strategy_context: RecommenderAlgorithmStrategyContext ) -> np.ndarray: data_set_source = strategy_context.data_set_source data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create( data_set_source) data_set: DataFrame = data_frame_reader.parse( DataFrameReaderStrategyContext(data_set_source)) # TODO See how to obtain this programatically, without hardcoding column all_items: np.ndarray = data_set['item'].unique() relative_frequency_of_each_item: Series = data_set[ 'item'].value_counts(sort=False, normalize=True) all_users: np.ndarray = data_set['user'].unique() all_recommendations: np.ndarray = np.random.choice( all_items, size=(all_users.size, strategy_context.number_of_recommendations), replace=True, p=relative_frequency_of_each_item.values) return all_recommendations
def run(self, strategy_context: AlgorithmBiasStrategyContext) -> Graph: data_set_source = strategy_context.data_set_source recommendations: np.ndarray = strategy_context.recommender_strategy.run( RecommenderAlgorithmStrategyContext( data_set_source=data_set_source, # TODO Have it as parameter number_of_recommendations=10)) data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create( data_set_source) data_set: DataFrame = data_frame_reader.parse( DataFrameReaderStrategyContext(data_set_source)) # TODO See how to obtain this programatically, without hardcoding column all_items: np.ndarray = data_set['item'].unique() recommendation_frequencies: pandas.Series = pandas.Series( data=recommendations.flatten()).value_counts() series_with_zero_frequencies_for_all_items: pandas.Series = pandas.Series( data=np.zeros(shape=(all_items.size, )), index=all_items, dtype=np.int) frequencies_of_all_items: pandas.Series = recommendation_frequencies \ .combine_first(series_with_zero_frequencies_for_all_items) frequencies_of_frequencies_of_all_items = frequencies_of_all_items.value_counts( sort=False).sort_index() graph_points: List[GraphPoint] = [ GraphPoint(x=k, y=v) for k, v in frequencies_of_frequencies_of_all_items.to_dict().items() ] # TODO optimize if necessary graph: Graph = Graph(points=graph_points) graph = NormalizeGraph(float(all_items.size)).process_graph(graph) graph = LogarithmicGraph().process_graph(graph) graph = LimitGraphToN(n=15).process_graph(graph) return graph
def run(self, strategy_context: DataBiasStrategyContext) -> Graph: data_set_source = strategy_context.data_set_source data_frame_reader: DataFrameReaderStrategy = self.data_frame_reader_factory.create( data_set_source) data_set: DataFrame = data_frame_reader.parse( DataFrameReaderStrategyContext(data_set_source)) # TODO See how to obtain this programatically, without hardcoding column items: pandas.Series = data_set['item'] number_of_ratings_per_item: pandas.Series = items.value_counts( sort=False) number_of_ratings_per_rating: pandas.Series = number_of_ratings_per_item.value_counts( sort=False).sort_index() graph_points: List[GraphPoint] = [ GraphPoint(x=k, y=v) for k, v in number_of_ratings_per_rating.to_dict().items() ] # TODO optimize if necessary graph: Graph = Graph(points=graph_points) graph: Graph = NormalizeGraph(float( data_set.size)).process_graph(graph) graph: Graph = LogarithmicGraph().process_graph(graph) graph: Graph = LimitGraphToN(n=15).process_graph(graph) return graph