def test02(): print("Test 02") print("Running Recommender BPRMF on ML:") dataset: DatasetML = DatasetML.readDatasets() # Take only first 500k trainDataset: DatasetML = DatasetML("test", dataset.ratingsDF.iloc[0:800000], dataset.usersDF, dataset.itemsDF) print(dataset.ratingsDF.iloc[655924:655926]) # train recommender rec: ARecommender = RecommenderBPRMFImplicit( "test", { RecommenderBPRMFImplicit.ARG_FACTORS: 20, RecommenderBPRMFImplicit.ARG_ITERATIONS: 50, RecommenderBPRMFImplicit.ARG_LEARNINGRATE: 0.003, RecommenderBPRMFImplicit.ARG_REGULARIZATION: 0.003 }) rec.train(HistoryDF("test02"), trainDataset) # get recommendations: print("Recommendations before update") r: Series = rec.recommend(23, 50, {}) print(r) print("================== END OF TEST 02 ======================\n\n\n\n\n")
def test01(): print("Test 01") print("Running Recommender BPRMF on ML:") dataset: DatasetML = DatasetML.readDatasets() # Take only first 500k trainDataset: DatasetML = DatasetML("test", dataset.ratingsDF.iloc[0:499965], dataset.usersDF, dataset.itemsDF) # train recommender rec: ARecommender = RecommenderBPRMFImplicit( "test", { RecommenderBPRMFImplicit.ARG_FACTORS: 20, RecommenderBPRMFImplicit.ARG_ITERATIONS: 50, RecommenderBPRMFImplicit.ARG_LEARNINGRATE: 0.003, RecommenderBPRMFImplicit.ARG_REGULARIZATION: 0.003 }) rec.train(HistoryDF("test01"), trainDataset) # get one rating for update ratingsDFUpdate: DataFrame = dataset.ratingsDF.iloc[499965:503006] # get recommendations: print("Recommendations before update") print(rec._movieFeaturesMatrixLIL[:, ratingsDFUpdate['userId'].iloc[0]]. getnnz()) r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print(r) for i in range(ratingsDFUpdate.shape[0]): rUp = ratingsDFUpdate.iloc[i:i + 1, :] rec.update(rUp, {}) print("Recommendations after update") print(rec._movieFeaturesMatrixLIL[:, ratingsDFUpdate['userId'].iloc[0]]. getnnz()) r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print(r) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def test01(): print("Test 01") print("Running RecommenderTheMostPopular ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) rec: ARecommender = RecommenderTheMostPopular("test", {}) rec.train(HistoryDF("test"), trainDataset) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate) r: Series = rec.recommend(331, 50, {}) print(type(r)) print(r) # testing of a non-existent user r: Series = rec.recommend(10000, 50, {}) print(type(r)) print(r)
def test01(): print("Simulation: ML FuzzyDHondtDirectOptimize") jobID: str = "Roulette1" selector = RouletteWheelSelector({RouletteWheelSelector.ARG_EXPONENT: 1}) rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs() pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "FuzzyDHondtDirectOptimize" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescDHondtDirectOptimizeThompsonSampling( selector, "DCG")) batchID: str = "ml1mDiv90Ulinear0109R1" dataset: DatasetML = DatasetML.readDatasets() behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) #model:DataFrame = PModelDHont(pDescr.getRecommendersIDs()) model: DataFrame = PModelDHondtBanditsVotes(pDescr.getRecommendersIDs()) #lrClick:float = 0.03 #lrView:float = lrClick / 500 #eTool:AEvalTool = EvalToolDHondt({EvalToolDHondt.ARG_LEARNING_RATE_CLICKS: lrClick, # EvalToolDHondt.ARG_LEARNING_RATE_VIEWS: lrView}) eTool: AEvalTool = EvalToolDHondtBanditVotes({}) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [eTool], [HistoryHierDF(pDescr.getPortfolioID())])
def test01(): print("Simulation: ML TheMostPopular") rDescr:RecommenderDescription = InputRecomMLDefinition.exportRDescTheMostPopular() pDescr:APortfolioDescription = Portfolio1MethDescription(InputRecomMLDefinition.THE_MOST_POPULAR.title(), InputRecomMLDefinition.THE_MOST_POPULAR, rDescr) batchID:str = "ml1mDiv90Ulinear0109R1" dataset:DatasetML = DatasetML.readDatasets() behaviourFile:str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF:DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) # remove old results #path:str = ".." + os.sep + "results" + os.sep + batchID #try: # os.remove(path + os.sep + "computation-theMostPopular.txt") # os.remove(path + os.sep + "historyOfRecommendation-theMostPopular.txt") # os.remove(path + os.sep + "portfModelTimeEvolution-theMostPopular.txt") #except: # print("An exception occurred") # simulation of portfolio simulator:Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})], [HistoryHierDF(pDescr.getPortfolioID())])
def test01(): print("Test 01") print("Running Recommender BPRMF on ML:") batchID: str = "batchID" dataset: ADataset = DatasetML.readDatasets() history: AHistory = HistoryHierDF(["aa"]) argumentsDict: Dict[str, object] = { RecommenderBPRMF.ARG_EPOCHS: 2, RecommenderBPRMF.ARG_FACTORS: 10, RecommenderBPRMF.ARG_LEARNINGRATE: 0.05, RecommenderBPRMF.ARG_UREGULARIZATION: 0.0025, RecommenderBPRMF.ARG_BREGULARIZATION: 0, RecommenderBPRMF.ARG_PIREGULARIZATION: 0.0025, RecommenderBPRMF.ARG_NIREGULARIZATION: 0.00025 } r: ARecommender = RecommenderBPRMF(batchID, argumentsDict) r.train(history, dataset) numberOfItems: int = 20 userId: int = 1 res = r.recommend(userId, numberOfItems, argumentsDict) print(res) userId: int = 2 res = r.recommend(userId, numberOfItems, argumentsDict) print(res)
def test01(): print("Simulation: ML DHontThompsonSampling") jobID: str = "Roulette1" selector = RouletteWheelSelector({RouletteWheelSelector.ARG_EXPONENT: 1}) rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs() pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "DHontThompsonSampling" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescDHondtThompsonSampling(selector)) batchID: str = "ml1mDiv90Ulinear0109R1" dataset: DatasetML = DatasetML.readDatasets() behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) model: DataFrame = PModelDHondtBanditsVotes(pDescr.getRecommendersIDs()) eTool: AEvalTool = EvalToolDHondtBanditVotes({}) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [eTool], [HistoryHierDF(pDescr.getPortfolioID())])
def test01(): print("Test 01") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() # Take only first 50k ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test01"), trainDataset) # get one rating for update ratingsDFUpdate: DataFrame = ratingsDF.iloc[50005:50006] # get recommendations: print("Recommendations before update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) rec.update(ratingsDFUpdate, {}) print("Recommendations after update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def exportSimulatorML1M(self, batchID: str, divisionDatasetPercentualSize: int, uBehaviourID: str, repetition: int): argsSimulationDict: dict = { SimulationML.ARG_WINDOW_SIZE: 5, SimulationML.ARG_RECOM_REPETITION_COUNT: repetition, SimulationML.ARG_NUMBER_OF_RECOMM_ITEMS: 100, SimulationML.ARG_NUMBER_OF_AGGR_ITEMS: self.numberOfAggrItems, SimulationML.ARG_DIV_DATASET_PERC_SIZE: divisionDatasetPercentualSize, SimulationML.ARG_HISTORY_LENGTH: 10 } # dataset reading dataset: ADataset = DatasetML.readDatasets() behaviourFile: str = BehavioursML.getFile(uBehaviourID) behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) return simulator
def test01(): print("Test 01") dataset: ADataset = DatasetML.readDatasets() #12::Dracula: Dead and Loving It(1995)::Comedy | Horror itemsDF = dataset.itemsDF[dataset.itemsDF[Items.COL_MOVIEID] == 12] print(itemsDF) #458::Geronimo: An American Legend(1993)::Drama | Western itemsDF = dataset.itemsDF[dataset.itemsDF[Items.COL_MOVIEID] == 458] print(itemsDF)
def divideDataset(dataset: ADataset, behaviourDF: DataFrame, divisionDatasetPercentualSize: int, testDatasetPercentualSize: int, recomRepetitionCount: int): ratingsDF: DataFrame = dataset.ratingsDF usersDF: DataFrame = dataset.usersDF itemsDF: DataFrame = dataset.itemsDF # create train Dataset ratingsSortedDF: DataFrame = ratingsDF.sort_values( by=Ratings.COL_TIMESTAMP) numberOfRatings: int = ratingsSortedDF.shape[0] trainSize: int = (int)(numberOfRatings * divisionDatasetPercentualSize / 100) #print("trainSize: " + str(trainSize)) trainRatingsDF: DataFrame = ratingsSortedDF[0:trainSize] datasetID: str = "ml1m" + "Div" + str(divisionDatasetPercentualSize) trainDataset: ADataset = DatasetML(datasetID, trainRatingsDF, usersDF, itemsDF) # create test Rating DataFrame testSize: int = (int)(numberOfRatings * testDatasetPercentualSize / 100) #print("testSize: " + str(testSize)) testRatingsDF: DataFrame = ratingsSortedDF[trainSize:(trainSize + testSize)] # create test relevant Rating DataFrame testRelevantRatingsDF: DataFrame = testRatingsDF.loc[ testRatingsDF[Ratings.COL_RATING] >= 4] # create behaviour dictionary of DataFrame indexed by recomRepetition recomRepetitionCountInDataset: int = behaviourDF[ BehavioursML.COL_REPETITION].max() + 1 testRepeatedBehaviourDict: dict = {} bIndexes: List[int] = list( [recomRepetitionCountInDataset * i for i in testRatingsDF.index]) for repetitionI in range(recomRepetitionCount): # indexes of behaviour indexes: List[int] = [vI + repetitionI for vI in bIndexes] behaviourDFI: DataFrame = DataFrame( behaviourDF.take(indexes).values.tolist(), index=testRatingsDF.index, columns=behaviourDF.keys()) testRepeatedBehaviourDict[repetitionI] = behaviourDFI return (trainDataset, testRelevantRatingsDF, testRelevantRatingsDF, testRepeatedBehaviourDict)
def test03(): print("Test 03") # userID: 23 # currentItemID: 196 # repetition: 0 print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsSortedDF: DataFrame = ratingsDF.sort_values( by=Ratings.COL_TIMESTAMP) filmsDF: DataFrame = Items.readFromFileMl1m() print(len(ratingsSortedDF)) ratingsDFTrain: DataFrame = ratingsSortedDF[0:900000] ratingsDFTrain: DataFrame = ratingsDFTrain[ ratingsDFTrain[Ratings.COL_USERID] != 23] ratingsDFTrain: DataFrame = ratingsDFTrain[ ratingsDFTrain[Ratings.COL_MOVIEID] != 10] print(ratingsDFTrain.head(25)) trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test1", {}) rec.train(HistoryDF("test03"), trainDataset) uDdata = [[23, 10, 4, 10000]] uDF: DataFrame = pd.DataFrame(uDdata, columns=[ Ratings.COL_USERID, Ratings.COL_MOVIEID, Ratings.COL_RATING, Ratings.COL_TIMESTAMP ]) rec.update(uDF, {}) r: Series = rec.recommend(23, 10, {}) print(r) print("\n") r: Series = rec.recommend(23, 10, {}) print(r) print("================== END OF TEST 03 ======================\n\n\n\n\n")
def test02(): print("Simulation: ML W2V") rDescr:RecommenderDescription = InputRecomMLDefinition.exportRDescW2Vtpositivei50000ws1vs32upsweightedMeanups3() pDescr:APortfolioDescription = Portfolio1MethDescription("W2vPositiveMax", "w2vPositiveMax", rDescr) batchID:str = "ml1mDiv90Ulinear0109R1" dataset:DatasetML = DatasetML.readDatasets() behaviourFile:str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF:DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) # simulation of portfolio simulator:Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})], [HistoryHierDF(pDescr.getPortfolioID())])
def test06(): print("Simulation: ML VMCMF") rDescr:RecommenderDescription = InputRecomSTDefinition.exportRDescVMContextKNN() pDescr:APortfolioDescription = Portfolio1MethDescription(InputRecomSTDefinition.VMC_KNN.title(), InputRecomSTDefinition.VMC_KNN, rDescr) batchID:str = "mlDiv90Ulinear0109R1" dataset:DatasetML = DatasetML.readDatasets() behaviourFile:str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF:DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) # simulation of portfolio simulator:Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})], [HistoryHierDF(pDescr.getPortfolioID())])
def run(self, batchID: str, jobID: str): divisionDatasetPercentualSize: int uBehaviour: str repetition: int divisionDatasetPercentualSize, uBehaviour, repetition = InputABatchDefinition( ).getBatchParameters(self.datasetID)[batchID] selector: ADHondtSelector negativeImplFeedback: APenalization selector, negativeImplFeedback = self.getParameters()[jobID] portfolioID: str = self.getBatchName() + jobID history: AHistory = HistoryHierDF(portfolioID) dataset: ADataset = DatasetML.readDatasets() usersDF = dataset.usersDF itemsDF = dataset.itemsDF # Init evalTool evalTool: AEvalTool = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: history }) rIDs, rDescs = InputRecomMLDefinition.exportPairOfRecomIdsAndRecomDescrs( ) aDescContextDHont: AggregationDescription = InputAggrDefinition.exportADescDContextHondtINF( selector, negativeImplFeedback, evalTool) pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( portfolioID, rIDs, rDescs, aDescContextDHont) model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) simulator: Simulator = InputSimulatorDefinition().exportSimulatorML1M( batchID, divisionDatasetPercentualSize, uBehaviour, repetition) simulator.simulate([pDescr], [model], [evalTool], [history])
def test01(): print("Simulation: ML ContextDHondtINF") jobID: str = "Roulette1" selector = RouletteWheelSelector({RouletteWheelSelector.ARG_EXPONENT: 1}) #pProbToolOLin0802HLin1002:APenalization = PenalizationToolDefinition.exportProbPenaltyToolOStat08HLin1002( # InputSimulatorDefinition.numberOfAggrItems) pToolOLin0802HLin1002: APenalization = PenalizationToolDefinition.exportPenaltyToolOLin0802HLin1002( InputSimulatorDefinition.numberOfAggrItems) itemsDF: DataFrame = Items.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() historyDF: AHistory = HistoryDF("test01") eTool: AEvalTool = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF }) rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs() pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "ContextDHondtINF" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescDContextHondtINF( selector, pToolOLin0802HLin1002, eTool)) batchID: str = "ml1mDiv90Ulinear0109R1" dataset: DatasetML = DatasetML.readDatasets() behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [eTool], [HistoryHierDF(pDescr.getPortfolioID())])
def test11(): print("Test 11") print("Running Recommender BPRMF on ML:") batchID: str = "batchID" trainDataset: ADataset testDataset: ADataset trainDataset, testDataset = DatasetML.readDatasets().divideDataset(90) testUserIDs: ndarray = testDataset.ratingsDF[Ratings.COL_USERID].unique() history: AHistory = HistoryHierDF(["aa"]) numberOfItems: int = 20 rd: RecommenderDescription = InputRecomMLDefinition.exportRDescBPRMF() #rd:RecommenderDescription = InputRecomMLDefinition.exportRDescBPRMFIMPLf20i20lr0003r01() #rd:RecommenderDescription = InputRecomMLDefinition.exportRDescTheMostPopular() #rd:RecommenderDescription = InputRecomMLDefinition.exportRDescKNN() #rd:RecommenderDescription = InputRecomMLDefinition.exportRDescCosineCBcbdOHEupsweightedMeanups3() #rd:RecommenderDescription = InputRecomMLDefinition.exportRDescW2Vtpositivei50000ws1vs32upsweightedMeanups3() #rd:RecommenderDescription = InputRecomMLDefinition.exportRDescW2Vtpositivei50000ws1vs64upsweightedMeanups7() r: ARecommender = rd.exportRecommender("aaa") argumentsDict: Dict = rd.getArguments() r.train(history, trainDataset) numberOfHit: int = 0 for userIdI in testUserIDs: recI: Series = r.recommend(int(userIdI), numberOfItems, argumentsDict) recItemIDsI: List[int] = [i for i in recI.keys()] windowItemIds: List[int] = testDataset.ratingsDF.loc[ testDataset.ratingsDF[Ratings.COL_USERID] == userIdI][ Ratings.COL_MOVIEID].unique() itemIdsHitted: List[int] = list(set(recItemIDsI) & set(windowItemIds)) numberOfHit += len(itemIdsHitted) print("") print("numberOfHit: " + str(numberOfHit))
def test01(): print("Test 01") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() # Take only first 50k ratingsDFTrain: DataFrame = ratingsDF.iloc[0:800000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderVMContextKNN("test", {}) start = time.time() rec.train(HistoryDF("test01"), trainDataset) end = time.time() print("Time to train: " + str(end - start)) # get one rating for update ratingsDFUpdate: DataFrame = ratingsDF.iloc[800006:800007] # get recommendations: print("Recommendations before update") start = time.time() r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) end = time.time() print("Time to train: " + str(end - start)) rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate) print("Recommendations after update") r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {}) print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
def test01(): print("Test 01") print("Running RecommenderCosineCB ML:") #cbDataPath:str = Configuration.cbDataFileWithPathTFIDF cbDataPath: str = Configuration.cbML1MDataFileWithPathOHE ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) args: dict = { RecommenderCosineCB.ARG_CB_DATA_PATH: Configuration.cbML1MDataFileWithPathTFIDF, RecommenderCosineCB.ARG_USER_PROFILE_SIZE: 5, RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max", RecommenderCosineCB.ARG_USE_DIVERSITY: True } #True rec: ARecommender = RecommenderCosineCB("test", args) rec.train(HistoryDF("test"), trainDataset) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] #ratingsDFUpdate:DataFrame = ratingsDF.iloc[3:4] rec.update(ratingsDFUpdate, args) print(len(rec.userProfiles[331])) print("max") r: Series = rec.recommend(331, 20, args) print(type(r)) print(r) # testing of a non-existent user print("mean") r: Series = rec.recommend(10000, 20, args) print(type(r)) print(r)
def visualizationML(): print("visualizationML") from datasets.ml.ratings import Ratings # class # dataset reading dataset: ADataset = DatasetML.readDatasets() print(dataset.ratingsDF.head()) userIdsWithDuplicites: List[int] = dataset.ratingsDF[ Ratings.COL_USERID].tolist() userIds: List[int] = list(set(userIdsWithDuplicites)) plt.hist(userIdsWithDuplicites, len(userIds), None, fc='none', lw=1.5, histtype='step') plt.ticklabel_format(style='plain') plt.show()
def test00(): ratingsDF:DataFrame = DataFrame({'$a':[2783,2783,2783,3970,3970], '$b':[1909,1396,2901,3408,3407], '$c':[2,4,5,4,1], '$d':[100,101,102,103,104]}) ratingsDF.columns = [Ratings.COL_USERID, Ratings.COL_MOVIEID, Ratings.COL_RATING, Ratings.COL_TIMESTAMP] print(ratingsDF) print("") behaviourDF:DataFrame = DataFrame({'$a':[2783,2783,2783,2783,2783,2783, 3970,3970,3970,3970], '$b':[1909,1909,1396,1396,2901,2901,2901,2901,3407,3407], '$c':[0,1,0,1,0,1,0,1,0,1], '$d':["b0000000000", "b0000000000", "b0000000000", "b0000000000", "b0000000000", "b0000000000", "b0000000000", "b0000000000", "b0000000000", "b0000000000"]}) behaviourDF.columns = [BehavioursML.COL_USERID, BehavioursML.COL_MOVIEID, BehavioursML.COL_REPETITION, BehavioursML.COL_BEHAVIOUR] print(behaviourDF) print("") dataset:ADataset = DatasetML(ratingsDF, DataFrame(), DataFrame()) divisionDatasetPercentualSize:int = 50 testDatasetPercentualSize:int = 50 recomRepetitionCount:int = 2 trainDataset:ADataset testRatingsDF:DataFrame testRepeatedBehaviourDict:dict trainDataset, testRatingsDF, testRepeatedBehaviourDict = SimulationML.divideDataset(dataset, behaviourDF, divisionDatasetPercentualSize, testDatasetPercentualSize, recomRepetitionCount) print("---------------------") print("trainRatingsDF:") print(trainDataset.ratingsDF) print("") print("testRatingsDF:") print(testRatingsDF) print("") print("") print(testRepeatedBehaviourDict[0]) print("") print("") print(testRepeatedBehaviourDict[1]) print("")
def test02(): print("Test 02") print("Running RecommenderItemBasedKNN ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() filmsDF: DataFrame = Items.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:1000000] trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(), filmsDF) # train recommender rec: ARecommender = RecommenderItemBasedKNN("test", {}) rec.train(HistoryDF("test02"), trainDataset) r: Series = rec.recommend(1, 50, {}) print(r) print("================== END OF TEST 02 ======================\n\n\n\n\n")
def test01(): print("") dataset: DatasetML = DatasetML.readDatasets() toolMMR: ToolMMR = ToolMMR() toolMMR.init(dataset) inputRecommendation: Dict[int, float] = { 2390: 0.585729, 1858: 0.513299, 2169: 0.471460, 3125: 0.376679, 3624: 0.369205 } lambda_: float = 0.5 numberOfItems: int = 20 r = toolMMR.mmr_sorted(lambda_, pd.Series(inputRecommendation), numberOfItems) print(r)
def test01(): print("Simulation: ML ContextFuzzyDHondtDirectOptimize") jobID: str = "Roulette1" selector: ADHondtSelector = RouletteWheelSelector( {RouletteWheelSelector.ARG_EXPONENT: 1}) itemsDF: DataFrame = Items.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() historyDF: AHistory = HistoryHierDF("test01") eTool: AEvalTool = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF }) rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs() pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "ContextFuzzyDHondtDirectOptimize" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescContextFuzzyDHondtDirectOptimize( selector, eTool)) batchID: str = "ml1mDiv90Ulinear0109R1" dataset: DatasetML = DatasetML.readDatasets() behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [eTool], [historyDF])
def test04(): print("Simulation: ML CB") #rDescr:RecommenderDescription = InputRecomMLDefinition.exportRDescCBmean() rDescr:RecommenderDescription = InputRecomMLDefinition.exportRDescCosineCBcbdOHEupsmaxups1() #pDescr:APortfolioDescription = Portfolio1MethDescription(InputRecomMLDefinition.COS_CB_MEAN.title(), # InputRecomMLDefinition.COS_CB_MEAN, rDescr) pDescr:APortfolioDescription = Portfolio1MethDescription(InputRecomMLDefinition.COS_CB_WINDOW3.title(), InputRecomMLDefinition.COS_CB_WINDOW3, rDescr) batchID:str = "ml1mDiv90Ulinear0109R1" dataset:DatasetML = DatasetML.readDatasets() behaviourFile:str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF:DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) # simulation of portfolio simulator:Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})], [HistoryHierDF(pDescr.getPortfolioID())])
def test01(): print("Simulation: ML FuzzyDHondtINF") jobID: str = "Roulette1" selector = RouletteWheelSelector({RouletteWheelSelector.ARG_EXPONENT: 1}) pProbToolOLin0802HLin1002: APenalization = PenalizationToolDefinition.exportProbPenaltyToolOStat08HLin1002( InputSimulatorDefinition.numberOfAggrItems) rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs() pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription( "FuzzyDHondtINF" + jobID, rIDs, rDescs, InputAggrDefinition.exportADescDHondtINF(selector, pProbToolOLin0802HLin1002)) batchID: str = "ml1mDiv90Ulinear0109R1" dataset: DatasetML = DatasetML.readDatasets() behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109) behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile) model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs()) lrClick: float = 0.03 lrView: float = lrClick / 500 eTool: AEvalTool = EvalToolDHondt({ EvalToolDHondt.ARG_LEARNING_RATE_CLICKS: lrClick, EvalToolDHondt.ARG_LEARNING_RATE_VIEWS: lrView }) # simulation of portfolio simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict, dataset, behavioursDF) simulator.simulate([pDescr], [model], [eTool], [HistoryHierDF(pDescr.getPortfolioID())])
def test01(): print("Test 01") print("Running RecommenderW2V ML:") ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000] trainDataset: ADataset = DatasetML("ml1mDiv90", ratingsDFTrain, pd.DataFrame(), pd.DataFrame()) argsDict: Dict[str, str] = { RecommenderW2V.ARG_ITERATIONS: 50000, RecommenderW2V.ARG_TRAIN_VARIANT: 'positive', RecommenderW2V.ARG_USER_PROFILE_SIZE: -1, RecommenderW2V.ARG_USER_PROFILE_STRATEGY: 'weightedMean', RecommenderW2V.ARG_VECTOR_SIZE: 128, RecommenderW2V.ARG_WINDOW_SIZE: 5 } rec: ARecommender = RecommenderW2V("RecommenderW2V", argsDict) rec.train(HistoryDF("w2v"), trainDataset) ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004] rec.update(ratingsDFUpdate, {}) print(len(rec.userProfiles[331])) r: Series = rec.recommend(331, 50, argsDict) print("max") print(type(r)) print(r) r: Series = rec.recommend(10000, 50, argsDict) print("mean") print(type(r)) print(r)
def test01(): print("Test 01") dataset: ADataset = DatasetML.readDatasets() itemsDF: DataFrame = dataset.itemsDF from datasets.ml.items import Items #class #print(Items.getAllGenres()) #print(itemsDF.head()) #print(itemsDF[itemsDF[Items.COL_GENRES] == Items.GENRE_COMEDY].head()) #print(itemsDF[itemsDF[Items.COL_GENRES].str.contains(Items.GENRE_COMEDY)].head()) dataset.getTheMostPopularOfGenre(Items.GENRE_COMEDY) argsDict: dict = {RecommenderClusterBased.ARG_RECOMMENDER_NUMERIC_ID: 1} r: ARecommender = RecommenderClusterBased("test", argsDict) r.train(HistoryDF("test01"), dataset) userID: int = 1 rItemIds: List[int] = r.recommend(userID, 20, argsDict) print(rItemIds)
def test01(): print("Test 01") rDescr: RecommenderDescription = RecommenderDescription( RecommenderTheMostPopular, {}) recommenderID: str = "TheMostPopular" pDescr: Portfolio1MethDescription = Portfolio1MethDescription( recommenderID.title(), recommenderID, rDescr) dataset: ADataset = DatasetML.readDatasets() history: AHistory = HistoryDF("test") p: APortfolio = pDescr.exportPortfolio("jobID", history) p.train(history, dataset) # r, rwr = p.recommend(1, DataFrame(), {APortfolio.ARG_NUMBER_OF_AGGR_ITEMS:20}) # rItemID1 = r[0] # rItemID2 = r[1] # rItemID3 = r[2] # # print(r) # print("rItemID1: " + str(rItemID1)) # print("rItemID2: " + str(rItemID2)) # print("rItemID3: " + str(rItemID3)) testRatingsDF: DataFrame = DataFrame(columns=[ Ratings.COL_USERID, Ratings.COL_MOVIEID, Ratings.COL_RATING, Ratings.COL_TIMESTAMP ]) timeStampI: int = 1000 userID1: int = 1 userID2: int = 2 userID3: int = 3 rItemID1: int = 9001 rItemID2: int = 9002 rItemID3: int = 9003 # training part of dataset for i in [i + 0 for i in range(5 * 8)]: timeStampI = timeStampI + 1 testRatingsDF.loc[i] = [userID1] + list([9000, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID2] + list( [rItemID1, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID2] + list( [rItemID2, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID3] + list( [rItemID3, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID2] + list( [rItemID2, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID2] + list( [rItemID2, 5, timeStampI]) # testing part of dataset userID11: int = 11 userID12: int = 12 timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID11] + list( [rItemID1, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID11] + list( [rItemID2, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID11] + list( [rItemID3, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID12] + list( [rItemID2, 5, timeStampI]) timeStampI = timeStampI + 1 testRatingsDF.loc[len(testRatingsDF)] = [userID11] + list( [rItemID2, 5, timeStampI]) print("len(testRatingsDF): " + str(len(testRatingsDF))) print(testRatingsDF.head(20)) print(testRatingsDF.tail(20)) datasetMy: ADataset = DatasetML("", testRatingsDF, dataset.usersDF, dataset.itemsDF) behavioursDF: DataFrame = DataFrame( columns=[BehavioursML.COL_REPETITION, BehavioursML.COL_BEHAVIOUR]) for ratingIndexI in range(len(testRatingsDF)): for repetitionI in range(5): behavioursDF.loc[ratingIndexI * 5 + repetitionI] = list( [repetitionI, [True] * 20]) print(behavioursDF.head(20)) argsSimulationDict: Dict[str, str] = { SimulationML.ARG_WINDOW_SIZE: 5, SimulationML.ARG_RECOM_REPETITION_COUNT: 1, SimulationML.ARG_NUMBER_OF_RECOMM_ITEMS: 100, SimulationML.ARG_NUMBER_OF_AGGR_ITEMS: InputSimulatorDefinition.numberOfAggrItems, SimulationML.ARG_DIV_DATASET_PERC_SIZE: 90, SimulationML.ARG_HISTORY_LENGTH: 10 } # simulation of portfolio simulator: Simulator = Simulator("test", SimulationML, argsSimulationDict, datasetMy, behavioursDF) simulator.simulate([pDescr], [DataFrame()], [EToolDoNothing({})], HistoryHierDF)
def test02(repetitions=1): N = 100 # get dataset itemsDF: DataFrame = Items.readFromFileMl1m() usersDF: DataFrame = Users.readFromFileMl1m() ratingsDF: DataFrame = Ratings.readFromFileMl1m() ratingsDFTrain = ratingsDF[:50000] ratingsDFUpdate: DataFrame = ratingsDF.iloc[50001:50100] trainDataset: ADataset = DatasetML("ml", ratingsDFTrain, usersDF, itemsDF) historyDF: AHistory = HistoryDF("test01") # train KNN rec1: ARecommender = RecommenderItemBasedKNN("run", {}) rec1.train(HistoryDF("test01"), trainDataset) # train Most Popular rec2: ARecommender = RecommenderTheMostPopular("run", {}) rec2.train(historyDF, trainDataset) # methods parametes methodsParamsData: List[tuple] = [['ItembasedKNN', 0.4], ['MostPopular', 0.6]] methodsParamsDF: DataFrame = pd.DataFrame(methodsParamsData, columns=["methodID", "votes"]) methodsParamsDF.set_index("methodID", inplace=True) userID = 352 ratingsDFuserID = ratingsDF[ratingsDF['userId'] == userID] itemID = ratingsDFuserID.iloc[0]['movieId'] historyDF: AHistory = HistoryDF("test01") historyDF.insertRecommendation(userID, itemID, 1, True, 10) r1: Series = rec1.recommend(userID, N, {}) r2: Series = rec2.recommend(userID, N, {}) methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2} evaluationDict: dict = { EvalToolContext.ARG_USER_ID: userID, EvalToolContext.ARG_RELEVANCE: methodsResultDict } evalToolDHondt = EvalToolContext({ EvalToolContext.ARG_USERS: usersDF, EvalToolContext.ARG_ITEMS: itemsDF, EvalToolContext.ARG_DATASET: "ml", EvalToolContext.ARG_HISTORY: historyDF }) aggr: AggrContextFuzzyDHondt = AggrContextFuzzyDHondt( historyDF, { AggrContextFuzzyDHondt.ARG_EVAL_TOOL: evalToolDHondt, AggrContextFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({}) }) aggrInit: AggrFuzzyDHondt = AggrFuzzyDHondt( historyDF, {AggrFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({})}) l1 = aggrInit.runWithResponsibility(methodsResultDict, methodsParamsDF, userID, N) import random print("l1:" + str(l1)) evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict) evalToolDHondt.click(l1, random.choice(l1)[0], methodsParamsDF, evaluationDict) timestamp = 10 counter = 0 r1c = 0 r2c = 0 for _ in range(repetitions): for index, row in ratingsDFuserID.iterrows(): r1: Series = rec1.recommend(userID, N, {}) r2: Series = rec2.recommend(userID, N, {}) methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2} evalDict = {"a": 1} historyDF.insertRecommendation(userID, row['movieId'], 1, True, timestamp) timestamp += 1 l1 = aggr.runWithResponsibility(methodsResultDict, methodsParamsDF, userID, argumentsDict=evalDict, numberOfItems=N) import random randomItem = random.choice(l1)[0] if randomItem in r1.index: r1c += 1 if randomItem in r2.index: r2c += 1 evaluationDict: dict = { EvalToolContext.ARG_USER_ID: userID, EvalToolContext.ARG_RELEVANCE: methodsResultDict } print("votes Items: ", r1c) print("votes mostPopular ", r2c) evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict) evalToolDHondt.click(l1, randomItem, methodsParamsDF, evaluationDict) rec1.update(ratingsDFuserID.loc[[index]], {}) # rec2.update(ratingsDFuserID.loc[index]) Not implemented #print("Counter = ", counter, "; All = ", len(ratingsDFuserID.iloc[800:]), "; Index: ", index) print(methodsParamsDF) counter += 1