示例#1
0
    def exportPairOfRecomIdsAndRecomDescrsCluster(cls):

        recom: str = "Recom" + "Cluster"

        rIds: List[str] = []
        rDescs: List[RecommenderDescription] = []

        for gIdI in range(0, len(Items.getAllGenres())):

            genreI: str = Items.getAllGenres()[gIdI]

            rIds.append(recom + genreI)
            rDescs.append(cls.exportRDescClusterBased(gIdI))

        return (rIds, rDescs)
    def run(self, batchID:str, jobID:str):

        divisionDatasetPercentualSize:int
        uBehaviour:str
        repetition:int
        divisionDatasetPercentualSize, uBehaviour, repetition = InputABatchDefinition().getBatchParameters(self.datasetID)[batchID]

        selector:ADHondtSelector = self.getParameters()[jobID]

        itemsDF:DataFrame = Items.readFromFileMl1m()
        usersDF:DataFrame = Users.readFromFileMl1m()

        historyDF:AHistory = HistoryHierDF("test01")

        eTool:AEvalTool = EvalToolContext({
            EvalToolContext.ARG_USERS: usersDF,
            EvalToolContext.ARG_ITEMS: itemsDF,
            EvalToolContext.ARG_DATASET: "ml",
            EvalToolContext.ARG_HISTORY: historyDF})

        rIDs, rDescs = InputRecomMLDefinition.exportPairOfRecomIdsAndRecomDescrs()

        aDescDHont:AggregationDescription = InputAggrDefinition.exportADescContextFuzzyDHondtDirectOptimize(selector, eTool)

        pDescr:Portfolio1AggrDescription = Portfolio1AggrDescription(
            self.getBatchName() + jobID, rIDs, rDescs, aDescDHont)

        model:DataFrame = PModelDHondt(pDescr.getRecommendersIDs())

        simulator:Simulator = InputSimulatorDefinition().exportSimulatorML1M(
                batchID, divisionDatasetPercentualSize, uBehaviour, repetition)
        simulator.simulate([pDescr], [model], [eTool], [HistoryHierDF(pDescr.getPortfolioID())])
示例#3
0
    def exportSimulatorML1M(batchID: str, divisionDatasetPercentualSize: int,
                            uBehaviourID: str, repetition: int):

        argsSimulationDict: dict = {
            SimulationPortfolioToUser.ARG_WINDOW_SIZE:
            5,
            SimulationPortfolioToUser.ARG_REPETITION_OF_RECOMMENDATION:
            repetition,
            SimulationPortfolioToUser.ARG_NUMBER_OF_RECOMM_ITEMS:
            100,
            SimulationPortfolioToUser.ARG_NUMBER_OF_AGGR_ITEMS:
            InputSimulatorDefinition.numberOfAggrItems,
            SimulationPortfolioToUser.ARG_DIV_DATASET_PERC_SIZE:
            divisionDatasetPercentualSize
        }

        # dataset reading
        ratingsDF: DataFrame = Ratings.readFromFileMl1m()
        usersDF: DataFrame = Users.readFromFileMl1m()
        itemsDF: DataFrame = Items.readFromFileMl1m()

        behaviourFile: str = Behaviours.getFile(uBehaviourID)
        behavioursDF: DataFrame = Behaviours.readFromFileMl1m(behaviourFile)

        # simulation of portfolio
        simulator: Simulator = Simulator(batchID, SimulationPortfolioToUser,
                                         argsSimulationDict, ratingsDF,
                                         usersDF, itemsDF, behavioursDF)

        return simulator
示例#4
0
    def readDatasets():
        # dataset reading
        ratingsDF: DataFrame = Ratings.readFromFileMl1m()
        usersDF: DataFrame = Users.readFromFileMl1m()
        itemsDF: DataFrame = Items.readFromFileMl1m()

        return DatasetML("ml1mDivAll", ratingsDF, usersDF, itemsDF)
示例#5
0
def test01():
    print("Test 01")

    print("Running RecommenderItemBasedKNN ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    filmsDF: DataFrame = Items.readFromFileMl1m()

    # Take only first 50k
    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000]

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       filmsDF)

    # train recommender
    rec: ARecommender = RecommenderItemBasedKNN("test", {})
    rec.train(HistoryDF("test01"), trainDataset)

    # get one rating for update
    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50005:50006]

    # get recommendations:
    print("Recommendations before update")
    r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {})

    rec.update(ratingsDFUpdate, {})

    print("Recommendations after update")
    r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {})

    print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
示例#6
0
def test03():
    print("Test 03")

    #    userID: 23
    #    currentItemID: 196
    #    repetition: 0

    print("Running RecommenderItemBasedKNN ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()
    ratingsSortedDF: DataFrame = ratingsDF.sort_values(
        by=Ratings.COL_TIMESTAMP)

    filmsDF: DataFrame = Items.readFromFileMl1m()

    print(len(ratingsSortedDF))
    ratingsDFTrain: DataFrame = ratingsSortedDF[0:900000]
    ratingsDFTrain: DataFrame = ratingsDFTrain[
        ratingsDFTrain[Ratings.COL_USERID] != 23]
    ratingsDFTrain: DataFrame = ratingsDFTrain[
        ratingsDFTrain[Ratings.COL_MOVIEID] != 10]

    print(ratingsDFTrain.head(25))

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       filmsDF)

    # train recommender
    rec: ARecommender = RecommenderItemBasedKNN("test1", {})
    rec.train(HistoryDF("test03"), trainDataset)

    uDdata = [[23, 10, 4, 10000]]
    uDF: DataFrame = pd.DataFrame(uDdata,
                                  columns=[
                                      Ratings.COL_USERID, Ratings.COL_MOVIEID,
                                      Ratings.COL_RATING, Ratings.COL_TIMESTAMP
                                  ])

    rec.update(uDF, {})

    r: Series = rec.recommend(23, 10, {})
    print(r)
    print("\n")

    r: Series = rec.recommend(23, 10, {})
    print(r)

    print("================== END OF TEST 03 ======================\n\n\n\n\n")
def test01():

    print("Simulation: ML ContextDHondtINF")

    jobID: str = "Roulette1"

    selector = RouletteWheelSelector({RouletteWheelSelector.ARG_EXPONENT: 1})

    #pProbToolOLin0802HLin1002:APenalization = PenalizationToolDefinition.exportProbPenaltyToolOStat08HLin1002(
    #    InputSimulatorDefinition.numberOfAggrItems)
    pToolOLin0802HLin1002: APenalization = PenalizationToolDefinition.exportPenaltyToolOLin0802HLin1002(
        InputSimulatorDefinition.numberOfAggrItems)

    itemsDF: DataFrame = Items.readFromFileMl1m()
    usersDF: DataFrame = Users.readFromFileMl1m()

    historyDF: AHistory = HistoryDF("test01")

    eTool: AEvalTool = EvalToolContext({
        EvalToolContext.ARG_USERS: usersDF,
        EvalToolContext.ARG_ITEMS: itemsDF,
        EvalToolContext.ARG_DATASET: "ml",
        EvalToolContext.ARG_HISTORY: historyDF
    })

    rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs()

    pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription(
        "ContextDHondtINF" + jobID, rIDs, rDescs,
        InputAggrDefinition.exportADescDContextHondtINF(
            selector, pToolOLin0802HLin1002, eTool))

    batchID: str = "ml1mDiv90Ulinear0109R1"
    dataset: DatasetML = DatasetML.readDatasets()
    behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109)
    behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile)

    model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs())

    # simulation of portfolio
    simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict,
                                     dataset, behavioursDF)
    simulator.simulate([pDescr], [model], [eTool],
                       [HistoryHierDF(pDescr.getPortfolioID())])
示例#8
0
def test01():
    print("Test 01")

    print("Running RecommenderItemBasedKNN ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    filmsDF: DataFrame = Items.readFromFileMl1m()

    # Take only first 50k

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:800000]

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       filmsDF)

    # train recommender
    rec: ARecommender = RecommenderVMContextKNN("test", {})
    start = time.time()
    rec.train(HistoryDF("test01"), trainDataset)
    end = time.time()
    print("Time to train: " + str(end - start))

    # get one rating for update
    ratingsDFUpdate: DataFrame = ratingsDF.iloc[800006:800007]

    # get recommendations:
    print("Recommendations before update")
    start = time.time()
    r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {})
    end = time.time()
    print("Time to train: " + str(end - start))

    rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate)

    print("Recommendations after update")
    r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {})

    print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
    def train(self, history:AHistory, dataset: ADataset):
        if not isinstance(history, AHistory):
            raise ValueError("Argument history isn't type AHistory.")
        if not isinstance(dataset, ADataset):
            raise ValueError("Argument dataset isn't type ADataset.")
        self.trainDataset = dataset

        if type(dataset) is DatasetML:
            from datasets.ml.items import Items #class
            allGenres:List[str] = Items.getAllGenres()
            self._sortedAscRatings4CountDF:DataFrame = dataset.getTheMostPopularOfGenre(
                    allGenres[self.recommenderNumericId])

        elif type(dataset) is DatasetRetailRocket:
            self._sortedAsceventsTransCountDF:DataFrame = dataset.getTheMostSold()

        elif type(dataset) is DatasetST:
            self._sortedTheMostCommon = dataset.getTheMostSold()

        else:
            raise ValueError("Argument dataset isn't of expected type.")
示例#10
0
def test02():
    print("Test 02")

    print("Running RecommenderItemBasedKNN ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    filmsDF: DataFrame = Items.readFromFileMl1m()

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:1000000]

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       filmsDF)

    # train recommender
    rec: ARecommender = RecommenderItemBasedKNN("test", {})
    rec.train(HistoryDF("test02"), trainDataset)

    r: Series = rec.recommend(1, 50, {})
    print(r)
    print("================== END OF TEST 02 ======================\n\n\n\n\n")
示例#11
0
def test01():

    print("Simulation: ML ContextFuzzyDHondtDirectOptimize")

    jobID: str = "Roulette1"

    selector: ADHondtSelector = RouletteWheelSelector(
        {RouletteWheelSelector.ARG_EXPONENT: 1})

    itemsDF: DataFrame = Items.readFromFileMl1m()
    usersDF: DataFrame = Users.readFromFileMl1m()

    historyDF: AHistory = HistoryHierDF("test01")

    eTool: AEvalTool = EvalToolContext({
        EvalToolContext.ARG_USERS: usersDF,
        EvalToolContext.ARG_ITEMS: itemsDF,
        EvalToolContext.ARG_DATASET: "ml",
        EvalToolContext.ARG_HISTORY: historyDF
    })

    rIDs, rDescs = InputRecomSTDefinition.exportPairOfRecomIdsAndRecomDescrs()

    pDescr: Portfolio1AggrDescription = Portfolio1AggrDescription(
        "ContextFuzzyDHondtDirectOptimize" + jobID, rIDs, rDescs,
        InputAggrDefinition.exportADescContextFuzzyDHondtDirectOptimize(
            selector, eTool))

    batchID: str = "ml1mDiv90Ulinear0109R1"
    dataset: DatasetML = DatasetML.readDatasets()
    behaviourFile: str = BehavioursML.getFile(BehavioursML.BHVR_LINEAR0109)
    behavioursDF: DataFrame = BehavioursML.readFromFileMl1m(behaviourFile)

    model: DataFrame = PModelDHondt(pDescr.getRecommendersIDs())

    # simulation of portfolio
    simulator: Simulator = Simulator(batchID, SimulationML, argsSimulationDict,
                                     dataset, behavioursDF)
    simulator.simulate([pDescr], [model], [eTool], [historyDF])
示例#12
0
def test02(repetitions=1):
    N = 100

    # get dataset
    itemsDF: DataFrame = Items.readFromFileMl1m()
    usersDF: DataFrame = Users.readFromFileMl1m()
    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    ratingsDFTrain = ratingsDF[:50000]
    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50001:50100]

    trainDataset: ADataset = DatasetML("ml", ratingsDFTrain, usersDF, itemsDF)

    historyDF: AHistory = HistoryDF("test01")

    # train KNN
    rec1: ARecommender = RecommenderItemBasedKNN("run", {})
    rec1.train(HistoryDF("test01"), trainDataset)

    # train Most Popular
    rec2: ARecommender = RecommenderTheMostPopular("run", {})
    rec2.train(historyDF, trainDataset)

    # methods parametes
    methodsParamsData: List[tuple] = [['ItembasedKNN', 0.4],
                                      ['MostPopular', 0.6]]
    methodsParamsDF: DataFrame = pd.DataFrame(methodsParamsData,
                                              columns=["methodID", "votes"])
    methodsParamsDF.set_index("methodID", inplace=True)

    userID = 352
    ratingsDFuserID = ratingsDF[ratingsDF['userId'] == userID]
    itemID = ratingsDFuserID.iloc[0]['movieId']

    historyDF: AHistory = HistoryDF("test01")
    historyDF.insertRecommendation(userID, itemID, 1, True, 10)

    r1: Series = rec1.recommend(userID, N, {})
    r2: Series = rec2.recommend(userID, N, {})

    methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2}
    evaluationDict: dict = {
        EvalToolContext.ARG_USER_ID: userID,
        EvalToolContext.ARG_RELEVANCE: methodsResultDict
    }
    evalToolDHondt = EvalToolContext({
        EvalToolContext.ARG_USERS: usersDF,
        EvalToolContext.ARG_ITEMS: itemsDF,
        EvalToolContext.ARG_DATASET: "ml",
        EvalToolContext.ARG_HISTORY: historyDF
    })

    aggr: AggrContextFuzzyDHondt = AggrContextFuzzyDHondt(
        historyDF, {
            AggrContextFuzzyDHondt.ARG_EVAL_TOOL: evalToolDHondt,
            AggrContextFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({})
        })
    aggrInit: AggrFuzzyDHondt = AggrFuzzyDHondt(
        historyDF,
        {AggrFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({})})
    l1 = aggrInit.runWithResponsibility(methodsResultDict, methodsParamsDF,
                                        userID, N)
    import random

    print("l1:" + str(l1))
    evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict)
    evalToolDHondt.click(l1,
                         random.choice(l1)[0], methodsParamsDF, evaluationDict)
    timestamp = 10
    counter = 0
    r1c = 0
    r2c = 0
    for _ in range(repetitions):
        for index, row in ratingsDFuserID.iterrows():
            r1: Series = rec1.recommend(userID, N, {})
            r2: Series = rec2.recommend(userID, N, {})
            methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2}
            evalDict = {"a": 1}
            historyDF.insertRecommendation(userID, row['movieId'], 1, True,
                                           timestamp)
            timestamp += 1
            l1 = aggr.runWithResponsibility(methodsResultDict,
                                            methodsParamsDF,
                                            userID,
                                            argumentsDict=evalDict,
                                            numberOfItems=N)
            import random
            randomItem = random.choice(l1)[0]
            if randomItem in r1.index:
                r1c += 1
            if randomItem in r2.index:
                r2c += 1
            evaluationDict: dict = {
                EvalToolContext.ARG_USER_ID: userID,
                EvalToolContext.ARG_RELEVANCE: methodsResultDict
            }
            print("votes Items: ", r1c)
            print("votes mostPopular ", r2c)
            evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict)
            evalToolDHondt.click(l1, randomItem, methodsParamsDF,
                                 evaluationDict)
            rec1.update(ratingsDFuserID.loc[[index]], {})
            # rec2.update(ratingsDFuserID.loc[index]) Not implemented
            #print("Counter = ", counter, "; All = ", len(ratingsDFuserID.iloc[800:]), "; Index: ", index)
            print(methodsParamsDF)
            counter += 1
示例#13
0
def test01():
    print("Test 01")

    #print("Running Two paralel History Databases:")

    # method results, items=[1,2,4,5,6,7,8,12,32,64,77]
    methodsResultDict: dict = {
        "metoda1":
        pd.Series([0.2, 0.1, 0.3, 0.3, 0.1], [32, 2, 8, 1, 4], name="rating"),
        "metoda2":
        pd.Series([0.1, 0.1, 0.2, 0.3, 0.3], [1, 5, 32, 6, 7], name="rating"),
        "metoda3":
        pd.Series([0.3, 0.1, 0.2, 0.3, 0.1], [7, 2, 77, 64, 12], name="rating")
    }

    rItemIDsWithResponsibility: List = [(7, {
        'metoda1': 0,
        'metoda2': 24.0,
        'metoda3': 18.0
    }), (1, {
        'metoda1': 30.0,
        'metoda2': 8.0,
        'metoda3': 0
    }), (32, {
        'metoda1': 20.0,
        'metoda2': 16.0,
        'metoda3': 0
    }), (8, {
        'metoda1': 30.0,
        'metoda2': 0,
        'metoda3': 0
    }), (6, {
        'metoda1': 0,
        'metoda2': 24.0,
        'metoda3': 0
    }), (64, {
        'metoda1': 0,
        'metoda2': 0,
        'metoda3': 18.0
    }), (2, {
        'metoda1': 10.0,
        'metoda2': 0,
        'metoda3': 6.0
    }), (77, {
        'metoda1': 0,
        'metoda2': 0,
        'metoda3': 12.0
    }), (4, {
        'metoda1': 10.0,
        'metoda2': 0,
        'metoda3': 0
    }), (5, {
        'metoda1': 0,
        'metoda2': 8.0,
        'metoda3': 0
    }), (12, {
        'metoda1': 0,
        'metoda2': 0,
        'metoda3': 6.0
    })]

    # methods parametes
    portfolioModelData = [['metoda1', 100], ['metoda2', 80], ['metoda3', 60]]
    portfolioModelDF: DataFrame = pd.DataFrame(portfolioModelData,
                                               columns=["methodID", "votes"])
    portfolioModelDF.set_index("methodID", inplace=True)

    itemsDF: DataFrame = Items.readFromFileMl1m()
    usersDF: DataFrame = Users.readFromFileMl1m()

    print("Definition:")
    print(portfolioModelDF)
    print(sumMethods(portfolioModelDF))
    print()

    userID = 1
    itemID = 2
    historyDF: AHistory = HistoryDF("test01")
    historyDF.insertRecommendation(userID, itemID, 1, True, 10)
    historyDF.insertRecommendation(userID, itemID, 1, True, 20)
    historyDF.insertRecommendation(userID, itemID, 1, True, 30)
    historyDF.insertRecommendation(userID, itemID, 1, False, 40)

    evaluationDict: dict = {
        EvalToolContext.ARG_USER_ID: userID,
        EvalToolContext.ARG_RELEVANCE: methodsResultDict
    }
    evalToolDHondt = EvalToolContext({
        EvalToolContext.ARG_USERS: usersDF,
        EvalToolContext.ARG_ITEMS: itemsDF,
        EvalToolContext.ARG_DATASET: "ml",
        EvalToolContext.ARG_HISTORY: historyDF
    })

    print("Clicked:")
    #print("rItemIDsWithResponsibility: " + str(rItemIDsWithResponsibility))
    evalToolDHondt.click(rItemIDsWithResponsibility, 7, portfolioModelDF,
                         evaluationDict)
    evalToolDHondt.click(rItemIDsWithResponsibility, 1, portfolioModelDF,
                         evaluationDict)
    evalToolDHondt.click(rItemIDsWithResponsibility, 7, portfolioModelDF,
                         evaluationDict)
    print(portfolioModelDF)
    print(sumMethods(portfolioModelDF))
    print()

    print("Displayed - start:")
    for i in range(100):
        print("rItemIDsWithResponsibility: " + str(rItemIDsWithResponsibility))
        evalToolDHondt.displayed(rItemIDsWithResponsibility, portfolioModelDF,
                                 evaluationDict)
        print(portfolioModelDF)
        print(sumMethods(portfolioModelDF))
        print()
    print(portfolioModelDF)
    print(sumMethods(portfolioModelDF))
    print("Displayed - end:")
    print()

    print("Clicked:")
    evalToolDHondt.click(rItemIDsWithResponsibility, 4, portfolioModelDF,
                         evaluationDict)
    print(portfolioModelDF)
    print(sumMethods(portfolioModelDF))
    print()
示例#14
0
    modelOfUserI: DataFrame = modelDF.getModel(float('nan'))
    modelOfUserI: DataFrame = modelDF.getModel(userID)

    print(modelOfUserI.head(25))
    print(list(modelDF.index))

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()
    #print(ratingsDF.head())

    ratingsOfuser: DataFrame = ratingsDF[ratingsDF[Ratings.COL_USERID] ==
                                         userID]
    itemIds: List[int] = ratingsOfuser[Ratings.COL_MOVIEID].tolist()
    #print(itemIds)

    itemsDF: DataFrame = Items.readFromFileMl1m()
    r = Items.countA(itemsDF, itemIds)
    print(r)

    #from matplotlib import pyplot as plt
    #from matplotlib import font_manager as fm

    # make a square figure and axes
    #fig = plt.figure(1, figsize=(6, 6), dpi=50)
    #ax = fig.add_axes([0.16, 0.16, 0.68, 0.68])

    #plt.title("Scripting languages")
    #ax.title.set_fontsize(30)

    # vytvoření koláčového grafu
    #ax.pie(r.values(), labels=r.keys(), autopct='%1.1f%%', shadow=True)