Пример #1
0
def test01():
    print("Test 01")
    os.chdir("..")

    print("Running RecommenderTheMostPopular:")

    cbDataPath:str = Configuration.cbDataFileWithPathTFIDF

    #ratingsDF:DataFrame = Ratings.readFromFileMl100k()
    ratingsDF: DataFrame = Ratings.readFromFileMl1m()


    ratingsDFTrain:DataFrame = ratingsDF.iloc[0:50000]

    rec:ARecommender = RecommenderTheMostPopular("test", {})
    rec.train(pd.DataFrame(), ratingsDFTrain, pd.DataFrame(), pd.DataFrame())

    ratingsDFUpdate:DataFrame = ratingsDF.iloc[50003:50004]
    rec.update(ratingsDFUpdate)

    r:Series = rec.recommend(331, 50, {})
    print(type(r))
    print(r)

    # testing of a non-existent user
    r:Series =rec.recommend(10000, 50, {})
    print(type(r))
    print(r)
Пример #2
0
    def readDatasets():
        # dataset reading
        ratingsDF: DataFrame = Ratings.readFromFileMl1m()
        usersDF: DataFrame = Users.readFromFileMl1m()
        itemsDF: DataFrame = Items.readFromFileMl1m()

        return DatasetML("ml1mDivAll", ratingsDF, usersDF, itemsDF)
Пример #3
0
    def exportSimulatorML1M(batchID: str, divisionDatasetPercentualSize: int,
                            uBehaviourID: str, repetition: int):

        argsSimulationDict: dict = {
            SimulationPortfolioToUser.ARG_WINDOW_SIZE:
            5,
            SimulationPortfolioToUser.ARG_REPETITION_OF_RECOMMENDATION:
            repetition,
            SimulationPortfolioToUser.ARG_NUMBER_OF_RECOMM_ITEMS:
            100,
            SimulationPortfolioToUser.ARG_NUMBER_OF_AGGR_ITEMS:
            InputSimulatorDefinition.numberOfAggrItems,
            SimulationPortfolioToUser.ARG_DIV_DATASET_PERC_SIZE:
            divisionDatasetPercentualSize
        }

        # dataset reading
        ratingsDF: DataFrame = Ratings.readFromFileMl1m()
        usersDF: DataFrame = Users.readFromFileMl1m()
        itemsDF: DataFrame = Items.readFromFileMl1m()

        behaviourFile: str = Behaviours.getFile(uBehaviourID)
        behavioursDF: DataFrame = Behaviours.readFromFileMl1m(behaviourFile)

        # simulation of portfolio
        simulator: Simulator = Simulator(batchID, SimulationPortfolioToUser,
                                         argsSimulationDict, ratingsDF,
                                         usersDF, itemsDF, behavioursDF)

        return simulator
Пример #4
0
def test01():
    print("Test 01")

    print("Running RecommenderTheMostPopular ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000]

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       pd.DataFrame())

    rec: ARecommender = RecommenderTheMostPopular("test", {})
    rec.train(HistoryDF("test"), trainDataset)

    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004]
    rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate)

    r: Series = rec.recommend(331, 50, {})
    print(type(r))
    print(r)

    # testing of a non-existent user
    r: Series = rec.recommend(10000, 50, {})
    print(type(r))
    print(r)
Пример #5
0
def test01():
    print("Test 01")

    print("Running RecommenderItemBasedKNN ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    filmsDF: DataFrame = Items.readFromFileMl1m()

    # Take only first 50k
    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000]

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       filmsDF)

    # train recommender
    rec: ARecommender = RecommenderItemBasedKNN("test", {})
    rec.train(HistoryDF("test01"), trainDataset)

    # get one rating for update
    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50005:50006]

    # get recommendations:
    print("Recommendations before update")
    r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {})

    rec.update(ratingsDFUpdate, {})

    print("Recommendations after update")
    r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {})

    print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
Пример #6
0
def test03():
    print("Test 03")

    #    userID: 23
    #    currentItemID: 196
    #    repetition: 0

    print("Running RecommenderItemBasedKNN ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()
    ratingsSortedDF: DataFrame = ratingsDF.sort_values(
        by=Ratings.COL_TIMESTAMP)

    filmsDF: DataFrame = Items.readFromFileMl1m()

    print(len(ratingsSortedDF))
    ratingsDFTrain: DataFrame = ratingsSortedDF[0:900000]
    ratingsDFTrain: DataFrame = ratingsDFTrain[
        ratingsDFTrain[Ratings.COL_USERID] != 23]
    ratingsDFTrain: DataFrame = ratingsDFTrain[
        ratingsDFTrain[Ratings.COL_MOVIEID] != 10]

    print(ratingsDFTrain.head(25))

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       filmsDF)

    # train recommender
    rec: ARecommender = RecommenderItemBasedKNN("test1", {})
    rec.train(HistoryDF("test03"), trainDataset)

    uDdata = [[23, 10, 4, 10000]]
    uDF: DataFrame = pd.DataFrame(uDdata,
                                  columns=[
                                      Ratings.COL_USERID, Ratings.COL_MOVIEID,
                                      Ratings.COL_RATING, Ratings.COL_TIMESTAMP
                                  ])

    rec.update(uDF, {})

    r: Series = rec.recommend(23, 10, {})
    print(r)
    print("\n")

    r: Series = rec.recommend(23, 10, {})
    print(r)

    print("================== END OF TEST 03 ======================\n\n\n\n\n")
Пример #7
0
    def generateFileMl1m(numberOfItems: int, countOfRepetitions: int,
                         behaviourID: str,
                         uBehavDesc: UserBehaviourDescription):

        np.random.seed(42)
        random.seed(42)

        print("Generate Behaviour " + behaviourID)

        behaviourFile: str = Behaviours.getFile(behaviourID)

        ratingsDF: DataFrame = Ratings.readFromFileMl1m()

        ratingsCopyDF: DataFrame = ratingsDF[[
            Ratings.COL_USERID, Ratings.COL_MOVIEID
        ]].copy()
        ratingsCopyDF[Behaviours.COL_REPETITION] = [range(countOfRepetitions)
                                                    ] * len(ratingsCopyDF)

        behavioursDF: DataFrame = ratingsCopyDF.explode(
            Behaviours.COL_REPETITION)
        behavioursDF[Behaviours.COL_BEHAVIOUR] = [None] * len(behavioursDF)
        behavioursDF.reset_index(inplace=True)

        if behaviourID is Behaviours.BHVR_LINEAR0109:
            #print(Behaviours.BHVR_LINEAR0109)
            Behaviours.__generateLinear0109BehaviourMl1m(
                behavioursDF, numberOfItems, countOfRepetitions, uBehavDesc)
        elif behaviourID is Behaviours.BHVR_STATIC08:
            #print(Behaviours.BHVR_STATIC08)
            Behaviours.__generateStatic08BehaviourMl1m(behavioursDF,
                                                       numberOfItems,
                                                       countOfRepetitions,
                                                       uBehavDesc)
        else:
            #print("General")
            Behaviours.__generateGeneralBehaviourMl1m(behavioursDF,
                                                      numberOfItems,
                                                      countOfRepetitions,
                                                      uBehavDesc)

        print(behavioursDF.head(10))
        del behavioursDF['index']
        print(behavioursDF.head(10))

        behavioursDF.to_csv(behaviourFile, sep='\t', index=False)
Пример #8
0
def test01():
    print("Test 01")

    print("Running RecommenderItemBasedKNN ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    filmsDF: DataFrame = Items.readFromFileMl1m()

    # Take only first 50k

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:800000]

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       filmsDF)

    # train recommender
    rec: ARecommender = RecommenderVMContextKNN("test", {})
    start = time.time()
    rec.train(HistoryDF("test01"), trainDataset)
    end = time.time()
    print("Time to train: " + str(end - start))

    # get one rating for update
    ratingsDFUpdate: DataFrame = ratingsDF.iloc[800006:800007]

    # get recommendations:
    print("Recommendations before update")
    start = time.time()
    r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {})
    end = time.time()
    print("Time to train: " + str(end - start))

    rec.update(ARecommender.UPDT_CLICK, ratingsDFUpdate)

    print("Recommendations after update")
    r: Series = rec.recommend(ratingsDFUpdate['userId'].iloc[0], 50, {})

    print("Test for non-existent user:"******"================== END OF TEST 01 ======================\n\n\n\n\n")
Пример #9
0
def test01():
    print("Test 01")

    print("Running RecommenderCosineCB ML:")

    #cbDataPath:str = Configuration.cbDataFileWithPathTFIDF
    cbDataPath: str = Configuration.cbML1MDataFileWithPathOHE

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000]

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       pd.DataFrame())

    args: dict = {
        RecommenderCosineCB.ARG_CB_DATA_PATH:
        Configuration.cbML1MDataFileWithPathTFIDF,
        RecommenderCosineCB.ARG_USER_PROFILE_SIZE: 5,
        RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max",
        RecommenderCosineCB.ARG_USE_DIVERSITY: True
    }  #True
    rec: ARecommender = RecommenderCosineCB("test", args)

    rec.train(HistoryDF("test"), trainDataset)

    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004]
    #ratingsDFUpdate:DataFrame = ratingsDF.iloc[3:4]
    rec.update(ratingsDFUpdate, args)

    print(len(rec.userProfiles[331]))

    print("max")
    r: Series = rec.recommend(331, 20, args)
    print(type(r))
    print(r)

    # testing of a non-existent user
    print("mean")
    r: Series = rec.recommend(10000, 20, args)
    print(type(r))
    print(r)
Пример #10
0
def test02():
    print("Test 02")

    print("Running RecommenderItemBasedKNN ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    filmsDF: DataFrame = Items.readFromFileMl1m()

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:1000000]

    trainDataset: ADataset = DatasetML("test", ratingsDFTrain, pd.DataFrame(),
                                       filmsDF)

    # train recommender
    rec: ARecommender = RecommenderItemBasedKNN("test", {})
    rec.train(HistoryDF("test02"), trainDataset)

    r: Series = rec.recommend(1, 50, {})
    print(r)
    print("================== END OF TEST 02 ======================\n\n\n\n\n")
Пример #11
0
def test01():
    print("Test 01")
    os.chdir("..")

    print("Running RecommenderW2V:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000]

    id: str = "ml1mDiv90"
    #id:str = "test"

    rec: ARecommender = RecommenderW2V(
        id, {RecommenderW2V.ARG_TRAIN_VARIANT: "posneg"})
    #rec:ARecommender = RecommenderW2V(id, {RecommenderW2V.ARG_TRAIN_VARIANT:"positive"})
    rec.train(HistoryDF("w2v"), ratingsDFTrain, pd.DataFrame(), pd.DataFrame())

    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004]
    rec.update(ratingsDFUpdate)

    print(len(rec.userProfiles[331]))

    r: Series = rec.recommend(
        331, 50, {RecommenderW2V.ARG_USER_PROFILE_STRATEGY: "max"})
    print("max")
    print(type(r))
    print(r)

    r: Series = rec.recommend(
        331, 50, {RecommenderW2V.ARG_USER_PROFILE_STRATEGY: "window10"})
    print("window10")
    print(type(r))
    print(r)

    r: Series = rec.recommend(
        10000, 50, {RecommenderW2V.ARG_USER_PROFILE_STRATEGY: "window10"})
    print("mean")
    print(type(r))
    print(r)
Пример #12
0
def test01():
    print("Test 01")

    print("Running RecommenderW2V ML:")

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000]

    trainDataset: ADataset = DatasetML("ml1mDiv90", ratingsDFTrain,
                                       pd.DataFrame(), pd.DataFrame())

    argsDict: Dict[str, str] = {
        RecommenderW2V.ARG_ITERATIONS: 50000,
        RecommenderW2V.ARG_TRAIN_VARIANT: 'positive',
        RecommenderW2V.ARG_USER_PROFILE_SIZE: -1,
        RecommenderW2V.ARG_USER_PROFILE_STRATEGY: 'weightedMean',
        RecommenderW2V.ARG_VECTOR_SIZE: 128,
        RecommenderW2V.ARG_WINDOW_SIZE: 5
    }
    rec: ARecommender = RecommenderW2V("RecommenderW2V", argsDict)

    rec.train(HistoryDF("w2v"), trainDataset)

    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004]
    rec.update(ratingsDFUpdate, {})

    print(len(rec.userProfiles[331]))

    r: Series = rec.recommend(331, 50, argsDict)
    print("max")
    print(type(r))
    print(r)

    r: Series = rec.recommend(10000, 50, argsDict)
    print("mean")
    print(type(r))
    print(r)
Пример #13
0
def test01():
    print("Test 01")
    os.chdir("..")

    print("Running RecommenderCosineCB:")

    cbDataPath: str = Configuration.cbDataFileWithPathTFIDF
    cbDataPath: str = Configuration.cbDataFileWithPathOHE

    #ratingsDF:DataFrame = Ratings.readFromFileMl100k()
    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    ratingsDFTrain: DataFrame = ratingsDF.iloc[0:50000]

    rec: ARecommender = RecommenderCosineCB(
        "test", {RecommenderCosineCB.ARG_CB_DATA_PATH: cbDataPath})
    rec.train(pd.DataFrame(), ratingsDFTrain, pd.DataFrame(), pd.DataFrame())

    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50003:50004]
    #ratingsDFUpdate:DataFrame = ratingsDF.iloc[3:4]
    rec.update(ratingsDFUpdate)

    print(len(rec.userProfiles[331]))

    print("max")
    r: Series = rec.recommend(
        331, 50, {RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "max"})
    print(type(r))
    print(r)

    # testing of a non-existent user
    print("mean")
    r: Series = rec.recommend(
        10000, 50, {RecommenderCosineCB.ARG_USER_PROFILE_STRATEGY: "mean"})
    print(type(r))
    print(r)
Пример #14
0
def test02(repetitions=1):
    N = 100

    # get dataset
    itemsDF: DataFrame = Items.readFromFileMl1m()
    usersDF: DataFrame = Users.readFromFileMl1m()
    ratingsDF: DataFrame = Ratings.readFromFileMl1m()

    ratingsDFTrain = ratingsDF[:50000]
    ratingsDFUpdate: DataFrame = ratingsDF.iloc[50001:50100]

    trainDataset: ADataset = DatasetML("ml", ratingsDFTrain, usersDF, itemsDF)

    historyDF: AHistory = HistoryDF("test01")

    # train KNN
    rec1: ARecommender = RecommenderItemBasedKNN("run", {})
    rec1.train(HistoryDF("test01"), trainDataset)

    # train Most Popular
    rec2: ARecommender = RecommenderTheMostPopular("run", {})
    rec2.train(historyDF, trainDataset)

    # methods parametes
    methodsParamsData: List[tuple] = [['ItembasedKNN', 0.4],
                                      ['MostPopular', 0.6]]
    methodsParamsDF: DataFrame = pd.DataFrame(methodsParamsData,
                                              columns=["methodID", "votes"])
    methodsParamsDF.set_index("methodID", inplace=True)

    userID = 352
    ratingsDFuserID = ratingsDF[ratingsDF['userId'] == userID]
    itemID = ratingsDFuserID.iloc[0]['movieId']

    historyDF: AHistory = HistoryDF("test01")
    historyDF.insertRecommendation(userID, itemID, 1, True, 10)

    r1: Series = rec1.recommend(userID, N, {})
    r2: Series = rec2.recommend(userID, N, {})

    methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2}
    evaluationDict: dict = {
        EvalToolContext.ARG_USER_ID: userID,
        EvalToolContext.ARG_RELEVANCE: methodsResultDict
    }
    evalToolDHondt = EvalToolContext({
        EvalToolContext.ARG_USERS: usersDF,
        EvalToolContext.ARG_ITEMS: itemsDF,
        EvalToolContext.ARG_DATASET: "ml",
        EvalToolContext.ARG_HISTORY: historyDF
    })

    aggr: AggrContextFuzzyDHondt = AggrContextFuzzyDHondt(
        historyDF, {
            AggrContextFuzzyDHondt.ARG_EVAL_TOOL: evalToolDHondt,
            AggrContextFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({})
        })
    aggrInit: AggrFuzzyDHondt = AggrFuzzyDHondt(
        historyDF,
        {AggrFuzzyDHondt.ARG_SELECTOR: TheMostVotedItemSelector({})})
    l1 = aggrInit.runWithResponsibility(methodsResultDict, methodsParamsDF,
                                        userID, N)
    import random

    print("l1:" + str(l1))
    evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict)
    evalToolDHondt.click(l1,
                         random.choice(l1)[0], methodsParamsDF, evaluationDict)
    timestamp = 10
    counter = 0
    r1c = 0
    r2c = 0
    for _ in range(repetitions):
        for index, row in ratingsDFuserID.iterrows():
            r1: Series = rec1.recommend(userID, N, {})
            r2: Series = rec2.recommend(userID, N, {})
            methodsResultDict: dict = {"ItembasedKNN": r1, "MostPopular": r2}
            evalDict = {"a": 1}
            historyDF.insertRecommendation(userID, row['movieId'], 1, True,
                                           timestamp)
            timestamp += 1
            l1 = aggr.runWithResponsibility(methodsResultDict,
                                            methodsParamsDF,
                                            userID,
                                            argumentsDict=evalDict,
                                            numberOfItems=N)
            import random
            randomItem = random.choice(l1)[0]
            if randomItem in r1.index:
                r1c += 1
            if randomItem in r2.index:
                r2c += 1
            evaluationDict: dict = {
                EvalToolContext.ARG_USER_ID: userID,
                EvalToolContext.ARG_RELEVANCE: methodsResultDict
            }
            print("votes Items: ", r1c)
            print("votes mostPopular ", r2c)
            evalToolDHondt.displayed(l1, methodsParamsDF, evaluationDict)
            evalToolDHondt.click(l1, randomItem, methodsParamsDF,
                                 evaluationDict)
            rec1.update(ratingsDFuserID.loc[[index]], {})
            # rec2.update(ratingsDFuserID.loc[index]) Not implemented
            #print("Counter = ", counter, "; All = ", len(ratingsDFuserID.iloc[800:]), "; Index: ", index)
            print(methodsParamsDF)
            counter += 1
Пример #15
0
    #file:str = "results/ml1mDiv90Ustatic06R1/portfModelTimeEvolution-ClusterFixedClk003ViewDivisor500NRFalse.txt"
    file: str = "../results/ml1mDiv90Ulinear0109R1/portfModelTimeEvolution-ClusterFixedClk003ViewDivisor500NRFalse.txt"
    modelDF: PModelDHondtPersonalised = PModelDHondtPersonalised.readModel(
        file, 38000)
    #print(modelDF.head())

    userID = 11.0

    modelOfUserI: DataFrame = modelDF.getModel(float('nan'))
    modelOfUserI: DataFrame = modelDF.getModel(userID)

    print(modelOfUserI.head(25))
    print(list(modelDF.index))

    ratingsDF: DataFrame = Ratings.readFromFileMl1m()
    #print(ratingsDF.head())

    ratingsOfuser: DataFrame = ratingsDF[ratingsDF[Ratings.COL_USERID] ==
                                         userID]
    itemIds: List[int] = ratingsOfuser[Ratings.COL_MOVIEID].tolist()
    #print(itemIds)

    itemsDF: DataFrame = Items.readFromFileMl1m()
    r = Items.countA(itemsDF, itemIds)
    print(r)

    #from matplotlib import pyplot as plt
    #from matplotlib import font_manager as fm

    # make a square figure and axes