示例#1
0
    def divideDataset(dataset: ADataset, behaviourDF: DataFrame,
                      divisionDatasetPercentualSize: int,
                      testDatasetPercentualSize: int,
                      recomRepetitionCount: int):

        eventsDF: DataFrame = dataset.eventsDF
        serialsDF: DataFrame = dataset.serialsDF

        # create train Dataset
        eventsSortedDF: DataFrame = eventsDF  #.sort_values(by=Events.COL_TIME_STAMP)
        numberOfEvents: int = eventsSortedDF.shape[0]

        trainSize: int = (int)(numberOfEvents * divisionDatasetPercentualSize /
                               100)
        #print("trainSize: " + str(trainSize))
        trainEventsDF: DataFrame = eventsSortedDF[0:trainSize]

        datasetID: str = "st" + "Div" + str(divisionDatasetPercentualSize)
        trainDataset: ADataset = DatasetST(datasetID, trainEventsDF, serialsDF)

        # create test Event DataFrame
        testSize: int = (int)(numberOfEvents * testDatasetPercentualSize / 100)
        #print("testSize: " + str(testSize))
        testEventsPartDF: DataFrame = eventsSortedDF[trainSize:(trainSize +
                                                                testSize)]
        testEventsDF: DataFrame = testEventsPartDF.loc[
            testEventsPartDF[Events.COL_OBJECT_ID] != 0]

        # create test relevant Event DataFrame
        testRelevantEventsDF: DataFrame = testEventsDF

        # create behaviour dictionary of DataFrame indexed by recomRepetition
        recomRepetitionCountInDataset: int = behaviourDF[
            BehavioursST.COL_REPETITION].max() + 1

        testRepeatedBehaviourDict: dict = {}
        bIndexes: List[int] = list(
            [recomRepetitionCountInDataset * i for i in testEventsDF.index])
        for repetitionI in range(recomRepetitionCount):
            # indexes of behaviour
            indexes: List[int] = [vI + repetitionI for vI in bIndexes]
            behaviourDFI: DataFrame = DataFrame(
                behaviourDF.take(indexes).values.tolist(),
                index=testEventsDF.index,
                columns=behaviourDF.keys())
            testRepeatedBehaviourDict[repetitionI] = behaviourDFI

        return (trainDataset, testEventsDF, testRelevantEventsDF,
                testRepeatedBehaviourDict)
示例#2
0
def test03():
    print("Test 03")

    print("Running RecommenderTheMostPopular ST:")

    from datasets.slantour.events import Events  # class
    eventsDF: DataFrame = Events.readFromFile()

    dataset: ADataset = DatasetST("test", eventsDF, DataFrame())

    rec: ARecommender = RecommenderTheMostPopular("rTheMostPopular", {})
    rec.train(HistoryDF("test"), dataset)

    recommendation = rec.recommend(
        1, 20, {rec.ARG_ALLOWED_ITEMIDS: list(range(0, 1000))})
    print(recommendation)
示例#3
0
def test22():
    print("Test 22")

    print("Running RecommenderItemBasedKNN ST:")

    from datasets.slantour.events import Events  # class

    userID1: int = 1
    userID2: int = 2
    userID3: int = 3

    trainEventsDF: DataFrame = DataFrame(
        columns=[Events.COL_USER_ID, Events.COL_OBJECT_ID])

    trainEventsDF.loc[0] = [userID1, 101]
    trainEventsDF.loc[1] = [userID1, 102]
    trainEventsDF.loc[2] = [userID1, 103]
    trainEventsDF.loc[3] = [userID1, 104]
    trainEventsDF.loc[4] = [userID2, 101]
    trainEventsDF.loc[5] = [userID2, 102]

    print(trainEventsDF.head(10))

    trainDataset: ADataset = DatasetST("test", trainEventsDF, DataFrame())

    rec: ARecommender = RecommenderItemBasedKNN("test", {})
    rec.train(HistoryDF("test"), trainDataset)

    print("update 1:")
    updateEvents1DF: DataFrame = DataFrame(columns=trainEventsDF.columns)
    updateEvents1DF.loc[0] = [userID1, 105]
    print(updateEvents1DF.head())
    rec.update(updateEvents1DF, {})

    print("update 2:")
    updateEvents2DF: DataFrame = DataFrame(columns=trainEventsDF.columns)
    updateEvents2DF.loc[0] = [userID3, 106]
    print(updateEvents2DF.head())
    rec.update(updateEvents2DF, {})

    print("recommend:")
    r = rec.recommend(userID2, 10, {})
    print(r)

    print("================== END OF TEST 06 ======================\n\n\n\n\n")
示例#4
0
def test21():
    print("Test 21")

    print("Running RecommenderItemBasedKNN ST:")

    from datasets.slantour.events import Events  # class
    eventsDF: DataFrame = Events.readFromFile()

    dataset: ADataset = DatasetST("test", eventsDF, DataFrame())

    rec: ARecommender = RecommenderItemBasedKNN("test", {})
    rec.train(HistoryDF("test"), dataset)

    uDF: DataFrame = DataFrame([eventsDF.iloc[9000]])
    print(uDF)
    rec.update(uDF, {})

    r = rec.recommend(3325463, 20, {})
    print(r)

    print("================== END OF TEST 05 ======================\n\n\n\n\n")