Пример #1
0
def testJoin():
    iss = cItemsets.cItemsets(3)

    a = cAssociationRules()
    
    u1 = urlparse.urlparse('http://eins')
    u2 = urlparse.urlparse('http://zwei')
    u3 = urlparse.urlparse('http://drei')
    u4 = urlparse.urlparse('http://vier')
    u5 = urlparse.urlparse('http://fuenf')

    i1 = cItemset.cItemset()
    i1.SetUrls([u1,u2,u3])
    i2 = cItemset.cItemset()
    i2.SetUrls([u1,u2,u4])
    i3 = cItemset.cItemset()
    i3.SetUrls([u1,u3,u4])
    i4 = cItemset.cItemset()
    i4.SetUrls([u1,u3,u5])
    i5 = cItemset.cItemset()
    i5.SetUrls([u2,u3,u4])

    iss.AddItemset(i1)
    iss.AddItemset(i2)
    iss.AddItemset(i3)
    iss.AddItemset(i4)
    iss.AddItemset(i5)

    a.Join(iss).Print()
Пример #2
0
def test():
    """Built-in test method for this class."""

    items = cItemsets(2)

    items.OpenFile('/tmp/foo.xml')

    urltuple = urlparse.urlparse('http://slashdot.org')
    urltuple2 = urlparse.urlparse('http://harth.org')

    itemset1 = cItemset.cItemset()
    itemset2 = cItemset.cItemset()

    itemset1.SetUrls([urltuple, urltuple])
    itemset1.SetCount(4711)

    itemset1.SetUrls([urltuple2, urltuple2])
    itemset1.SetCount(4712)

    items.AddItemset(itemset1)
    items.AddItemset(itemset2)

    items.CloseFile()

    items2 = cItemsets(2)

    items2.OpenFile('/tmp/foo.xml')
    items2.Print()
    #XXXprint 'now prune...'
    #items2.Prune(2)
    items2.Print()
    items2.CloseFile()
Пример #3
0
    def SetElements(self, lEls):
        """Read elements into internal representation.

        lEls -- elements

        <itemset count="74">
        <url>http://slashdot.org</url>
        ...
        </itemset>

        """
        for el in lEls:
            itemset = cItemset.cItemset()
            itemset.SetElement(el)
            self.lData.append(itemset)
Пример #4
0
    def ComputeCandidateOneItemsets(self, lSessions):
        """Computes candidate one itemsets from session

        lSessions -- list of sessions
        return -- candidate one itemsets

        """
        oneitemsets = cItemsets.cItemsets(1)

        for session in lSessions:
            for click in session.GetClicks():
                iset = cItemset.cItemset()
                iset.SetUrls([click.GetUrl(),])
                oneitemsets.AddItemset(iset)

        return oneitemsets
Пример #5
0
    def Join(self, itemsets):
        """Join itemsets.

        itemsets -- Lk-1 large k-1 itemsets
        return -- Ck candidate k itemsets

        """
        k = itemsets.GetSize() + 1
        candidates = cItemsets.cItemsets(k)

        lJoined = []

        for item_i in itemsets.GetList():
            for item_j in itemsets.GetList():
                list_i = item_i.GetUrls()
                list_j = item_j.GetUrls()
                # use dict to eliminate double occurences
                dict = {}
                for url in list_i:
                    dict[url] = 1
                for url in list_j:
                    dict[url] = 1

                if len(dict) == k:
                    if dict not in lJoined:
                        lJoined.append(dict)

        for dict in lJoined:
            i = cItemset.cItemset()
            for url in dict.keys():
                i.AddUrl(url)

            candidates.AddItemset(i)

        if len(candidates.lData) == 0:
            return None
        else:
            return candidates