Пример #1
0
    def Group(self, visited):
        self.grouped.clear()

        for link in self.links:
            parentLang = GroupLang(link.parentNode.lang, self.params.langIds)
            numSiblings = len(link.parentNode.links)

            numVisitedSiblings = GetVistedSiblings(link.childNode.urlId,
                                                   link.parentNode, visited)
            numVisitedSiblings = len(numVisitedSiblings)

            matchedSiblings = GetMatchedSiblings(link.childNode.urlId,
                                                 link.parentNode, visited)
            numMatchedSiblings = len(matchedSiblings)

            parentMatched = GetNodeMatched(link.parentNode, visited)

            linkLang = GroupLang(link.textLang, self.params.langIds)

            #print("numSiblings", numSiblings, numMatchedSiblings, link.childNode.url)
            #for sibling in link.parentNode.links:
            #    print("   sibling", sibling.childNode.url)

            key = (parentLang, numSiblings, numVisitedSiblings,
                   numMatchedSiblings, parentMatched, linkLang)

            if key not in self.grouped:
                self.grouped[key] = []
            self.grouped[key].append(link)
Пример #2
0
    def LinkToKey(self, link, visited):
        parentLang = GroupLang(link.parentNode.lang, self.params.langIds)

        matchedSiblings = GetMatchedSiblings(link.childNode.urlId, link.parentNode, visited)
        numMatchedSiblings = len(matchedSiblings)
        
        key = (parentLang, numMatchedSiblings)
        #print("key", key)
        return key
Пример #3
0
    def LinkToKey(self, link, visited):
        matchedSiblings = GetMatchedSiblings(link.childNode.urlId,
                                             link.parentNode, visited)
        numMatchedSiblings = len(matchedSiblings)
        if numMatchedSiblings >= self.params.NUM_ACTIONS:
            numMatchedSiblings = self.params.NUM_ACTIONS - 1

        key = (numMatchedSiblings, )
        #print("key", key)
        return key
Пример #4
0
    def LinkToKey(self, link, visited):
        parentLang = GroupLang(link.parentNode.lang, self.params.langIds)
        linkLang = GroupLang(link.textLang, self.params.langIds)

        numSiblings = len(link.parentNode.links)
        
        numVisitedSiblings = GetVistedSiblings(link.childNode.urlId, link.parentNode, visited)
        numVisitedSiblings = len(numVisitedSiblings)

        matchedSiblings = GetMatchedSiblings(link.childNode.urlId, link.parentNode, visited)
        numMatchedSiblings = len(matchedSiblings)
        
        parentMatched = GetNodeMatched(link.parentNode, visited)

        #key = (parentLang,linkLang)
        key = (parentLang, linkLang, numSiblings, numVisitedSiblings, numMatchedSiblings, parentMatched)
        #print("key", key)
        return key
Пример #5
0
    def AddLink(self, link, visited):
        langId = link.parentNode.lang
        numSiblings = len(link.parentNode.links)
        
        numVisitedSiblings = GetVistedSiblings(link.childNode.urlId, link.parentNode, visited)
        numVisitedSiblings = len(numVisitedSiblings)

        matchedSiblings = GetMatchedSiblings(link.childNode.urlId, link.parentNode, visited)
        numMatchedSiblings = len(matchedSiblings)
        
        #print("numSiblings", numSiblings, numMatchedSiblings, link.childNode.url)
        #for sibling in link.parentNode.links:
        #    print("   sibling", sibling.childNode.url)

        key = (langId,numSiblings, numVisitedSiblings, numMatchedSiblings) 
        if key not in self.dict:
            self.dict[key] = []
        self.dict[key].append(link)