def Group(self, visited): self.grouped.clear() for link in self.links: parentLang = GroupLang(link.parentNode.lang, self.params.langIds) numSiblings = len(link.parentNode.links) numVisitedSiblings = GetVistedSiblings(link.childNode.urlId, link.parentNode, visited) numVisitedSiblings = len(numVisitedSiblings) matchedSiblings = GetMatchedSiblings(link.childNode.urlId, link.parentNode, visited) numMatchedSiblings = len(matchedSiblings) parentMatched = GetNodeMatched(link.parentNode, visited) linkLang = GroupLang(link.textLang, self.params.langIds) #print("numSiblings", numSiblings, numMatchedSiblings, link.childNode.url) #for sibling in link.parentNode.links: # print(" sibling", sibling.childNode.url) key = (parentLang, numSiblings, numVisitedSiblings, numMatchedSiblings, parentMatched, linkLang) if key not in self.grouped: self.grouped[key] = [] self.grouped[key].append(link)
def LinkToKey(self, link, visited): parentLang = GroupLang(link.parentNode.lang, self.params.langIds) matchedSiblings = GetMatchedSiblings(link.childNode.urlId, link.parentNode, visited) numMatchedSiblings = len(matchedSiblings) key = (parentLang, numMatchedSiblings) #print("key", key) return key
def LinkToKey(self, link, visited): matchedSiblings = GetMatchedSiblings(link.childNode.urlId, link.parentNode, visited) numMatchedSiblings = len(matchedSiblings) if numMatchedSiblings >= self.params.NUM_ACTIONS: numMatchedSiblings = self.params.NUM_ACTIONS - 1 key = (numMatchedSiblings, ) #print("key", key) return key
def LinkToKey(self, link, visited): parentLang = GroupLang(link.parentNode.lang, self.params.langIds) linkLang = GroupLang(link.textLang, self.params.langIds) numSiblings = len(link.parentNode.links) numVisitedSiblings = GetVistedSiblings(link.childNode.urlId, link.parentNode, visited) numVisitedSiblings = len(numVisitedSiblings) matchedSiblings = GetMatchedSiblings(link.childNode.urlId, link.parentNode, visited) numMatchedSiblings = len(matchedSiblings) parentMatched = GetNodeMatched(link.parentNode, visited) #key = (parentLang,linkLang) key = (parentLang, linkLang, numSiblings, numVisitedSiblings, numMatchedSiblings, parentMatched) #print("key", key) return key
def AddLink(self, link, visited): langId = link.parentNode.lang numSiblings = len(link.parentNode.links) numVisitedSiblings = GetVistedSiblings(link.childNode.urlId, link.parentNode, visited) numVisitedSiblings = len(numVisitedSiblings) matchedSiblings = GetMatchedSiblings(link.childNode.urlId, link.parentNode, visited) numMatchedSiblings = len(matchedSiblings) #print("numSiblings", numSiblings, numMatchedSiblings, link.childNode.url) #for sibling in link.parentNode.links: # print(" sibling", sibling.childNode.url) key = (langId,numSiblings, numVisitedSiblings, numMatchedSiblings) if key not in self.dict: self.dict[key] = [] self.dict[key].append(link)