Пример #1
0
    def runLocalSearch(self, alpha, iterations):
        state = self.getInitialState()
        print "Initial State Obtained!"
        print state
        print "Running Local Search for " + str(iterations) + " iterations..."
        for i in range(iterations):
            for keyword in self.keywords:
                print
                print "Analyzing the keyword " + keyword
                candidateLinks = [k[0] for k in self.cachedPages[keyword]]
                assignedLink = state[keyword][0]

                for candidateLink in candidateLinks:
                    # Use Cache. duh.
                    assignedPage = self.retrieveCachedPage(assignedLink)
                    candidatePage = self.retrieveCachedPage(candidateLink)

                    candidateDocumentRelevances = []
                    currentLinkDocumentRelevances = []
                    context = [v[0] for k, v in state.items() if k != keyword]
                    for otherAssignedLink in context:
                        # get cached page
                        otherAssignedPage = self.retrieveCachedPage(candidateLink)

                        # Obtain the Document Relevances as a list
                        candidateScore = RelevanceModel.documentRelevance(candidatePage, otherAssignedPage)
                        candidateDocumentRelevances.append(candidateScore)

                        currentLinkScore = RelevanceModel.documentRelevance(candidatePage, assignedPage)
                        currentLinkDocumentRelevances.append(currentLinkScore)

                    # Obtain the LinkRelevances
                    assignedPage = self.retrieveCachedPage(assignedLink)
                    candidatePage = self.retrieveCachedPage(candidateLink)
                    currentLinkRelevance = RelevanceModel.linkRelevance(self.keywords, assignedPage)
                    candidateLinkRelevance = RelevanceModel.linkRelevance(self.keywords, candidatePage)
                    # Obtain a convex combination of the link relevances and the
                    # sum of the document relevances
                    candidatePsi = (1 - alpha)*candidateLinkRelevance + alpha*sum(candidateDocumentRelevances)
                    currentPsi = (1 - alpha)*currentLinkRelevance + alpha*sum(currentLinkDocumentRelevances)
                    print
                    print "Current Psi Value " + str(currentPsi) + " For " + assignedLink
                    print "Candidate Link Psi Value " + str(candidatePsi) + " For " + candidateLink

                    # If the candidate link's convex combination is greater than the current
                    # link's convex combination, we replace that assignment
                    if candidatePsi < currentPsi:
                        state[keyword] = (candidateLink, candidateLinkRelevance)
                        print "Replaced Link"
            print str(i + 1) + "/" + str(iterations) + " iterations complete"


        return state
Пример #2
0
    def getMaxRelevance(self, keyword):
        """For every possible candidate link for a keyword, obtain the link's
        relevance score to the keyword's context and return the highest
        scoring link along with its relevance score"""
        relevances = []
        candidateLinks = [k[0] for k in self.cachedPages[keyword]]
        for candidateLink in candidateLinks:
            score = RelevanceModel.linkRelevance(self.keywords, candidateLink)
            relevances.append((candidateLink, score))

        # Returns the tuple with the highest relevance score in the array
        return max(relevances, key=lambda x:x[1])