示例#1
0
    def receiveQuery(self, query):
        """
        :type query: `maay.p2pquerier.P2pQuery`
        """
        if query.qid in self._receivedQueries or \
           query.qid in self._sentQueries:
            return

        if query.qid not in self._sentQueries:
            print "P2pQuerier receiveQuery : %s from %s:%s " \
                  % (query.getWords(), query.client_host, query.client_port)
            self._receivedQueries[query.qid] = query 

        query.hop()        
        if query.ttl > 0:
            self.sendQuery(query)

        documents = self.querier.findDocuments(query.query)

        if len(documents) == 0:
            print " ... no document matching the query, won't answer."
            return
        
        for doc in documents:
            abstract = makeAbstract(doc.text, query.getWords())
            doc.text = untagText(removeSpace(abstract))

        # provider is a 4-uple (login, node_id, IP, xmlrpc-port)
        provider = (NODE_LOGIN,
                    NODE_CONFIG.get_node_id(),
                    NODE_HOST,
                    NODE_CONFIG.rpcserver_port)
            
        self.relayAnswer(P2pAnswer(query.qid, provider, documents))
示例#2
0
 def relayAnswer(self, answer, local=False): # local still unused
     """record and forward answers to a query.
     If local is True, then the answers come from a local query,
     and thus must not be recorded in the database"""
     print "P2pQuerier relayAnswer : %s documents" % len(answer.documents)
     query = self._receivedQueries.get(answer.queryId)
     if query:
         print " ... relaying Answer to originator ..."
     else:
         query = self._sentQueries.get(answer.queryId)
         if query:
             print " ... originator : we got mail :) ... "
         else:
             print " ... bailing out (bug?) : we had no query for this answer"
             return
     
     toSend = []
     
     for document in answer.documents:
         if not isinstance(document, dict):
             document = document.__dict__
         # TODO: record answer in database if local is False
         # auc : to cache them ?
         if not query.isKnown(document):
             abstract = makeAbstract(document['text'], query.getWords())
             document['text'] = untagText(removeSpace(abstract))
             query.addMatch(document)
             #toSend.append(document.asDictionnary())
             # above was meant to be like .asKwargs() ?
             # anyway, this stuff is xmlrpc-serializable (auc)
             toSend.append(document)
     
     if query.sender != self.nodeId: 
         try:
             # getNodeUrl seems not to exist yet
             #senderUrl = self.querier.getNodeUrl(query.sender)
             host = query.host 
             port = query.port
             print " ... will send answer to %s:%s" % (host, port)
             senderUrl = 'http://%s:%s' % (host, port)
             proxy = Proxy(senderUrl)
             d = proxy.callRemote('distributedQueryAnswer',
                                  query.qid,
                                  self.nodeId,
                                  toSend)
             d.addCallback(self.querier.registerNodeActivity)
             d.addErrback(P2pErrbacks.answerQueryProblem)
             P2pErrbacks.setAnswerTarget(senderUrl)
         except ValueError:
             print "unknown node %s" % query.sender
     else: # local would be true ? don't waste the answers ...
         self._notifyAnswerCallbacks(answer.queryId, toSend)
示例#3
0
    def relayAnswer(self, answer, local=False):  # local still unused
        """record and forward answers to a query.
        If local is True, then the answers come from a local query,
        and thus must not be recorded in the database"""
        print "P2pQuerier relayAnswer : %s documents" % len(answer.documents)
        query = self._receivedQueries.get(answer.queryId)
        if query:
            print " ... relaying Answer to originator ..."
        else:
            query = self._sentQueries.get(answer.queryId)
            if query:
                print " ... originator : we got mail :) ... "
            else:
                print " ... bailing out (bug?) : we had no query for this answer"
                return

        toSend = []

        for document in answer.documents:
            if not isinstance(document, dict):
                document = document.__dict__
            # TODO: record answer in database if local is False
            # auc : to cache them ?
            if not query.isKnown(document):
                abstract = makeAbstract(document['text'], query.getWords())
                document['text'] = untagText(removeSpace(abstract))
                query.addMatch(document)
                #toSend.append(document.asDictionnary())
                # above was meant to be like .asKwargs() ?
                # anyway, this stuff is xmlrpc-serializable (auc)
                toSend.append(document)

        if query.sender != self.nodeId:
            try:
                # getNodeUrl seems not to exist yet
                #senderUrl = self.querier.getNodeUrl(query.sender)
                host = query.host
                port = query.port
                print " ... will send answer to %s:%s" % (host, port)
                senderUrl = 'http://%s:%s' % (host, port)
                proxy = Proxy(senderUrl)
                d = proxy.callRemote('distributedQueryAnswer', query.qid,
                                     self.nodeId, toSend)
                d.addCallback(self.querier.registerNodeActivity)
                d.addErrback(P2pErrbacks.answerQueryProblem)
                P2pErrbacks.setAnswerTarget(senderUrl)
            except ValueError:
                print "unknown node %s" % query.sender
        else:  # local would be true ? don't waste the answers ...
            self._notifyAnswerCallbacks(answer.queryId, toSend)
示例#4
0
    def relayAnswer(self, answer): 
        """record and forward answers to a query."""
        print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \
              % (len(answer.documents), answer.provider[2],
                 answer.provider[3])
        query = self._receivedQueries.get(answer.qid)
        if not query :
            query = self._sentQueries.get(answer.qid)
            if not query:
                print " ... bug or dos : we had no query for this answer"
                return
                
        toSend = []
        for document in answer.documents:
            if not isinstance(document, dict):
                document = document.__dict__
                if 'url' in document:
                    document['url'] = os.path.basename(document['url'])
            # TODO: record answer in database if local is False
            # auc : to have them in Document with state == KNOWN
            #if not query.isKnown(document):
            abstract = makeAbstract(document['text'], query.getWords())
            document['text'] = untagText(removeSpace(abstract))
            query.addMatch(document)
            toSend.append(document)
            ## else:
##                 #FIXME: shouldn't we add all documents regardless
##                 #       of duplicates, so as to add a new provider entry ?
##                 print "we already know this doc !!!@~^#{"

        if query.sender != NODE_CONFIG.get_node_id():
            self.querier.registerNodeActivity(answer.provider[1])
            (host, port) = (query.client_host, query.client_port)
            print " ... relaying Answer to %s:%s ..." % (host, port)
            senderUrl = 'http://%s:%s' % (host, port)
            proxy = Proxy(senderUrl)
            d = proxy.callRemote('distributedQueryAnswer',
                                 query.qid,
                                 NODE_CONFIG.get_node_id(),
                                 answer.provider,
                                 toSend) 
            d.addErrback(answerQueryErrback(query))
        else:
            print " ... originator : we got an answer !"
            self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
示例#5
0
    def relayAnswer(self, answer): 
        """record and forward answers to a query."""
        print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \
              % (len(answer.documents), answer.provider[2],
                 answer.provider[3])
        query = self._receivedQueries.get(answer.qid)
        if not query :
            query = self._sentQueries.get(answer.qid)
            if not query:
                print " ... bug or dos : we had no query for this answer"
                return
                
        toSend = []
        for document in answer.documents:
            if not isinstance(document, dict):
                document = document.__dict__
            # only node-local docs will exhibit their full pathname
            if 'url' in document:
                doc_url = base64.decodestring(document['url'])
                document['url'] = base64.encodestring(os.path.basename(doc_url))
            # TODO: record answer in database if local is False
            # auc : to have them in Document with state == KNOWN
            abstract = makeAbstract(document['text'], query.getWords())
            document['text'] = untagText(removeSpace(abstract))
            query.addMatch(document)
            toSend.append(document)

        if query.sender != NODE_CONFIG.get_node_id():
            self.querier.registerNodeActivity(answer.provider[1])
            (host, port) = (query.client_host, query.client_port)
            print " ... relaying Answer to %s:%s ..." % (host, port)
            senderUrl = 'http://%s:%s' % (host, port)
            proxy = Proxy(senderUrl)
            d = proxy.callRemote('distributedQueryAnswer',
                                 query.qid,
                                 NODE_CONFIG.get_node_id(),
                                 answer.provider,
                                 toSend) 
            d.addErrback(answerQueryErrback(query))
        else:
            print " ... originator : we got an answer !"
            self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
示例#6
0
    def receiveQuery(self, query):
        """
        :type query: `maay.p2pquerier.P2pQuery`
        """
        print "P2pQuerier receiveQuery : %s" % query
        if query.qid in self._receivedQueries or \
           query.qid in self._sentQueries:
            print " ... we already know query %s, this ends the trip" % query.qid
            return

        if query.qid not in self._sentQueries:
            print " ... %s is a new query, let's work ..." % query.qid
            self._receivedQueries[query.qid] = query 

        query.hop()        
        if query.ttl > 0:
            self.sendQuery(query)

        documents = self.querier.findDocuments(query.query)
        for doc in documents:
            abstract = makeAbstract(doc.text, query.getWords())
            doc.text = untagText(removeSpace(abstract))
            
        self.relayAnswer(P2pAnswer(query.qid, documents))
示例#7
0
    def receiveQuery(self, query):
        """
        :type query: `maay.p2pquerier.P2pQuery`
        """
        print "P2pQuerier receiveQuery : %s" % query
        if query.qid in self._receivedQueries or \
           query.qid in self._sentQueries:
            print " ... we already know query %s, this ends the trip" % query.qid
            return

        if query.qid not in self._sentQueries:
            print " ... %s is a new query, let's work ..." % query.qid
            self._receivedQueries[query.qid] = query

        query.hop()
        if query.ttl > 0:
            self.sendQuery(query)

        documents = self.querier.findDocuments(query.query)
        for doc in documents:
            abstract = makeAbstract(doc.text, query.getWords())
            doc.text = untagText(removeSpace(abstract))

        self.relayAnswer(P2pAnswer(query.qid, documents))
 def testUntag(self):
     text = 'Hello <a href="foo.bar.com">world <b>!</b></a><img alt="" />'
     self.assertEquals(untagText(text), 'Hello world !')
 def testUntag(self):
     text = 'Hello <a href="foo.bar.com">world <b>!</b></a><img alt="" />'
     self.assertEquals(untagText(text), 'Hello world !')