Exemplo n.º 1
0
    def receiveQuery(self, query):
        """
        :type query: `maay.p2pquerier.P2pQuery`
        """
        if query.qid in self._receivedQueries or \
           query.qid in self._sentQueries:
            return

        if query.qid not in self._sentQueries:
            print "P2pQuerier receiveQuery : %s from %s:%s " \
                  % (query.getWords(), query.client_host, query.client_port)
            self._receivedQueries[query.qid] = query 

        query.hop()        
        if query.ttl > 0:
            self.sendQuery(query)

        documents = self.querier.findDocuments(query.query)

        if len(documents) == 0:
            print " ... no document matching the query, won't answer."
            return
        
        for doc in documents:
            abstract = makeAbstract(doc.text, query.getWords())
            doc.text = untagText(removeSpace(abstract))

        # provider is a 4-uple (login, node_id, IP, xmlrpc-port)
        provider = (NODE_LOGIN,
                    NODE_CONFIG.get_node_id(),
                    NODE_HOST,
                    NODE_CONFIG.rpcserver_port)
            
        self.relayAnswer(P2pAnswer(query.qid, provider, documents))
Exemplo n.º 2
0
 def __init__(self, context, querier, p2pquerier):
     athena.LivePage.__init__(self)
     # NOTE: At the time this comment is written, athena/LivePages are handled
     #       differently in nevow SVN. It's now possible to insantiate directly
     #       LivePage instances (which is great !), so we'll have to change
     #       the implementation for next nevow release.
     self.querier = querier
     self.p2pquerier = p2pquerier
     self.query = Query.fromContext(context)
     self.offset = self.query.offset
     self.onlyLocal = False
     self.onlyDistant = False
     # push local results once for all
     if len(inevow.IRemainingSegments(context)) < 2:
         # only store abstracts in the results table
         results = []
         for localDoc in querier.findDocuments(self.query):
             localDoc.text = makeAbstract(localDoc.text, self.query.words)
             results.append(localDoc)
         webappConfig = INodeConfiguration(context)
         p2pQuery = P2pQuery(sender=webappConfig.get_node_id(),
                             query=self.query)
         self.qid = p2pQuery.qid
         self.p2pQuery = p2pQuery
         # purge old results
         self.querier.purgeOldResults()
         provider = (NODE_LOGIN, NODE_CONFIG.get_node_id(), 'localhost', 0)
         self.querier.pushDocuments(self.qid, results, provider)
         self.results = self.querier.getQueryResults(self.query)
Exemplo n.º 3
0
 def relayAnswer(self, answer, local=False): # local still unused
     """record and forward answers to a query.
     If local is True, then the answers come from a local query,
     and thus must not be recorded in the database"""
     print "P2pQuerier relayAnswer : %s documents" % len(answer.documents)
     query = self._receivedQueries.get(answer.queryId)
     if query:
         print " ... relaying Answer to originator ..."
     else:
         query = self._sentQueries.get(answer.queryId)
         if query:
             print " ... originator : we got mail :) ... "
         else:
             print " ... bailing out (bug?) : we had no query for this answer"
             return
     
     toSend = []
     
     for document in answer.documents:
         if not isinstance(document, dict):
             document = document.__dict__
         # TODO: record answer in database if local is False
         # auc : to cache them ?
         if not query.isKnown(document):
             abstract = makeAbstract(document['text'], query.getWords())
             document['text'] = untagText(removeSpace(abstract))
             query.addMatch(document)
             #toSend.append(document.asDictionnary())
             # above was meant to be like .asKwargs() ?
             # anyway, this stuff is xmlrpc-serializable (auc)
             toSend.append(document)
     
     if query.sender != self.nodeId: 
         try:
             # getNodeUrl seems not to exist yet
             #senderUrl = self.querier.getNodeUrl(query.sender)
             host = query.host 
             port = query.port
             print " ... will send answer to %s:%s" % (host, port)
             senderUrl = 'http://%s:%s' % (host, port)
             proxy = Proxy(senderUrl)
             d = proxy.callRemote('distributedQueryAnswer',
                                  query.qid,
                                  self.nodeId,
                                  toSend)
             d.addCallback(self.querier.registerNodeActivity)
             d.addErrback(P2pErrbacks.answerQueryProblem)
             P2pErrbacks.setAnswerTarget(senderUrl)
         except ValueError:
             print "unknown node %s" % query.sender
     else: # local would be true ? don't waste the answers ...
         self._notifyAnswerCallbacks(answer.queryId, toSend)
Exemplo n.º 4
0
    def relayAnswer(self, answer, local=False):  # local still unused
        """record and forward answers to a query.
        If local is True, then the answers come from a local query,
        and thus must not be recorded in the database"""
        print "P2pQuerier relayAnswer : %s documents" % len(answer.documents)
        query = self._receivedQueries.get(answer.queryId)
        if query:
            print " ... relaying Answer to originator ..."
        else:
            query = self._sentQueries.get(answer.queryId)
            if query:
                print " ... originator : we got mail :) ... "
            else:
                print " ... bailing out (bug?) : we had no query for this answer"
                return

        toSend = []

        for document in answer.documents:
            if not isinstance(document, dict):
                document = document.__dict__
            # TODO: record answer in database if local is False
            # auc : to cache them ?
            if not query.isKnown(document):
                abstract = makeAbstract(document['text'], query.getWords())
                document['text'] = untagText(removeSpace(abstract))
                query.addMatch(document)
                #toSend.append(document.asDictionnary())
                # above was meant to be like .asKwargs() ?
                # anyway, this stuff is xmlrpc-serializable (auc)
                toSend.append(document)

        if query.sender != self.nodeId:
            try:
                # getNodeUrl seems not to exist yet
                #senderUrl = self.querier.getNodeUrl(query.sender)
                host = query.host
                port = query.port
                print " ... will send answer to %s:%s" % (host, port)
                senderUrl = 'http://%s:%s' % (host, port)
                proxy = Proxy(senderUrl)
                d = proxy.callRemote('distributedQueryAnswer', query.qid,
                                     self.nodeId, toSend)
                d.addCallback(self.querier.registerNodeActivity)
                d.addErrback(P2pErrbacks.answerQueryProblem)
                P2pErrbacks.setAnswerTarget(senderUrl)
            except ValueError:
                print "unknown node %s" % query.sender
        else:  # local would be true ? don't waste the answers ...
            self._notifyAnswerCallbacks(answer.queryId, toSend)
Exemplo n.º 5
0
 def render_row(self, context, data):
     document = data
     words = self.query.split()
     context.fillSlots('mime_type', re.sub("/", "_", document.mime_type))
     context.fillSlots('doctitle',
                       tags.xml(boldifyText(document.title, words)))
     # XXX abstract attribute should be a unicode string
     try:
         abstract = makeAbstract(document.text, words)
         abstract = normalize_text(unicode(abstract))
     except Exception, exc:
         import traceback
         traceback.print_exc()
         print exc
         abstract = u'No abstract available for this document [%s]' % exc
Exemplo n.º 6
0
 def render_row(self, context, data):
     document = data
     words = self.query.split()
     context.fillSlots('mime_type', re.sub("/", "_", document.mime_type))
     context.fillSlots('doctitle',
                       tags.xml(boldifyText(document.title, words)))
     # XXX abstract attribute should be a unicode string
     try:
         abstract = makeAbstract(document.text, words)
         abstract = normalize_text(unicode(abstract))
     except Exception, exc:
         import traceback
         traceback.print_exc()
         print exc
         abstract = u'No abstract available for this document [%s]' % exc
Exemplo n.º 7
0
    def relayAnswer(self, answer): 
        """record and forward answers to a query."""
        print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \
              % (len(answer.documents), answer.provider[2],
                 answer.provider[3])
        query = self._receivedQueries.get(answer.qid)
        if not query :
            query = self._sentQueries.get(answer.qid)
            if not query:
                print " ... bug or dos : we had no query for this answer"
                return
                
        toSend = []
        for document in answer.documents:
            if not isinstance(document, dict):
                document = document.__dict__
                if 'url' in document:
                    document['url'] = os.path.basename(document['url'])
            # TODO: record answer in database if local is False
            # auc : to have them in Document with state == KNOWN
            #if not query.isKnown(document):
            abstract = makeAbstract(document['text'], query.getWords())
            document['text'] = untagText(removeSpace(abstract))
            query.addMatch(document)
            toSend.append(document)
            ## else:
##                 #FIXME: shouldn't we add all documents regardless
##                 #       of duplicates, so as to add a new provider entry ?
##                 print "we already know this doc !!!@~^#{"

        if query.sender != NODE_CONFIG.get_node_id():
            self.querier.registerNodeActivity(answer.provider[1])
            (host, port) = (query.client_host, query.client_port)
            print " ... relaying Answer to %s:%s ..." % (host, port)
            senderUrl = 'http://%s:%s' % (host, port)
            proxy = Proxy(senderUrl)
            d = proxy.callRemote('distributedQueryAnswer',
                                 query.qid,
                                 NODE_CONFIG.get_node_id(),
                                 answer.provider,
                                 toSend) 
            d.addErrback(answerQueryErrback(query))
        else:
            print " ... originator : we got an answer !"
            self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
Exemplo n.º 8
0
    def relayAnswer(self, answer): 
        """record and forward answers to a query."""
        print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \
              % (len(answer.documents), answer.provider[2],
                 answer.provider[3])
        query = self._receivedQueries.get(answer.qid)
        if not query :
            query = self._sentQueries.get(answer.qid)
            if not query:
                print " ... bug or dos : we had no query for this answer"
                return
                
        toSend = []
        for document in answer.documents:
            if not isinstance(document, dict):
                document = document.__dict__
            # only node-local docs will exhibit their full pathname
            if 'url' in document:
                doc_url = base64.decodestring(document['url'])
                document['url'] = base64.encodestring(os.path.basename(doc_url))
            # TODO: record answer in database if local is False
            # auc : to have them in Document with state == KNOWN
            abstract = makeAbstract(document['text'], query.getWords())
            document['text'] = untagText(removeSpace(abstract))
            query.addMatch(document)
            toSend.append(document)

        if query.sender != NODE_CONFIG.get_node_id():
            self.querier.registerNodeActivity(answer.provider[1])
            (host, port) = (query.client_host, query.client_port)
            print " ... relaying Answer to %s:%s ..." % (host, port)
            senderUrl = 'http://%s:%s' % (host, port)
            proxy = Proxy(senderUrl)
            d = proxy.callRemote('distributedQueryAnswer',
                                 query.qid,
                                 NODE_CONFIG.get_node_id(),
                                 answer.provider,
                                 toSend) 
            d.addErrback(answerQueryErrback(query))
        else:
            print " ... originator : we got an answer !"
            self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
Exemplo n.º 9
0
    def receiveQuery(self, query):
        """
        :type query: `maay.p2pquerier.P2pQuery`
        """
        print "P2pQuerier receiveQuery : %s" % query
        if query.qid in self._receivedQueries or \
           query.qid in self._sentQueries:
            print " ... we already know query %s, this ends the trip" % query.qid
            return

        if query.qid not in self._sentQueries:
            print " ... %s is a new query, let's work ..." % query.qid
            self._receivedQueries[query.qid] = query

        query.hop()
        if query.ttl > 0:
            self.sendQuery(query)

        documents = self.querier.findDocuments(query.query)
        for doc in documents:
            abstract = makeAbstract(doc.text, query.getWords())
            doc.text = untagText(removeSpace(abstract))

        self.relayAnswer(P2pAnswer(query.qid, documents))
Exemplo n.º 10
0
    def receiveQuery(self, query):
        """
        :type query: `maay.p2pquerier.P2pQuery`
        """
        print "P2pQuerier receiveQuery : %s" % query
        if query.qid in self._receivedQueries or \
           query.qid in self._sentQueries:
            print " ... we already know query %s, this ends the trip" % query.qid
            return

        if query.qid not in self._sentQueries:
            print " ... %s is a new query, let's work ..." % query.qid
            self._receivedQueries[query.qid] = query 

        query.hop()        
        if query.ttl > 0:
            self.sendQuery(query)

        documents = self.querier.findDocuments(query.query)
        for doc in documents:
            abstract = makeAbstract(doc.text, query.getWords())
            doc.text = untagText(removeSpace(abstract))
            
        self.relayAnswer(P2pAnswer(query.qid, documents))
Exemplo n.º 11
0
    def testSimple(self):
        # Check excerpt at the beginning of the text

        abstract = makeAbstract(self.text, [u"free"])
        expected = u'This program is <b>free</b> software; you can redistribute it and/or modify it under the terms of <b>...</b>  Public License as published by the <b>Free</b> Software Foundation; either version 2 of the License, or (at your <b>...</b>  this program; if not, write to the <b>Free</b> Software Foundation, Inc., 51 <b>...</b>'
        self.assertEquals(expected, abstract)
Exemplo n.º 12
0
 def testWordAtEnd(self):
     abstract = makeAbstract(self.text, [u"Boston"])
     expected = ' <b>...</b>  Inc., 51 Franklin St, Fifth Floor, <b>Boston</b>, MA  02110-1301 USA'
     self.assertEquals(expected, abstract)
Exemplo n.º 13
0
 def testUnknownWord(self):
     abstract = makeAbstract(self.text, [u"FOOBAR"])
     expected = 'This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, <b>...</b>'
     self.assertEquals(expected, abstract)
Exemplo n.º 14
0
 def testMixedCase(self):
     abstract = makeAbstract(self.text, [u"pUrPoSe"])
     expected = ' <b>...</b>  or FITNESS FOR A PARTICULAR <b>PURPOSE</b>.  See the GNU General Public <b>...</b>'
     self.assertEquals(expected, abstract)
Exemplo n.º 15
0
    def testSimple(self):
        # Check excerpt at the beginning of the text

        abstract = makeAbstract(self.text, [u"free"])
        expected = u'This program is <b>free</b> software; you can redistribute it and/or modify it under the terms of <b>...</b>  Public License as published by the <b>Free</b> Software Foundation; either version 2 of the License, or (at your <b>...</b>  this program; if not, write to the <b>Free</b> Software Foundation, Inc., 51 <b>...</b>'
        self.assertEquals(expected, abstract)
Exemplo n.º 16
0
 def testWordAtEnd(self):
     abstract = makeAbstract(self.text, [u"Boston"])
     expected = ' <b>...</b>  Inc., 51 Franklin St, Fifth Floor, <b>Boston</b>, MA  02110-1301 USA'
     self.assertEquals(expected, abstract)
Exemplo n.º 17
0
 def testUnknownWord(self):
     abstract = makeAbstract(self.text, [u"FOOBAR"])
     expected = 'This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, <b>...</b>'
     self.assertEquals(expected, abstract)
Exemplo n.º 18
0
 def testMixedCase(self):
     abstract = makeAbstract(self.text, [u"pUrPoSe"])
     expected = ' <b>...</b>  or FITNESS FOR A PARTICULAR <b>PURPOSE</b>.  See the GNU General Public <b>...</b>'
     self.assertEquals(expected, abstract)
Exemplo n.º 19
0
 def testLength200AndUnknownWord(self):
     try:
         abstract = makeAbstract(self.text[:200], [u'tralala'])
     except IndexError:
         self.fail('bug #5648 is present')