def receiveQuery(self, query): """ :type query: `maay.p2pquerier.P2pQuery` """ if query.qid in self._receivedQueries or \ query.qid in self._sentQueries: return if query.qid not in self._sentQueries: print "P2pQuerier receiveQuery : %s from %s:%s " \ % (query.getWords(), query.client_host, query.client_port) self._receivedQueries[query.qid] = query query.hop() if query.ttl > 0: self.sendQuery(query) documents = self.querier.findDocuments(query.query) if len(documents) == 0: print " ... no document matching the query, won't answer." return for doc in documents: abstract = makeAbstract(doc.text, query.getWords()) doc.text = untagText(removeSpace(abstract)) # provider is a 4-uple (login, node_id, IP, xmlrpc-port) provider = (NODE_LOGIN, NODE_CONFIG.get_node_id(), NODE_HOST, NODE_CONFIG.rpcserver_port) self.relayAnswer(P2pAnswer(query.qid, provider, documents))
def __init__(self, context, querier, p2pquerier): athena.LivePage.__init__(self) # NOTE: At the time this comment is written, athena/LivePages are handled # differently in nevow SVN. It's now possible to insantiate directly # LivePage instances (which is great !), so we'll have to change # the implementation for next nevow release. self.querier = querier self.p2pquerier = p2pquerier self.query = Query.fromContext(context) self.offset = self.query.offset self.onlyLocal = False self.onlyDistant = False # push local results once for all if len(inevow.IRemainingSegments(context)) < 2: # only store abstracts in the results table results = [] for localDoc in querier.findDocuments(self.query): localDoc.text = makeAbstract(localDoc.text, self.query.words) results.append(localDoc) webappConfig = INodeConfiguration(context) p2pQuery = P2pQuery(sender=webappConfig.get_node_id(), query=self.query) self.qid = p2pQuery.qid self.p2pQuery = p2pQuery # purge old results self.querier.purgeOldResults() provider = (NODE_LOGIN, NODE_CONFIG.get_node_id(), 'localhost', 0) self.querier.pushDocuments(self.qid, results, provider) self.results = self.querier.getQueryResults(self.query)
def relayAnswer(self, answer): """record and forward answers to a query.""" print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \ % (len(answer.documents), answer.provider[2], answer.provider[3]) query = self._receivedQueries.get(answer.qid) if not query : query = self._sentQueries.get(answer.qid) if not query: print " ... bug or dos : we had no query for this answer" return toSend = [] for document in answer.documents: if not isinstance(document, dict): document = document.__dict__ if 'url' in document: document['url'] = os.path.basename(document['url']) # TODO: record answer in database if local is False # auc : to have them in Document with state == KNOWN #if not query.isKnown(document): abstract = makeAbstract(document['text'], query.getWords()) document['text'] = untagText(removeSpace(abstract)) query.addMatch(document) toSend.append(document) ## else: ## #FIXME: shouldn't we add all documents regardless ## # of duplicates, so as to add a new provider entry ? ## print "we already know this doc !!!@~^#{" if query.sender != NODE_CONFIG.get_node_id(): self.querier.registerNodeActivity(answer.provider[1]) (host, port) = (query.client_host, query.client_port) print " ... relaying Answer to %s:%s ..." % (host, port) senderUrl = 'http://%s:%s' % (host, port) proxy = Proxy(senderUrl) d = proxy.callRemote('distributedQueryAnswer', query.qid, NODE_CONFIG.get_node_id(), answer.provider, toSend) d.addErrback(answerQueryErrback(query)) else: print " ... originator : we got an answer !" self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
def relayAnswer(self, answer): """record and forward answers to a query.""" print "P2pQuerier relayAnswer : %s document(s) from %s:%s" \ % (len(answer.documents), answer.provider[2], answer.provider[3]) query = self._receivedQueries.get(answer.qid) if not query : query = self._sentQueries.get(answer.qid) if not query: print " ... bug or dos : we had no query for this answer" return toSend = [] for document in answer.documents: if not isinstance(document, dict): document = document.__dict__ # only node-local docs will exhibit their full pathname if 'url' in document: doc_url = base64.decodestring(document['url']) document['url'] = base64.encodestring(os.path.basename(doc_url)) # TODO: record answer in database if local is False # auc : to have them in Document with state == KNOWN abstract = makeAbstract(document['text'], query.getWords()) document['text'] = untagText(removeSpace(abstract)) query.addMatch(document) toSend.append(document) if query.sender != NODE_CONFIG.get_node_id(): self.querier.registerNodeActivity(answer.provider[1]) (host, port) = (query.client_host, query.client_port) print " ... relaying Answer to %s:%s ..." % (host, port) senderUrl = 'http://%s:%s' % (host, port) proxy = Proxy(senderUrl) d = proxy.callRemote('distributedQueryAnswer', query.qid, NODE_CONFIG.get_node_id(), answer.provider, toSend) d.addErrback(answerQueryErrback(query)) else: print " ... originator : we got an answer !" self._notifyAnswerCallbacks(answer.qid, answer.provider, toSend)
def __init__(self, words, offset=0, filetype=None, filename=None, order=None, direction=None, qid=None): self.qid = qid or hashIt(NODE_CONFIG.get_node_id(), self) self.words = words # unicode string self.offset = offset self.filetype = filetype self.filename = filename self.limit = None #FIXME: ugly stuff below, related to result # presentation in the browser self.order = order or 'publication_time' self.direction = direction or 'DESC'
def _selectTargetNeighbors(self, query): """return a list of nodes to which the query will be sent. """ nbNodes = 2**(max(5, query.ttl)) # TODO: use the neighbors' profiles to route requests return self.querier.getActiveNeighbors(NODE_CONFIG.get_node_id(), nbNodes)