def index(self, obj, attributes=None): if not IDexterityContent.providedBy(obj): return # get transaction id ts = time.time() annotations = IAnnotations(obj) annotations["ELASTIC_LAST_INDEXING_QUEUED_TIMESTAMP"] = ts index.delay("/".join(obj.getPhysicalPath()), ts, index_name())
def numObjects(self): """Return the number of indexed objects.""" es_kwargs = dict(index=index_name(), body={"query": {"match_all": {}}}) es = get_query_client() try: return es.count(**es_kwargs)["count"] except Exception: logger.exception('ElasticSearch "count" query failed') return "Problem getting all documents count from ElasticSearch!"
def __call__(self): cat = api.portal.get_tool("portal_catalog") count = 0 for path in cat._catalog.uids: if path.endswith("/portal_catalog"): # no idea why it is in the list, ignore continue index.delay(path, 0, index_name()) count += 1 return "queued {0}".format(count)
def unindex(self, obj): uid = api.content.get_uuid(obj) unindex.delay(uid, index_name())
def _apply_index(self, request): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.id) if record.keys is None: return None keys = [] for key in record.keys: key = key.replace("\\", "").replace('"', "") if not isinstance(key, bytes): key = key.encode("utf8") keys.append(key) template_params = {"keys": keys} __traceback_info__ = "template parameters: {0}".format(template_params) query_body = self._apply_template(template_params) logger.info(query_body) es_kwargs = dict( index=index_name(), body=query_body, size=BATCH_SIZE, scroll="1m", _source_includes=["rid"], ) es = get_query_client() try: result = es.search(**es_kwargs) except RequestError: logger.info("Query failed:\n{0}".format(query_body)) return None except TransportError: logger.exception("ElasticSearch failed") return None # initial return value, other batches to be applied def score(record): return int(10000 * float(record["_score"])) retval = IIBTree() for r in result["hits"]["hits"]: retval[r["_source"]["rid"]] = score(r) total = result["hits"]["total"]["value"] if total > BATCH_SIZE: sid = result["_scroll_id"] counter = BATCH_SIZE while counter < total: result = es.scroll(scroll_id=sid, scroll="1m") for record in result["hits"]["hits"]: retval[record["_source"]["rid"]] = score(record) counter += BATCH_SIZE return retval, (self.id,)