def create_index(self): body = { "settings": settings.ES_SETTINGS, "mappings": { settings.ES_ARTICLE_DOCTYPE: settings.ES_MAPPING } } indices.IndicesClient(self.es).create(self.index, body)
def get_stemmed_form(idx, word): """ Returns the stemmed form of a word for this Parameters: idx : str The name of the elasticsearch index word : str The input word """ result = indices.IndicesClient(_es()).analyze(index=idx, text=word, analyzer=_STEMMING_ANALYZER) return result['tokens'][0]['token']
def check_index(self): """ Check whether the server is up and the index exists. If the server is down, raise an exception. If the index does not exist, try to create it. """ if not self.es.ping(): raise Exception("Elastic server cannot be reached") if not indices.IndicesClient(self.es).exists(self.index): log.info("Index {self.index} does not exist, creating".format(**locals())) self.create_index() x = cluster.ClusterClient(self.es).health(self.index, wait_for_status='yellow')
def create_index(self, shards=5, replicas=1): es_settings = settings.ES_SETTINGS.copy() es_settings.update({"number_of_shards" : shards, "number_of_replicas": replicas}) body = { "settings": es_settings, "mappings": { settings.ES_ARTICLE_DOCTYPE: settings.ES_MAPPING } } indices.IndicesClient(self.es).create(self.index, body)
def delete_index(self): try: indices.IndicesClient(self.es).delete(self.index) except Exception, e: if 'IndexMissingException' in unicode(e): return raise
def flush(self): indices.IndicesClient(self.es).flush()
def do_search(idx, typ, query, start, num, date_ranges, exclude_distributions, exclude_article_types, selected_pillars, return_source=False, sort_order='_score'): """Returns ElasticSearch search results. Fetch all documents matching the query and return a list of elasticsearch results. This method accepts boolean queries in the Elasticsearch query string syntax (see Elasticsearch reference). Parameters: idx : str The name of the elasticsearch index typ : str The type of document requested query : str A query string in the Elasticsearch query string language start : int An integer representing the index of the first result to be retrieved num : int The total number of results to be retrieved date_ranges : list(dict) A list of dictionaries containg the upper and lower dates of the requested date ranges exclude_distributions : list A list of strings respresenting distributions that should be excluded from the search exclude_article_types : list A list of strings representing article types that should be excluded from the search selected_pillars : list A list of string representing pillars that should be included into the search. Each pillar is linked to a list of newspapers. return_source : boolean, optional A boolean indicating whether the _source of ES documents should be returned or a smaller selection of document fields. The smaller set of document fields (stored in _ES_RETURN_FIELDS) is the default sort_order: string, optional The sort order for this query. Syntax is fieldname:order, multiple sort orders can be separated by commas. Note that if the sort_order doesn't contain _score, no scores will be returned. Returns: validity : boolean A boolean indicating whether the input query string is valid. results : list A list of elasticsearch results or a message explaining why the input query string is invalid. """ q = create_query(query, date_ranges, exclude_distributions, exclude_article_types, selected_pillars) valid_q = indices.IndicesClient(_es()).validate_query(index=idx, doc_type=typ, body=q, explain=True) if valid_q.get('valid'): if return_source: # for each document return the _source field that contains all # document fields (no fields parameter in the ES call) return True, _es().search(index=idx, doc_type=typ, body=q, from_=start, size=num, sort=sort_order) else: # for each document return the fields listed in_ES_RETURN_FIELDS return True, _es().search(index=idx, doc_type=typ, body=q, fields=_ES_RETURN_FIELDS, from_=start, size=num, sort=sort_order) return False, valid_q.get('explanations')[0].get('error')
def clear_cache(self): indices.IndicesClient(self.es).clear_cache()