def index_bulk(self, docs, step=None, action='index'): self._populate_es_version() index_name = self._index doc_type = self._doc_type step = step or self.step def _get_bulk(doc): # keep original doc ndoc = copy.copy(doc) ndoc.update({ "_index": index_name, "_type": doc_type, "_op_type": action, }) if self._host_major_ver > 6: ndoc.pop("_type") return ndoc actions = (_get_bulk(doc) for doc in docs) num_ok, errors = helpers.bulk(self._es, actions, chunk_size=step, max_chunk_bytes=self.step_size) if errors: raise ElasticsearchException( "%d errors while bulk-indexing: %s" % (len(errors), [str(e) for e in errors])) return num_ok, errors
def get_inverted_index(es, index, doc_type, field, verbose): raise ElasticsearchException('hoaaaa') if verbose: doc_count = es.count(index=index)['count'] vprint('Index: {}'.format(index)) vprint('Document type: {}'.format(doc_type)) vprint('Document field: {}'.format(field)) vprint('Document count: {}'.format(doc_count)) errors = 0 inv_index = InvertedIndex() if verbose: vprint('Reading term vectors...') pbar = tqdm(total=doc_count, file=sys.stderr) for n_docs, n_errs in inv_index.read_index(es, index, doc_type, field): if verbose: pbar.update(n_docs) errors += n_errs if verbose: pbar.close() vprint('Done ({} mterm vectors errors).'.format(errors)) return inv_index
def record(self, event): # type: (Event) -> None logger.debug('recording event %s', event.message_id) # Try unmarshalling the event before sticking it into storage, might derive additional value from # being able to inspect the event in storage payload = None try: payload = json.loads(event.payload) except ValueError: pass try: doc = { 'destination_topic': event.destination_topic, 'payload': base64.b64encode(event.payload), 'deserialized_payload': payload, 'received': format_timestamp( time.time()) # ES takes long ms level epoch timestamps } self.__esc.index(self.__idx, self.__DOCUMENT_TYPE, doc, id=event.message_id) logger.debug('recorded event %s', event.message_id) self.__monitor.record_success() except TransportError as e: self.__monitor.record_error(e.error) raise ElasticsearchException(e.error) except ElasticsearchException as e: self.__monitor.record_error(e.message) raise
def submit_pool(self, mapping=None): """ Submit current document grouping (grouped by mapping) to the appropriate mapping in the ElasticSearch index. :param str mapping: The mapping to submit a to index. """ # Set default mapping if not mapping: mapping = self.default_mapping # Elasticsearch's Bulk API expects data in a strange format (see link) # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html docs = [] for doc in self.doc_pool[mapping]: docs.append({"index": True}) docs.append(doc) # We want to see any errors that are thrown up by Elasticsearch response = self.es.bulk(docs, index=self.index, doc_type=mapping, timeout=75) if response["errors"] is True: raise ElasticsearchException( "Error response from Elasticsearch server: %s" % response) # Empty the list now that we've indexed all the docs from it self.doc_pool[mapping] = []
def send_docs_to_elasticsearch(elasticsearch_docs): es = create_connection(host=ELASTICSEARCH_LOGS_HOST, port=ELASTICSEARCH_LOGS_PORT, http_auth=None) _, failed = bulk(es, elasticsearch_docs, stats_only=True) if failed: raise ElasticsearchException( f"{failed} out of {len(elasticsearch_docs)} failed")
def test_exception_flow(self, mock_bulk): mock_record = {'record_type': 'firewall_rule'} mock_bulk.side_effect = ElasticsearchException() es_store = esstore.EsStore() es_store.write(mock_record) es_store.done() mock_bulk.assert_called_once_with(mock.ANY)
def setUp(self): self.delete_timeout = 10 self.cluster_mock = Mock() self.child_logger_mock = Mock() self.logger_mock = Mock() self.logger_mock.getChild = Mock(return_value=self.child_logger_mock) self.delete_action_mock = Mock() self.delete_action_mock.do_action = Mock() self.build_delete_action_for_expired_indices_mock = Mock( return_value=self.delete_action_mock) self.elasticsearch_exception = ElasticsearchException( self.faker.sentence())
invalid_amount["item_list"][0].update(amount=0) with login_disabled_app.test_client() as client: response = client.post("api/product/list", json=invalid_amount) data = json.loads(response.data) ErrorSchema().load(data) assert response.status_code == 400 @pytest.mark.parametrize( "method,http_method,test_url,error,status_code", [("select_by_item_list", "POST", "/api/product/list", ValidationError("test"), 400), ("select_by_item_list", "POST", "/api/product/list", ElasticsearchException(), 504), ("select_by_item_list", "POST", "/api/product/list", ElasticsearchDslException(), 504), ("select_by_item_list", "POST", "/api/product/list", Exception(), 500)]) def test_kind_products_controller_error(mocker, get_request_function, request_json, method, http_method, test_url, error, status_code): with mocker.patch.object(ProductService, method, side_effect=error): make_request = get_request_function(http_method) response = make_request(test_url, json=request_json) data = json.loads(response.data) ErrorSchema().load(data) assert response.status_code == status_code
with login_disabled_app.test_client() as client: response = client.post( "api/product/total", json=invalid_amount ) data = json.loads(response.data) ErrorSchema().load(data) assert response.status_code == 400 @pytest.mark.parametrize( "method,http_method,test_url,error,status_code", [ ("select_by_item_list", "POST", "/api/product/total", ValidationError("test"), 400), ("select_by_item_list", "POST", "/api/product/total", ElasticsearchException(), 504), ("select_by_item_list", "POST", "/api/product/total", ElasticsearchDslException(), 504), ("select_by_item_list", "POST", "/api/product/total", Exception(), 500) ] ) def test_kind_products_controller_error(mocker, get_request_function, request_json, method, http_method, test_url, error, status_code): with mocker.patch.object(ProductService, method, side_effect=error): make_request = get_request_function(http_method) response = make_request( test_url, json=request_json ) data = json.loads(response.data) ErrorSchema().load(data)
with mocker.patch.object(ProductService, "products_count", return_value=5): with login_disabled_app.test_client() as client: response = client.get( "api/start" ) data = json.loads(response.data) ProductsCountSchema().load(data) assert response.status_code == 200 @pytest.mark.parametrize( "method,http_method,test_url,error,status_code", [ ("products_count", "GET", "/api/start", ElasticsearchException(), 504), ("products_count", "GET", "/api/start", ElasticsearchDslException(), 504), ("products_count", "GET", "/api/start", Exception(), 500) ] ) def test_start_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code): with mocker.patch.object(ProductService, method, side_effect=error): make_request = get_request_function(http_method) response = make_request( test_url ) data = json.loads(response.data) ErrorSchema().load(data)
def test_str(self): class TestCase: def __init__(self, msg, err, verbose, expected_str): self.msg = msg self.err = err self.verbose = verbose self.expected_str = expected_str tests = { 'msg and err are None, verbose=False': TestCase(msg=None, err=None, verbose=False, expected_str=''), 'msg and err are None, verbose=True': TestCase(msg=None, err=None, verbose=True, expected_str=''), 'msg only, verbose=False': TestCase( msg='error message', err=None, verbose=False, expected_str='error message', ), 'msg only, verbose=True': TestCase( msg='error message', err=None, verbose=True, expected_str='error message', ), 'err is string, verbose=False': TestCase( msg='error message', err='we have a big problem', verbose=False, expected_str='error message', ), 'err is string, verbose=True': TestCase( msg='error message', err='we have a big problem', verbose=True, expected_str='error message: we have a big problem', ), 'err is list, verbose=False': TestCase( msg='error message', err=['error1', 'error2', 'error3'], verbose=False, expected_str='error message', ), 'err is list, verbose=True': TestCase( msg='error message', err=['error1', 'error2', 'error3'], verbose=True, expected_str= 'error message: [\'error1\', \'error2\', \'error3\']', ), 'err is ValueError, verbose=False': TestCase( msg='error message', err=ValueError('we have a big problem'), verbose=False, expected_str='error message', ), 'err is ValueError, verbose=True': TestCase( msg='error message', err=ValueError('we have a big problem'), verbose=True, expected_str='error message: ValueError: we have a big problem', ), 'err is ElasticsearchException, verbose=False': TestCase( msg='error message', err=ElasticsearchException('we have a big problem'), verbose=False, expected_str='error message', ), 'err is ElasticsearchException, verbose=True': TestCase( msg='error message', err=ElasticsearchException('we have a big problem'), verbose=True, expected_str= 'error message: elasticsearch.exceptions.ElasticsearchException: ' 'we have a big problem', ), } for test_name, test in tests.items(): err = ElasticBufferFlushError(msg=test.msg, err=test.err, verbose=test.verbose) self.assertEqual(str(err), test.expected_str, test_name)
from django.conf import settings from elasticsearch import Elasticsearch, ElasticsearchException es = Elasticsearch(settings.HAYSTACK_CONNECTIONS['default']['URL']) try: es.info() except ElasticsearchException: raise ElasticsearchException( "There is no elasticsearch node running on {}".format( settings.HAYSTACK_CONNECTIONS['default']['URL'])) version = es.info()['version']['number'] major_version = version.split('.')[0] if not int(major_version) == 5: raise ElasticsearchException( "ES version is not 5, but {} instead.".format(version))
response = client.post( "api/session/test", json=invalid_range ) data = json.loads(response.data) ErrorSchema().load(data) assert response.status_code == 400 @pytest.mark.parametrize( "method,http_method,test_url,error,status_code", [ ("select_by_id", "POST", "/api/session/test", NoContentError(), 204), ("select_by_id", "POST", "/api/session/test", NotFoundError(), 404), ("select_by_id", "POST", "/api/session/test", ElasticsearchException(), 504), ("select_by_id", "POST", "/api/session/test", ElasticsearchDslException(), 504), ("select_by_id", "POST", "/api/session/test", Exception(), 500) ] ) def test_start_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code): with mocker.patch.object(SessionService, method, side_effect=error): make_request = get_request_function(http_method) response = make_request( test_url ) if status_code == 204: with pytest.raises(JSONDecodeError): json.loads(response.data)
def itersearch(self, scroll, **kwargs): ''' Iterated search for making Scroll API really simple to use. Executes a search query of scroll type and returns an iterator for easy iteration over the result set and for making further calls to Elasticsearch for scrolling over the remaining results. :arg index: A comma-separated list of index names to search; use `_all` or empty string to perform the operation on all indices :arg doc_type: A comma-separated list of document types to search; leave empty to perform the operation on all types :arg body: The search definition using the Query DSL :arg chunked: False to get one document per iteration. False to get the all the documents returned in response to every scroll request. Defaults to False. :arg with_meta: True to return meta data of Scroll API requests with every iteration. Defaults to False. :arg _source: True or false to return the _source field or not, or a list of fields to return :arg _source_exclude: A list of fields to exclude from the returned _source field :arg _source_include: A list of fields to extract and return from the _source field :arg analyze_wildcard: Specify whether wildcard and prefix queries should be analyzed (default: false) :arg analyzer: The analyzer to use for the query string :arg default_operator: The default operator for query string query (AND or OR) (default: OR) :arg df: The field to use as default where no field prefix is given in the query string :arg explain: Specify whether to return detailed information about score computation as part of a hit :arg fields: A comma-separated list of fields to return as part of a hit :arg ignore_indices: When performed on multiple indices, allows to ignore `missing` ones (default: none) :arg indices_boost: Comma-separated list of index boosts :arg lenient: Specify whether format-based query failures (such as providing text to a numeric field) should be ignored :arg lowercase_expanded_terms: Specify whether query terms should be lowercased :arg from_: Starting offset (default: 0) :arg preference: Specify the node or shard the operation should be performed on (default: random) :arg q: Query in the Lucene query string syntax :arg routing: A comma-separated list of specific routing values :arg scroll: Specify how long a consistent view of the index should be maintained for scrolled search :arg size: Number of hits to return (default: 10) :arg sort: A comma-separated list of <field>:<direction> pairs :arg source: The URL-encoded request definition using the Query DSL (instead of using request body) :arg stats: Specific 'tag' of the request for logging and statistical purposes :arg suggest_field: Specify which field to use for suggestions :arg suggest_mode: Specify suggest mode (default: missing) :arg suggest_size: How many suggestions to return in response :arg suggest_text: The source text for which the suggestions should be returned :arg timeout: Explicit operation timeout :arg version: Specify whether to return document version as part of a hit .. Usage:: from superelasticsearch import SuperElasticsearch es = SuperElasticsearch(hosts=['localhost:9200']) for doc in es.itersearch(index='tweets', doc_type='tweet', chunked=False): print doc['_id'] ''' # add scroll kwargs['scroll'] = scroll # prepare kwargs for search if 'chunked' in kwargs: chunked = kwargs.pop('chunked') else: chunked = True if 'with_meta' in kwargs: with_meta = kwargs.pop('with_meta') else: with_meta = False resp = self.search(**kwargs) total = resp['hits']['total'] scroll_id = resp['_scroll_id'] counter = 0 while len(resp['hits']['hits']) > 0: # prepare meta meta = resp.copy() meta['hits'] = resp['hits'].copy() meta['hits'].pop('hits') # if expected chunked, then return chunks else return # every doc per iteration if chunked: if with_meta: yield resp['hits']['hits'], meta else: yield resp['hits']['hits'] else: for doc in resp['hits']['hits']: if with_meta: yield doc, meta else: yield doc # increment the counter counter += len(resp['hits']['hits']) # get the next set of results scroll_id = resp['_scroll_id'] resp = self.scroll(scroll_id=scroll_id, scroll=kwargs['scroll']) # check if all the documents were scrolled or not if counter != total: raise ElasticsearchException( 'Failed to get all the documents while scrolling. Total ' 'documents that matched the query: %s\n' 'Total documents that were retrieved while scrolling: %s\n' 'Last scroll_id with documents: %s.\n' 'Last scroll_id: %s ' % ( total, counter, scroll_id, resp['_scroll_id'])) # clear scroll self.clear_scroll(scroll_id=scroll_id)
with mocker.patch.object(ProductService, "select_by_id", return_value=mock_product): with login_disabled_app.test_client() as client: response = client.get( "api/product/id" ) data = json.loads(response.data) ProductResultsSchema().load(data) assert response.status_code == 200 @pytest.mark.parametrize( "method,http_method,test_url,error,status_code", [ ("select_by_id", "GET", "api/product/id", NotFoundError(), 404), ("select_by_id", "GET", "api/product/id", ElasticsearchException(), 504), ("select_by_id", "GET", "api/product/id", ElasticsearchDslException(), 504), ("select_by_id", "GET", "api/product/id", Exception(), 500) ] ) def test_product_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code): with mocker.patch.object(ProductService, method, side_effect=error): make_request = get_request_function(http_method) response = make_request( test_url ) data = json.loads(response.data) if status_code == 404:
assert response.status_code == 400 invalid_range = deepcopy(request_json) invalid_range["pricerange"].update(min=100.0, max=50.0) with login_disabled_app.test_client() as client: response = client.post("api/brand/test", json=invalid_range) data = json.loads(response.data) ErrorSchema().load(data) assert response.status_code == 400 @pytest.mark.parametrize("method,http_method,test_url,error,status_code", [ ("get_total", "POST", "/api/brand/test", NoContentError(), 204), ("get_total", "POST", "/api/brand/test", ElasticsearchException(), 504), ("get_total", "POST", "/api/brand/test", ElasticsearchDslException(), 504), ("get_total", "POST", "/api/brand/test", Exception(), 500) ]) def test_brand_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code): with mocker.patch.object(ProductService, method, side_effect=error): make_request = get_request_function(http_method) response = make_request(test_url) if status_code == 204: with pytest.raises(JSONDecodeError): json.loads(response.data) else: data = json.loads(response.data)
invalid_pagesize = deepcopy(request_json) invalid_pagesize.update(pagesize=0) with login_disabled_app.test_client() as client: response = client.post("api/brand/test/1", json=invalid_pagesize) data = json.loads(response.data) ErrorSchema().load(data) assert response.status_code == 400 @pytest.mark.parametrize( "method,http_method,test_url,error,status_code", [("select", "POST", "/api/brand/test/1", NoContentError(), 204), ("select", "POST", "/api/brand/test/1", ElasticsearchException(), 504), ("select", "POST", "/api/brand/test/1", ElasticsearchDslException(), 504), ("select", "POST", "/api/brand/test/1", Exception(), 500)]) def test_kind_products_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code): with mocker.patch.object(ProductService, method, side_effect=error): make_request = get_request_function(http_method) response = make_request(test_url) if status_code == 204: with pytest.raises(JSONDecodeError): json.loads(response.data) else: data = json.loads(response.data)
invalid_amount = deepcopy(request_json) invalid_amount.update(amount=0) with login_disabled_app.test_client() as client: response = client.post("api/gender/test", json=invalid_amount) data = json.loads(response.data) ErrorSchema().load(data) assert response.status_code == 400 @pytest.mark.parametrize( "method,http_method,test_url,error,status_code", [("super_discounts", "POST", "/api/gender/test", NoContentError(), 204), ("super_discounts", "POST", "/api/gender/test", ElasticsearchException(), 504), ("super_discounts", "POST", "/api/gender/test", ElasticsearchDslException(), 504), ("super_discounts", "POST", "/api/gender/test", Exception(), 500)]) def test_gender_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code): with mocker.patch.object(ProductService, method, side_effect=error): make_request = get_request_function(http_method) response = make_request(test_url) if status_code == 204: with pytest.raises(JSONDecodeError): json.loads(response.data) else: