示例#1
0
    def index_bulk(self, docs, step=None, action='index'):

        self._populate_es_version()
        index_name = self._index
        doc_type = self._doc_type
        step = step or self.step

        def _get_bulk(doc):
            # keep original doc
            ndoc = copy.copy(doc)
            ndoc.update({
                "_index": index_name,
                "_type": doc_type,
                "_op_type": action,
            })
            if self._host_major_ver > 6:
                ndoc.pop("_type")
            return ndoc

        actions = (_get_bulk(doc) for doc in docs)
        num_ok, errors = helpers.bulk(self._es,
                                      actions,
                                      chunk_size=step,
                                      max_chunk_bytes=self.step_size)
        if errors:
            raise ElasticsearchException(
                "%d errors while bulk-indexing: %s" %
                (len(errors), [str(e) for e in errors]))
        return num_ok, errors
示例#2
0
def get_inverted_index(es, index, doc_type, field, verbose):
    raise ElasticsearchException('hoaaaa')
    if verbose:
        doc_count = es.count(index=index)['count']
        vprint('Index: {}'.format(index))
        vprint('Document type: {}'.format(doc_type))
        vprint('Document field: {}'.format(field))
        vprint('Document count: {}'.format(doc_count))

    errors = 0
    inv_index = InvertedIndex()
    if verbose:
        vprint('Reading term vectors...')
        pbar = tqdm(total=doc_count, file=sys.stderr)

    for n_docs, n_errs in inv_index.read_index(es, index, doc_type, field):
        if verbose:
            pbar.update(n_docs)
        errors += n_errs

    if verbose:
        pbar.close()

    vprint('Done ({} mterm vectors errors).'.format(errors))
    return inv_index
示例#3
0
    def record(self, event):
        # type: (Event) -> None
        logger.debug('recording event %s', event.message_id)

        # Try unmarshalling the event before sticking it into storage, might derive additional value from
        # being able to inspect the event in storage
        payload = None
        try:
            payload = json.loads(event.payload)
        except ValueError:
            pass

        try:
            doc = {
                'destination_topic': event.destination_topic,
                'payload': base64.b64encode(event.payload),
                'deserialized_payload': payload,
                'received': format_timestamp(
                    time.time())  # ES takes long ms level epoch timestamps
            }

            self.__esc.index(self.__idx,
                             self.__DOCUMENT_TYPE,
                             doc,
                             id=event.message_id)
            logger.debug('recorded event %s', event.message_id)
            self.__monitor.record_success()
        except TransportError as e:
            self.__monitor.record_error(e.error)
            raise ElasticsearchException(e.error)
        except ElasticsearchException as e:
            self.__monitor.record_error(e.message)
            raise
示例#4
0
    def submit_pool(self, mapping=None):
        """
        Submit current document grouping (grouped by mapping) to the
        appropriate mapping in the ElasticSearch index.
        :param str mapping: The mapping to submit a to index.
        """
        # Set default mapping
        if not mapping:
            mapping = self.default_mapping

        # Elasticsearch's Bulk API expects data in a strange format (see link)
        # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html
        docs = []
        for doc in self.doc_pool[mapping]:
            docs.append({"index": True})
            docs.append(doc)

        # We want to see any errors that are thrown up by Elasticsearch
        response = self.es.bulk(docs,
                                index=self.index,
                                doc_type=mapping,
                                timeout=75)
        if response["errors"] is True:
            raise ElasticsearchException(
                "Error response from Elasticsearch server: %s" % response)

        # Empty the list now that we've indexed all the docs from it
        self.doc_pool[mapping] = []
示例#5
0
def send_docs_to_elasticsearch(elasticsearch_docs):
    es = create_connection(host=ELASTICSEARCH_LOGS_HOST,
                           port=ELASTICSEARCH_LOGS_PORT,
                           http_auth=None)
    _, failed = bulk(es, elasticsearch_docs, stats_only=True)
    if failed:
        raise ElasticsearchException(
            f"{failed} out of {len(elasticsearch_docs)} failed")
示例#6
0
    def test_exception_flow(self, mock_bulk):
        mock_record = {'record_type': 'firewall_rule'}
        mock_bulk.side_effect = ElasticsearchException()

        es_store = esstore.EsStore()
        es_store.write(mock_record)
        es_store.done()

        mock_bulk.assert_called_once_with(mock.ANY)
示例#7
0
 def setUp(self):
     self.delete_timeout = 10
     self.cluster_mock = Mock()
     self.child_logger_mock = Mock()
     self.logger_mock = Mock()
     self.logger_mock.getChild = Mock(return_value=self.child_logger_mock)
     self.delete_action_mock = Mock()
     self.delete_action_mock.do_action = Mock()
     self.build_delete_action_for_expired_indices_mock = Mock(
         return_value=self.delete_action_mock)
     self.elasticsearch_exception = ElasticsearchException(
         self.faker.sentence())
    invalid_amount["item_list"][0].update(amount=0)

    with login_disabled_app.test_client() as client:
        response = client.post("api/product/list", json=invalid_amount)

    data = json.loads(response.data)
    ErrorSchema().load(data)
    assert response.status_code == 400


@pytest.mark.parametrize(
    "method,http_method,test_url,error,status_code",
    [("select_by_item_list", "POST", "/api/product/list",
      ValidationError("test"), 400),
     ("select_by_item_list", "POST", "/api/product/list",
      ElasticsearchException(), 504),
     ("select_by_item_list", "POST", "/api/product/list",
      ElasticsearchDslException(), 504),
     ("select_by_item_list", "POST", "/api/product/list", Exception(), 500)])
def test_kind_products_controller_error(mocker, get_request_function,
                                        request_json, method, http_method,
                                        test_url, error, status_code):
    with mocker.patch.object(ProductService, method, side_effect=error):
        make_request = get_request_function(http_method)

        response = make_request(test_url, json=request_json)

        data = json.loads(response.data)
        ErrorSchema().load(data)

        assert response.status_code == status_code
    with login_disabled_app.test_client() as client:
        response = client.post(
            "api/product/total",
            json=invalid_amount
        )

    data = json.loads(response.data)
    ErrorSchema().load(data)
    assert response.status_code == 400


@pytest.mark.parametrize(
    "method,http_method,test_url,error,status_code",
    [
        ("select_by_item_list", "POST", "/api/product/total", ValidationError("test"), 400),
        ("select_by_item_list", "POST", "/api/product/total", ElasticsearchException(), 504),
        ("select_by_item_list", "POST", "/api/product/total", ElasticsearchDslException(), 504),
        ("select_by_item_list", "POST", "/api/product/total", Exception(), 500)
    ]
)
def test_kind_products_controller_error(mocker, get_request_function, request_json, method, http_method, test_url, error, status_code):
    with mocker.patch.object(ProductService, method, side_effect=error):
        make_request = get_request_function(http_method)

        response = make_request(
            test_url,
            json=request_json
        )

        data = json.loads(response.data)
        ErrorSchema().load(data)
    with mocker.patch.object(ProductService, "products_count", return_value=5):
        with login_disabled_app.test_client() as client:
            response = client.get(
                "api/start"
            )

        data = json.loads(response.data)
        ProductsCountSchema().load(data)

        assert response.status_code == 200


@pytest.mark.parametrize(
    "method,http_method,test_url,error,status_code",
    [
        ("products_count", "GET", "/api/start", ElasticsearchException(), 504),
        ("products_count", "GET", "/api/start", ElasticsearchDslException(), 504),
        ("products_count", "GET", "/api/start", Exception(), 500)
    ]
)
def test_start_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code):
    with mocker.patch.object(ProductService, method, side_effect=error):
        make_request = get_request_function(http_method)

        response = make_request(
            test_url
        )

        data = json.loads(response.data)
        ErrorSchema().load(data)
示例#11
0
    def test_str(self):
        class TestCase:
            def __init__(self, msg, err, verbose, expected_str):
                self.msg = msg
                self.err = err
                self.verbose = verbose
                self.expected_str = expected_str

        tests = {
            'msg and err are None, verbose=False':
            TestCase(msg=None, err=None, verbose=False, expected_str=''),
            'msg and err are None, verbose=True':
            TestCase(msg=None, err=None, verbose=True, expected_str=''),
            'msg only, verbose=False':
            TestCase(
                msg='error message',
                err=None,
                verbose=False,
                expected_str='error message',
            ),
            'msg only, verbose=True':
            TestCase(
                msg='error message',
                err=None,
                verbose=True,
                expected_str='error message',
            ),
            'err is string, verbose=False':
            TestCase(
                msg='error message',
                err='we have a big problem',
                verbose=False,
                expected_str='error message',
            ),
            'err is string, verbose=True':
            TestCase(
                msg='error message',
                err='we have a big problem',
                verbose=True,
                expected_str='error message: we have a big problem',
            ),
            'err is list, verbose=False':
            TestCase(
                msg='error message',
                err=['error1', 'error2', 'error3'],
                verbose=False,
                expected_str='error message',
            ),
            'err is list, verbose=True':
            TestCase(
                msg='error message',
                err=['error1', 'error2', 'error3'],
                verbose=True,
                expected_str=
                'error message: [\'error1\', \'error2\', \'error3\']',
            ),
            'err is ValueError, verbose=False':
            TestCase(
                msg='error message',
                err=ValueError('we have a big problem'),
                verbose=False,
                expected_str='error message',
            ),
            'err is ValueError, verbose=True':
            TestCase(
                msg='error message',
                err=ValueError('we have a big problem'),
                verbose=True,
                expected_str='error message: ValueError: we have a big problem',
            ),
            'err is ElasticsearchException, verbose=False':
            TestCase(
                msg='error message',
                err=ElasticsearchException('we have a big problem'),
                verbose=False,
                expected_str='error message',
            ),
            'err is ElasticsearchException, verbose=True':
            TestCase(
                msg='error message',
                err=ElasticsearchException('we have a big problem'),
                verbose=True,
                expected_str=
                'error message: elasticsearch.exceptions.ElasticsearchException: '
                'we have a big problem',
            ),
        }

        for test_name, test in tests.items():
            err = ElasticBufferFlushError(msg=test.msg,
                                          err=test.err,
                                          verbose=test.verbose)
            self.assertEqual(str(err), test.expected_str, test_name)
from django.conf import settings
from elasticsearch import Elasticsearch, ElasticsearchException

es = Elasticsearch(settings.HAYSTACK_CONNECTIONS['default']['URL'])

try:
    es.info()
except ElasticsearchException:
    raise ElasticsearchException(
        "There is no elasticsearch node running on {}".format(
            settings.HAYSTACK_CONNECTIONS['default']['URL']))

version = es.info()['version']['number']
major_version = version.split('.')[0]

if not int(major_version) == 5:
    raise ElasticsearchException(
        "ES version is not 5, but {} instead.".format(version))
示例#13
0
        response = client.post(
            "api/session/test",
            json=invalid_range
        )

    data = json.loads(response.data)
    ErrorSchema().load(data)
    assert response.status_code == 400


@pytest.mark.parametrize(
    "method,http_method,test_url,error,status_code",
    [
        ("select_by_id", "POST", "/api/session/test", NoContentError(), 204),
        ("select_by_id", "POST", "/api/session/test", NotFoundError(), 404),
        ("select_by_id", "POST", "/api/session/test", ElasticsearchException(), 504),
        ("select_by_id", "POST", "/api/session/test", ElasticsearchDslException(), 504),
        ("select_by_id", "POST", "/api/session/test", Exception(), 500)
    ]
)
def test_start_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code):
    with mocker.patch.object(SessionService, method, side_effect=error):
        make_request = get_request_function(http_method)

        response = make_request(
            test_url
        )

        if status_code == 204:
            with pytest.raises(JSONDecodeError):
                json.loads(response.data)
示例#14
0
    def itersearch(self, scroll, **kwargs):
        '''
        Iterated search for making Scroll API really simple to use.

        Executes a search query of scroll type and returns an iterator for easy
        iteration over the result set and for making further calls to
        Elasticsearch for scrolling over the remaining results.

        :arg index: A comma-separated list of index names to search; use `_all`
            or empty string to perform the operation on all indices
        :arg doc_type: A comma-separated list of document types to search;
            leave empty to perform the operation on all types
        :arg body: The search definition using the Query DSL
        :arg chunked: False to get one document per iteration. False to get the
            all the documents returned in response to every scroll request.
            Defaults to False.
        :arg with_meta: True to return meta data of Scroll API requests with
                        every iteration. Defaults to False.
        :arg _source: True or false to return the _source field or not, or a
            list of fields to return
        :arg _source_exclude: A list of fields to exclude from the returned
            _source field
        :arg _source_include: A list of fields to extract and return from the
            _source field
        :arg analyze_wildcard: Specify whether wildcard and prefix queries
            should be analyzed (default: false)
        :arg analyzer: The analyzer to use for the query string
        :arg default_operator: The default operator for query string query (AND
            or OR) (default: OR)
        :arg df: The field to use as default where no field prefix is given in
            the query string
        :arg explain: Specify whether to return detailed information about
            score computation as part of a hit
        :arg fields: A comma-separated list of fields to return as part of a hit
        :arg ignore_indices: When performed on multiple indices, allows to
            ignore `missing` ones (default: none)
        :arg indices_boost: Comma-separated list of index boosts
        :arg lenient: Specify whether format-based query failures (such as
            providing text to a numeric field) should be ignored
        :arg lowercase_expanded_terms: Specify whether query terms should be
            lowercased
        :arg from_: Starting offset (default: 0)
        :arg preference: Specify the node or shard the operation should be
            performed on (default: random)
        :arg q: Query in the Lucene query string syntax
        :arg routing: A comma-separated list of specific routing values
        :arg scroll: Specify how long a consistent view of the index should be
            maintained for scrolled search
        :arg size: Number of hits to return (default: 10)
        :arg sort: A comma-separated list of <field>:<direction> pairs
        :arg source: The URL-encoded request definition using the Query DSL
            (instead of using request body)
        :arg stats: Specific 'tag' of the request for logging and statistical
            purposes
        :arg suggest_field: Specify which field to use for suggestions
        :arg suggest_mode: Specify suggest mode (default: missing)
        :arg suggest_size: How many suggestions to return in response
        :arg suggest_text: The source text for which the suggestions should be
            returned
        :arg timeout: Explicit operation timeout
        :arg version: Specify whether to return document version as part of a
            hit

        .. Usage::
        from superelasticsearch import SuperElasticsearch
        es = SuperElasticsearch(hosts=['localhost:9200'])
        for doc in es.itersearch(index='tweets', doc_type='tweet',
                                 chunked=False):
            print doc['_id']
        '''

        # add scroll
        kwargs['scroll'] = scroll

        # prepare kwargs for search
        if 'chunked' in kwargs:
            chunked = kwargs.pop('chunked')
        else:
            chunked = True

        if 'with_meta' in kwargs:
            with_meta = kwargs.pop('with_meta')
        else:
            with_meta = False

        resp = self.search(**kwargs)
        total = resp['hits']['total']
        scroll_id = resp['_scroll_id']
        counter = 0

        while len(resp['hits']['hits']) > 0:
            # prepare meta
            meta = resp.copy()
            meta['hits'] = resp['hits'].copy()
            meta['hits'].pop('hits')

            # if expected chunked, then return chunks else return
            # every doc per iteration
            if chunked:
                if with_meta:
                    yield resp['hits']['hits'], meta
                else:
                    yield resp['hits']['hits']
            else:
                for doc in resp['hits']['hits']:
                    if with_meta:
                        yield doc, meta
                    else:
                        yield doc

            # increment the counter
            counter += len(resp['hits']['hits'])

            # get the next set of results
            scroll_id = resp['_scroll_id']
            resp = self.scroll(scroll_id=scroll_id, scroll=kwargs['scroll'])

        # check if all the documents were scrolled or not
        if counter != total:
            raise ElasticsearchException(
                'Failed to get all the documents while scrolling. Total '
                'documents that matched the query: %s\n'
                'Total documents that were retrieved while scrolling: %s\n'
                'Last scroll_id with documents: %s.\n'
                'Last scroll_id: %s ' % (
                    total,
                    counter,
                    scroll_id,
                    resp['_scroll_id']))

        # clear scroll
        self.clear_scroll(scroll_id=scroll_id)
    with mocker.patch.object(ProductService, "select_by_id", return_value=mock_product):
        with login_disabled_app.test_client() as client:
            response = client.get(
                "api/product/id"
            )

        data = json.loads(response.data)
        ProductResultsSchema().load(data)
        assert response.status_code == 200


@pytest.mark.parametrize(
    "method,http_method,test_url,error,status_code",
    [
        ("select_by_id", "GET", "api/product/id", NotFoundError(), 404),
        ("select_by_id", "GET", "api/product/id", ElasticsearchException(), 504),
        ("select_by_id", "GET", "api/product/id", ElasticsearchDslException(), 504),
        ("select_by_id", "GET", "api/product/id", Exception(), 500)
    ]
)
def test_product_controller_error(mocker, get_request_function, method, http_method, test_url, error, status_code):
    with mocker.patch.object(ProductService, method, side_effect=error):
        make_request = get_request_function(http_method)

        response = make_request(
            test_url
        )

        data = json.loads(response.data)

        if status_code == 404:
    assert response.status_code == 400

    invalid_range = deepcopy(request_json)
    invalid_range["pricerange"].update(min=100.0, max=50.0)

    with login_disabled_app.test_client() as client:
        response = client.post("api/brand/test", json=invalid_range)

    data = json.loads(response.data)
    ErrorSchema().load(data)
    assert response.status_code == 400


@pytest.mark.parametrize("method,http_method,test_url,error,status_code", [
    ("get_total", "POST", "/api/brand/test", NoContentError(), 204),
    ("get_total", "POST", "/api/brand/test", ElasticsearchException(), 504),
    ("get_total", "POST", "/api/brand/test", ElasticsearchDslException(), 504),
    ("get_total", "POST", "/api/brand/test", Exception(), 500)
])
def test_brand_controller_error(mocker, get_request_function, method,
                                http_method, test_url, error, status_code):
    with mocker.patch.object(ProductService, method, side_effect=error):
        make_request = get_request_function(http_method)

        response = make_request(test_url)

        if status_code == 204:
            with pytest.raises(JSONDecodeError):
                json.loads(response.data)
        else:
            data = json.loads(response.data)
    invalid_pagesize = deepcopy(request_json)
    invalid_pagesize.update(pagesize=0)

    with login_disabled_app.test_client() as client:
        response = client.post("api/brand/test/1", json=invalid_pagesize)

    data = json.loads(response.data)
    ErrorSchema().load(data)
    assert response.status_code == 400


@pytest.mark.parametrize(
    "method,http_method,test_url,error,status_code",
    [("select", "POST", "/api/brand/test/1", NoContentError(), 204),
     ("select", "POST", "/api/brand/test/1", ElasticsearchException(), 504),
     ("select", "POST", "/api/brand/test/1", ElasticsearchDslException(), 504),
     ("select", "POST", "/api/brand/test/1", Exception(), 500)])
def test_kind_products_controller_error(mocker, get_request_function, method,
                                        http_method, test_url, error,
                                        status_code):
    with mocker.patch.object(ProductService, method, side_effect=error):
        make_request = get_request_function(http_method)

        response = make_request(test_url)

        if status_code == 204:
            with pytest.raises(JSONDecodeError):
                json.loads(response.data)
        else:
            data = json.loads(response.data)
示例#18
0
    invalid_amount = deepcopy(request_json)
    invalid_amount.update(amount=0)

    with login_disabled_app.test_client() as client:
        response = client.post("api/gender/test", json=invalid_amount)

    data = json.loads(response.data)
    ErrorSchema().load(data)
    assert response.status_code == 400


@pytest.mark.parametrize(
    "method,http_method,test_url,error,status_code",
    [("super_discounts", "POST", "/api/gender/test", NoContentError(), 204),
     ("super_discounts", "POST", "/api/gender/test", ElasticsearchException(),
      504),
     ("super_discounts", "POST", "/api/gender/test",
      ElasticsearchDslException(), 504),
     ("super_discounts", "POST", "/api/gender/test", Exception(), 500)])
def test_gender_controller_error(mocker, get_request_function, method,
                                 http_method, test_url, error, status_code):
    with mocker.patch.object(ProductService, method, side_effect=error):
        make_request = get_request_function(http_method)

        response = make_request(test_url)

        if status_code == 204:
            with pytest.raises(JSONDecodeError):
                json.loads(response.data)
        else: