Пример #1
0
def test_merge_no_doc_error():
    defaults = {}
    indexer_doc = {}
    # assert error message
    try:
        _ = indexer_utils.merge_default_fields(indexer_doc, defaults)
    except ValueError as err:
        assert str(err) == "indexer return data should have 'doc' dictionary {}"
Пример #2
0
def test_merge_same_scalar():
    defaults = {
        "key": "1"
    }
    indexer_doc = {
        "doc": {
            "key": "1"
        }
    }
    indexer_ret = indexer_utils.merge_default_fields(indexer_doc, defaults)
    assert indexer_ret['doc']['key'] == "1"
Пример #3
0
def test_merge_scalars():
    defaults = {
        "key": 1
    }
    indexer_doc = {
        "doc": {
            "key": 2
        }
    }
    indexer_ret = indexer_utils.merge_default_fields(indexer_doc, defaults)
    assert type(indexer_ret['doc']['key']) == list
    assert lists_are_same(indexer_ret['doc']['key'], [1, 2])
Пример #4
0
def test_merge_add_new_field():
    defaults = {
        "key": "1"
    }
    indexer_doc = {
        "doc": {
            "key2": "2"
        }
    }
    indexer_ret = indexer_utils.merge_default_fields(indexer_doc, defaults)
    assert indexer_ret['doc']['key'] == "1"
    assert indexer_ret['doc']['key2'] == "2"
Пример #5
0
def test_merge_lists():
    # merge two lists
    defaults = {
        "key": [1, 2]
    }
    indexer_doc = {
        "doc": {
            "key": [1, 3]
        }
    }
    indexer_ret = indexer_utils.merge_default_fields(indexer_doc, defaults)
    assert type(indexer_ret['doc']['key']) == list
    assert lists_are_same(indexer_ret['doc']['key'], [1, 2, 3])
    # merge list into scalar
    defaults = {
        "key": 1
    }
    indexer_doc = {
        "doc": {
            "key": [1, 3]
        }
    }
    indexer_ret = indexer_utils.merge_default_fields(indexer_doc, defaults)
    assert type(indexer_ret['doc']['key']) == list
    assert lists_are_same(indexer_ret['doc']['key'], [1, 3])
    # merge scalar into list
    defaults = {
        "key": [1, 2]
    }
    indexer_doc = {
        "doc": {
            "key": 3
        }
    }
    indexer_ret = indexer_utils.merge_default_fields(indexer_doc, defaults)
    assert type(indexer_ret['doc']['key']) == list
    assert lists_are_same(indexer_ret['doc']['key'], [1, 2, 3])
Пример #6
0
def index_obj(obj_data, ws_info, msg_data):
    """
    For a newly created object, generate the index document for it and push to
    the elasticsearch topic on Kafka.
    Args:
        obj_data - in-memory parsed data from the workspace object
        msg_data - json event data received from the kafka workspace events
            stream. Must have keys for `wsid` and `objid`
    """
    obj_type = obj_data['info'][2]
    (type_module, type_name, type_version) = ws_utils.get_type_pieces(obj_type)
    if (type_module + '.' +
            type_name) in config()['global']['ws_type_blacklist']:
        # Blacklisted type, so we don't index it
        return
    # check if this particular object has the tag "noindex"
    metadata = ws_info[-1]
    # If the workspace's object metadata contains a "nosearch" tag, skip it
    if metadata.get('searchtags'):
        if 'noindex' in metadata['searchtags']:
            return
    # Get the info of the first object to get the creation date of the object.
    upa = get_upa_from_msg_data(msg_data)
    try:
        obj_data_v1 = config()['ws_client'].admin_req(
            'getObjects', {
                'objects': [{
                    'ref': upa + '/1'
                }],
                'no_data': 1
            })
    except WorkspaceResponseError as err:
        ws_utils.log_error(err)
        raise err
    obj_data_v1 = obj_data_v1['data'][0]
    # Dispatch to a specific type handler to produce the search document
    (indexer, conf) = _find_indexer(type_module, type_name, type_version)
    # All indexers are generators that yield document data for ES.
    defaults = indexer_utils.default_fields(obj_data, ws_info, obj_data_v1)
    for indexer_ret in indexer(obj_data, ws_info, obj_data_v1, conf):
        if indexer_ret['_action'] == 'index':
            allow_indices = config()['allow_indices']
            skip_indices = config()['skip_indices']
            if allow_indices is not None and indexer_ret.get(
                    'index') not in allow_indices:
                # This index name is not in the indexing whitelist from the config, so we skip
                logger.debug(
                    f"Index '{indexer_ret['index']}' is not in ALLOW_INDICES, skipping"
                )
                continue
            if skip_indices is not None and indexer_ret.get(
                    'index') in skip_indices:
                # This index name is in the indexing blacklist in the config, so we skip
                logger.debug(
                    f"Index '{indexer_ret['index']}' is in SKIP_INDICES, skipping"
                )
                continue
            if '_no_defaults' not in indexer_ret:
                # Inject all default fields into the index document.
                indexer_ret = indexer_utils.merge_default_fields(
                    indexer_ret, defaults)
        yield indexer_ret