Пример #1
0
    def sanity_check_new_index(self, attempt, document, new_index_name, previous_record_count):
        """ Ensure that we do not point to an index that looks like it has missing data. """
        current_record_count = self.get_record_count(document)
        percentage_change = self.percentage_change(current_record_count, previous_record_count)

        # Verify there was not a big shift in record count
        record_count_is_sane = percentage_change < settings.INDEX_SIZE_CHANGE_THRESHOLD

        # Spot check a known-flaky field type to detect VAN-391
        aggregation_type = Mapping.from_es(new_index_name)['aggregation_key'].name
        record_count_is_sane = record_count_is_sane and aggregation_type == 'keyword'

        if not record_count_is_sane:
            conn = get_connection()
            alternate_current_record_count = conn.search({"query": {"match_all": {}}}, index=new_index_name).get(
                'hits', {}).get('total', {}).get('value', 0)
            message = '''
        Sanity check failed for attempt #{0}.
        Index name: {1}
        Percentage change: {2}
        Previous record count: {3}
        Base record count: {4}
        Search record count: {5}
        Aggregation key type: {6}
                '''.format(
                attempt,
                new_index_name,
                str(int(round(percentage_change * 100, 0))) + '%',
                previous_record_count,
                current_record_count,
                alternate_current_record_count,
                aggregation_type,
            )
            logger.info(message)
            logger.info('...sleeping for 5 seconds...')
            time.sleep(5)
        else:
            message = '''
        Sanity check passed for attempt #{0}.
        Index name: {1}
        Percentage change: {2}
        Previous record count: {3}
        Current record count: {4}
                '''.format(
                attempt,
                new_index_name,
                str(int(round(percentage_change * 100, 0))) + '%',
                previous_record_count,
                current_record_count
            )
            logger.info(message)

        index_info_string = (
            'The previous index contained [{}] records. '
            'The new index contains [{}] records, a [{:.2f}%] change.'.format(
                previous_record_count, current_record_count, percentage_change * 100
            )
        )

        return record_count_is_sane, index_info_string
Пример #2
0
def view_mappings(index='buddyupevents', doc_type='event'):
    """Return a Mapping of mappings.

    Usage: explore.view_mappings().to_dict()
    """
    m = Mapping.from_es(index, doc_type)
    return m
Пример #3
0
    def mapping(self, index, document):
        """ This method looks for the mapping in an index for a given document type

        :param index: Elasticsearch index
        :param document: type of document

        :returns: dictionary with the mapping
        """

        mapping = Mapping.from_es(index, document, using=self.es)
        return mapping.to_dict()[document]['properties']
Пример #4
0
    def mapping(self, index, document):
        """ This method looks for the mapping in an index for a given document type

        :param index: Elasticsearch index
        :param document: type of document

        :returns: dictionary with the mapping
        """

        mapping = Mapping.from_es(index, document, using=self.es)
        return mapping.to_dict()[document]['properties']
Пример #5
0
def _get_es_facets():
    """Returns a dict from UI facet name to Elasticsearch facet object."""
    using = Elasticsearch(app.app.config['ELASTICSEARCH_URL'])
    try:
        mapping = Mapping.from_es(app.app.config['INDEX_NAME'],
                                  'type',
                                  using=using).to_dict()
    except TransportError as e:
        if 'index_not_found_exception' in e.error:
            app.app.logger.error('Index %s not found at %s' %
                                 (app.app.config['INDEX_NAME'],
                                  app.app.config['ELASTICSEARCH_URL']))
            raise e

    config_path = os.path.join(app.app.config['DATASET_CONFIG_DIR'], 'ui.json')
    facets_config = _parse_json_file(config_path)['facets']

    # Preserve order, so facets are returned in same order as the config file.
    facets = OrderedDict()

    for facet_config in facets_config:
        field_name = facet_config['elasticsearch_field_name']
        if not field_name in mapping['type']['properties']:
            raise ValueError(
                'elasticsearch_field_name %s not found in Elasticsearch index %s'
                % (field_name, app.app.config['INDEX_NAME']))
        field_type = mapping['type']['properties'][field_name]['type']
        ui_facet_name = facet_config['ui_facet_name']
        if field_type == 'text':
            # Use ".keyword" because we want aggregation on keyword field, not
            # term field. See
            # https://www.elastic.co/guide/en/elasticsearch/reference/6.2/fielddata.html#before-enabling-fielddata
            facets[ui_facet_name] = TermsFacet(field=field_name + '.keyword',
                                               size=20)
        else:
            # Assume numeric type.
            # TODO: Handle other types.
            # TODO: Automatically figure out bucket intervals. Unfortunately
            # Elasticsearch won't do this for us
            # (https://github.com/elastic/elasticsearch/issues/9572). Make the
            # ranges easy to read (10-19,20-29 instead of 10-17,18-25).
            facets[ui_facet_name] = HistogramFacet(field=field_name,
                                                   interval=10)
    app.app.logger.info('Elasticsearch facets: %s' % facets)
    return facets
Пример #6
0
def ensure_vocabulary_mappings(term_info):
    """
    Ensure the mapping is properly set in Elasticsearch to always do exact
    matches on taxonomy terms. Accepts the output of get_resource_terms.

    Calling this function during indexing means that vocabularies do not
    need to be added to the mapping in advance. This deals with the fact
    that vocabularies can be added on-the-fly without having to play around
    with extra signals.

    Args:
        term_info (dict): Details of terms for a group of LearningResources.
    """
    if len(term_info) == 0:
        return

    get_conn()  # We don't need the return value; just for it to exist.

    # Retrieve current mapping from Elasticsearch.
    mapping = Mapping.from_es(index=INDEX_NAME, doc_type=DOC_TYPE)

    # Get the field names from the mapping.
    existing_vocabs = set(mapping.to_dict()["learningresource"]["properties"])

    # Get all the taxonomy names from the data.
    vocab_ids = set()
    for vocab_terms in term_info.values():
        for vocab_id in vocab_terms.keys():
            vocab_ids.add(vocab_id)
    updated = False
    # Add vocabulary to mapping if necessary.
    for vocab_id in vocab_ids:
        vocab_key = make_vocab_key(vocab_id)
        if vocab_key in existing_vocabs:
            continue
        mapping.field(vocab_key, "string", index="not_analyzed")
        updated = True
    if updated:
        mapping.save(INDEX_NAME)
        refresh_index()
Пример #7
0
def ensure_vocabulary_mappings(term_info):
    """
    Ensure the mapping is properly set in Elasticsearch to always do exact
    matches on taxonomy terms. Accepts the output of get_resource_terms.

    Calling this function during indexing means that vocabularies do not
    need to be added to the mapping in advance. This deals with the fact
    that vocabularies can be added on-the-fly without having to play around
    with extra signals.

    Args:
        term_info (dict): Details of terms for a group of LearningResources.
    """
    if len(term_info) == 0:
        return

    get_conn()  # We don't need the return value; just for it to exist.

    # Retrieve current mapping from Elasticsearch.
    mapping = Mapping.from_es(index=INDEX_NAME, doc_type=DOC_TYPE)

    # Get the field names from the mapping.
    existing_vocabs = set(mapping.to_dict()["learningresource"]["properties"])

    # Get all the taxonomy names from the data.
    vocab_ids = set()
    for vocab_terms in term_info.values():
        for vocab_id in vocab_terms.keys():
            vocab_ids.add(vocab_id)
    updated = False
    # Add vocabulary to mapping if necessary.
    for vocab_id in vocab_ids:
        vocab_key = make_vocab_key(vocab_id)
        if vocab_key in existing_vocabs:
            continue
        mapping.field(vocab_key, "string", index="not_analyzed")
        updated = True
    if updated:
        mapping.save(INDEX_NAME)
        refresh_index()
Пример #8
0
    def add_mapping_to_index(self, lang_code, lang_analyzer, delete_old_index=False, kuromoji_synonyms=None):
        """
        Add or update mail/irc-mapping to EL-index, create/update required analyzers and add fields.

        :param lang_code: ``str`` Language of index e.g. 'ja'
        :param lang_analyzer: ``str`` Name of analyzer for language e.g. 'kuromoji', 'standard' etc.
        :param delete_old_index: ``bool`` Delete index if existing? Default: False = Update existing index (Close, Update, Open)
        :param kuromoji_synonyms: ``dict`` Synonyms for kuromoji Japanese analyzer.
            Keep old synonyms if synonyms list empty and index not deleted
        :return: None
        """
        if kuromoji_synonyms is None:
            kuromoji_synonyms = []
        analyzer_lang = helpers.get_analyzer(lang_analyzer, delete_old_index=delete_old_index,
                                             user_dictionary_file=self._user_dictionary_file,
                                             synonyms=kuromoji_synonyms)
        analyzer_case_insensitive_sort = analysis.analyzer('case_insensitive_sort',
                                                           tokenizer=analysis.tokenizer('keyword'),
                                                           filter=['lowercase'])
        mapping = Mapping(self._type_name)
        reopen_index = False
        index_name = self._index_prefix.format(lang_code)
        if self._es.indices.exists(index=index_name):
            if delete_old_index:
                self._es.indices.delete(index=index_name, ignore=[400, 404])
            else:
                self._es.indices.close(index=index_name)
                reopen_index = True
                mapping = Mapping.from_es(index_name, self._type_name, using=self._es)  # Get existing index from server

        self.add_mapping_fields(mapping, analyzer_lang, analyzer_case_insensitive_sort)

        mapping.save(index_name, using=self._es)  # Insert or update

        if reopen_index:
            self._es.indices.open(index=index_name)
def test_mapping(setup_es):
    """Test the ES mapping for a contact."""
    mapping = Mapping.from_es(
        ContactSearchApp.es_model.get_write_index(),
        ContactSearchApp.name,
    )

    assert mapping.to_dict() == {
        'contact': {
            'properties': {
                'accepts_dit_email_marketing': {
                    'type': 'boolean'
                },
                'address_1': {
                    'type': 'text'
                },
                'address_2': {
                    'type': 'text'
                },
                'address_country': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'address_county': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'address_postcode': {
                    'type': 'text'
                },
                'address_same_as_company': {
                    'type': 'boolean'
                },
                'address_town': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'adviser': {
                    'properties': {
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'archived': {
                    'type': 'boolean'
                },
                'archived_by': {
                    'properties': {
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'archived_on': {
                    'type': 'date'
                },
                'archived_reason': {
                    'type': 'text'
                },
                'company': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                        'trading_names': {
                            'type': 'text',
                            'fields': {
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'company_sector': {
                    'properties': {
                        'ancestors': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                }
                            },
                            'type': 'object',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'company_uk_region': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'created_by': {
                    'properties': {
                        'dit_team': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                },
                                'name': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                            },
                            'type': 'object',
                        },
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'created_on': {
                    'type': 'date'
                },
                'email': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'email_alternative': {
                    'type': 'text'
                },
                'first_name': {
                    'type': 'text',
                    'fields': {
                        'keyword': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                },
                'id': {
                    'type': 'keyword'
                },
                'job_title': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'last_name': {
                    'type': 'text',
                    'fields': {
                        'keyword': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                },
                'modified_on': {
                    'type': 'date'
                },
                'name': {
                    'type': 'text',
                    'fields': {
                        'keyword': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                },
                'notes': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'primary': {
                    'type': 'boolean'
                },
                'telephone_alternative': {
                    'type': 'text'
                },
                'telephone_countrycode': {
                    'type': 'keyword'
                },
                'telephone_number': {
                    'type': 'keyword'
                },
                'title': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
            },
            'dynamic': 'false',
        },
    }
Пример #10
0
def test_mapping(setup_es):
    """Test the ES mapping for a companies house company."""
    mapping = Mapping.from_es(
        CompaniesHouseCompanySearchApp.es_model.get_target_index_name(),
        CompaniesHouseCompanySearchApp.name,
    )

    assert mapping.to_dict() == {
        'companieshousecompany': {
            'dynamic': 'false',
            'properties': {
                'company_category': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'company_number': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'company_status': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'id': {
                    'type': 'keyword',
                },
                'incorporation_date': {
                    'type': 'date',
                },
                'name': {
                    'type': 'text',
                    'fields': {
                        'keyword': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                },
                'registered_address_1': {
                    'type': 'text',
                },
                'registered_address_2': {
                    'type': 'text',
                },
                'registered_address_country': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'registered_address_county': {
                    'type': 'text',
                },
                'registered_address_postcode': {
                    'copy_to': ['registered_address_postcode_trigram'],
                    'type': 'text',
                },
                'registered_address_postcode_trigram': {
                    'analyzer': 'trigram_analyzer',
                    'type': 'text',
                },
                'registered_address_town': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'registered_address': {
                    'type': 'object',
                    'properties': {
                        'line_1': {
                            'index': False,
                            'type': 'text'
                        },
                        'line_2': {
                            'index': False,
                            'type': 'text'
                        },
                        'town': {
                            'index': False,
                            'type': 'text'
                        },
                        'county': {
                            'index': False,
                            'type': 'text'
                        },
                        'postcode': {
                            'type': 'text',
                            'fields': {
                                'trigram': {
                                    'type': 'text',
                                    'analyzer': 'trigram_analyzer',
                                },
                            },
                        },
                        'country': {
                            'type': 'object',
                            'properties': {
                                'id': {
                                    'index': False,
                                    'type': 'keyword'
                                },
                                'name': {
                                    'index': False,
                                    'type': 'text'
                                },
                            },
                        },
                    },
                },
                'sic_code_1': {
                    'type': 'text',
                },
                'sic_code_2': {
                    'type': 'text',
                },
                'sic_code_3': {
                    'type': 'text',
                },
                'sic_code_4': {
                    'type': 'text',
                },
                'uri': {
                    'type': 'text',
                },
            },
        },
    }
Пример #11
0
def test_mapping(es):
    """Test the ES mapping for a company."""
    mapping = Mapping.from_es(
        CompanySearchApp.es_model.get_write_index(),
        CompanySearchApp.name,
    )
    assert mapping.to_dict() == {
        'company': {
            'dynamic': 'false',
            'properties': {
                'archived': {
                    'type': 'boolean'
                },
                'archived_by': {
                    'properties': {
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'archived_on': {
                    'type': 'date'
                },
                'archived_reason': {
                    'type': 'text'
                },
                'business_type': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'company_number': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'created_on': {
                    'type': 'date'
                },
                'description': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'duns_number': {
                    'type': 'keyword'
                },
                'employee_range': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'export_experience_category': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'export_to_countries': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'future_interest_countries': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'global_headquarters': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'headquarter_type': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'id': {
                    'type': 'keyword'
                },
                'modified_on': {
                    'type': 'date'
                },
                'name': {
                    'type': 'text',
                    'fields': {
                        'keyword': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                },
                'reference_code': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'address': {
                    'type': 'object',
                    'properties': {
                        'line_1': {
                            'index': False,
                            'type': 'text'
                        },
                        'line_2': {
                            'index': False,
                            'type': 'text'
                        },
                        'town': {
                            'index': False,
                            'type': 'text'
                        },
                        'county': {
                            'index': False,
                            'type': 'text'
                        },
                        'postcode': {
                            'type': 'text',
                            'fields': {
                                'trigram': {
                                    'type': 'text',
                                    'analyzer': 'trigram_analyzer',
                                },
                            },
                        },
                        'country': {
                            'type': 'object',
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                },
                                'name': {
                                    'type': 'text',
                                    'fields': {
                                        'trigram': {
                                            'type': 'text',
                                            'analyzer': 'trigram_analyzer',
                                        },
                                    },
                                },
                            },
                        },
                    },
                },
                'registered_address': {
                    'type': 'object',
                    'properties': {
                        'line_1': {
                            'index': False,
                            'type': 'text'
                        },
                        'line_2': {
                            'index': False,
                            'type': 'text'
                        },
                        'town': {
                            'index': False,
                            'type': 'text'
                        },
                        'county': {
                            'index': False,
                            'type': 'text'
                        },
                        'postcode': {
                            'type': 'text',
                            'fields': {
                                'trigram': {
                                    'type': 'text',
                                    'analyzer': 'trigram_analyzer',
                                },
                            },
                        },
                        'country': {
                            'type': 'object',
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                },
                                'name': {
                                    'type': 'text',
                                    'fields': {
                                        'trigram': {
                                            'type': 'text',
                                            'analyzer': 'trigram_analyzer',
                                        },
                                    },
                                },
                            },
                        },
                    },
                },
                'uk_address_postcode': {
                    'analyzer': 'postcode_analyzer_v2',
                    'search_analyzer': 'postcode_search_analyzer_v2',
                    'type': 'text',
                },
                'uk_registered_address_postcode': {
                    'analyzer': 'postcode_analyzer_v2',
                    'search_analyzer': 'postcode_search_analyzer_v2',
                    'type': 'text',
                },
                'one_list_group_global_account_manager': {
                    'properties': {
                        'first_name': {
                            'index': False,
                            'type': 'text',
                        },
                        'id': {
                            'type': 'keyword',
                        },
                        'last_name': {
                            'index': False,
                            'type': 'text',
                        },
                        'name': {
                            'index': False,
                            'type': 'text',
                        },
                    },
                    'type': 'object',
                },
                'sector': {
                    'properties': {
                        'ancestors': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                }
                            },
                            'type': 'object',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'suggest': {
                    'analyzer': 'simple',
                    'max_input_length': 50,
                    'preserve_position_increments': True,
                    'preserve_separators': True,
                    'type': 'completion',
                    'contexts': [
                        {
                            'name': 'country',
                            'type': 'CATEGORY'
                        },
                    ],
                },
                'trading_names': {
                    'type': 'text',
                    'fields': {
                        'trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                },
                'turnover_range': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'uk_based': {
                    'type': 'boolean'
                },
                'uk_region': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'vat_number': {
                    'index': False,
                    'type': 'keyword',
                },
                'website': {
                    'type': 'text'
                },
                'latest_interaction_date': {
                    'type': 'date'
                },
            },
        },
    }
Пример #12
0
def search(index, **kwargs):
    output = {'success': False}
    mapping = Mapping.from_es(index, 'doc')
    srch = Metadata.search(index=index)
    logger.info('search in index {}'.format(index))
    source_fields = ''
    sort_field = None
    sort_order = 'asc'
    from_date = None
    to_date = None
    date_type = None
    filters = []
    relation = None
    coords = None
    match = None

    start = 1
    size = 100
    q_list = []
    for key in kwargs:
        if key == 'fields':
            source_fields = kwargs[key]
            continue
        elif key == 'start':
            start = kwargs[key]
            continue
        elif key == 'size':
            size = kwargs[key]
            continue
        elif key == 'sort':
            sort_field = kwargs[key]
            continue
        elif key == 'sortorder':
            sort_order = kwargs[key].lower() == 'desc' and 'desc' or 'asc'
            continue
        elif key == 'from':
            from_date = kwargs[key]
            continue
        elif key == 'to':
            to_date = kwargs[key]
            continue
        elif key == 'dates.dateType':
            date_type = kwargs[key].lower()
            continue
        elif key == 'encloses':
            relation = key
            coords = kwargs[key]
            continue
        elif key == 'includes':
            relation = key
            coords = kwargs[key]
            continue
        elif key == 'overlaps':
            relation = key
            coords = kwargs[key]
            continue
        elif key == 'excludes':
            relation = key
            coords = kwargs[key]
            continue
        elif key == 'match':
            match = kwargs[key]
            if kwargs[key].lower() not in \
                    ['must', 'filter', 'should', 'must_not']:
                msg = 'Unknown match value: {}'.format(kwargs[key])
                output['error'] = msg
                return output
            continue
        elif key == 'and':
            params = kwargs[key].split(',')
            queries = []
            for param in params:
                k, v = param.split('=')
                qry, msg = _create_query(k, v, mapping)
                if msg == 'geo-ok':
                    filters.extend(qry)
                    continue
                elif msg != 'ok':
                    output['error'] = msg
                    return output
                if not isinstance(qry, list):
                    qry = [qry]
                queries.extend(qry)
            q_list.append(Q({'bool': {'must': queries}}))
            continue
        elif key == 'or':
            params = kwargs[key].split(',')
            queries = []
            for param in params:
                k, v = param.split('=')
                qry, msg = _create_query(k, v, mapping)
                if msg != 'ok':
                    output['error'] = msg
                    return output
                if not isinstance(qry, list):
                    qry = [qry]
                queries.extend(qry)
            q_list.append(Q({'bool': {'should': queries}}))
            continue
        elif key == 'not':
            params = kwargs[key].split(',')
            queries = []
            for param in params:
                k, v = param.split('=')
                qry, msg = _create_query(k, v, mapping)
                if msg != 'ok':
                    output['error'] = msg
                    return output
                if not isinstance(qry, list):
                    qry = [qry]
                queries.extend(qry)
            q_list.append(Q({'bool': {'must_not': queries}}))
            continue

        # Otherwise add to query
        qry, msg = _create_query(key, kwargs[key], mapping)
        if msg not in ['geo-ok', 'ok']:
            output['error'] = msg
            return output

        if not isinstance(qry, list):
            qry = [qry]
        q_list.extend(qry)

    if sort_field:
        logger.debug('Sort on {}'.format(sort_field))
        sort_field_name = sort_field
        if sort_field.startswith('-'):
            sort_order = 'desc'
            sort_field = sort_field_name = sort_field[1:]
        sort_field = {sort_field: {'order': sort_order}}

        field_type = mapping.resolve_field(sort_field_name)
        if field_type is None:
            msg = 'Unknown sort field: {}'.format(sort_field_name)
            output['error'] = msg
            return output
        if type(field_type).name in [
                'object',
        ]:
            msg = 'Cannot sort on field: {}'.format(sort_field_name)
            output['error'] = msg
            return output
        srch = srch.sort(sort_field)

    if source_fields:
        new_fields = []
        if isinstance(source_fields, str):
            source_fields = source_fields.split(',')
        for field in source_fields:
            field = field.strip()
            logger.debug('limit output field: {}'.format(field))
            field_name = field
            if mapping.resolve_field(field_name) is None:
                msg = 'Unknown source field: {}'.format(field)
                output['error'] = msg
                return output
            new_fields.append(field_name)
        srch = srch.source(new_fields)

    if from_date:
        try:
            from_date = datetime.strptime(from_date, '%Y-%m-%d').date()
        except Exception as e:
            msg = 'from date {} format should be YYYY-MM-DD'.format(from_date)
            output['error'] = msg
            return output

    if to_date:
        try:
            to_date = datetime.strptime(to_date, '%Y-%m-%d').date()
        except Exception as e:
            msg = 'to date {} format should be YYYY-MM-DD'.format(to_date)
            output['error'] = msg
            return output

    if from_date or to_date:
        dates = []
        if from_date:
            dates.append({
                'range': {
                    'metadata_json.dates.date.lte': {
                        'gte': from_date
                    }
                }
            })
        if to_date:
            dates.append(
                {'range': {
                    'metadata_json.dates.date.gte': {
                        'lte': to_date
                    }
                }})
        if date_type:
            dates.append({'term': {'metadata_json.dates.dateType': date_type}})
        filters.append({'bool': {'must': dates}})
    elif date_type:
        q_list.append(Q({"match": {
            'metadata_json.dates.dateType': date_type
        }}))

    if relation:
        afilter, msg = _create_geo_query(relation, coords)
        if msg != 'geo-ok':
            output['error'] = msg
            return output

        filters.extend(afilter)

    # Add Query
    if len(q_list) == 0:
        q_list = {'match_all': {}}

    if match:
        qry = {match: q_list}
    else:
        qry = {'must': q_list}

    # Add Filter
    if filters:
        if len(filters) == 1:
            qry['filter'] = filters[0]
        else:
            qry['filter'] = {'bool': {'should': filters}}

    qry = {'bool': qry}
    logger.info('Search Query {}'.format(qry))
    srch.query = qry

    try:
        start = int(start)
    except Exception as e:
        msg = 'start {} must be an integer'.format(start)
        output['error'] = msg
        return output

    if start <= 0:
        msg = 'start must be greater that zero'
        output['error'] = msg
        return output

    start -= 1

    try:
        size = int(size)
    except Exception as e:
        msg = 'size {} must be an integer'.format(size)
        output['error'] = msg
        return output
    if size < 0:
        msg = 'size must be greater that zero'
        output['error'] = msg
        return output
    size = size + start

    logger.debug('page {} - {}'.format(start, size))
    srch = srch[start:size]

    try:
        output['result'] = srch.execute()
        output['count'] = srch.count()
        logger.debug('count {}'.format(srch.count()))
        output['success'] = True
    except TransportError as e:
        if e.error == 'search_phase_execution_exception':
            msg = 'Search phrase error {}'.format(e)
        else:
            msg = 'Search Engine Transport error: {}'.format(e)
        output['error'] = msg
    except Exception as e:
        msg = 'Search Engine unknown error: {}'.format(e)
        output['error'] = msg
    return output
Пример #13
0
 class Meta:
     mapping = Mapping.from_es("blog")
     index = "blog"
Пример #14
0
def test_mapping(es):
    """Test the ES mapping for an investment project."""
    mapping = Mapping.from_es(
        ESInvestmentProject.get_write_index(),
        DEFAULT_MAPPING_TYPE,
    )

    assert mapping.to_dict() == {
        DEFAULT_MAPPING_TYPE: {
            'dynamic': 'false',
            'properties': {
                '_document_type': {
                    'type': 'keyword',
                },
                'actual_land_date': {
                    'type': 'date'
                },
                'actual_uk_regions': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'address_1': {
                    'type': 'text'
                },
                'address_2': {
                    'type': 'text'
                },
                'address_postcode': {
                    'type': 'text'
                },
                'address_town': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'allow_blank_estimated_land_date': {
                    'index': False,
                    'type': 'boolean',
                },
                'allow_blank_possible_uk_regions': {
                    'index': False,
                    'type': 'boolean',
                },
                'anonymous_description': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'approved_commitment_to_invest': {
                    'type': 'boolean'
                },
                'approved_fdi': {
                    'type': 'boolean'
                },
                'approved_good_value': {
                    'type': 'boolean'
                },
                'approved_high_value': {
                    'type': 'boolean'
                },
                'approved_landed': {
                    'type': 'boolean'
                },
                'approved_non_fdi': {
                    'type': 'boolean'
                },
                'archived': {
                    'type': 'boolean'
                },
                'archived_by': {
                    'properties': {
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'archived_on': {
                    'type': 'date'
                },
                'archived_reason': {
                    'type': 'text'
                },
                'associated_non_fdi_r_and_d_project': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'project_code': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'average_salary': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'business_activities': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'client_cannot_provide_foreign_investment': {
                    'type': 'boolean'
                },
                'client_cannot_provide_total_investment': {
                    'type': 'boolean'
                },
                'client_contacts': {
                    'properties': {
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'client_relationship_manager': {
                    'properties': {
                        'dit_team': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                },
                                'name': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                            },
                            'type': 'object',
                        },
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'client_requirements': {
                    'type': 'text',
                    'index': False,
                },
                'comments': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'country_investment_originates_from': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'country_lost_to': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                            'index': False,
                        },
                        'name': {
                            'type': 'text',
                            'index': False,
                        },
                    },
                    'type': 'object',
                },
                'created_by': {
                    'properties': {
                        'dit_team': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                },
                                'name': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                            },
                            'type': 'object',
                        },
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'created_on': {
                    'type': 'date'
                },
                'date_abandoned': {
                    'type': 'date'
                },
                'date_lost': {
                    'type': 'date'
                },
                'delivery_partners': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'description': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'estimated_land_date': {
                    'type': 'date'
                },
                'export_revenue': {
                    'type': 'boolean'
                },
                'fdi_type': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'fdi_value': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'foreign_equity_investment': {
                    'type': 'double'
                },
                'government_assistance': {
                    'type': 'boolean'
                },
                'gross_value_added': {
                    'type': 'double'
                },
                'id': {
                    'type': 'keyword'
                },
                'intermediate_company': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'investment_type': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'investor_company': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'investor_company_country': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'investor_type': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'level_of_involvement': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'level_of_involvement_simplified': {
                    'type': 'keyword'
                },
                'likelihood_to_land': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'modified_on': {
                    'type': 'date'
                },
                'name': {
                    'type': 'text',
                    'fields': {
                        'keyword': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                },
                'new_tech_to_uk': {
                    'type': 'boolean'
                },
                'non_fdi_r_and_d_budget': {
                    'type': 'boolean'
                },
                'number_new_jobs': {
                    'type': 'integer'
                },
                'number_safeguarded_jobs': {
                    'type': 'long'
                },
                'other_business_activity': {
                    'type': 'text',
                    'index': False,
                },
                'project_arrived_in_triage_on': {
                    'type': 'date'
                },
                'project_assurance_adviser': {
                    'properties': {
                        'dit_team': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                },
                                'name': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                            },
                            'type': 'object',
                        },
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'project_code': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                    'fields': {
                        'trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                },
                'project_manager': {
                    'properties': {
                        'dit_team': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                },
                                'name': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                            },
                            'type': 'object',
                        },
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'proposal_deadline': {
                    'type': 'date'
                },
                'quotable_as_public_case_study': {
                    'type': 'boolean'
                },
                'r_and_d_budget': {
                    'type': 'boolean'
                },
                'reason_abandoned': {
                    'type': 'text',
                    'index': False,
                },
                'reason_delayed': {
                    'type': 'text',
                    'index': False,
                },
                'reason_lost': {
                    'type': 'text',
                    'index': False,
                },
                'referral_source_activity': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'referral_source_activity_event': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'referral_source_activity_marketing': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'referral_source_activity_website': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'referral_source_adviser': {
                    'properties': {
                        'first_name': {
                            'type': 'text',
                            'index': False,
                        },
                        'id': {
                            'type': 'keyword',
                            'index': False,
                        },
                        'last_name': {
                            'type': 'text',
                            'index': False,
                        },
                        'name': {
                            'type': 'text',
                            'index': False,
                        },
                    },
                    'type': 'object',
                },
                'sector': {
                    'properties': {
                        'ancestors': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                }
                            },
                            'type': 'object',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'site_decided': {
                    'type': 'boolean'
                },
                'some_new_jobs': {
                    'type': 'boolean'
                },
                'specific_programme': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'stage': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'status': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'team_members': {
                    'properties': {
                        'dit_team': {
                            'properties': {
                                'id': {
                                    'type': 'keyword'
                                },
                                'name': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                            },
                            'type': 'object',
                        },
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'total_investment': {
                    'type': 'double'
                },
                'uk_company': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'uk_company_decided': {
                    'type': 'boolean'
                },
                'uk_region_locations': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'will_new_jobs_last_two_years': {
                    'type': 'boolean'
                },
            },
        },
    }
Пример #15
0
def test_mapping(es):
    """Test the ES mapping for an event."""
    mapping = Mapping.from_es(
        EventSearchApp.es_model.get_write_index(),
        DEFAULT_MAPPING_TYPE,
    )

    assert mapping.to_dict() == {
        DEFAULT_MAPPING_TYPE: {
            'dynamic': 'false',
            'properties': {
                '_document_type': {
                    'type': 'keyword',
                },
                'address_1': {
                    'type': 'text'
                },
                'address_2': {
                    'type': 'text'
                },
                'address_country': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'address_county': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'address_postcode': {
                    'type': 'text',
                    'fields': {
                        'trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                },
                'address_town': {
                    'normalizer': 'lowercase_asciifolding_normalizer',
                    'type': 'keyword',
                },
                'created_on': {
                    'type': 'date'
                },
                'disabled_on': {
                    'type': 'date'
                },
                'end_date': {
                    'type': 'date'
                },
                'event_type': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'id': {
                    'type': 'keyword'
                },
                'lead_team': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'location_type': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'modified_on': {
                    'type': 'date'
                },
                'name': {
                    'type': 'text',
                    'fields': {
                        'keyword': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                },
                'notes': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'organiser': {
                    'properties': {
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword'
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'related_programmes': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'service': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'start_date': {
                    'type': 'date'
                },
                'teams': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'uk_region': {
                    'properties': {
                        'id': {
                            'type': 'keyword'
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
            },
        },
    }
from elasticsearch_dsl import Keyword, Mapping, Nested, Text

m = Mapping('my-type')
m.field('title', 'text')
m.field('category', 'text', fields={'raw': Keyword()})

comment = Nested()
comment.field('author', Text())
comment.field('created_at', Date())
m.field('comments', comment)
m.meta('_all', enabled=False)
m.save('my-index')

# We can also get the mapping from our production cluster
m = Mapping.from_es('my-index', 'my-type', using='prod')

m.update_from_es('my-index', using='qa')

m.save('my-index', using='prod')


#################################################
# DOCTYPE

from datetime import datetime
from elasticsearch_dsl import DocType, Date, Nested, Boolean, \
        analyzer, InnerObjectWrapper, Completion, Keyword, Text

html_strip = analyzer('html_strip',
        tokenizer='standard',
Пример #17
0
 class Meta:
     mapping = Mapping.from_es("user")
     index = "user"
def test_mapping(es):
    """Test the ES mapping for a large capital investor profile."""
    mapping = Mapping.from_es(
        LargeInvestorProfileSearchApp.es_model.get_write_index(),
        LargeInvestorProfileSearchApp.name,
    )
    assert mapping.to_dict() == {
        'large-investor-profile': {
            'properties': {
                'asset_classes_of_interest': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'construction_risks': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'country_of_origin': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'fields': {
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                            'type': 'text',
                        },
                    },
                    'type': 'object',
                },
                'created_by': {
                    'properties': {
                        'dit_team': {
                            'properties': {
                                'id': {
                                    'type': 'keyword',
                                },
                                'name': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                            },
                            'type': 'object',
                        },
                        'first_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'id': {
                            'type': 'keyword',
                        },
                        'last_name': {
                            'normalizer': 'lowercase_asciifolding_normalizer',
                            'type': 'keyword',
                        },
                        'name': {
                            'type': 'text',
                            'fields': {
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                        },
                    },
                    'type': 'object',
                },
                'created_on': {
                    'type': 'date',
                },
                'deal_ticket_sizes': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'desired_deal_roles': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'global_assets_under_management': {
                    'type': 'long',
                },
                'id': {
                    'type': 'keyword',
                },
                'investable_capital': {
                    'type': 'long',
                },
                'investment_types': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'investor_company': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'fields': {
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                                'keyword': {
                                    'normalizer':
                                    'lowercase_asciifolding_normalizer',
                                    'type': 'keyword',
                                },
                            },
                            'type': 'text',
                        },
                        'trading_names': {
                            'fields': {
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                            'type': 'text',
                        },
                    },
                    'type': 'object',
                },
                'investor_description': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'investor_type': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'minimum_equity_percentage': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'minimum_return_rate': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'modified_on': {
                    'type': 'date',
                },
                'notes_on_locations': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'other_countries_being_considered': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'fields': {
                                'trigram': {
                                    'analyzer': 'trigram_analyzer',
                                    'type': 'text',
                                },
                            },
                            'type': 'text',
                        },
                    },
                    'type': 'object',
                },
                'required_checks_conducted': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'restrictions': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'time_horizons': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
                'uk_region_locations': {
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'index': False,
                            'type': 'keyword',
                        },
                    },
                    'type': 'object',
                },
            },
            'dynamic': 'false',
        },
    }
Пример #19
0
def test_mapping(setup_es):
    """Test the ES mapping for an order."""
    mapping = Mapping.from_es(OrderSearchApp.es_model.get_write_index(),
                              OrderSearchApp.name)

    assert mapping.to_dict() == {
        'order': {
            'dynamic': 'false',
            'properties': {
                'assignees': {
                    'include_in_parent': True,
                    'properties': {
                        'dit_team': {
                            'include_in_parent': True,
                            'properties': {
                                'id': {
                                    'type': 'keyword',
                                },
                                'name': {
                                    'analyzer': 'lowercase_keyword_analyzer',
                                    'fielddata': True,
                                    'type': 'text',
                                },
                            },
                            'type': 'nested',
                        },
                        'first_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'id': {
                            'type': 'keyword',
                        },
                        'last_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name': {
                            'copy_to': ['assignees.name_trigram'],
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name_trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'billing_address_1': {
                    'type': 'text',
                },
                'billing_address_2': {
                    'type': 'text',
                },
                'billing_address_country': {
                    'include_in_parent': True,
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'billing_address_county': {
                    'analyzer': 'lowercase_keyword_analyzer',
                    'fielddata': True,
                    'type': 'text',
                },
                'billing_address_postcode': {
                    'type': 'text',
                },
                'billing_address_town': {
                    'analyzer': 'lowercase_keyword_analyzer',
                    'fielddata': True,
                    'type': 'text',
                },
                'billing_contact_name': {
                    'type': 'text',
                },
                'billing_company_name': {
                    'type': 'text',
                },
                'billing_email': {
                    'analyzer': 'lowercase_keyword_analyzer',
                    'fielddata': True,
                    'type': 'text',
                },
                'billing_phone': {
                    'analyzer': 'lowercase_keyword_analyzer',
                    'fielddata': True,
                    'type': 'text',
                },
                'cancellation_reason': {
                    'include_in_parent': True,
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'cancelled_by': {
                    'include_in_parent': True,
                    'properties': {
                        'first_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'id': {
                            'type': 'keyword',
                        },
                        'last_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name': {
                            'copy_to': ['cancelled_by.name_trigram'],
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name_trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'cancelled_on': {
                    'type': 'date',
                },
                'company': {
                    'include_in_parent': True,
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'copy_to': ['company.name_trigram'],
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name_trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                        'trading_name': {
                            'copy_to': ['company.trading_name_trigram'],
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'trading_name_trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'completed_by': {
                    'include_in_parent': True,
                    'properties': {
                        'first_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'id': {
                            'type': 'keyword',
                        },
                        'last_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name': {
                            'copy_to': ['completed_by.name_trigram'],
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name_trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'completed_on': {
                    'type': 'date',
                },
                'contact': {
                    'include_in_parent': True,
                    'properties': {
                        'first_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'id': {
                            'type': 'keyword',
                        },
                        'last_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name': {
                            'copy_to': ['contact.name_trigram'],
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name_trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'contact_email': {
                    'analyzer': 'lowercase_keyword_analyzer',
                    'fielddata': True,
                    'type': 'text',
                },
                'contact_phone': {
                    'type': 'keyword',
                },
                'contacts_not_to_approach': {
                    'type': 'text',
                },
                'created_by': {
                    'include_in_parent': True,
                    'properties': {
                        'first_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'id': {
                            'type': 'keyword',
                        },
                        'last_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name': {
                            'copy_to': ['created_by.name_trigram'],
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name_trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                        'dit_team': {
                            'include_in_parent': True,
                            'properties': {
                                'id': {
                                    'type': 'keyword',
                                },
                                'name': {
                                    'analyzer': 'lowercase_keyword_analyzer',
                                    'fielddata': True,
                                    'type': 'text',
                                },
                            },
                            'type': 'nested',
                        },
                    },
                    'type': 'nested',
                },
                'created_on': {
                    'type': 'date',
                },
                'delivery_date': {
                    'type': 'date',
                },
                'description': {
                    'analyzer': 'english_analyzer',
                    'type': 'text',
                },
                'discount_value': {
                    'index': False,
                    'type': 'integer',
                },
                'existing_agents': {
                    'index': False,
                    'type': 'text',
                },
                'further_info': {
                    'type': 'text',
                },
                'id': {
                    'type': 'keyword',
                },
                'modified_on': {
                    'type': 'date',
                },
                'net_cost': {
                    'index': False,
                    'type': 'integer',
                },
                'paid_on': {
                    'type': 'date',
                },
                'payment_due_date': {
                    'type': 'date',
                },
                'po_number': {
                    'index': False,
                    'type': 'keyword',
                },
                'primary_market': {
                    'include_in_parent': True,
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'reference': {
                    'copy_to': ['reference_trigram'],
                    'analyzer': 'lowercase_keyword_analyzer',
                    'fielddata': True,
                    'type': 'text',
                },
                'reference_trigram': {
                    'analyzer': 'trigram_analyzer',
                    'type': 'text',
                },
                'sector': {
                    'include_in_parent': True,
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'ancestors': {
                            'include_in_parent': True,
                            'properties': {
                                'id': {
                                    'type': 'keyword',
                                },
                            },
                            'type': 'nested',
                        },
                    },
                    'type': 'nested',
                },
                'uk_region': {
                    'include_in_parent': True,
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'service_types': {
                    'include_in_parent': True,
                    'properties': {
                        'id': {
                            'type': 'keyword',
                        },
                        'name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'status': {
                    'analyzer': 'lowercase_keyword_analyzer',
                    'fielddata': True,
                    'type': 'text',
                },
                'subscribers': {
                    'include_in_parent': True,
                    'properties': {
                        'dit_team': {
                            'include_in_parent': True,
                            'properties': {
                                'id': {
                                    'type': 'keyword',
                                },
                                'name': {
                                    'analyzer': 'lowercase_keyword_analyzer',
                                    'fielddata': True,
                                    'type': 'text',
                                },
                            },
                            'type': 'nested',
                        },
                        'first_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'id': {
                            'type': 'keyword',
                        },
                        'last_name': {
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name': {
                            'copy_to': ['subscribers.name_trigram'],
                            'analyzer': 'lowercase_keyword_analyzer',
                            'fielddata': True,
                            'type': 'text',
                        },
                        'name_trigram': {
                            'analyzer': 'trigram_analyzer',
                            'type': 'text',
                        },
                    },
                    'type': 'nested',
                },
                'subtotal_cost': {
                    'copy_to': ['subtotal_cost_string'],
                    'type': 'integer',
                },
                'subtotal_cost_string': {
                    'type': 'keyword',
                },
                'total_cost': {
                    'copy_to': ['total_cost_string'],
                    'type': 'integer',
                },
                'total_cost_string': {
                    'type': 'keyword',
                },
                'vat_cost': {
                    'index': False,
                    'type': 'integer',
                },
                'vat_number': {
                    'index': False,
                    'type': 'keyword',
                },
                'vat_status': {
                    'index': False,
                    'type': 'keyword',
                },
                'vat_verified': {
                    'index': False,
                    'type': 'boolean',
                },
            },
        },
    }
Пример #20
0
    def test_create_mapping(self):
        @my_index_alias.doc_type
        class MyDocType2(DocType):
            value2 = Float()

            class Meta:
                doc_type = 'my_doc_type_2'

        @register_indexer('my_alias')
        class MyIndexer(BaseIndexer):
            doc_type_klass = MyDocType
            index_alias = my_index_alias

        @register_indexer('my_alias')
        class MyIndexer2(BaseIndexer):
            doc_type_klass = MyDocType2
            index_alias = my_index_alias

        class MyPartialIndexer(PartialIndexer, MyIndexer):
            batch_size = 2

            def get_batch_queryset(self, keys):
                return Mock(count=Mock(return_value=len(keys)),
                            __iter__=Mock(return_value=iter(
                                [Mock(id=key, value=key + 10)
                                 for key in keys])))

            def get_batch_update_docs_queries(self, keys):
                return self.doc_type_klass.search().query('terms', id=keys)

            def extract_datum(self, datum):
                return {'id': datum.id, 'value': datum.value}

        for value in [1, 2, 3]:
            MyDocType(id=value, value=value).save()
            MyDocType2(id=value, value2=value).save()
        my_index_alias.read_index.refresh()

        my_indexer = MyPartialIndexer(updating_keys=[1, 2])
        with my_indexer.index_alias.indexing():
            my_indexer.reindex()

        expect(MyDocType.search().count()).to.equal(3)
        expect(MyDocType.search().query(
            'term', id=1).execute()[0].to_dict()['value']).to.equal(11)
        expect(MyDocType.search().query(
            'term', id=2).execute()[0].to_dict()['value']).to.equal(12)
        expect(MyDocType.search().query(
            'term', id=3).execute()[0].to_dict()['value']).to.equal(3)

        expect(MyDocType2.search().count()).to.equal(3)
        expect(MyDocType2.search().query(
            'term', id=1).execute()[0].to_dict()['value2']).to.equal(1)
        expect(MyDocType2.search().query(
            'term', id=2).execute()[0].to_dict()['value2']).to.equal(2)
        expect(MyDocType2.search().query(
            'term', id=3).execute()[0].to_dict()['value2']).to.equal(3)

        expect(Mapping.from_es('test_my_alias',
                               'my_doc_type').to_dict()).to.eq({
                                   'my_doc_type': {
                                       'properties': {
                                           'id': {
                                               'type': 'keyword'
                                           },
                                           'value': {
                                               'type': 'float'
                                           }
                                       }
                                   }
                               })

        expect(Mapping.from_es('test_my_alias',
                               'my_doc_type_2').to_dict()).to.eq({
                                   'my_doc_type_2': {
                                       'properties': {
                                           'id': {
                                               'type': 'keyword'
                                           },
                                           'value2': {
                                               'type': 'float'
                                           }
                                       }
                                   }
                               })