def configure_schema(schema, version): # Generate relevant type mappings for entity properties so that # we can do correct searches on each. schema_mapping = {} numeric_mapping = {registry.date.group: NUMERIC} for prop in schema.properties.values(): config = dict(TYPE_MAPPINGS.get(prop.type, KEYWORD)) config["copy_to"] = ["text"] schema_mapping[prop.name] = config if prop.type in NUMERIC_TYPES: numeric_mapping[prop.name] = NUMERIC mapping = { "date_detection": False, "dynamic": False, "_source": {"excludes": ["text", "fingerprints"]}, "properties": { "caption": KEYWORD, "schema": KEYWORD, "schemata": KEYWORD, registry.entity.group: KEYWORD, registry.language.group: KEYWORD, registry.country.group: KEYWORD, registry.checksum.group: KEYWORD, registry.ip.group: KEYWORD, registry.url.group: KEYWORD, registry.iban.group: KEYWORD, registry.email.group: KEYWORD, registry.phone.group: KEYWORD, registry.mimetype.group: KEYWORD, registry.identifier.group: KEYWORD, registry.date.group: PARTIAL_DATE, registry.address.group: KEYWORD, registry.name.group: KEYWORD, "fingerprints": { "type": "keyword", "normalizer": "latin_index", "copy_to": "text", "fields": {"text": LATIN_TEXT}, }, "text": { "type": "text", "analyzer": "latin_index", "search_analyzer": "latin_query", "search_quote_analyzer": "latin_index", "term_vector": "with_positions_offsets", }, "properties": {"type": "object", "properties": schema_mapping}, "numeric": {"type": "object", "properties": numeric_mapping}, "role_id": KEYWORD, "collection_id": KEYWORD, "origin": KEYWORD, "created_at": {"type": "date"}, "updated_at": {"type": "date"}, }, } index = schema_index(model.get(schema), version) settings = index_settings(shards=get_shard_weight(schema)) return configure_index(index, mapping, settings)
def configure_schema(schema, version): # Generate relevant type mappings for entity properties so that # we can do correct searches on each. schema_mapping = {} for prop in schema.properties.values(): config = dict(TYPE_MAPPINGS.get(prop.type, KEYWORD)) config['copy_to'] = ['text'] schema_mapping[prop.name] = config mapping = { "date_detection": False, "dynamic": False, "_source": { "excludes": ["text", "fingerprints"] }, "properties": { "name": { "type": "text", "analyzer": "icu_latin", "fields": {"kw": KEYWORD}, "boost": 3.0, "copy_to": "text" }, "schema": KEYWORD, "schemata": KEYWORD, "foreign_id": KEYWORD, "document_id": KEYWORD, "collection_id": KEYWORD, "uploader_id": KEYWORD, "entities": KEYWORD, "languages": KEYWORD, "countries": KEYWORD, "checksums": KEYWORD, "keywords": KEYWORD, "ips": KEYWORD, "urls": KEYWORD, "ibans": KEYWORD, "emails": KEYWORD, "phones": KEYWORD, "mimetypes": KEYWORD, "identifiers": KEYWORD, "dates": PARTIAL_DATE, "addresses": { "type": "keyword", "fields": {"text": LATIN_TEXT} }, "names": { "type": "keyword", "fields": {"text": LATIN_TEXT}, "copy_to": "text" }, "fingerprints": { "type": "keyword", "normalizer": "icu_latin", "copy_to": "text", "fields": {"text": LATIN_TEXT} }, "text": { "type": "text", "analyzer": "icu_latin", "term_vector": "with_positions_offsets", "store": True }, "properties": { "type": "object", "properties": schema_mapping }, "updated_at": {"type": "date"}, } } index = schema_index(model.get(schema), version) settings = index_settings(shards=get_shard_weight(schema)) return configure_index(index, mapping, settings)
def configure_schema(schema, version): # Generate relevant type mappings for entity properties so that # we can do correct searches on each. schema_mapping = {} for prop in schema.properties.values(): config = dict(TYPE_MAPPINGS.get(prop.type, KEYWORD)) config['copy_to'] = ['text'] schema_mapping[prop.name] = config mapping = { "date_detection": False, "dynamic": False, "_source": { "excludes": ["text", "fingerprints"] }, "properties": { "name": { "type": "text", "analyzer": "icu_latin", "fields": {"kw": KEYWORD}, "boost": 3.0, "copy_to": "text" }, "schema": KEYWORD, "schemata": KEYWORD, "bulk": {"type": "boolean"}, "status": KEYWORD, "error_message": { "type": "text", "copy_to": "text", "index": False }, "foreign_id": KEYWORD, "document_id": KEYWORD, "collection_id": KEYWORD, "uploader_id": KEYWORD, "fingerprints": { "type": "keyword", "normalizer": "icu_latin", "copy_to": "text", "fields": {"text": LATIN_TEXT} }, "entities": KEYWORD, "languages": KEYWORD, "countries": KEYWORD, "checksums": KEYWORD, "keywords": KEYWORD, "ips": KEYWORD, "urls": KEYWORD, "ibans": KEYWORD, "emails": KEYWORD, "phones": KEYWORD, "mimetypes": KEYWORD, "identifiers": KEYWORD, "addresses": { "type": "keyword", "fields": {"text": LATIN_TEXT} }, "dates": PARTIAL_DATE, "names": { "type": "keyword", "fields": {"text": LATIN_TEXT}, "copy_to": "text" }, "created_at": {"type": "date"}, "updated_at": {"type": "date"}, "text": { "type": "text", "analyzer": "icu_latin", "term_vector": "with_positions_offsets", "store": True }, "properties": { "type": "object", "properties": schema_mapping } } } index = schema_index(model.get(schema), version) return configure_index( index, mapping, index_settings(shards=get_shard_weight(schema)) )