class CustomDictionaryWord(es.Document): word = es.Keyword() word_normal = es.Keyword() class Index: name = ES_INDEX_CUSTOM_DICTIONARY_WORD using = ES_CLIENT
class User(ArchivingDocType): """Model a user.""" email = dsl.Keyword() customers = dsl.Keyword() class Meta: index = auth_index._name @classmethod def get_by_email(cls, address): """Get the first user with the given email.""" response = cls.search(index=auth_index._name).filter( 'term', email=address ).execute() if response.hits.total == 0: raise NotFoundError( 'There is no user with email address \'{}\''.format(address) ) elif response.hits.total == 1: return response[0] else: raise ConflictError( 'Inconsistent data detected: there are {} users with email' ' address \'{}\': {}'.format( response.hits.total, address, [user.meta.id for user in response.hits], ) )
class ExtendedDataDocument(object): """Data ES document extensions.""" source = dsl.Keyword() species = dsl.Text() build = dsl.Keyword() feature_type = dsl.Keyword()
class LawSuitModel(es.Document): class Index: name = 'law_go_kr' case_id = es.Keyword() index_data = es.Keyword() detail_data_html = es.Text() detail_data_searchable = es.Keyword()
class Pand(es.DocType): id = es.Keyword() landelijk_id = es.Text(analyzer=analyzers.autocomplete, fields={ 'raw': es.Keyword(), 'nozero': es.Text(analyzer=analyzers.nozero) }) pandnaam = es.Text(analyzer=analyzers.adres, fields=naam_fields) _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BAG_PAND']
class Dictionary(es.Document): corpus = es.Keyword() name = es.Keyword() description = es.Text() datetime = es.Date() number_of_documents = es.Integer() is_ready = es.Boolean() class Index: name = ES_INDEX_DICTIONARY_INDEX using = ES_CLIENT
class TopicDocument(es.Document): topic_id = es.Keyword() topic_weight = es.Float() document_es_id = es.Keyword() datetime = es.Date() document_source = es.Keyword() document_corpus = es.Keyword() document_num_views = es.Integer() document_num_comments = es.Integer() class Index: name = ES_INDEX_TOPIC_DOCUMENT # f"{ES_INDEX_TOPIC_DOCUMENT}_{tm}" using = ES_CLIENT settings = { "number_of_shards": 3, "number_of_replicas": 1, "max_result_window": 5000000, } settings_dynamic = { "number_of_shards": 2, "number_of_replicas": 1, "max_result_window": 5000000, } mappings = { "properties": { "datetime": { "type": "date" }, "document_es_id": { "type": "keyword", }, "document_source": { "type": "keyword", }, "document_corpus": { "type": "keyword", }, "document_num_views": { "type": "long", }, "document_num_comments": { "type": "long", }, "topic_id": { "type": "keyword", }, "topic_weight": { "type": "float" } } }
class DataDocType(es.DocType): """Elasticsearch test model""" first_name = es.Keyword() last_name = es.Keyword() city = es.Text() skills = es.Keyword() birthday = es.Date() is_active = es.Boolean() score = es.Integer() description = es.Text() class Meta: index = 'test'
class Bouwblok(es.DocType): """ Bouwblok searchable fields. """ code = es.Text( analyzer=analyzers.bouwblokid, fields={'keyword': es.Keyword()}, ) subtype = es.Keyword() _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BAG_BOUWBLOK']
class DataDocType(es.Document): """Elasticsearch test model""" first_name = es.Keyword() last_name = es.Keyword() city = es.Text() skills = es.Keyword() birthday = es.Date() is_active = es.Boolean() score = es.Integer() location = es.GeoPoint() description = es.Text() class Index: name = 'test'
class Location(es.InnerDoc): # pytlint: disable = too-few-public-methods """ InnerDoc mapping of location information embedded within a tweet. This data is created by us during the processing pipeline. """ city = es.Keyword(doc_values=True) country = es.Keyword(doc_values=True) county = es.Keyword(doc_values=True) id = es.Text() latitude = es.Text() longitude = es.Text() resolution_method = es.Text() state = es.Keyword(doc_values=True)
class BaseDocument(indices.BaseDocument): """Base search document.""" id = dsl.Integer() # pylint: disable=invalid-name slug = Slug() version = dsl.Keyword() name = Name() created = dsl.Date() modified = dsl.Date() contributor_id = dsl.Integer() contributor_name = User() # We use a separate field for contributor sorting because we use an entirely # different value for it (the display name). contributor_sort = dsl.Keyword() owner_ids = dsl.Integer(multi=True) owner_names = User(multi=True)
class TopicCombo(es.Document): topics = es.Object() common_docs_ids = es.Keyword() common_docs_num = es.Integer() class Index: name = ES_INDEX_TOPIC_COMBOS # f"{ES_INDEX_TOPIC_COMBOS}_{tm}" using = ES_CLIENT settings = { "number_of_shards": 2, "number_of_replicas": 1, "max_result_window": 5000000, } mappings = { "properties": { "topics": { "type": "object" }, "common_docs_ids": { "type": "keyword", }, "common_docs_num": { "type": "integer", }, } }
class DynamicTopicModellingIndex(TopicModellingIndex): meta_dtm_name = es.Keyword() class Index: name = ES_INDEX_DYNAMIC_TOPIC_MODELLING using = ES_CLIENT settings = { "number_of_shards": 1, "number_of_replicas": 1, } mappings = { "properties": { "meta_dtm_name": { "type": "keyword", }, "datetime_from": { "type": "date", }, "datetime_to": { "type": "date", }, "name": { "type": "keyword", }, }, }
class BaseDocument(dsl.DocType): """Base document class to build ElasticSearch documents. This is standard ``elasticsearch-dsl`` ``DocType`` class with already added fields for handling permissions. """ #: list of user ids with view permission on the object users_with_permissions = dsl.Keyword(multi=True) #: list of group ids with view permission on the object groups_with_permissions = dsl.Keyword(multi=True) #: identifies if object has public view permission assigned public_permission = dsl.Boolean()
class ClusterSource(es.Document): name = es.Keyword() clusters = es.Object() clustering_params = es.Object() class Index: name = ES_INDEX_SOURCE_CLUSTERS using = ES_CLIENT settings = { "number_of_shards": 1, "number_of_replicas": 1, } mappings = { "properties": { "name": { "type": "keyword", }, "clusters": { "type": "object", }, "clustering_params": { "type": "object", }, }, }
class User(es.DocType): """Elastic document describing user.""" objectID = es.Keyword() username = es.Text(fielddata=True, analyzer=autocomplete) username_exact = es.Keyword() full_name = es.Text(fielddata=True, analyzer=autocomplete) roles = es.Keyword(multi=True) groups = es.Keyword(multi=True) email = es.Text(fielddata=True, analyzer=autocomplete) email_exact = es.Keyword() class Meta: index = 'users'
class EmbeddingIndex(es.Document): corpus = es.Keyword() number_of_documents = es.Integer() is_ready = es.Boolean() name = es.Keyword() description = es.Text() datetime_created = es.Date() datetime_finished = es.Date() by_unit = es.Keyword() # Token/Word/Sentence/Text algorithm = es.Keyword() pooling = es.Keyword() meta_parameters = es.Object() class Index: name = ES_INDEX_EMBEDDING using = ES_CLIENT
class Monument(es.Document): """ Elastic data for Monument """ id = es.Keyword() type = es.Keyword() naam = es.Text(fielddata=True, analyzer=analyzers.monument_naam, fields={ 'raw': es.Text(fielddata=True), 'keyword': es.Text(fielddata=True, analyzer=analyzers.subtype) }) class Index: name = settings.ELASTIC_INDICES['MONUMENTEN']
class KadastraalSubject(es.DocType): naam = es.Text( analyzer=analyzers.naam, fields={ 'raw': es.Keyword(), 'ngram': es.Text( analyzer=analyzers.kad_sbj_naam, search_analyzer=analyzers.kad_obj_aanduiding_keyword)}) natuurlijk_persoon = es.Boolean() geslachtsnaam = es.Text(analyzer=analyzers.naam) order = es.Integer() subtype = es.Keyword() _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BRK_SUBJECT']
class KadastraalObject(es.DocType): aanduiding = es.Text( fielddata=True, analyzer=analyzers.postcode, fields=kad_text_fields) # The search aanduiding is the aanduiding without the "acd00 " prefix # remove this in future short_aanduiding = es.Text( analyzer=analyzers.kad_obj_aanduiding, search_analyzer='standard', fields=kad_text_fields) sectie = es.Text( fields=kad_text_fields, ) objectnummer = es.Text( analyzer=analyzers.autocomplete, search_analyzer='standard', fields=kad_int_fields, ) indexletter = es.Keyword( fields=kad_text_fields, ) indexnummer = es.Text( analyzer=analyzers.autocomplete, search_analyzer='standard', fields=kad_int_fields ) order = es.Integer() centroid = es.GeoPoint() gemeente = es.Text(analyzer=analyzers.naam) gemeente_code = es.Keyword(normalizer=analyzers.lowercase) subtype = es.Keyword() _display = es.Keyword() class Index: name = settings.ELASTIC_INDICES['BRK_OBJECT']
class DocumentLocation(es.Document): document_es_id = es.Keyword() document_datetime = es.Date() document_source = es.Keyword() location_name = es.Keyword() location_level = es.Keyword() location_weight = es.Float() location_id = es.Keyword() class Index: name = ES_INDEX_DOCUMENT_LOCATION # !!! f"{ES_INDEX_DOCUMENT_EVAL}_{tm}_{criterion.id}" using = ES_CLIENT settings = { "number_of_shards": 3, "number_of_replicas": 1, "max_result_window": 5000000, } mappings = { "properties": { "document_datetime": { "type": "date" }, "document_es_id": { "type": "keyword" }, "document_source": { "type": "keyword" }, "location_level": { "type": "keyword" }, "location_name": { "type": "keyword" }, "location_weight": { "type": "float" }, "location_id": { "type": "keyword" }, } }
class EntityDocument(CollectionDocument): """Document for entity search.""" collection = dsl.Integer() type = dsl.Keyword() class Index: """Meta class for entity search document.""" name = "entity"
class Node(es.DocType): """ Elastic document describing user """ node_type = es.Keyword() objectID = es.Keyword() name = es.Text( fielddata=True, analyzer=autocomplete ) user = es.Object( fields={ 'id': es.Keyword(), 'name': es.Text( fielddata=True, analyzer=autocomplete) } ) description = es.Text() is_free = es.Boolean() project = es.Object( fields={ 'id': es.Keyword(), 'name': es.Keyword(), 'url': es.Keyword(), } ) media = es.Keyword() picture = es.Keyword() tags = es.Keyword(multi=True) license_notes = es.Text() created_at = es.Date() updated_at = es.Date() class Meta: index = 'nodes'
class CollectionDocument(BaseDocument): """Document for collection search.""" # Data values extracted from the descriptor. descriptor_data = dsl.Text(multi=True) tags = dsl.Keyword(multi=True) class Meta: """Meta class for collection search document.""" index = 'collection'
class EntityDocument(CollectionDocument): """Document for entity search.""" descriptor_completed = dsl.Boolean() collections = dsl.Integer(multi=True) type = dsl.Keyword() class Meta: """Meta class for entity search document.""" index = 'entity'
class DataDocument(BaseDocument): """Document for data search.""" started = dsl.Date() finished = dsl.Date() status = dsl.Keyword() process = dsl.Integer() process_type = ProcessType() # Keep backward compatibility. type = ProcessType() process_name = Name() tags = dsl.Keyword(multi=True) collection = dsl.Integer() entity = dsl.Integer() class Index: """Meta class for data search document.""" name = 'data'
class AuthorDoc(DocType): id = edsl.Integer(required=True) orcid = edsl.Keyword() researcherid = edsl.Keyword() email = edsl.Keyword() name = edsl.Text(copy_to=ALL_DATA_FIELD, analyzer=autocomplete_analyzer, search_analyzer='standard') @classmethod def from_instance(cls, author): doc = cls(meta={'id': author.id}, id=author.id, orcid=author.orcid, researcherid=author.researcherid, email=author.email, name=author.name) return doc.to_dict(include_meta=True) class Index: name = 'author'
class CollectionDocument(BaseDocument): """Document for collection search.""" # Data values extracted from the descriptor. descriptor_data = dsl.Text(multi=True) tags = dsl.Keyword(multi=True) description = dsl.Text(fielddata=True) class Index: """Meta class for collection search document.""" name = "collection"
class TrainingJob(elasticsearch_dsl.Document): id = elasticsearch_dsl.Integer() schema_version = elasticsearch_dsl.Integer() job_name = elasticsearch_dsl.Keyword() author = elasticsearch_dsl.Keyword() created_at = elasticsearch_dsl.Date() ended_at = elasticsearch_dsl.Date() params = elasticsearch_dsl.Text() raw_log = elasticsearch_dsl.Text() model_url = elasticsearch_dsl.Text() # Metrics epochs = elasticsearch_dsl.Integer() train_acc = elasticsearch_dsl.Float() final_val_acc = elasticsearch_dsl.Float() best_val_acc = elasticsearch_dsl.Float() final_val_loss = elasticsearch_dsl.Float() best_val_loss = elasticsearch_dsl.Float() final_val_sensitivity = elasticsearch_dsl.Float() best_val_sensitivity = elasticsearch_dsl.Float() final_val_specificity = elasticsearch_dsl.Float() best_val_specificity = elasticsearch_dsl.Float() final_val_auc = elasticsearch_dsl.Float() best_val_auc = elasticsearch_dsl.Float() # Params batch_size = elasticsearch_dsl.Integer() val_split = elasticsearch_dsl.Float() seed = elasticsearch_dsl.Integer() rotation_range = elasticsearch_dsl.Float() width_shift_range = elasticsearch_dsl.Float() height_shift_range = elasticsearch_dsl.Float() shear_range = elasticsearch_dsl.Float() zoom_range = elasticsearch_dsl.Keyword() horizontal_flip = elasticsearch_dsl.Boolean() vertical_flip = elasticsearch_dsl.Boolean() dropout_rate1 = elasticsearch_dsl.Float() dropout_rate2 = elasticsearch_dsl.Float() data_dir = elasticsearch_dsl.Keyword() gcs_url = elasticsearch_dsl.Keyword() mip_thickness = elasticsearch_dsl.Integer() height_offset = elasticsearch_dsl.Integer() pixel_value_range = elasticsearch_dsl.Keyword() # We need to keep a list of params for the parser because # we can't use traditional approaches to get the class attrs params_to_parse = [ 'batch_size', 'val_split', 'seed', 'rotation_range', 'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range', 'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2', 'data_dir', 'gcs_url', 'mip_thickness', 'height_offset', 'pixel_value_range' ] class Index: name = TRAINING_JOBS