Пример #1
0
class CustomDictionaryWord(es.Document):
    word = es.Keyword()
    word_normal = es.Keyword()

    class Index:
        name = ES_INDEX_CUSTOM_DICTIONARY_WORD
        using = ES_CLIENT
Пример #2
0
class User(ArchivingDocType):
    """Model a user."""

    email = dsl.Keyword()
    customers = dsl.Keyword()

    class Meta:
        index = auth_index._name

    @classmethod
    def get_by_email(cls, address):
        """Get the first user with the given email."""
        response = cls.search(index=auth_index._name).filter(
            'term',
            email=address
        ).execute()

        if response.hits.total == 0:
            raise NotFoundError(
                'There is no user with email address \'{}\''.format(address)
            )
        elif response.hits.total == 1:
            return response[0]
        else:
            raise ConflictError(
                'Inconsistent data detected: there are {} users with email'
                ' address \'{}\': {}'.format(
                    response.hits.total,
                    address,
                    [user.meta.id for user in response.hits],
                )
            )
Пример #3
0
class ExtendedDataDocument(object):
    """Data ES document extensions."""

    source = dsl.Keyword()
    species = dsl.Text()
    build = dsl.Keyword()
    feature_type = dsl.Keyword()
Пример #4
0
class LawSuitModel(es.Document):
    class Index:
        name = 'law_go_kr'

    case_id = es.Keyword()
    index_data = es.Keyword()
    detail_data_html = es.Text()
    detail_data_searchable = es.Keyword()
Пример #5
0
class Pand(es.DocType):
    id = es.Keyword()
    landelijk_id = es.Text(analyzer=analyzers.autocomplete,
                           fields={
                               'raw': es.Keyword(),
                               'nozero': es.Text(analyzer=analyzers.nozero)
                           })
    pandnaam = es.Text(analyzer=analyzers.adres, fields=naam_fields)
    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BAG_PAND']
Пример #6
0
class Dictionary(es.Document):
    corpus = es.Keyword()
    name = es.Keyword()
    description = es.Text()
    datetime = es.Date()
    number_of_documents = es.Integer()

    is_ready = es.Boolean()

    class Index:
        name = ES_INDEX_DICTIONARY_INDEX
        using = ES_CLIENT
Пример #7
0
class TopicDocument(es.Document):
    topic_id = es.Keyword()
    topic_weight = es.Float()
    document_es_id = es.Keyword()
    datetime = es.Date()
    document_source = es.Keyword()
    document_corpus = es.Keyword()
    document_num_views = es.Integer()
    document_num_comments = es.Integer()

    class Index:
        name = ES_INDEX_TOPIC_DOCUMENT  # f"{ES_INDEX_TOPIC_DOCUMENT}_{tm}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 3,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        settings_dynamic = {
            "number_of_shards": 2,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        mappings = {
            "properties": {
                "datetime": {
                    "type": "date"
                },
                "document_es_id": {
                    "type": "keyword",
                },
                "document_source": {
                    "type": "keyword",
                },
                "document_corpus": {
                    "type": "keyword",
                },
                "document_num_views": {
                    "type": "long",
                },
                "document_num_comments": {
                    "type": "long",
                },
                "topic_id": {
                    "type": "keyword",
                },
                "topic_weight": {
                    "type": "float"
                }
            }
        }
class DataDocType(es.DocType):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    description = es.Text()

    class Meta:
        index = 'test'
Пример #9
0
class Bouwblok(es.DocType):
    """
    Bouwblok searchable fields.
    """
    code = es.Text(
        analyzer=analyzers.bouwblokid,
        fields={'keyword': es.Keyword()},
    )

    subtype = es.Keyword()

    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BAG_BOUWBLOK']
Пример #10
0
class DataDocType(es.Document):
    """Elasticsearch test model"""
    first_name = es.Keyword()
    last_name = es.Keyword()
    city = es.Text()
    skills = es.Keyword()
    birthday = es.Date()
    is_active = es.Boolean()
    score = es.Integer()
    location = es.GeoPoint()
    description = es.Text()


    class Index:
        name = 'test'
Пример #11
0
class Location(es.InnerDoc):  # pytlint: disable = too-few-public-methods
    """
    InnerDoc mapping of location information embedded within a tweet.

    This data is created by us during the processing pipeline.
    """

    city = es.Keyword(doc_values=True)
    country = es.Keyword(doc_values=True)
    county = es.Keyword(doc_values=True)
    id = es.Text()
    latitude = es.Text()
    longitude = es.Text()
    resolution_method = es.Text()
    state = es.Keyword(doc_values=True)
Пример #12
0
class BaseDocument(indices.BaseDocument):
    """Base search document."""

    id = dsl.Integer()  # pylint: disable=invalid-name
    slug = Slug()
    version = dsl.Keyword()
    name = Name()
    created = dsl.Date()
    modified = dsl.Date()
    contributor_id = dsl.Integer()
    contributor_name = User()
    # We use a separate field for contributor sorting because we use an entirely
    # different value for it (the display name).
    contributor_sort = dsl.Keyword()
    owner_ids = dsl.Integer(multi=True)
    owner_names = User(multi=True)
Пример #13
0
class TopicCombo(es.Document):
    topics = es.Object()
    common_docs_ids = es.Keyword()
    common_docs_num = es.Integer()

    class Index:
        name = ES_INDEX_TOPIC_COMBOS  # f"{ES_INDEX_TOPIC_COMBOS}_{tm}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 2,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        mappings = {
            "properties": {
                "topics": {
                    "type": "object"
                },
                "common_docs_ids": {
                    "type": "keyword",
                },
                "common_docs_num": {
                    "type": "integer",
                },
            }
        }
Пример #14
0
class DynamicTopicModellingIndex(TopicModellingIndex):
    meta_dtm_name = es.Keyword()

    class Index:
        name = ES_INDEX_DYNAMIC_TOPIC_MODELLING
        using = ES_CLIENT

        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 1,
        }
        mappings = {
            "properties": {
                "meta_dtm_name": {
                    "type": "keyword",
                },
                "datetime_from": {
                    "type": "date",
                },
                "datetime_to": {
                    "type": "date",
                },
                "name": {
                    "type": "keyword",
                },
            },
        }
Пример #15
0
class BaseDocument(dsl.DocType):
    """Base document class to build ElasticSearch documents.

    This is standard ``elasticsearch-dsl`` ``DocType`` class with
    already added fields for handling permissions.

    """

    #: list of user ids with view permission on the object
    users_with_permissions = dsl.Keyword(multi=True)

    #: list of group ids with view permission on the object
    groups_with_permissions = dsl.Keyword(multi=True)

    #: identifies if object has public view permission assigned
    public_permission = dsl.Boolean()
Пример #16
0
class ClusterSource(es.Document):
    name = es.Keyword()
    clusters = es.Object()
    clustering_params = es.Object()

    class Index:
        name = ES_INDEX_SOURCE_CLUSTERS
        using = ES_CLIENT

        settings = {
            "number_of_shards": 1,
            "number_of_replicas": 1,
        }

        mappings = {
            "properties": {
                "name": {
                    "type": "keyword",
                },
                "clusters": {
                    "type": "object",
                },
                "clustering_params": {
                    "type": "object",
                },
            },
        }
Пример #17
0
class User(es.DocType):
    """Elastic document describing user."""

    objectID = es.Keyword()

    username = es.Text(fielddata=True, analyzer=autocomplete)
    username_exact = es.Keyword()
    full_name = es.Text(fielddata=True, analyzer=autocomplete)

    roles = es.Keyword(multi=True)
    groups = es.Keyword(multi=True)

    email = es.Text(fielddata=True, analyzer=autocomplete)
    email_exact = es.Keyword()

    class Meta:
        index = 'users'
Пример #18
0
class EmbeddingIndex(es.Document):
    corpus = es.Keyword()
    number_of_documents = es.Integer()
    is_ready = es.Boolean()
    name = es.Keyword()
    description = es.Text()
    datetime_created = es.Date()
    datetime_finished = es.Date()

    by_unit = es.Keyword()  # Token/Word/Sentence/Text
    algorithm = es.Keyword()
    pooling = es.Keyword()
    meta_parameters = es.Object()

    class Index:
        name = ES_INDEX_EMBEDDING
        using = ES_CLIENT
Пример #19
0
class Monument(es.Document):
    """
    Elastic data for Monument
    """
    id = es.Keyword()
    type = es.Keyword()
    naam = es.Text(fielddata=True,
                   analyzer=analyzers.monument_naam,
                   fields={
                       'raw':
                       es.Text(fielddata=True),
                       'keyword':
                       es.Text(fielddata=True, analyzer=analyzers.subtype)
                   })

    class Index:
        name = settings.ELASTIC_INDICES['MONUMENTEN']
Пример #20
0
class KadastraalSubject(es.DocType):
    naam = es.Text(
        analyzer=analyzers.naam,
        fields={
            'raw': es.Keyword(),
            'ngram': es.Text(
                analyzer=analyzers.kad_sbj_naam,
                search_analyzer=analyzers.kad_obj_aanduiding_keyword)})

    natuurlijk_persoon = es.Boolean()
    geslachtsnaam = es.Text(analyzer=analyzers.naam)
    order = es.Integer()

    subtype = es.Keyword()
    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BRK_SUBJECT']
Пример #21
0
class KadastraalObject(es.DocType):
    aanduiding = es.Text(
        fielddata=True,
        analyzer=analyzers.postcode,
        fields=kad_text_fields)

    # The search aanduiding is the aanduiding without the "acd00 " prefix
    # remove this in future
    short_aanduiding = es.Text(
        analyzer=analyzers.kad_obj_aanduiding,
        search_analyzer='standard',
        fields=kad_text_fields)

    sectie = es.Text(
        fields=kad_text_fields,
    )

    objectnummer = es.Text(
        analyzer=analyzers.autocomplete,
        search_analyzer='standard',
        fields=kad_int_fields,
    )

    indexletter = es.Keyword(
        fields=kad_text_fields,
    )

    indexnummer = es.Text(
        analyzer=analyzers.autocomplete,
        search_analyzer='standard',
        fields=kad_int_fields
    )

    order = es.Integer()
    centroid = es.GeoPoint()

    gemeente = es.Text(analyzer=analyzers.naam)
    gemeente_code = es.Keyword(normalizer=analyzers.lowercase)

    subtype = es.Keyword()
    _display = es.Keyword()

    class Index:
        name = settings.ELASTIC_INDICES['BRK_OBJECT']
Пример #22
0
class DocumentLocation(es.Document):
    document_es_id = es.Keyword()
    document_datetime = es.Date()
    document_source = es.Keyword()
    location_name = es.Keyword()
    location_level = es.Keyword()
    location_weight = es.Float()
    location_id = es.Keyword()

    class Index:
        name = ES_INDEX_DOCUMENT_LOCATION  # !!! f"{ES_INDEX_DOCUMENT_EVAL}_{tm}_{criterion.id}"
        using = ES_CLIENT

        settings = {
            "number_of_shards": 3,
            "number_of_replicas": 1,
            "max_result_window": 5000000,
        }
        mappings = {
            "properties": {
                "document_datetime": {
                    "type": "date"
                },
                "document_es_id": {
                    "type": "keyword"
                },
                "document_source": {
                    "type": "keyword"
                },
                "location_level": {
                    "type": "keyword"
                },
                "location_name": {
                    "type": "keyword"
                },
                "location_weight": {
                    "type": "float"
                },
                "location_id": {
                    "type": "keyword"
                },
            }
        }
Пример #23
0
class EntityDocument(CollectionDocument):
    """Document for entity search."""

    collection = dsl.Integer()
    type = dsl.Keyword()

    class Index:
        """Meta class for entity search document."""

        name = "entity"
Пример #24
0
class Node(es.DocType):
    """
    Elastic document describing user
    """

    node_type = es.Keyword()

    objectID = es.Keyword()

    name = es.Text(
        fielddata=True,
        analyzer=autocomplete
    )

    user = es.Object(
        fields={
            'id': es.Keyword(),
            'name': es.Text(
                fielddata=True,
                analyzer=autocomplete)
        }
    )

    description = es.Text()

    is_free = es.Boolean()

    project = es.Object(
        fields={
            'id': es.Keyword(),
            'name': es.Keyword(),
            'url': es.Keyword(),
        }
    )

    media = es.Keyword()

    picture = es.Keyword()

    tags = es.Keyword(multi=True)
    license_notes = es.Text()

    created_at = es.Date()
    updated_at = es.Date()

    class Meta:
        index = 'nodes'
Пример #25
0
class CollectionDocument(BaseDocument):
    """Document for collection search."""

    # Data values extracted from the descriptor.
    descriptor_data = dsl.Text(multi=True)
    tags = dsl.Keyword(multi=True)

    class Meta:
        """Meta class for collection search document."""

        index = 'collection'
Пример #26
0
class EntityDocument(CollectionDocument):
    """Document for entity search."""

    descriptor_completed = dsl.Boolean()
    collections = dsl.Integer(multi=True)
    type = dsl.Keyword()

    class Meta:
        """Meta class for entity search document."""

        index = 'entity'
Пример #27
0
class DataDocument(BaseDocument):
    """Document for data search."""

    started = dsl.Date()
    finished = dsl.Date()
    status = dsl.Keyword()
    process = dsl.Integer()
    process_type = ProcessType()
    # Keep backward compatibility.
    type = ProcessType()
    process_name = Name()
    tags = dsl.Keyword(multi=True)

    collection = dsl.Integer()
    entity = dsl.Integer()

    class Index:
        """Meta class for data search document."""

        name = 'data'
Пример #28
0
class AuthorDoc(DocType):
    id = edsl.Integer(required=True)
    orcid = edsl.Keyword()
    researcherid = edsl.Keyword()
    email = edsl.Keyword()
    name = edsl.Text(copy_to=ALL_DATA_FIELD,
                     analyzer=autocomplete_analyzer,
                     search_analyzer='standard')

    @classmethod
    def from_instance(cls, author):
        doc = cls(meta={'id': author.id},
                  id=author.id,
                  orcid=author.orcid,
                  researcherid=author.researcherid,
                  email=author.email,
                  name=author.name)
        return doc.to_dict(include_meta=True)

    class Index:
        name = 'author'
Пример #29
0
class CollectionDocument(BaseDocument):
    """Document for collection search."""

    # Data values extracted from the descriptor.
    descriptor_data = dsl.Text(multi=True)
    tags = dsl.Keyword(multi=True)
    description = dsl.Text(fielddata=True)

    class Index:
        """Meta class for collection search document."""

        name = "collection"
Пример #30
0
class TrainingJob(elasticsearch_dsl.Document):
    id = elasticsearch_dsl.Integer()
    schema_version = elasticsearch_dsl.Integer()
    job_name = elasticsearch_dsl.Keyword()
    author = elasticsearch_dsl.Keyword()
    created_at = elasticsearch_dsl.Date()
    ended_at = elasticsearch_dsl.Date()
    params = elasticsearch_dsl.Text()
    raw_log = elasticsearch_dsl.Text()
    model_url = elasticsearch_dsl.Text()

    # Metrics
    epochs = elasticsearch_dsl.Integer()
    train_acc = elasticsearch_dsl.Float()
    final_val_acc = elasticsearch_dsl.Float()
    best_val_acc = elasticsearch_dsl.Float()
    final_val_loss = elasticsearch_dsl.Float()
    best_val_loss = elasticsearch_dsl.Float()
    final_val_sensitivity = elasticsearch_dsl.Float()
    best_val_sensitivity = elasticsearch_dsl.Float()
    final_val_specificity = elasticsearch_dsl.Float()
    best_val_specificity = elasticsearch_dsl.Float()
    final_val_auc = elasticsearch_dsl.Float()
    best_val_auc = elasticsearch_dsl.Float()

    # Params
    batch_size = elasticsearch_dsl.Integer()
    val_split = elasticsearch_dsl.Float()
    seed = elasticsearch_dsl.Integer()

    rotation_range = elasticsearch_dsl.Float()
    width_shift_range = elasticsearch_dsl.Float()
    height_shift_range = elasticsearch_dsl.Float()
    shear_range = elasticsearch_dsl.Float()
    zoom_range = elasticsearch_dsl.Keyword()
    horizontal_flip = elasticsearch_dsl.Boolean()
    vertical_flip = elasticsearch_dsl.Boolean()

    dropout_rate1 = elasticsearch_dsl.Float()
    dropout_rate2 = elasticsearch_dsl.Float()

    data_dir = elasticsearch_dsl.Keyword()
    gcs_url = elasticsearch_dsl.Keyword()

    mip_thickness = elasticsearch_dsl.Integer()
    height_offset = elasticsearch_dsl.Integer()
    pixel_value_range = elasticsearch_dsl.Keyword()

    # We need to keep a list of params for the parser because
    # we can't use traditional approaches to get the class attrs
    params_to_parse = [
        'batch_size', 'val_split', 'seed', 'rotation_range',
        'width_shift_range', 'height_shift_range', 'shear_range', 'zoom_range',
        'horizontal_flip', 'vertical_flip', 'dropout_rate1', 'dropout_rate2',
        'data_dir', 'gcs_url', 'mip_thickness', 'height_offset',
        'pixel_value_range'
    ]

    class Index:
        name = TRAINING_JOBS