class PublicBodyDocument(DocType): name = fields.TextField( fields={'raw': fields.KeywordField()}, analyzer=analyzer, ) name_auto = fields.TextField(attr='all_names', analyzer=ngram_analyzer) content = fields.TextField(analyzer=analyzer) jurisdiction = fields.IntegerField(attr='jurisdiction_id') classification = fields.ListField(fields.IntegerField()) categories = fields.ListField(fields.IntegerField()) regions = fields.ListField(fields.IntegerField()) regions_exact = fields.ListField(fields.IntegerField()) regions_kind = fields.ListField(fields.KeywordField()) class Meta: model = PublicBody queryset_chunk_size = 100 def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super().get_queryset().select_related( 'jurisdiction').prefetch_related('classification', 'categories', 'regions') def prepare_content(self, obj): content = [ obj.name, obj.other_names, obj.jurisdiction.name if obj.jurisdiction else '', obj.email or '', obj.description, obj.contact, obj.address, obj.url, obj.classification.name if obj.classification else '' ] + [o.name for o in obj.categories.all()] return ' '.join(c for c in content if c) def prepare_classification(self, obj): if obj.classification is None: return [] return [obj.classification.id ] + [c.id for c in obj.classification.get_ancestors()] def prepare_categories(self, obj): cats = obj.categories.all() return [o.id for o in cats ] + [c.id for o in cats for c in o.get_ancestors()] def prepare_regions(self, obj): regs = obj.regions.all() return [o.id for o in regs ] + [c.id for o in regs for c in o.get_ancestors()] def prepare_regions_exact(self, obj): regs = obj.regions.all() return [o.id for o in regs] def prepare_regions_kind(self, obj): regs = obj.regions.all() return [o.kind for o in regs]
class ArticleDocument(Document): title = fields.TextField( fields={'raw': fields.KeywordField()}, analyzer=analyzer, ) url = fields.TextField( fields={'raw': fields.KeywordField()}, analyzer=analyzer, ) description = fields.TextField( fields={'raw': fields.KeywordField()}, analyzer=analyzer, ) start_publication = fields.DateField() author = fields.ListField(fields.IntegerField()) category = fields.ListField(fields.IntegerField()) content = fields.TextField(analyzer=analyzer, search_analyzer=search_analyzer, search_quote_analyzer=search_quote_analyzer, index_options='offsets') special_signals = True class Django: model = Article queryset_chunk_size = 100 def get_queryset(self): return Article.published.all() def prepare_content(self, obj): html = obj.get_html_content() return ' '.join([obj.title, obj.description, strip_tags(html)] + [o.title for o in obj.categories.all()] + [t.name for t in obj.tags.all()]) def prepare_description(self, obj): return strip_tags(obj.description) def prepare_url(self, obj): return obj.get_absolute_url() def prepare_title(self, obj): return obj.title def prepare_start_publication(self, obj): return obj.start_publication def prepare_category(self, obj): cats = obj.categories.all() return [o.id for o in cats] def prepare_author(self, obj): authors = obj.authors.all() return [o.id for o in authors]
class ConceptDocument(Document): class Index: name = 'concepts' settings = {'number_of_shards': 1, 'number_of_replicas': 0} id = fields.KeywordField(attr='mnemonic', normalizer="lowercase") name = fields.KeywordField(attr='display_name', normalizer="lowercase") last_update = fields.DateField(attr='updated_at') locale = fields.ListField(fields.KeywordField(attr='display_name')) source = fields.KeywordField(attr='parent_resource', normalizer="lowercase") owner = fields.KeywordField(attr='owner_name', normalizer="lowercase") owner_type = fields.KeywordField(attr='owner_type') source_version = fields.ListField(fields.TextField()) collection_version = fields.ListField(fields.TextField()) collection = fields.ListField(fields.KeywordField()) public_can_view = fields.BooleanField(attr='public_can_view') datatype = fields.KeywordField(attr='datatype', normalizer="lowercase") concept_class = fields.KeywordField(attr='concept_class', normalizer="lowercase") retired = fields.KeywordField(attr='retired') is_active = fields.KeywordField(attr='is_active') is_latest_version = fields.KeywordField(attr='is_latest_version') extras = fields.ObjectField() class Django: model = Concept fields = [ 'version', ] @staticmethod def prepare_locale(instance): return list( instance.names.filter( locale__isnull=False).distinct('locale').values_list( 'locale', flat=True)) @staticmethod def prepare_source_version(instance): return list(instance.sources.values_list('version', flat=True)) @staticmethod def prepare_collection_version(instance): return list(instance.collection_set.values_list('version', flat=True)) @staticmethod def prepare_collection(instance): return list( set( list(instance.collection_set.values_list('mnemonic', flat=True)))) @staticmethod def prepare_extras(instance): return instance.extras or {}
class ArticleDocument(DocType): title = fields.TextField( fields={'raw': fields.KeywordField()}, analyzer=analyzer, ) url = fields.TextField( fields={'raw': fields.KeywordField()}, analyzer=analyzer, ) description = fields.TextField( fields={'raw': fields.KeywordField()}, analyzer=analyzer, ) start_publication = fields.DateField() author = fields.ListField(fields.IntegerField()) category = fields.ListField(fields.IntegerField()) content = fields.TextField( analyzer=analyzer ) special_signals = True class Meta: model = Article queryset_chunk_size = 100 def get_queryset(self): return Article.published.all() def prepare_content(self, obj): html = obj.get_html_content() return strip_tags(html) def prepare_description(self, obj): return strip_tags(obj.description) def prepare_url(self, obj): return obj.get_absolute_url() def prepare_title(self, obj): return obj.title def prepare_start_publication(self, obj): return obj.start_publication def prepare_category(self, obj): cats = obj.categories.all() return [o.id for o in cats] def prepare_author(self, obj): authors = obj.authors.all() return [o.id for o in authors]
class JobDocument(Document): # Object/List fields analyzers_to_execute = fields.ListField(fields.KeywordField()) connectors_to_execute = fields.ListField(fields.KeywordField()) # Normal fields errors = fields.TextField() # Keyword fields to allow aggregations/vizualizations source = fields.KeywordField() status = fields.KeywordField() md5 = fields.KeywordField() tlp = fields.KeywordField() observable_name = fields.KeywordField() observable_classification = fields.KeywordField() file_name = fields.KeywordField() file_mimetype = fields.KeywordField() # Nested (ForeignKey) fields tags = fields.NestedField( properties={"label": fields.KeywordField(), "color": fields.TextField()} ) analyzer_reports = fields.NestedField( properties={ "name": fields.KeywordField(), "status": fields.KeywordField(), "report": fields.ObjectField(), "errors": fields.TextField(), "start_time": fields.DateField(), "end_time": fields.DateField(), } ) connector_reports = fields.NestedField( properties={ "name": fields.KeywordField(), "status": fields.KeywordField(), "report": fields.ObjectField(), "errors": fields.TextField(), "start_time": fields.DateField(), "end_time": fields.DateField(), } ) class Index: # Name of the Elasticsearch index name = "jobs" class Django: model = Job # The model associated with this Document # The fields of the model you want to be indexed in Elasticsearch fields = [ "is_sample", "received_request_time", "finished_analysis_time", ]
class TweetsDocument(Document): # search_item = fields.TextField(attrs="search_item") excepted_words = fields.ListField(field=fields.TextField) author = fields.ObjectField() entities = fields.ObjectField() text = fields.TextField(fields={"raw": fields.KeywordField()}, analyzer="text_analyser") raw_text = fields.TextField() retweet_count = fields.IntegerField() class Django: model = models.Tweet fields = [ "search_item", "id", # "author", "created_date", # "entities", "source", "lang", ] auto_refresh = False ignore_signals = False # Paginate the django queryset used to populate the index with the specified size (by # default there is no pagination) queryset_pagination = 1000
class ArticleDocument(Document): category = fields.ObjectField( properties={ 'id': fields.TextField(), 'name': fields.TextField(), 'image': fields.FileField(), }) cover = fields.FileField() tags = fields.ListField(fields.TextField()) class Index: name = 'articles' settings = {'number_of_shards': 1, 'number_of_replicas': 1} class Django: model = Articles fields = [ 'id', 'updated_at', 'title', 'subtitle', 'author_name', 'user', 'realease', 'is_active', 'slug' ] # related_models = [Category] def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super(ArticleDocument, self).get_queryset().select_related('category')
class CollectionDocument(Document): class Index: name = 'collections' settings = {'number_of_shards': 1, 'number_of_replicas': 0} last_update = fields.DateField(attr='updated_at') public_can_view = fields.TextField(attr='public_can_view') locale = fields.ListField(fields.KeywordField()) owner = fields.KeywordField(attr='parent_resource', normalizer='lowercase') owner_type = fields.KeywordField(attr='parent_resource_type') collection_type = fields.KeywordField(attr='collection_type', normalizer='lowercase') is_active = fields.KeywordField(attr='is_active') version = fields.KeywordField(attr='version') name = fields.KeywordField(attr='name', normalizer='lowercase') canonical_url = fields.KeywordField(attr='canonical_url', normalizer='lowercase') mnemonic = fields.KeywordField(attr='mnemonic', normalizer='lowercase') extras = fields.ObjectField() class Django: model = Collection fields = [ 'full_name', 'custom_validation_schema', ] @staticmethod def prepare_locale(instance): return get(instance.supported_locales, []) @staticmethod def prepare_extras(instance): return instance.extras or {}
class OrganizationDocument(Document): class Index: name = 'organizations' settings = {'number_of_shards': 1, 'number_of_replicas': 0} last_update = fields.DateField(attr='updated_at') public_can_view = fields.BooleanField(attr='public_can_view') name = fields.KeywordField(attr='name', normalizer="lowercase") mnemonic = fields.KeywordField(attr='mnemonic', normalizer="lowercase") extras = fields.ObjectField() user = fields.ListField(fields.KeywordField()) class Django: model = Organization fields = [ 'is_active', 'company', 'location', ] @staticmethod def prepare_extras(instance): return instance.extras or {} @staticmethod def prepare_user(instance): return list(instance.members.values_list('username', flat=True))
class ExperimentDocument(Document): """ Our Experiment ElasticSearch Document, which corresponds to our Experiment model. """ # Keyword Fields title = fields.TextField(analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}) publication_title = fields.TextField(analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}) description = fields.TextField(analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}) publication_authors = fields.TextField( analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}) technology = fields.TextField(analyzer=html_strip_no_stop, fielddata=True, fields={"raw": fields.KeywordField()}) organism_names = fields.TextField(analyzer=html_strip_no_ngram, fielddata=True, fields={"raw": fields.KeywordField()}) platform_names = fields.TextField(analyzer=standard_keyword, fielddata=True, fields={"raw": fields.TextField()}) platform_accession_codes = fields.TextField( analyzer=standard_keyword, fielddata=True, fields={"raw": fields.TextField()}) # Basic Fields accession_code = fields.KeywordField() alternate_accession_code = fields.KeywordField() submitter_institution = fields.TextField() publication_doi = fields.TextField() has_publication = fields.BooleanField() sample_metadata_fields = fields.TextField() pubmed_id = fields.TextField() num_total_samples = fields.IntegerField() num_processed_samples = fields.IntegerField() num_downloadable_samples = fields.IntegerField() source_first_published = fields.DateField() # Index all downloadable samples as keywords so that we can calculate unique counts on the facets downloadable_samples = fields.ListField(fields.KeywordField()) class Django: model = Experiment parallel_indexing = True queryset_pagination = 3000 fields = [ "id", ] def get_queryset(self): """ Override default queryset """ return super(ExperimentDocument, self).get_queryset().order_by("id")
class SourceDocument(Document): class Index: name = 'sources' settings = {'number_of_shards': 1, 'number_of_replicas': 0} locale = fields.ListField(fields.KeywordField()) last_update = fields.DateField(attr='updated_at') owner = fields.KeywordField(attr='parent_resource') owner_type = fields.KeywordField(attr='parent_resource_type') public_can_view = fields.TextField(attr='public_can_view') source_type = fields.KeywordField(attr='source_type') is_active = fields.KeywordField(attr='is_active') version = fields.KeywordField(attr='version') name = fields.KeywordField(attr='name') class Django: model = Source fields = [ 'full_name', 'custom_validation_schema', ] @staticmethod def prepare_locale(instance): return get(instance.supported_locales, [])
class JobDocument(Document): # Object/List fields analyzers_requested = fields.ListField(fields.KeywordField()) analyzers_to_execute = fields.ListField(fields.KeywordField()) analysis_reports = fields.ObjectField() # Normal fields errors = fields.TextField() runtime_configuration = fields.ObjectField() # Keyword fields to allow aggregations/vizualizations source = fields.KeywordField() md5 = fields.KeywordField() status = fields.KeywordField() observable_name = fields.KeywordField() observable_classification = fields.KeywordField() file_name = fields.KeywordField() file_mimetype = fields.KeywordField() # Nested (ForeignKey) fields tags = fields.NestedField(properties={ "label": fields.KeywordField(), "color": fields.TextField() }) def prepare_runtime_configuration(self, instance): return instance.runtime_configuration def prepare_analysis_reports(self, instance): """ https://github.com/django-es/django-elasticsearch-dsl/issues/36 """ return instance.analysis_reports class Index: # Name of the Elasticsearch index name = "jobs" class Django: model = Job # The model associated with this Document # The fields of the model you want to be indexed in Elasticsearch fields = [ "is_sample", "run_all_available_analyzers", "received_request_time", "finished_analysis_time", "force_privacy", "disable_external_analyzers", ]
class CollectionDocument(Document): class Index: name = 'collections' settings = {'number_of_shards': 1, 'number_of_replicas': 0} last_update = fields.DateField(attr='updated_at') public_can_view = fields.TextField(attr='public_can_view') locale = fields.ListField(fields.KeywordField()) owner = fields.KeywordField(attr='parent_resource', normalizer='lowercase') owner_type = fields.KeywordField(attr='parent_resource_type') collection_type = fields.KeywordField(attr='collection_type', normalizer='lowercase') is_active = fields.KeywordField(attr='is_active') version = fields.KeywordField(attr='version') name = fields.KeywordField(attr='name', normalizer='lowercase') canonical_url = fields.KeywordField(attr='canonical_url', normalizer='lowercase') mnemonic = fields.KeywordField(attr='mnemonic', normalizer='lowercase') extras = fields.ObjectField() identifier = fields.ObjectField() publisher = fields.KeywordField(attr='publisher', normalizer='lowercase') immutable = fields.KeywordField(attr='immutable') created_by = fields.KeywordField() class Django: model = Collection fields = [ 'full_name', 'custom_validation_schema', 'revision_date', 'retired', ] @staticmethod def prepare_locale(instance): return get(instance.supported_locales, []) @staticmethod def prepare_extras(instance): value = {} if instance.extras: value = jsonify_safe(instance.extras) return value or {} @staticmethod def prepare_identifier(instance): value = {} if instance.identifier: value = jsonify_safe(instance.identifier) return value or {} @staticmethod def prepare_created_by(instance): return instance.created_by.username
class AdDocument(DocType): category = fields.KeywordField(attr='category.path_name') category_path = fields.KeywordField(attr='category.path_ids_str') category_slug = fields.KeywordField(attr='category.slug') images = fields.ListField(fields.KeywordField(attr='images_url')) product = fields.ObjectField(properties={ 'title': fields.KeywordField(attr='product_type.title'), 'specs': fields.NestedField(attr='specs.all', properties={ 'label': fields.KeywordField(attr='field.label'), 'value': fields.KeywordField(attr='value.value_json') }) }) class Meta: model = Ad fields = [ 'title', 'desc', 'price', 'created_at', 'updated_at' ] related_models = [Taxonomy, File, Product] def get_instances_from_related(self, related_instance): if isinstance(related_instance, Taxonomy): return related_instance.ad_set.all() elif isinstance(related_instance, File): return related_instance.ad_set.all() elif isinstance(related_instance, Product): try: return related_instance.ad except Ad.DoesNotExist as exc: pass
class UserProfileDocument(Document): class Index: name = 'user_profiles' settings = {'number_of_shards': 1, 'number_of_replicas': 0} last_update = fields.DateField(attr='updated_at') date_joined = fields.DateField(attr='created_at') username = fields.KeywordField(attr='username', normalizer='lowercase') location = fields.KeywordField(attr='location', normalizer='lowercase') company = fields.KeywordField(attr='company', normalizer='lowercase') name = fields.KeywordField(attr='name', normalizer='lowercase') extras = fields.ObjectField(dynamic=True) org = fields.ListField(fields.KeywordField()) class Django: model = UserProfile fields = [ 'is_active', 'is_superuser', 'is_staff', ] @staticmethod def prepare_extras(instance): value = {} if instance.extras: value = jsonify_safe(instance.extras) if isinstance(value, dict): value = flatten_dict(value) return value or {} @staticmethod def prepare_org(instance): return list(instance.organizations.values_list('mnemonic', flat=True))
class UserProfileDocument(Document): class Index: name = 'user_profiles' settings = {'number_of_shards': 1, 'number_of_replicas': 0} date_joined = fields.DateField(attr='created_at') username = fields.KeywordField(attr='username', normalizer='lowercase') location = fields.KeywordField(attr='location', normalizer='lowercase') company = fields.KeywordField(attr='company', normalizer='lowercase') name = fields.KeywordField(attr='name', normalizer='lowercase') extras = fields.ObjectField() org = fields.ListField(fields.KeywordField()) class Django: model = UserProfile fields = ['is_active'] @staticmethod def prepare_extras(instance): return instance.extras or {} @staticmethod def prepare_org(instance): return list(instance.organizations.values_list('mnemonic', flat=True))
class CityDocument(Document): """City Elasticsearch document. This document has been created purely for testing out complex fields. """ # In different parts of the code different fields are used. There are # a couple of use cases: (1) more-like-this functionality, where `title`, # `description` and `summary` fields are used, (2) search and filtering # functionality where all of the fields are used. # ID id = fields.IntegerField(attr='id') # ******************************************************************** # ********************** Main data fields for search ***************** # ******************************************************************** name = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) info = StringField(analyzer=html_strip) # ******************************************************************** # ************** Nested fields for search and filtering ************** # ******************************************************************** # City object country = fields.NestedField( properties={ 'name': StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }), 'info': StringField(analyzer=html_strip), 'location': fields.GeoPointField(attr='location_field_indexing'), }) location = fields.GeoPointField(attr='location_field_indexing') # ******************************************************************** # ********** Other complex fields for search and filtering *********** # ******************************************************************** boolean_list = fields.ListField(StringField(attr='boolean_list_indexing')) # boolean_dict_indexing = fields.ObjectField( # properties={ # 'true': fields.BooleanField(), # 'false': fields.BooleanField(), # } # ) datetime_list = fields.ListField( StringField(attr='datetime_list_indexing')) # datetime_dict_indexing float_list = fields.ListField(StringField(attr='float_list_indexing')) # float_dict_indexing integer_list = fields.ListField(StringField(attr='integer_list_indexing')) # integer_dict_indexing class Django(object): model = City # The model associate with this Document class Meta(object): parallel_indexing = True
class BookDocument(DocType): """Book Elasticsearch document.""" # In different parts of the code different fields are used. There are # a couple of use cases: (1) more-like-this functionality, where `title`, # `description` and `summary` fields are used, (2) search and filtering # functionality where all of the fields are used. # ID id = fields.IntegerField(attr='id') # ******************************************************************** # *********************** Main data fields for search **************** # ******************************************************************** __title_fields = { 'raw': KeywordField(), 'suggest': fields.CompletionField(), 'edge_ngram_completion': StringField(analyzer=edge_ngram_completion), 'mlt': StringField(analyzer='english'), } if ELASTICSEARCH_GTE_5_0: __title_fields.update({ 'suggest_context': fields.CompletionField(contexts=[ { "name": "tag", "type": "category", "path": "tags.raw", }, { "name": "state", "type": "category", "path": "state.raw", }, { "name": "publisher", "type": "category", "path": "publisher.raw", }, ]), }) title = StringField(analyzer=html_strip, fields=__title_fields) description = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'mlt': StringField(analyzer='english'), }) summary = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'mlt': StringField(analyzer='english'), }) # ******************************************************************** # ********** Additional fields for search and filtering ************** # ******************************************************************** authors = fields.ListField( StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), })) # Publisher publisher = StringField(attr='publisher_indexing', analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) # Publication date publication_date = fields.DateField() # State state = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) # ISBN isbn = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), }) # Price price = fields.FloatField() # Pages pages = fields.IntegerField() # Stock count stock_count = fields.IntegerField() # Tags tags = StringField(attr='tags_indexing', analyzer=html_strip, fields={ 'raw': KeywordField(multi=True), 'suggest': fields.CompletionField(multi=True), }, multi=True) # Date created created = fields.DateField() null_field = StringField(attr='null_field_indexing') class Meta(object): """Meta options.""" model = Book # The model associate with this DocType parallel_indexing = True def prepare_summary(self, instance): """Prepare summary.""" return instance.summary[:32766] def prepare_authors(self, instance): """Prepare authors.""" return [author.name for author in instance.authors.all()]
class MappingDocument(Document): class Index: name = 'mappings' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Mapping fields = [ 'external_id' ] last_update = fields.DateField(attr='updated_at') owner = fields.KeywordField(attr='owner_name') owner_type = fields.KeywordField(attr='owner_type') source = fields.KeywordField(attr='source') retired = fields.KeywordField(attr='retired') is_active = fields.KeywordField(attr='is_active') is_latest_version = fields.KeywordField(attr='is_latest_version') map_type = fields.KeywordField(attr='map_type') from_concept = fields.ListField(fields.KeywordField()) to_concept = fields.ListField(fields.KeywordField()) concept = fields.ListField(fields.KeywordField()) concept_source = fields.ListField(fields.KeywordField()) concept_owner = fields.ListField(fields.KeywordField()) from_concept_owner = fields.KeywordField(attr='from_source_owner') to_concept_owner = fields.KeywordField(attr='to_source_owner') concept_owner_type = fields.ListField(fields.KeywordField(attr='to_source_owner')) from_concept_owner_type = fields.KeywordField(attr='from_source_owner_type') to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type') from_concept_source = fields.KeywordField(attr='from_source_name') to_concept_source = fields.KeywordField(attr='to_source_name') source_version = fields.ListField(fields.IntegerField()) collection_version = fields.ListField(fields.IntegerField()) collection = fields.ListField(fields.KeywordField()) public_can_view = fields.BooleanField(attr='public_can_view') id = fields.KeywordField(attr='mnemonic') @staticmethod def prepare_from_concept(instance): return [instance.from_concept_url, instance.from_concept_code, instance.from_concept_name] @staticmethod def prepare_to_concept(instance): return [instance.get_to_concept_code(), instance.get_to_concept_name()] def prepare_concept(self, instance): return self.prepare_from_concept(instance) + self.prepare_to_concept(instance) @staticmethod def prepare_concept_source(instance): return [instance.from_source_name, instance.to_source_name] @staticmethod def prepare_concept_owner(instance): return [instance.from_source_owner, instance.to_source_owner] @staticmethod def prepare_concept_owner_type(instance): return [instance.from_source_owner_type, instance.to_source_owner_type] @staticmethod def prepare_source_version(instance): return list(instance.sources.values_list('id', flat=True)) @staticmethod def prepare_collection_version(instance): return list(instance.collection_set.values_list('id', flat=True)) @staticmethod def prepare_collection(instance): from core.collections.models import Collection return list( Collection.objects.filter( version=HEAD, mnemonic__in=instance.collection_set.values_list('mnemonic', flat=True) ).distinct('id').values_list('id', flat=True) )
class FoiRequestDocument(Document): content = fields.TextField(analyzer=analyzer, search_analyzer=search_analyzer, search_quote_analyzer=search_quote_analyzer, index_options='offsets') title = fields.TextField() description = fields.TextField() resolution = fields.KeywordField() status = fields.KeywordField() costs = fields.FloatField() tags = fields.ListField(fields.KeywordField()) classification = fields.ListField(fields.IntegerField()) categories = fields.ListField(fields.IntegerField()) campaign = fields.IntegerField() due_date = fields.DateField() first_message = fields.DateField() last_message = fields.DateField() publicbody = fields.IntegerField(attr='public_body_id') jurisdiction = fields.IntegerField(attr='public_body.jurisdiction_id') user = fields.IntegerField(attr='user_id') team = fields.IntegerField(attr='team_id') public = fields.BooleanField() class Django: model = FoiRequest queryset_chunk_size = 50 def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return FoiRequest.objects.select_related( 'jurisdiction', 'public_body', ) def prepare_content(self, obj): return render_to_string('foirequest/search/foirequest_text.txt', {'object': obj}) def prepare_tags(self, obj): return [tag.id for tag in obj.tags.all()] def prepare_public(self, obj): return obj.in_public_search_index() def prepare_campaign(self, obj): return obj.campaign_id def prepare_classification(self, obj): if obj.public_body_id is None: return [] if obj.public_body.classification is None: return [] classification = obj.public_body.classification return [classification.id ] + [c.id for c in classification.get_ancestors()] def prepare_categories(self, obj): if obj.public_body: cats = obj.public_body.categories.all() return [o.id for o in cats ] + [c.id for o in cats for c in o.get_ancestors()] return [] def prepare_team(self, obj): if obj.project and obj.project.team_id: return obj.project.team_id return None
class LegislationDocument(DocType): classifications = fields.TextField(term_vector='with_positions_offsets') article_classifications = fields.TextField( term_vector='with_positions_offsets') tags = fields.TextField(term_vector='with_positions_offsets') article_tags = fields.TextField(term_vector='with_positions_offsets') country = fields.KeywordField() country_name = fields.KeywordField(attr='country.name') law_type = fields.KeywordField() pdf_text = fields.TextField() year_mentions = fields.ListField(fields.IntegerField()) articles = fields.NestedField( properties={ 'pk': fields.IntegerField(), 'code': fields.KeywordField(), 'text': fields.TextField(), 'classifications_text': fields.TextField(term_vector='with_positions_offsets'), 'parent_classifications': fields.TextField(term_vector='with_positions_offsets'), 'tags_text': fields.TextField(term_vector='with_positions_offsets'), 'parent_tags': fields.TextField(term_vector='with_positions_offsets') }) def prepare_classifications(self, instance): classification_names = instance.classifications.all().values_list( 'name', flat=True) if CONN in ''.join(classification_names): raise ValidationError( "Classification names must not include the character " "'{}'.".format(CONN)) return CONN.join(classification_names) def prepare_article_classifications(self, instance): classification_names = { cl.name for cl in [ item for sublist in [ article.classifications.all() for article in instance.articles.all() ] for item in sublist ] } if CONN in ''.join(classification_names): raise ValidationError( "Classification names must not include the character " "'{}'.".format(CONN)) return CONN.join(classification_names) def prepare_tags(self, instance): tag_names = instance.tags.all().values_list('name', flat=True) if CONN in ''.join(tag_names): raise ValidationError( "Tag names must not include the character '{}'.".format(CONN)) return CONN.join(tag_names) def prepare_article_tags(self, instance): tag_names = { tag.name for tag in [ item for sublist in [article.tags.all() for article in instance.articles.all()] for item in sublist ] } if CONN in ''.join(tag_names): raise ValidationError( "Tag names must not include the character '{}'.".format(CONN)) return CONN.join(tag_names) def prepare_country(self, instance): return instance.country.iso def prepare_pdf_text(self, instance): return '\n\n'.join([page.page_text for page in instance.pages.all()]) def prepare_year_mentions(self, instance): return [ int(year) for year in re.findall('\d{4}', instance.year_mention or '') if int(year) >= settings.MIN_YEAR and int(year) <= settings.MAX_YEAR ] def get_instances_from_related(self, related_instance): if isinstance(related_instance, LegislationArticle): return related_instance.legislation else: # it's a TaxonomyClassification or TaxonomyTag return related_instance.legislation_set.all() class Meta: model = Legislation # The model associated with this DocType # The fields of the model to be indexed in Elasticsearch fields = [ 'id', 'title', 'abstract', 'year', 'year_amendment', ] related_models = [ LegislationArticle, TaxonomyClassification, TaxonomyTag ]
class MappingDocument(Document): class Index: name = 'mappings' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Mapping fields = ['external_id'] last_update = fields.DateField(attr='updated_at') owner = fields.KeywordField(attr='owner_name', normalizer="lowercase") owner_type = fields.KeywordField(attr='owner_type') source = fields.KeywordField(attr='source', normalizer="lowercase") retired = fields.KeywordField(attr='retired') is_active = fields.KeywordField(attr='is_active') is_latest_version = fields.KeywordField(attr='is_latest_version') map_type = fields.KeywordField(attr='map_type', normalizer="lowercase") from_concept = fields.ListField(fields.KeywordField()) to_concept = fields.ListField(fields.KeywordField()) concept = fields.ListField(fields.KeywordField()) concept_source = fields.ListField(fields.KeywordField()) concept_owner = fields.ListField(fields.KeywordField()) from_concept_owner = fields.KeywordField(attr='from_source_owner') to_concept_owner = fields.KeywordField(attr='to_source_owner') concept_owner_type = fields.ListField( fields.KeywordField(attr='to_source_owner')) from_concept_owner_type = fields.KeywordField( attr='from_source_owner_type') to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type') from_concept_source = fields.KeywordField(attr='from_source_name') to_concept_source = fields.KeywordField(attr='to_source_name') source_version = fields.ListField(fields.KeywordField()) collection_version = fields.ListField(fields.KeywordField()) collection = fields.ListField(fields.KeywordField()) collection_owner_url = fields.ListField(fields.KeywordField()) public_can_view = fields.BooleanField(attr='public_can_view') id = fields.KeywordField(attr='mnemonic', normalizer="lowercase") extras = fields.ObjectField(dynamic=True) created_by = fields.KeywordField(attr='created_by.username') @staticmethod def prepare_from_concept(instance): from_concept_name = get(instance, 'from_concept_name') or get( instance, 'from_concept.display_name') return [ instance.from_concept_url, instance.from_concept_code, from_concept_name ] @staticmethod def prepare_to_concept(instance): return [instance.get_to_concept_code(), instance.get_to_concept_name()] def prepare_concept(self, instance): return self.prepare_from_concept(instance) + self.prepare_to_concept( instance) @staticmethod def prepare_concept_source(instance): return [instance.from_source_name, instance.to_source_name] @staticmethod def prepare_concept_owner(instance): return [instance.from_source_owner, instance.to_source_owner] @staticmethod def prepare_concept_owner_type(instance): return [instance.from_source_owner_type, instance.to_source_owner_type] @staticmethod def prepare_source_version(instance): return list(instance.sources.values_list('version', flat=True)) @staticmethod def prepare_collection_version(instance): return list(instance.collection_set.values_list('version', flat=True)) @staticmethod def prepare_collection(instance): return list( set( list(instance.collection_set.values_list('mnemonic', flat=True)))) @staticmethod def prepare_collection_owner_url(instance): return list({ coll.parent_url for coll in instance.collection_set.select_related( 'user', 'organization') }) @staticmethod def prepare_extras(instance): value = {} if instance.extras: value = jsonify_safe(instance.extras) if isinstance(value, dict): value = flatten_dict(value) return value or {}
class VariableDocument(GenericDataDocument): """Search document data.Variable""" name = fields.TextField(analyzer=n_gram_analyzer) dataset = fields.ObjectField( properties={ "name": fields.TextField(), "label": fields.TextField(), "label_de": fields.TextField(), }) categories = fields.ObjectField( properties={ "labels": fields.ListField(fields.TextField(analyzer="english")), "labels_de": fields.ListField(fields.TextField(analyzer="german")), }) conceptual_dataset = fields.KeywordField() @staticmethod def _get_study(model_object: Variable) -> Study: study: Study = model_object.dataset.study return study def prepare_analysis_unit(self, variable: Variable) -> Optional[str]: """Return the related analysis_unit's or None""" return self._handle_missing_content(variable.dataset.analysis_unit) @staticmethod def prepare_categories(variable: Variable) -> Dict[str, List[str]]: """Return the variable's categories, only labels and labels_de""" output = {} for key in ("labels", "labels_de"): labels = variable.categories.get(key) if labels: output[key] = list( filter( lambda label: not re.match(r"\[-\d+\].*", label), labels, )) return output def prepare_conceptual_dataset(self, variable: Variable) -> Optional[str]: """Return the related conceptual_dataset' title or None""" return self._handle_missing_content( variable.dataset.conceptual_dataset) def prepare_period(self, variable: Variable) -> Optional[str]: """Return the related period's title or None""" return self._handle_missing_content(variable.dataset.period) class Index: # pylint: disable=missing-docstring,too-few-public-methods name = f"{settings.ELASTICSEARCH_DSL_INDEX_PREFIX}variables" class Django: # pylint: disable=missing-docstring,too-few-public-methods model = Variable def get_queryset(self) -> QuerySet: """ Return the queryset that should be indexed by this doc type, with select related dataset, analysis_unit, conceptual_dataset, period and study. """ return (super().get_queryset().select_related( "concept", "dataset", "dataset__analysis_unit", "dataset__conceptual_dataset", "dataset__period", "dataset__study", ))
class LegislationDocument(Document): classifications = fields.TextField(term_vector="with_positions_offsets") section_classifications = fields.TextField( term_vector="with_positions_offsets") tags = fields.TextField(term_vector="with_positions_offsets") section_tags = fields.TextField(term_vector="with_positions_offsets") country = fields.KeywordField() country_name = fields.KeywordField(attr="country.name") law_type = fields.KeywordField() pdf_text = fields.TextField() year_mentions = fields.ListField(fields.IntegerField()) sections = fields.NestedField( properties={ "pk": fields.IntegerField(), "code": fields.KeywordField(), "text": fields.TextField(), "classifications_text": fields.TextField(term_vector="with_positions_offsets"), "parent_classifications": fields.TextField(term_vector="with_positions_offsets"), "tags_text": fields.TextField(term_vector="with_positions_offsets"), "parent_tags": fields.TextField(term_vector="with_positions_offsets"), }) def prepare_classifications(self, instance): classification_names = instance.classifications.all().values_list( "name", "code", ) classification_names = [ "{} ({})".format(x[0], x[1]) for x in classification_names ] if CONN in "".join(classification_names): raise ValidationError( "Classification names must not include the character " "'{}'.".format(CONN)) return CONN.join(classification_names) def prepare_section_classifications(self, instance): classification_names = { "{} ({})".format(cl.name, cl.code) for cl in [ item for sublist in [ section.classifications.all() for section in instance.sections.all() ] for item in sublist ] } if CONN in "".join(classification_names): raise ValidationError( "Classification names must not include the character " "'{}'.".format(CONN)) return CONN.join(classification_names) def prepare_tags(self, instance): tag_names = instance.tags.all().values_list("name", flat=True) if CONN in "".join(tag_names): raise ValidationError( "Tag names must not include the character '{}'.".format(CONN)) return CONN.join(tag_names) def prepare_section_tags(self, instance): tag_names = { tag.name for tag in [ item for sublist in [section.tags.all() for section in instance.sections.all()] for item in sublist ] } if CONN in "".join(tag_names): raise ValidationError( "Tag names must not include the character '{}'.".format(CONN)) return CONN.join(tag_names) def prepare_country(self, instance): return instance.country.iso def prepare_pdf_text(self, instance): return "\n\n".join([page.page_text for page in instance.pages.all()]) def prepare_year_mentions(self, instance): return [ int(year) for year in re.findall("\d{4}", instance.year_mention or "") if int(year) >= settings.MIN_YEAR and int(year) <= settings.MAX_YEAR ] def get_instances_from_related(self, related_instance): if isinstance(related_instance, LegislationSection): return related_instance.legislation else: # it's a TaxonomyClassification or TaxonomyTag return related_instance.legislation_set.all() class Django: model = Legislation # The model associated with this Document # The fields of the model to be indexed in Elasticsearch fields = [ "id", "title", "abstract", "year", "year_amendment", ] related_models = [ LegislationSection, TaxonomyClassification, TaxonomyTag ]
class QuestionDocument(GenericDataDocument): """Search document instruments.Question""" instrument = fields.ObjectField( properties={ "name": fields.TextField(), "label": fields.TextField(), "label_de": fields.TextField(), }) items = fields.ObjectField( properties={ "en": fields.ListField(fields.TextField(analyzer="english")), "de": fields.ListField(fields.TextField(analyzer="german")), }) @staticmethod def _get_study(model_object: Question) -> Study: study: Study = getattr(model_object.instrument, "study") return study # lookup methods @staticmethod def prepare_analysis_unit(question: Question) -> Optional[str]: """Return the related analysis_unit's or None""" try: return question.instrument.analysis_unit.title() except AttributeError: return None @staticmethod def prepare_period(question: Question) -> Optional[str]: """Return the related period's title or None""" try: return question.instrument.period.title() except AttributeError: return None @staticmethod def prepare_items(question: Question) -> Dict: """Return the question's items, containing text, text_de and answers""" items = {"en": [], "de": []} for item in question.items: text = item.get("text") text_de = item.get("text_de") if text: items["en"].append(text) if text_de: items["de"].append(text_de) answers = item.get("answers", []) for answer in answers: label = answer.get("label") label_de = answer.get("label_de") if label: items["en"].append(label) if label_de: items["de"].append(label_de) return items class Index: # pylint: disable=too-few-public-methods missing-class-docstring name = f"{settings.ELASTICSEARCH_DSL_INDEX_PREFIX}questions" class Django: # pylint: disable=too-few-public-methods missing-class-docstring model = Question def get_queryset(self) -> QuerySet: """ Return the queryset that should be indexed by this doc type, with select related instrument, study, period. """ return (super().get_queryset().select_related( "instrument", "instrument__analysis_unit", "instrument__period", "instrument__study", ).only( "items", "name", "label", "label_de", "instrument__name", "instrument__label", "instrument__analysis_unit__name", "instrument__analysis_unit__label", "instrument__period__name", "instrument__period__label", "instrument__study__name", "instrument__study__label", ))
class ConceptDocument(Document): class Index: name = 'concepts' settings = {'number_of_shards': 1, 'number_of_replicas': 0} id = fields.KeywordField(attr='mnemonic', normalizer="lowercase") name = fields.TextField() _name = fields.KeywordField(attr='display_name', normalizer='lowercase') last_update = fields.DateField(attr='updated_at') locale = fields.ListField(fields.KeywordField()) source = fields.KeywordField(attr='parent_resource', normalizer="lowercase") owner = fields.KeywordField(attr='owner_name', normalizer="lowercase") owner_type = fields.KeywordField(attr='owner_type') source_version = fields.ListField(fields.KeywordField()) collection_version = fields.ListField(fields.KeywordField()) collection = fields.ListField(fields.KeywordField()) collection_owner_url = fields.ListField(fields.KeywordField()) public_can_view = fields.BooleanField(attr='public_can_view') datatype = fields.KeywordField(attr='datatype', normalizer="lowercase") concept_class = fields.KeywordField(attr='concept_class', normalizer="lowercase") retired = fields.KeywordField(attr='retired') is_active = fields.KeywordField(attr='is_active') is_latest_version = fields.KeywordField(attr='is_latest_version') extras = fields.ObjectField(dynamic=True) class Django: model = Concept fields = [ 'version', 'external_id', ] @staticmethod def prepare_name(instance): name = instance.display_name if name: name = name.replace('-', '_') return name @staticmethod def prepare_locale(instance): return list( instance.names.filter( locale__isnull=False).distinct('locale').values_list( 'locale', flat=True)) @staticmethod def prepare_source_version(instance): return list(instance.sources.values_list('version', flat=True)) @staticmethod def prepare_collection_version(instance): return list(instance.collection_set.values_list('version', flat=True)) @staticmethod def prepare_collection(instance): return list( set( list(instance.collection_set.values_list('mnemonic', flat=True)))) @staticmethod def prepare_collection_owner_url(instance): return list({ coll.parent_url for coll in instance.collection_set.select_related( 'user', 'organization') }) @staticmethod def prepare_extras(instance): value = {} if instance.extras: value = jsonify_safe(instance.extras) if isinstance(value, dict): value = flatten_dict(value) return value or {}
class PageDocument(DocType): document = fields.IntegerField(attr='document_id') title = fields.TextField() description = fields.TextField() tags = fields.ListField(fields.KeywordField()) created_at = fields.DateField() publicbody = fields.IntegerField(attr='document.publicbody_id') jurisdiction = fields.IntegerField( attr='document.publicbody.jurisdiction_id') foirequest = fields.IntegerField(attr='document.foirequest_id') campaign = fields.IntegerField(attr='document.foirequest.campaign_id') collections = fields.IntegerField() user = fields.IntegerField(attr='document.user_id') team = fields.IntegerField(attr='document.team_id') public = fields.BooleanField() number = fields.IntegerField() content = fields.TextField( analyzer=analyzer, search_analyzer=search_analyzer, search_quote_analyzer=search_quote_analyzer, index_options='offsets', ) class Meta: model = Page queryset_chunk_size = 50 def get_queryset(self): """Not mandatory but to improve performance we can select related in one sql request""" return super().get_queryset().select_related('document', ) def prepare_title(self, obj): if obj.number == 1: if obj.document.title.endswith('.pdf'): return '' return obj.document.title return '' def prepare_description(self, obj): if obj.number == 1: return obj.document.description return '' def prepare_tags(self, obj): return [tag.id for tag in obj.document.tags.all()] def prepare_created_at(self, obj): return obj.document.created_at def prepare_public(self, obj): return obj.document.is_public() def prepare_team(self, obj): if obj.document.team_id: return obj.document.team_id return None def prepare_collections(self, obj): collections = obj.document.document_documentcollection.all() return list(collections.values_list('id', flat=True))
class MappingDocument(Document): class Index: name = 'mappings' settings = {'number_of_shards': 1, 'number_of_replicas': 0} class Django: model = Mapping fields = ['external_id'] last_update = fields.DateField(attr='updated_at') owner = fields.KeywordField(attr='owner_name', normalizer="lowercase") owner_type = fields.KeywordField(attr='owner_type') source = fields.KeywordField(attr='source', normalizer="lowercase") retired = fields.KeywordField(attr='retired') is_active = fields.KeywordField(attr='is_active') is_latest_version = fields.KeywordField(attr='is_latest_version') map_type = fields.KeywordField(attr='map_type', normalizer="lowercase") from_concept = fields.ListField(fields.KeywordField()) to_concept = fields.ListField(fields.KeywordField()) concept = fields.ListField(fields.KeywordField()) concept_source = fields.ListField(fields.KeywordField()) concept_owner = fields.ListField(fields.KeywordField()) from_concept_owner = fields.KeywordField(attr='from_source_owner') to_concept_owner = fields.KeywordField(attr='to_source_owner') concept_owner_type = fields.ListField( fields.KeywordField(attr='to_source_owner')) from_concept_owner_type = fields.KeywordField( attr='from_source_owner_type') to_concept_owner_type = fields.KeywordField(attr='to_source_owner_type') from_concept_source = fields.KeywordField(attr='from_source_name') to_concept_source = fields.KeywordField(attr='to_source_name') source_version = fields.ListField(fields.TextField()) collection_version = fields.ListField(fields.TextField()) collection = fields.ListField(fields.KeywordField()) public_can_view = fields.BooleanField(attr='public_can_view') id = fields.KeywordField(attr='mnemonic', normalizer="lowercase") extras = fields.ObjectField() @staticmethod def prepare_from_concept(instance): return [ instance.from_concept_url, instance.from_concept_code, instance.from_concept_name ] @staticmethod def prepare_to_concept(instance): return [instance.get_to_concept_code(), instance.get_to_concept_name()] def prepare_concept(self, instance): return self.prepare_from_concept(instance) + self.prepare_to_concept( instance) @staticmethod def prepare_concept_source(instance): return [instance.from_source_name, instance.to_source_name] @staticmethod def prepare_concept_owner(instance): return [instance.from_source_owner, instance.to_source_owner] @staticmethod def prepare_concept_owner_type(instance): return [instance.from_source_owner_type, instance.to_source_owner_type] @staticmethod def prepare_source_version(instance): return list(instance.sources.values_list('version', flat=True)) @staticmethod def prepare_collection_version(instance): return list(instance.collection_set.values_list('version', flat=True)) @staticmethod def prepare_collection(instance): return list( set( list(instance.collection_set.values_list('mnemonic', flat=True)))) @staticmethod def prepare_extras(instance): return instance.extras or {}
class CollectionItemDocument(DocType): """Collection item document.""" # ID id = fields.IntegerField(attr='id') record_number = KeywordField() inventory_number = KeywordField() api_url = KeywordField(index="not_analyzed") web_url = KeywordField(index="not_analyzed") # ******************************************************************** # *************** Main data fields for search and filtering ********** # ******************************************************************** importer_uid = KeywordField(attr='importer_uid_indexing') language_code_orig = KeywordField(attr='language_code_orig') department = StringField( attr='department_indexing', analyzer=html_strip, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), # 'suggest': fields.CompletionField(), }) # ******************************************************************** # ***************************** English ****************************** # ******************************************************************** title_en = StringField( attr='title_en_indexing', analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), # 'suggest': fields.CompletionField(), }) description_en = StringField( attr='description_en_indexing', analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), # 'suggest': fields.CompletionField(), }) period_en = StringField( attr='period_en_indexing', analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), # 'suggest': fields.CompletionField(), }) period_1_en = fields.NestedField( attr='period_1_en_indexing', properties={ 'name': StringField(analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), }), 'period_2_en': fields.NestedField( properties={ 'name': StringField(analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), }), 'period_3_en': fields.NestedField( properties={ 'name': StringField(analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), }), 'period_4_en': fields.NestedField( properties={ 'name': StringField( analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), }) }) }) }) }) primary_object_type_en = StringField( attr='primary_object_type_en_indexing', analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), 'suggest': fields.CompletionField(), }) object_type_en = StringField(attr='object_type_en_indexing', analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), 'suggest': fields.CompletionField(), }) # To be shown on the detail page object_type_detail_en = fields.TextField( attr='object_type_detail_en_indexing', index='no') material_en = StringField( attr='material_en_indexing', analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), # 'suggest': fields.CompletionField(), }) # To be shown on the detail page material_detail_en = fields.TextField(attr='material_detail_en_indexing', index='no') city_en = StringField( attr='city_en_indexing', analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), # 'suggest': fields.CompletionField(), }) country_en = StringField( attr='country_en_indexing', analyzer=html_strip_synonyms_en, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='english'), # 'suggest': fields.CompletionField(), }) # To be shown on the detail page references_en = fields.TextField(attr='references_en_indexing', index='no') # To be shown on the detail page acquired_en = fields.TextField(attr='acquired_en_indexing', index='no') # To be shown on the detail page site_found_en = fields.TextField(attr='site_found_en_indexing', index='no') # To be shown on the detail page reign_en = fields.TextField(attr='reign_en_indexing', index='no') # To be shown on the detail page keywords_en = fields.TextField(attr='keywords_en_indexing', index='no') # To be shown on the detail page dynasty_en = fields.TextField(attr='dynasty_en_indexing', index='no') # New fields # To be shown on the detail page credit_line_en = fields.TextField(attr='credit_line_en_indexing', index='no') # To be shown on the detail page region_en = fields.TextField(attr='region_en_indexing', index='no') # To be shown on the detail page sub_region_en = fields.TextField(attr='sub_region_en_indexing', index='no') # To be shown on the detail page locale_en = fields.TextField(attr='locale_en_indexing', index='no') # To be shown on the detail page excavation_en = fields.TextField(attr='excavation_en_indexing', index='no') # To be shown on the detail page museum_collection_en = fields.TextField( attr='museum_collection_en_indexing', index='no') # To be shown on the detail page style_en = fields.TextField(attr='style_en_indexing', index='no') # To be shown on the detail page culture_en = fields.TextField(attr='culture_en_indexing', index='no') # To be shown on the detail page inscriptions_en = fields.TextField(attr='inscriptions_en_indexing', index='no') # To be shown on the detail page provenance_en = fields.TextField(attr='provenance_en_indexing', index='no') # To be shown on the detail page exhibitions_en = fields.TextField(attr='exhibitions_en_indexing', index='no') # ******************************************************************** # ****************************** Dutch ******************************* # ******************************************************************** title_nl = StringField( attr='title_nl_indexing', analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='dutch'), # 'suggest': fields.CompletionField(), }) description_nl = StringField( attr='description_nl_indexing', analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='dutch'), # 'suggest': fields.CompletionField(), }) period_nl = StringField( attr='period_nl_indexing', analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='dutch'), # 'suggest': fields.CompletionField(), }) period_1_nl = fields.NestedField( attr='period_1_nl_indexing', properties={ 'name': StringField(analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), }), 'period_2_nl': fields.NestedField( properties={ 'name': StringField(analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), }), 'period_3_nl': fields.NestedField( properties={ 'name': StringField(analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), }), 'period_4_nl': fields.NestedField( properties={ 'name': StringField( analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), }) }) }) }) }) primary_object_type_nl = StringField( attr='primary_object_type_nl_indexing', analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='dutch'), 'suggest': fields.CompletionField(), }) object_type_nl = StringField(attr='object_type_nl_indexing', analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='dutch'), 'suggest': fields.CompletionField(), }) # To be shown on the detail page object_type_detail_nl = fields.TextField( attr='object_type_detail_nl_indexing', index='no') material_nl = StringField( attr='material_nl_indexing', analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='dutch'), # 'suggest': fields.CompletionField(), }) # To be shown on the detail page material_detail_nl = fields.TextField(attr='material_detail_nl_indexing', index='no') city_nl = StringField( attr='city_nl_indexing', analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='dutch'), # 'suggest': fields.CompletionField(), }) country_nl = StringField( attr='country_nl_indexing', analyzer=html_strip_synonyms_nl, fields={ 'raw': KeywordField(), 'natural': StringField(analyzer='dutch'), # 'suggest': fields.CompletionField(), }) # To be shown on the detail page keywords_nl = fields.TextField(attr='keywords_nl_indexing', index='no') # To be shown on the detail page acquired_nl = fields.TextField(attr='acquired_nl_indexing', index='no') # To be shown on the detail page site_found_nl = fields.TextField(attr='site_found_nl_indexing', index='no') # To be shown on the detail page reign_nl = fields.TextField(attr='reign_nl_indexing', index='no') # To be shown on the detail page references_nl = fields.TextField(attr='references_nl_indexing', index='no') # To be shown on the detail page dynasty_nl = fields.TextField(attr='dynasty_nl_indexing', index='no') # New fields # To be shown on the detail page credit_line_nl = fields.TextField(attr='credit_line_nl_indexing', index='no') # To be shown on the detail page region_nl = fields.TextField(attr='region_nl_indexing', index='no') # To be shown on the detail page sub_region_nl = fields.TextField(attr='sub_region_nl_indexing', index='no') # To be shown on the detail page locale_nl = fields.TextField(attr='locale_nl_indexing', index='no') # To be shown on the detail page excavation_nl = fields.TextField(attr='excavation_nl_indexing', index='no') # To be shown on the detail page museum_collection_nl = fields.TextField( attr='museum_collection_nl_indexing', index='no') # To be shown on the detail page style_nl = fields.TextField(attr='style_nl_indexing', index='no') # To be shown on the detail page culture_nl = fields.TextField(attr='culture_nl_indexing', index='no') # To be shown on the detail page inscriptions_nl = fields.TextField(attr='inscriptions_nl_indexing', index='no') # To be shown on the detail page provenance_nl = fields.TextField(attr='provenance_nl_indexing', index='no') # To be shown on the detail page exhibitions_nl = fields.TextField(attr='exhibitions_nl_indexing', index='no') # ******************************************************************** # ************************** Language independent ******************** # ******************************************************************** dimensions = StringField( attr='dimensions_indexing', analyzer=html_strip, fields={ 'raw': KeywordField(), 'natural': StringField(), # 'suggest': fields.CompletionField(), }) object_date_begin = StringField( attr='object_date_begin_indexing', analyzer=html_strip, fields={ 'raw': KeywordField(), 'natural': StringField(), # 'suggest': fields.CompletionField(), }) object_date_end = StringField( attr='object_date_end_indexing', analyzer=html_strip, fields={ 'raw': KeywordField(), 'natural': StringField(), # 'suggest': fields.CompletionField(), }) location = fields.GeoPointField(attr='geo_location_indexing') # List of 32x32 PNG versions of the images. Full path to. images = fields.ListField(StringField(attr='images_indexing')) # List of image URLs. images_urls = fields.ListField( fields.ObjectField(attr='images_urls_indexing', properties={ 'th': KeywordField(index="not_analyzed"), 'lr': KeywordField(index="not_analyzed"), })) # Classified as by our AI classified_as = fields.ListField( StringField(attr='classified_as_indexing', fields={ 'raw': KeywordField(), })) # Classified as 1st element classified_as_1 = StringField(attr='classified_as_1_indexing', fields={ 'raw': KeywordField(), }) # Classified as 2nd element classified_as_2 = StringField(attr='classified_as_2_indexing', fields={ 'raw': KeywordField(), }) # Classified as 3rd element classified_as_3 = StringField(attr='classified_as_3_indexing', fields={ 'raw': KeywordField(), }) # ******************************************************************** # ************** Nested fields for search and filtering ************** # ******************************************************************** # # City object # country = fields.NestedField( # properties={ # 'name': StringField( # analyzer=html_strip, # fields={ # 'raw': KeywordField(), # 'suggest': fields.CompletionField(), # } # ), # 'info': StringField(analyzer=html_strip), # 'location': fields.GeoPointField(attr='location_field_indexing'), # } # ) # # location = fields.GeoPointField(attr='location_field_indexing') class Meta(object): """Meta options.""" model = Item # The model associate with this DocType def get_queryset(self): """Filter out items that are not eligible for indexing.""" qs = super(CollectionItemDocument, self).get_queryset() # qs = qs.select_related('period_node').prefetch_related('images') filters = [] for field in ['title']: for language in ['en', 'nl']: filters.extend([ Q(**{"{}_{}__isnull".format(field, language): True}), Q(**{"{}_{}__exact".format(field, language): ''}), ]) if filters: qs = qs.exclude(six.moves.reduce(operator.or_, filters)) # We concatenate ``object_type`` and ``classification`` fields, after # cleaning them. Therefore, db-only checks don't work here. ids = [] for item in qs: if not (item.object_type_en_indexing and item.object_type_nl_indexing): ids.append(item.pk) return qs.exclude(id__in=ids) def prepare_department(self, instance): """Prepare department.""" return instance.department_indexing \ if instance.department_indexing\ else VALUE_NOT_SPECIFIED def prepare_object_date_begin(self, instance): """Prepare material.""" return instance.object_date_begin_indexing def prepare_object_date_end(self, instance): """Prepare material.""" return instance.object_date_end_indexing # ******************************************************************** # ***************************** English ****************************** # ******************************************************************** def prepare_material_en(self, instance): """Prepare material.""" return instance.material_en_indexing \ if instance.material_en_indexing\ else VALUE_NOT_SPECIFIED def prepare_period_en(self, instance): """Prepare state.""" return instance.period_en_indexing \ if instance.period_en_indexing \ else VALUE_NOT_SPECIFIED def prepare_dynasty_en(self, instance): """Prepare dynasty.""" return instance.dynasty_en_indexing \ if instance.dynasty_en_indexing \ else VALUE_NOT_SPECIFIED def prepare_description_en(self, instance): """Prepare description.""" return instance.description_en_indexing \ if instance.description_en_indexing\ else VALUE_NOT_SPECIFIED def prepare_city_en(self, instance): """Prepare city.""" return instance.city_en_indexing \ if instance.city_en_indexing\ else VALUE_NOT_SPECIFIED def prepare_country_en(self, instance): """Prepare country.""" return instance.country_en_indexing \ if instance.country_en_indexing \ else VALUE_NOT_SPECIFIED # ******************************************************************** # ****************************** Dutch ******************************* # ******************************************************************** def prepare_material_nl(self, instance): """Prepare material.""" return instance.material_nl_indexing \ if instance.material_nl_indexing\ else VALUE_NOT_SPECIFIED def prepare_period_nl(self, instance): """Prepare state.""" return instance.period_nl_indexing \ if instance.period_nl_indexing \ else VALUE_NOT_SPECIFIED def prepare_dynasty_nl(self, instance): """Prepare dynasty.""" return instance.dynasty_nl_indexing \ if instance.dynasty_nl_indexing \ else VALUE_NOT_SPECIFIED def prepare_description_nl(self, instance): """Prepare description.""" return instance.description_nl_indexing \ if instance.description_nl_indexing\ else VALUE_NOT_SPECIFIED def prepare_city_nl(self, instance): """Prepare city.""" return instance.city_nl_indexing \ if instance.city_nl_indexing\ else VALUE_NOT_SPECIFIED def prepare_country_nl(self, instance): """Prepare country.""" return instance.country_nl_indexing \ if instance.country_nl_indexing \ else VALUE_NOT_SPECIFIED
class SourceDocument(Document): class Index: name = 'sources' settings = {'number_of_shards': 1, 'number_of_replicas': 0} locale = fields.ListField(fields.KeywordField()) last_update = fields.DateField(attr='updated_at') owner = fields.KeywordField(attr='parent_resource', normalizer='lowercase') owner_type = fields.KeywordField(attr='parent_resource_type') public_can_view = fields.TextField(attr='public_can_view') source_type = fields.KeywordField(attr='source_type', normalizer='lowercase') is_active = fields.KeywordField(attr='is_active') version = fields.KeywordField(attr='version') name = fields.KeywordField(attr='name', normalizer='lowercase') canonical_url = fields.KeywordField(attr='canonical_url', normalizer='lowercase') mnemonic = fields.KeywordField(attr='mnemonic', normalizer='lowercase') extras = fields.ObjectField(dynamic=True) identifier = fields.ObjectField() jurisdiction = fields.ObjectField() publisher = fields.KeywordField(attr='publisher', normalizer='lowercase') content_type = fields.KeywordField(attr='content_type', normalizer='lowercase') custom_validation_schema = fields.KeywordField( attr='custom_validation_schema', normalizer='lowercase') hierarchy_meaning = fields.KeywordField(attr='hierarchy_meaning', normalizer='lowercase') created_by = fields.KeywordField() class Django: model = Source fields = [ 'full_name', 'revision_date', 'retired', 'experimental', 'case_sensitive', 'compositional', 'version_needed', 'external_id', ] @staticmethod def prepare_locale(instance): return get(instance.supported_locales, []) @staticmethod def prepare_extras(instance): value = {} if instance.extras: value = jsonify_safe(instance.extras) if isinstance(value, dict): value = flatten_dict(value) return value or {} @staticmethod def prepare_identifier(instance): value = {} if instance.identifier: value = jsonify_safe(instance.identifier) return value or {} @staticmethod def prepare_jurisdiction(instance): value = {} if instance.jurisdiction: value = jsonify_safe(instance.jurisdiction) return value or {} @staticmethod def prepare_created_by(instance): return instance.created_by.username