class NoteDocument(Document): """ Document for the Note index. """ id = fields.IntegerField(attr='id') user = fields.KeywordField(attr='user_id') course_id = fields.KeywordField() usage_id = fields.KeywordField() quote = fields.TextField(analyzer=html_strip) text = fields.TextField(analyzer=html_strip) ranges = fields.KeywordField() created = fields.DateField() updated = fields.DateField() tags = fields.TextField(multi=True, analyzer=case_insensitive_keyword) def prepare_data(self, instance): """ Prepare data. """ return '{0}{1}'.format(instance.text, instance.tags) def prepare_tags(self, instance): return json.loads(instance.tags) class Django: model = Note class Meta: parallel_indexing = True queryset_pagination = 50
class ExtendedDocMixin(CommonDocMixin): slug = TranslatedKeywordField('slug') uuid = fields.TextField() status = fields.KeywordField() views_count = fields.IntegerField() published_at = fields.DateField() removed_at = fields.DateField()
class CarDocument(Document): """Book Elasticsearch document.""" id = fields.IntegerField(attr='id') name = fields.TextField(attr="name", analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword'), }) description = fields.TextField(analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword'), 'tokenized': fields.TextField(analyzer=html_strip), }) hosted = fields.DateField(fields={"raw": fields.DateField()}) version = fields.TextField(analyzer=html_strip, fields={ 'raw': fields.TextField(analyzer='keyword'), }) url = fields.TextField(attr="url") class Django(object): """Meta options.""" model = PackageShort
def datasets_field(**kwargs): return fields.NestedField(properties={ 'id': fields.IntegerField(), 'title': TranslatedTextField('title'), 'notes': TranslatedTextField('notes'), 'category': fields.KeywordField(attr='category.title'), 'formats': fields.KeywordField(attr='formats', multi=True), 'downloads_count': fields.IntegerField(attr='computed_downloads_count'), 'views_count': fields.IntegerField(attr='computed_views_count'), 'openness_scores': fields.IntegerField(attr='openness_scores'), 'modified': fields.DateField(), 'slug': TranslatedKeywordField('slug'), 'verified': fields.DateField(), }, **kwargs)
class MotionDocument(DocType): convention = fields.ObjectField( properties={ 'label': fields.KeywordField(), 'slot': fields.KeywordField(), 'year': fields.DateField(), }) section = fields.ObjectField(properties={ 'name': fields.KeywordField(), }) identifier = fields.KeywordField() submitters = fields.NestedField(properties={'name': fields.KeywordField()}) referrals = fields.NestedField(properties={'name': fields.KeywordField()}) status = fields.KeywordField() created = fields.DateField() class Meta: model = Motion fields = [ 'title', 'body', ]
class ApplicationsDoc(DocType): id = fields.IntegerField() slug = TranslatedKeywordField('slug') title = TranslatedTextField( 'title', common_params={'suggest': fields.CompletionField()}) notes = TranslatedTextField('notes') author = fields.KeywordField() url = fields.KeywordField() image_url = fields.KeywordField(attr='image_url') image_thumb_url = fields.KeywordField(attr='image_thumb_url') datasets = datasets_field(attr='published_datasets') users_following = fields.KeywordField(attr='users_following_list', multi=True) tags = TranslatedKeywordsList(attr='tags_list') views_count = fields.IntegerField() status = fields.KeywordField() modified = fields.DateField() created = fields.DateField() class Meta: doc_type = 'application' model = Application def get_queryset(self): return self._doc_type.model.objects.filter(status='published')
class CompetitionDocument(DocType): class Meta: model = Competition remote_id = fields.IntegerField() created_by = fields.TextField() title = fields.TextField() description = fields.TextField() html_text = fields.TextField() participant_count = fields.IntegerField() is_active = fields.BooleanField() prize = fields.IntegerField() current_phase_deadline = fields.DateField() url = fields.TextField() logo = fields.TextField() start = fields.DateField() end = fields.DateField() producer = fields.ObjectField(properties={ 'id': fields.IntegerField(), 'url': fields.TextField(), 'name': fields.TextField() }) # TODO: add "active" boolean field so we can add this to queries and not have a special case def prepare_created_by(self, instance): return instance.created_by
class ExperimentDocument(Document): class Index: name = 'experiments' settings = {'number_of_shards': 1, 'number_of_replicas': 0} id = fields.IntegerField() title = fields.TextField(fields={'raw': fields.KeywordField()}, analyzer=analyzer) description = fields.TextField(fields={'raw': fields.KeywordField()}, analyzer=analyzer) public_access = fields.IntegerField() created_time = fields.DateField() start_time = fields.DateField() end_time = fields.DateField() update_time = fields.DateField() institution_name = fields.StringField() created_by = fields.ObjectField(properties={ 'username': fields.StringField(fields={'raw': fields.KeywordField()}, ) }) objectacls = fields.ObjectField(properties={ 'pluginId': fields.StringField(), 'entityId': fields.StringField() }) class Django: model = Experiment related_models = [User, ObjectACL] def get_instances_from_related(self, related_instance): if isinstance(related_instance, User): return related_instance.experiment_set.all() if isinstance(related_instance, ObjectACL): return related_instance.content_object return None
class AssetDocument(Document): class Index: # Name of the Elasticsearch index name = "assets" # See Elasticsearch Indices API reference for available settings settings = {"number_of_shards": 1, "number_of_replicas": 0} item = fields.ObjectField( properties={ "item_id": fields.KeywordField(), "project": fields.ObjectField( properties={ "slug": fields.KeywordField(), "campaign": fields.ObjectField( properties={"slug": fields.KeywordField()} ), "topics": fields.NestedField( properties={"slug": fields.KeywordField()} ), } ), } ) transcription_status = fields.KeywordField() latest_transcription = fields.ObjectField( properties={ "created_on": fields.DateField(), "updated_on": fields.DateField(), "accepted": fields.DateField(), "rejected": fields.DateField(), "submitted": fields.DateField(), } ) submission_count = fields.IntegerField() def prepare_submission_count(self, instance): return Transcription.objects.filter( asset=instance, submitted__isnull=True ).count() class Django: model = Asset fields = ["published", "difficulty", "slug", "sequence", "year"] def get_queryset(self): return ( super() .get_queryset() .order_by("pk") .prefetch_related( "item", "item__project", "item__project__topics", "item__project__campaign", ) )
class ActDocument(Document): subject = fields.ObjectField( properties={ 'subject_title': fields.TextField(), 'subject_url': fields.TextField(), 'item': fields.ObjectField( properties={ 'item_title': fields.TextField(), 'item_number': fields.IntegerField(), 'item_text': fields.TextField(), 'period': fields.ObjectField( properties={ 'period_text': fields.TextField(), 'start_date': fields.DateField(), 'end_date': fields.DateField(), 'parent_url': fields.TextField(), 'period_url': fields.TextField(), }) }) }) class Index: # Name of the Elasticsearch index name = 'acts' # See Elasticsearch Indices API reference for available settings settings = {'number_of_shards': 1, 'number_of_replicas': 0} content = fields.TextField(analyzer=act_analyzer, ) class Django: model = Act fields = [ 'id', 'title', 'content_url', 'file_type', 'city', ] related_models = [ Subject, Item, Period ] # Optional: to ensure the Act will be re-saved when Subject is updated # def get_queryset(self): # """Not mandatory but to improve performance we can select related in one sql request""" # return super(ActDocument, self).get_queryset().select_related( # 'subject', 'subject__item', 'subject__item__period', # ) def get_instances_from_related(self, related_instance): if isinstance(related_instance, Subject): return related_instance.act_set.all()
class ApplicationsDoc(DocType): id = fields.IntegerField() slug = fields.KeywordField() title = fields.TextField( analyzer=polish_analyzer, fields={ 'raw': fields.KeywordField(), 'suggest': fields.CompletionField() } ) notes = fields.TextField( analyzer=polish_analyzer, fields={ 'raw': fields.KeywordField(), } ) author = fields.TextField( analyzer=standard_analyzer, fields={ 'raw': fields.KeywordField(), # TODO: fix author data before this # 'suggest': fields.CompletionField() } ) url = fields.KeywordField() image_url = fields.KeywordField( attr='image_url' ) datasets = datasets_field(attr='published_datasets') users_following = fields.KeywordField(attr='users_following_list', multi=True) tags = fields.KeywordField(attr='tags_list', multi=True) views_count = fields.IntegerField() status = fields.KeywordField() modified = fields.DateField() created = fields.DateField() class Meta: doc_type = 'application' model = Application related_models = [Tag, Dataset, UserFollowingApplication] def get_instances_from_related(self, related_instance): if isinstance(related_instance, UserFollowingApplication): return related_instance.follower.followed_applications.all() if isinstance(related_instance, Dataset): return related_instance.applications.all() if isinstance(related_instance, Tag): return related_instance.applications.all() def get_queryset(self): return self._doc_type.model.objects.filter(status='published')
class ArticleDoc(DocType): id = fields.IntegerField() slug = fields.KeywordField() title = fields.TextField(analyzer=polish_analyzer, fields={ 'raw': fields.KeywordField(), 'suggest': fields.CompletionField() }) notes = fields.TextField(analyzer=polish_analyzer, fields={ 'raw': fields.TextField(), }) author = fields.TextField( analyzer=standard_analyzer, fields={ 'raw': fields.KeywordField(), # TODO: fix author data before this # 'suggest': fields.CompletionField() }) datasets = datasets_field(attr='published_datasets') license = fields.NestedField( properties={ 'id': fields.IntegerField(), 'name': fields.TextField(), 'title': fields.TextField(), 'url': fields.TextField() }) tags = fields.KeywordField(attr='tags_list', multi=True) views_count = fields.IntegerField() users_following = fields.KeywordField(attr='users_following_list', multi=True) status = fields.TextField() modified = fields.DateField() created = fields.DateField() class Meta: doc_type = 'article' model = Article related_models = [Tag, Dataset, UserFollowingArticle] def get_instances_from_related(self, related_instance): if isinstance(related_instance, UserFollowingArticle): return related_instance.follower.followed_applications.all() if isinstance(related_instance, Dataset): return related_instance.articles.all() if isinstance(related_instance, Tag): return related_instance.articles.all() def get_queryset(self): return self._doc_type.model.objects.filter( status__in=['published', 'draft'])
class NewsDocument(DocType): """News Elasticsearch document.""" id = fields.IntegerField(attr='id') title = fields.StringField( analyzer=html_strip, fields={ 'raw': fields.StringField(analyzer='keyword', fielddata=True), } ) content = fields.StringField( analyzer=html_strip, fields={ 'raw': fields.StringField(analyzer='keyword'), } ) news_date = fields.DateField() entry_date = fields.DateField() stock = fields.StringField( attr='stock_indexing', analyzer=html_strip, fields={ 'raw': fields.StringField(analyzer='keyword'), } ) author = fields.StringField( attr='author_indexing', analyzer=html_strip, fields={ 'raw': fields.StringField(analyzer='keyword'), } ) sec_code = fields.StringField( analyzer=html_strip, fields={ 'raw': fields.StringField(analyzer='keyword'), } ) is_featured = fields.BooleanField() has_downloadable = fields.BooleanField() is_main = fields.BooleanField() class Meta(object): """Meta options.""" model = News # The model associate with this DocType
class JobDocument(Document): # Object/List fields analyzers_to_execute = fields.ListField(fields.KeywordField()) connectors_to_execute = fields.ListField(fields.KeywordField()) # Normal fields errors = fields.TextField() # Keyword fields to allow aggregations/vizualizations source = fields.KeywordField() status = fields.KeywordField() md5 = fields.KeywordField() tlp = fields.KeywordField() observable_name = fields.KeywordField() observable_classification = fields.KeywordField() file_name = fields.KeywordField() file_mimetype = fields.KeywordField() # Nested (ForeignKey) fields tags = fields.NestedField( properties={"label": fields.KeywordField(), "color": fields.TextField()} ) analyzer_reports = fields.NestedField( properties={ "name": fields.KeywordField(), "status": fields.KeywordField(), "report": fields.ObjectField(), "errors": fields.TextField(), "start_time": fields.DateField(), "end_time": fields.DateField(), } ) connector_reports = fields.NestedField( properties={ "name": fields.KeywordField(), "status": fields.KeywordField(), "report": fields.ObjectField(), "errors": fields.TextField(), "start_time": fields.DateField(), "end_time": fields.DateField(), } ) class Index: # Name of the Elasticsearch index name = "jobs" class Django: model = Job # The model associated with this Document # The fields of the model you want to be indexed in Elasticsearch fields = [ "is_sample", "received_request_time", "finished_analysis_time", ]
class AcceptedDatasetSubmissionDoc(Document): id = fields.IntegerField() is_active = fields.BooleanField() title = TranslatedTextField('title') notes = TranslatedTextField('notes') organization_name = TranslatedTextField('organization_name') potential_possibilities = TranslatedTextField('potential_possibilities') data_link = fields.TextField() comment = fields.TextField() submission_date = fields.DateField() decision = fields.TextField() decision_date = fields.DateField() published_at = fields.DateField() is_published_for_all = fields.BooleanField() feedback = fields.NestedField(properties={ 'user_id': fields.IntegerField(), 'opinion': fields.StringField() }) feedback_counters = fields.NestedField(properties={ 'plus': fields.IntegerField(), 'minus': fields.IntegerField(), }) status = fields.TextField() def prepare_feedback(self, instance): return [{ 'user_id': fb.user.id, 'opinion': fb.opinion } for fb in instance.feedback.all()] def prepare_feedback_counters(self, instance): return instance.feedback_counters class Index: name = mcs.ELASTICSEARCH_INDEX_NAMES['accepted_dataset_submissions'] settings = mcs.ELASTICSEARCH_DSL_INDEX_SETTINGS class Django: model = AcceptedDatasetSubmission related_models = [ SubmissionFeedback, ] def get_instances_from_related(self, related_instance): if isinstance(related_instance, SubmissionFeedback): return related_instance.submission def get_queryset(self): return super().get_queryset().filter( status__in=AcceptedDatasetSubmission.PUBLISHED_STATUSES)
class PostDocument(BaseDocument): auto_refresh = True hubs_flat = es_fields.TextField(attr='hubs_indexing_flat') hot_score = es_fields.IntegerField(attr='hot_score') score = es_fields.IntegerField(attr='score') discussion_count = es_fields.IntegerField(attr='discussion_count') unified_document_id = es_fields.IntegerField(attr='unified_document_id') title = es_fields.TextField(analyzer=title_analyzer) created_date = es_fields.DateField(attr='created_date') updated_date = es_fields.DateField(attr='updated_date') preview_img = es_fields.TextField(attr='preview_img') renderable_text = es_fields.TextField(attr='renderable_text', analyzer=content_analyzer) created_by_id = es_fields.IntegerField(attr='created_by_id') authors = es_fields.ObjectField( attr='authors_indexing', properties={ 'first_name': es_fields.TextField(), 'last_name': es_fields.TextField(), 'full_name': es_fields.TextField(), } ) hubs = es_fields.ObjectField( attr='hubs_indexing', properties={ 'hub_image': es_fields.TextField(), 'id': es_fields.IntegerField(), 'is_locked': es_fields.TextField(), 'is_removed': es_fields.TextField(), 'name': es_fields.KeywordField(), } ) class Index: name = 'post' class Django: model = ResearchhubPost queryset_pagination = 250 fields = [ 'id', 'document_type', ] def should_remove_from_index(self, obj): if obj.is_removed: return True return False
class CalculationDocument(Document): """Article elasticsearch document""" id = fields.IntegerField(attr='id') title = fields.TextField(analyzer=html_strip, fields={'raw': fields.KeywordField()}) body = fields.TextField(analyzer=html_strip, fields={'raw': fields.KeywordField()}) author = fields.IntegerField(attr='author_id') created = fields.DateField() modified = fields.DateField() pub_date = fields.DateField() class Django: model = calculations_models.Calculation
class DataFileDocument(Document): def parallel_bulk(self, actions, **kwargs): Document.parallel_bulk(self, actions=actions, **elasticsearch_parallel_index_settings) class Index: name = 'datafile' settings = elasticsearch_index_settings filename = fields.TextField(fields={'raw': fields.KeywordField()}, analyzer=analyzer) created_time = fields.DateField() modification_time = fields.DateField() dataset = fields.NestedField(properties={ 'id': fields.IntegerField(), }) experiments = fields.ObjectField() def prepare_experiments(self, instance): experiments = [] exps = instance.dataset.experiments.all() for exp in exps: exp_dict = {} exp_dict['id'] = exp.id exp_dict['public_access'] = exp.public_access oacls = exp.objectacls.all().values('entityId', 'pluginId') exp_dict['objectacls'] = list(oacls) experiments.append(exp_dict) return experiments class Django: model = DataFile related_models = [Dataset, Experiment] queryset_pagination = 5000 # same as chunk_size def get_queryset(self): return super(DataFileDocument, self).get_queryset().select_related('dataset') def get_instances_from_related(self, related_instance): if isinstance(related_instance, Dataset): return related_instance.datafile_set.all() if isinstance(related_instance, Experiment): return DataFile.objects.filter( dataset__experiments=related_instance) return None
class ResourceDoc(DocType): id = fields.IntegerField() uuid = fields.TextField() title = fields.TextField(analyzer=polish_analyzer, fields={ 'raw': fields.KeywordField(), 'suggest': fields.CompletionField() }) description = fields.TextField(analyzer=polish_analyzer, fields={ 'raw': fields.KeywordField(), }) file_url = fields.TextField(attr='file_url') link = fields.TextField() format = fields.TextField() type = fields.TextField() openness_score = fields.IntegerField() dataset = fields.NestedField( properties={ 'id': fields.IntegerField(), 'title': fields.TextField(analyzer=polish_analyzer, fields={'raw': fields.KeywordField()}) }) # data = data_field views_count = fields.IntegerField() downloads_count = fields.IntegerField() status = fields.TextField() modified = fields.DateField() created = fields.DateField() class Meta: doc_type = 'resource' model = Resource related_models = [ Dataset, ] def get_instances_from_related(self, related_instance): if isinstance(related_instance, Dataset): return related_instance.resources.all() def get_queryset(self): return self._doc_type.model.objects.filter(status='published')
class CollectionDocument(Document): class Index: name = 'collections' settings = {'number_of_shards': 1, 'number_of_replicas': 0} last_update = fields.DateField(attr='updated_at') public_can_view = fields.TextField(attr='public_can_view') locale = fields.ListField(fields.KeywordField()) owner = fields.KeywordField(attr='parent_resource', normalizer='lowercase') owner_type = fields.KeywordField(attr='parent_resource_type') collection_type = fields.KeywordField(attr='collection_type', normalizer='lowercase') is_active = fields.KeywordField(attr='is_active') version = fields.KeywordField(attr='version') name = fields.KeywordField(attr='name', normalizer='lowercase') canonical_url = fields.KeywordField(attr='canonical_url', normalizer='lowercase') mnemonic = fields.KeywordField(attr='mnemonic', normalizer='lowercase') extras = fields.ObjectField() class Django: model = Collection fields = [ 'full_name', 'custom_validation_schema', ] @staticmethod def prepare_locale(instance): return get(instance.supported_locales, []) @staticmethod def prepare_extras(instance): return instance.extras or {}
class OrganizationDocument(Document): class Index: name = 'organizations' settings = {'number_of_shards': 1, 'number_of_replicas': 0} last_update = fields.DateField(attr='updated_at') public_can_view = fields.BooleanField(attr='public_can_view') name = fields.KeywordField(attr='name', normalizer="lowercase") mnemonic = fields.KeywordField(attr='mnemonic', normalizer="lowercase") extras = fields.ObjectField() user = fields.ListField(fields.KeywordField()) class Django: model = Organization fields = [ 'is_active', 'company', 'location', ] @staticmethod def prepare_extras(instance): return instance.extras or {} @staticmethod def prepare_user(instance): return list(instance.members.values_list('username', flat=True))
class UserDocument(Document): post = fields.ObjectField(properties = { 'id' : fields.IntegerField(), 'statement' : fields.TextField(), 'location' : fields.ObjectField(properties = { 'city' : fields.TextField(), 'country' : fields.TextField(), }), 'file' : fields.ObjectField(properties={ 'file' : fields.FileField() }), 'created_date' : fields.DateField(), }) class Django: model = User fields = [ 'id', 'username', 'birthday', 'avatar', 'info', 'web_page', 'active', 'timestamp', ] related_models = [Post]
class ReferenceDocument(DocType): pk = fields.IntegerField(attr='pk') sid = fields.IntegerField(attr='sid') pmid = string_field(attr='pmid') study = ObjectField(properties={ "pk": fields.IntegerField(), "name": string_field('name'), }) name = string_field("name") doi = string_field("doi") title = string_field("title") abstract = text_field("abstract") journal = text_field("journal") date = fields.DateField() pdf = fields.FileField(fielddata=True) authors = ObjectField(properties={ 'first_name': string_field("first_name"), 'last_name': string_field("last_name"), 'pk': fields.IntegerField(), }) class Meta(object): model = Reference # Ignore auto updating of Elasticsearch when a model is saved # or deleted: ignore_signals = True # Don't perform an index refresh after every update (overrides global setting): auto_refresh = False
class UniversityDocument(Document): """University Elasticsearch document.""" id = fields.IntegerField(attr='id') name = fields.TextField(fields={ 'raw': fields.KeywordField(normalizer=keyword_normalizer), }, ) domain = fields.TextField( fields={'raw': fields.KeywordField(normalizer=keyword_normalizer)}) web_page = fields.TextField( fields={'raw': fields.KeywordField(normalizer=keyword_normalizer)}) country = fields.ObjectField( properties={ 'name': fields.TextField(), 'code': fields.TextField( fields={ 'raw': fields.KeywordField(normalizer=keyword_normalizer) }), }) createdAt = fields.DateField() isDelete = fields.BooleanField() class Django(object): """Meta options.""" model = University # The model associate with this DocType
class StartupDocument(DocType): user = fields.ObjectField( properties={ 'is_active': fields.BooleanField(), 'is_founder': fields.BooleanField(), 'is_account_disabled': fields.BooleanField(), 'first_name': fields.StringField(), 'last_name': fields.StringField(), 'last_activity': fields.DateField(attr='last_activity.date') }) job_set = fields.NestedField( properties={ 'title': fields.StringField(), 'description': fields.StringField(), 'level': fields.StringField(attr='get_level_display'), 'pay': fields.StringField(attr='get_pay_display') }) logo = fields.StringField(attr='logo_url') # logo_thumbnail = fields.StringField(attr='logo_thumbnail_url') get_stage_display = fields.StringField(attr='get_stage_display') get_field_display = fields.StringField(attr='get_field_display') stage = fields.StringField(attr='stage', analyzer=leave_default) field = fields.StringField(attr='field', analyzer=leave_default) class Meta: model = Founder related_models = [MyUser, Job] fields = ['startup_name', 'description', 'is_filled', 'employee_count'] def get_instances_from_related(self, related_instance): if isinstance(related_instance, MyUser): if hasattr(related_instance, 'founder'): return related_instance.founder if isinstance(related_instance, Job): return related_instance.founder
class UserDocument(DocType): """For testing purposes.""" id = fields.IntegerField(attr='id') username = StringField(fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) first_name = StringField(fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) last_name = StringField(fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) email = StringField(fields={ 'raw': KeywordField(), }) is_staff = fields.BooleanField() is_active = fields.BooleanField() date_joined = fields.DateField() class Meta(object): """Meta options.""" model = User # The model associate with this DocType
class MemberDocument(Document): """Member Elasticsearch document.""" id = fields.IntegerField(attr='id') full_name = StringField(analyzer=html_strip, fields={ 'raw': KeywordField(), 'suggest': fields.CompletionField(), }) birthday = fields.DateField() roles = fields.NestedField( properties={ 'title': fields.TextField(analyzer=html_strip, attr='role', fields={ 'raw': KeywordField(), }), }) member_url = fields.TextField(attr='get_absolute_url') image = fields.FileField(attr="image") class Django(object): """Inner nested class Django.""" model = Members # The model associate with this Document
class ExperimentDocument(Document): """ Our Experiment ElasticSearch Document, which corresponds to our Experiment model. """ # Keyword Fields title = fields.TextField(analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}) publication_title = fields.TextField(analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}) description = fields.TextField(analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}) publication_authors = fields.TextField( analyzer=html_strip, fielddata=True, fields={"raw": fields.KeywordField()}) technology = fields.TextField(analyzer=html_strip_no_stop, fielddata=True, fields={"raw": fields.KeywordField()}) organism_names = fields.TextField(analyzer=html_strip_no_ngram, fielddata=True, fields={"raw": fields.KeywordField()}) platform_names = fields.TextField(analyzer=standard_keyword, fielddata=True, fields={"raw": fields.TextField()}) platform_accession_codes = fields.TextField( analyzer=standard_keyword, fielddata=True, fields={"raw": fields.TextField()}) # Basic Fields accession_code = fields.KeywordField() alternate_accession_code = fields.KeywordField() submitter_institution = fields.TextField() publication_doi = fields.TextField() has_publication = fields.BooleanField() sample_metadata_fields = fields.TextField() pubmed_id = fields.TextField() num_total_samples = fields.IntegerField() num_processed_samples = fields.IntegerField() num_downloadable_samples = fields.IntegerField() source_first_published = fields.DateField() # Index all downloadable samples as keywords so that we can calculate unique counts on the facets downloadable_samples = fields.ListField(fields.KeywordField()) class Django: model = Experiment parallel_indexing = True queryset_pagination = 3000 fields = [ "id", ] def get_queryset(self): """ Override default queryset """ return super(ExperimentDocument, self).get_queryset().order_by("id")
class LabEventDoc(Document): id = fields.IntegerField() title = TranslatedTextField('title') notes = TranslatedTextField('notes') event_type = fields.KeywordField() execution_date = fields.DateField() reports = fields.NestedField( properties={ 'type': fields.KeywordField(attr='report_type'), 'download_url': fields.TextField(), 'link': fields.StringField(), } ) class Index: name = settings.ELASTICSEARCH_INDEX_NAMES['lab_events'] settings = settings.ELASTICSEARCH_DSL_INDEX_SETTINGS class Django: model = LabEvent related_models = [LabReport, ] def get_instances_from_related(self, related_instance): if isinstance(related_instance, LabReport): return related_instance.lab_event def get_queryset(self): return super().get_queryset().filter(status='published')
class SourceDocument(Document): class Index: name = 'sources' settings = {'number_of_shards': 1, 'number_of_replicas': 0} locale = fields.ListField(fields.KeywordField()) last_update = fields.DateField(attr='updated_at') owner = fields.KeywordField(attr='parent_resource') owner_type = fields.KeywordField(attr='parent_resource_type') public_can_view = fields.TextField(attr='public_can_view') source_type = fields.KeywordField(attr='source_type') is_active = fields.KeywordField(attr='is_active') version = fields.KeywordField(attr='version') name = fields.KeywordField(attr='name') class Django: model = Source fields = [ 'full_name', 'custom_validation_schema', ] @staticmethod def prepare_locale(instance): return get(instance.supported_locales, [])