class Repository(DocType): owner = Object(User) created_at = Date() description = Text(analyzer='snowball') tags = Keyword() @classmethod def search(cls): return super(Repository, cls).search().filter('term', commit_repo='repo') class Meta: index = 'git' doc_type = 'doc'
class InnerStructureDocument(InnerDoc): id = Keyword() name = Keyword() structure_type = Object(StructureTypeDocument) @classmethod def from_obj(cls, obj): doc = InnerStructureDocument( id=str(obj.pk), name=obj.name, type=StructureTypeDocument.from_obj(obj.type), ) return doc
class Repository(Document): owner = Object(User) created_at = Date() description = Text(analyzer=snowball) tags = Keyword() @classmethod def search(cls): return super(Repository, cls).search().filter('term', commit_repo='repo') class Index: name = 'git'
class Declaration(DocType): """Declaration document. Assumes there's a dynamic mapping with all fields not indexed by default.""" general = Object( properties={ 'full_name_suggest': Completion(preserve_separators=False), 'full_name': String(index='analyzed'), 'family_raw': String(index='analyzed'), 'family': Nested( properties={ 'name': String(index='analyzed'), 'relations': String(index='no'), 'inn': String(index='no') } ), 'post_raw': String(index='analyzed'), 'post': Object( properties={ 'region': String(index='not_analyzed'), 'office': String(index='not_analyzed'), 'post': String(index='analyzed') } ) } ) declaration = Object( properties={ 'date': NoneAwareDate(), 'notfull': Boolean(index='no'), 'notfull_lostpages': String(index='no'), 'additional_info': Boolean(index='no'), 'additional_info_text': String(index='no'), 'needs_scancopy_check': Boolean(index='no') } ) class Meta: index = 'catalog'
class Course(DocType): title = Text() dept = Text() code = Keyword() books = Object(doc_class=TextBook, properties={ "author": Text(), "title": Text(), "price": Text() }) sections = Object(doc_class=Section, properties={ "sem": Keyword(), "prof": Text(), "loc": Text(), "time": Text(), "day": Text() }) class Meta: index = "course_test"
class Schema(DiscoveryUserDoc): """ Example: { "_id": "cvisb", "_meta": { "url": "https://.../cvisb_dataset.json", "username": ... , "timestamp": "2019-08-28T21:48:04.116339" }, "@context": { "schema": "http://schema.org/", "cvisb": "https://data.cvisb.org/schema/", ... }, "@graph": [ ... ] } """ _meta = Object(SchemaMeta, required=True) # _id : schema namespace, provided by the front-end when registering # accessible through constructor argument 'id' or schema.meta.id class Index: """ Associated ES Index """ name = 'discover_schema' settings = {"number_of_shards": 1, "number_of_replicas": 0} def save(self, *args, **kwargs): """ Record timestamp when the document is saved. """ if not self.meta.id: raise ValidationException("namespace/_id is a required field.") self._meta.timestamp = datetime.now() return super().save(*args, **kwargs) @classmethod def gather_field(cls, field, reduce_func=mergeDict): """ Aggregate the values of a field. Default reduce fucntion deals with dicts. For example, call it on '@context' field. """ sequence = (hit[field].to_dict() for hit in cls.search().source(field).scan()) return functools.reduce(reduce_func, sequence, {})
class AuthorInfo(DocType): # meta.id = hash(NormalizedName) NormalizedName = Text(required=True, analyzer="standard") DisplayName = Text(required=True, analyzer="standard") AuthorIds = Long(multi=True) Papers = Object(multi=True, properties={ "AuthorId": Long(required=True), "PaperIds": Long(multi=True) }) CreatedDate = Date(required=True) class Meta: index = "author_info"
class PassiveDNSUnique(GenericDoc): class Meta: doc_type = "passivetotal_passive_dns_unique" result_ips = Ip() frequency = Nested( properties={ "entry": Nested( properties={ "ip": Ip(), "count": Integer(index="not_analyzed") } ) } ) total = Integer() queryType = String() queryValue = String() pager = Object() def __setattr__(self, key, value): if key == "results": key = "result_ips" GenericDoc.__setattr__(self, key, value) def __init__(self, jdata={}): GenericDoc.__init__(self) for k, v in jdata.items(): if k == "frequency": continue if v is None: v = {} setattr(self, k, v) if jdata.get("frequency") is not None: for ip, count in jdata.get("frequency"): self.frequency.append( { "entry": { "ip": ip, "count": count } } )
def company_field(): """Company field with id, name, trading_names and trigrams.""" return Object( properties={ 'id': Keyword(), 'name': Text( fields={ 'trigram': TrigramText(), 'keyword': NormalizedKeyword(), }, ), 'trading_names': TextWithTrigram(), }, )
class DigitalFileDoc(DocType): uuid = Text() filepath = Text(fields={'raw': Keyword()}) fileformat = Text() size_bytes = Long() datemodified = Text() dip = Object(properties={ 'id': Integer(), 'identifier': Text(), }) class Meta: index = 'accesspoc_digital_files' dynamic = MetaField('strict')
class _AggregateReportDoc(Document): class Index: name = "dmarc_aggregate" xml_schema = Text() org_name = Text() org_email = Text() org_extra_contact_info = Text() report_id = Text() date_range = Date() date_begin = Date() date_end = Date() errors = Text() published_policy = Object(_PublishedPolicy) source_ip_address = Ip() source_country = Text() source_reverse_dns = Text() source_Base_domain = Text() message_count = Integer disposition = Text() dkim_aligned = Boolean() spf_aligned = Boolean() passed_dmarc = Boolean() policy_overrides = Nested(_PolicyOverride) header_from = Text() envelope_from = Text() envelope_to = Text() dkim_results = Nested(_DKIMResult) spf_results = Nested(_SPFResult) def add_policy_override(self, type_, comment): self.policy_overrides.append(_PolicyOverride(type=type_, comment=comment)) def add_dkim_result(self, domain, selector, result): self.dkim_results.append(_DKIMResult(domain=domain, selector=selector, result=result)) def add_spf_result(self, domain, scope, result): self.spf_results.append(_SPFResult(domain=domain, scope=scope, result=result)) def save(self, ** kwargs): self.passed_dmarc = False self.passed_dmarc = self.spf_aligned or self.dkim_aligned return super().save(** kwargs)
class Log(DocType): """ Log entry """ timestamp = Date() tool = Keyword() description = Keyword() stats = Object() def save(self, **kwargs): self.timestamp = datetime.now() return super(Log, self).save(**kwargs) class Meta: index = "{}-log".format(config.get('jackal', 'index'))
class _ForensicSampleDoc(InnerDoc): raw = Text() headers = Object() headers_only = Boolean() to = Nested(_EmailAddressDoc) subject = Text() filename_safe_subject = Text() _from = Object(_EmailAddressDoc) date = Date() reply_to = Nested(_EmailAddressDoc) cc = Nested(_EmailAddressDoc) bcc = Nested(_EmailAddressDoc) body = Text() attachments = Nested(_EmailAttachmentDoc) def add_to(self, display_name, address): self.to.append( _EmailAddressDoc(display_name=display_name, address=address)) def add_reply_to(self, display_name, address): self.reply_to.append( _EmailAddressDoc(display_name=display_name, address=address)) def add_cc(self, display_name, address): self.cc.append( _EmailAddressDoc(display_name=display_name, address=address)) def add_bcc(self, display_name, address): self.bcc.append( _EmailAddressDoc(display_name=display_name, address=address)) def add_attachment(self, filename, content_type, sha256): self.attachments.append( _EmailAttachmentDoc(filename=filename, content_type=content_type, sha256=sha256))
class ZiroomDoc(Document): # 价格 price = Integer() # 城市 city = Text() # 位置 address = Text() # 详情 detail = Text() # 经纬度 location = GeoPoint() # 标题 title = Text() # 户型 house_type = Text() # 面积 area = Float() # 朝向 orientation = Text() # 楼层 floor = Integer() # 最高楼层 top_floor = Integer() # 标签 tags = Text(multi=True) # 图片 image_urls = Text(multi=True) # 房屋配置 deploy = Object() # url url = Text() # 交通 traffic = Text(multi=True) # 室友 chums = Object(multi=True) # 付款方式 payment = Object(multi=True) # 推荐 recommend = Object(multi=True) # 活动 activity = Object(multi=True) # 空气检测 air_part = Object() # 视频 vr_video = Object() class Index: name = 'hrent' class Meta: doc_type = 'ziroom'
class ComponentStructureUnitDocument(InnerDoc): id = Keyword() name = Keyword() reference_code = Keyword() structure = Object(InnerStructureDocument) @classmethod def from_obj(cls, obj): doc = ComponentStructureUnitDocument( id=str(obj.pk), name=obj.name, reference_code=obj.reference_code, structure=InnerStructureDocument.from_obj(obj.structure), ) return doc
class MilitaryType(Document): # information information = Object() # name name = Keyword() # url url = Keyword() # date date = Date() # type type = Keyword() class Index: # 数据库名称和表名称 name = "record"
class BuildDoc(Document): id = Keyword(required=True) # Note! The reason for using Object() instead of Nested() is because # SearchKit doesn't work if it's nested. This works though. build = Object(_Build) source = Object(_Source) target = Object(_Target) download = Object(_Download) class Index: name = settings.ES_BUILD_INDEX settings = settings.ES_BUILD_INDEX_SETTINGS @classmethod def create(cls, id, **doc): assert id and isinstance(id, int) and id > 0 return BuildDoc( meta={"id": id}, id=id, build=_Build(**doc["build"]), source=_Source(**doc["source"]), target=_Target(**doc["target"]), download=_Download(**doc["download"]), )
class Spam(DocType): X_Envelope_From = Object( properties = { 'email': String(index='not_analyzed'), 'header': String(index='not_analyzed'), 'localpart': String(index='not_analyzed'), 'domain': String(index='not_analyzed'), 'location': GeoPoint(), 'domain_type': String(index='not_analyzed'), 'country_code' : String(index='not_analyzed') } ) X_Envelope_To = String(index='not_analyzed') X_Spam_Flag = Boolean() X_Spam_Score = Float() To = String(multi=True, index='not_analyzed') Date = Date() From = String(index='not_analyzed') Reply_To = String(index='not_analyzed') X_Priority = Integer() #X_Mailer = String() MIME_Version = String(index='not_analyzed') Subject = String() Content_Transfer_Encoding = String(index='not_analyzed') Content_Type = String(index='not_analyzed') Charset = String(index='not_analyzed') Received = String(index='not_analyzed') Hops = Integer() Received_SPF = String(index = 'not_analyzed') DKIM_Signature = String(index = 'not_analyzed') ##### HEADERS RAJOUTES SUITE A TRAITEMENT #### spfResult = String(index = 'not_analyzed') spfTrue = String(index = 'not_analyzed') DKIM_Result = String(index = 'not_analyzed') DKIM_KeyLength = Integer() ############################################# #Message = String() phoneNumbers = String(multi=True, index='not_analyzed') URLs = String(multi=True, index='not_analyzed') attachmentsTypes = String(multi=True, index='not_analyzed') attachmentsSizes = Integer(multi=True) class Meta: index = 'default_index' doc_type = 'spam' def save(self, ** kwargs): return super().save(** kwargs)
class BlogPostIndex(DocType): author = Text() posted_date = Date() title = Text() text = Text() blog = Text() metadata = Object() @classmethod def trigger_delete(cls, instance): """ Delete the index from Elastic Search :param instance: Object to be deleted """ es_client.delete(instance.blog.index_name(), 'blog_post_index', instance.id)
class Log(LogShortIdMixin, Document): extra_data = Object() slug = Keyword() related_objects = Keyword() parent_log = Keyword() start = Date() stop = Date() time = Float() release = Keyword() def __str__(self): return self.id @classmethod def _get_using(cls, using=None): set_connection() return super()._get_using(using) @property def id(self): return self.meta.id @property def pk(self): return self.id def update(self, using=None, index=None, detect_noop=True, doc_as_upsert=False, refresh=False, retry_on_conflict=None, script=None, script_id=None, scripted_upsert=False, upsert=None, return_doc_meta=False, update_only_changed_fields=False, **fields): if update_only_changed_fields: fields = {k: v for k, v in fields.items() if getattr(self, k) != v} if fields: super().update(using, index, detect_noop, doc_as_upsert, refresh, retry_on_conflict, script, script_id, scripted_upsert, upsert, return_doc_meta, **fields)
class File(Component): filename = Keyword() extension = Keyword() href = Keyword() # @href size = Long() modified = Date() attachment = Object( properties={ 'date': Date() }) class Index: name = 'document' analyzers = [autocomplete_analyzer] class Meta: date_detection = MetaField('false')
class Document(DocType): """ Class for define mapping in ES This class partially aggregates the above classes, using them as attributes. """ keywords = Text(fields={'raw': Keyword()}) entity = Nested(Entity) cluster = Object(Coreferenc) type = Integer() class Meta: index = 'hard_mapping' def save(self, **kwargs): return super(Document, self).save(**kwargs)
class WebPage(Document): url = Keyword() title = Text(analyzer=text_analyzer) description = Text(fields={'raw': Keyword()}, analyzer=text_analyzer) body = Text(fields={'raw': Keyword()}, analyzer=text_analyzer) web = Object() weight = Keyword() def save(self, **kwargs): # self.set_name() try: lang = url.detect_language(self.body) return super(WebPage, self).save('default', 'page-%s' % lang, True, **kwargs) except Exception as e: return super(WebPage, self).save('default', 'page-en', True, **kwargs)
class GeneralDoc(Document): # define the fields here start = Double() duration = Double() content = Text() prev_id = Keyword() next_id = Keyword() context = Text() caption = Object(CaptionInnerDoc) class Index: name = "general_idx" # using the default settings for now. def save(self, **kwargs): # do something before save here, if you wish return super().save(**kwargs)
class Reference(Document): title = Text() authors = Object(Author) abstract = Text() in_citations = Keyword() out_citations = Keyword() urls = Keyword() is_ml = Boolean() class Meta: doc_type = '_doc' class Index: doc_type = '_doc' name = 'references' def __repr__(self): return f"{self.title} / {self.authors}"
def contact_or_adviser_field(include_dit_team=False): """Object field for advisers and contacts.""" props = { 'id': Keyword(), 'first_name': NormalizedKeyword(), 'last_name': NormalizedKeyword(), 'name': Text( fields={ 'keyword': NormalizedKeyword(), 'trigram': TrigramText(), }, ), } if include_dit_team: props['dit_team'] = id_name_field() return Object(properties=props)
class Device(MyDocType): device_type = Keyword() pos_x = Integer() pos_y = Integer() radius = Integer() key = Keyword() mqtt_account = Object(doc_class=MQTTAccount, properties={ 'username': Keyword(), 'password': Keyword(), 'server': Keyword(), 'port': Integer(), 'keep_alive': Keyword(), 'clients_topic': Keyword(), 'response_topic': Keyword() }) class Meta: index = 'bluetooth'
class File(DocType): bucket = Text() path = Text() timestamp = Date() user = Text() mimetype = Text() metadata = Object(include_in_all=True) # untyped dictionary class Meta: index = 'files' doc_type = 'file' @staticmethod def get_index(bucket, path): return bucket + ('_'.join(path.split('/'))) def save(self, **kwargs): self.meta.id = File.get_index(self.bucket, self.path) return super(File, self).save(**kwargs)
class BrowseCache(DocType): Type = Keyword(required=True) DisplayName = Text(required=True) EntityIds = Object(required=True, properties={ "AuthorIds": Long(multi=True), "ConferenceIds": Long(multi=True), "JournalIds": Long(multi=True), "AffiliationIds": Long(multi=True), "PaperIds": Long(multi=True) }) Citation = Text(analyzer="standard") Year = Integer() Field = Text(analyzer="standard") Affiliations = Text(analyzer="standard", multi=True) Url = Keyword() PhotoUrl = Keyword() class Meta: index = "browse_cache"
class _ForensicReportDoc(Document): class Index: name = "dmarc_forensic" feedback_type = Text() user_agent = Text() version = Text() original_mail_from = Text() arrival_date = Date() domain = Text() original_envelope_id = Text() authentication_results = Text() delivery_results = Text() source_ip_address = Ip() source_country = Text() source_reverse_dns = Text() source_authentication_mechanisms = Text() source_auth_failures = Text() dkim_domain = Text() original_rcpt_to = Text() sample = Object(_ForensicSampleDoc)
def base_serialized_field(): mapping = Object() mapping.field("url", String(index="not_analyzed")) mapping.field("last_updated", Date()) mapping.field("created", Date()) mapping.field("updaters", String(index="not_analyzed")) # URL references mapping.field("project", String(index="not_analyzed")) mapping.field("related_topics", String(index="not_analyzed", multi=True)) mapping.field("references", String(index="not_analyzed", multi=True)) mapping.field("referenced_by", String(index="not_analyzed", multi=True)) return mapping