class DocTestSSLResult(Document): source = Text(fields={'raw': Keyword()}) result = Boolean() timestamp = Date() ip = Keyword() hostname = Keyword() port = Integer() svcid = Keyword() protocols = Keyword(multi=True) ciphers = Text(multi=True, fields={'raw': Keyword()}) ciphertests = Keyword(multi=True) serverpref = Object( properties={ "cipher_order": Boolean(), "protocol": Keyword(), "cipher": Text(fields={'raw': Keyword()}) }) cert = Object( properties={ "keysize": Short(), "signalgo": Text(fields={'raw': Keyword()}), "md5_fingerprint": Keyword(), "sha1_fingerprint": Keyword(), "sha256_fingerprint": Keyword(), "cn": Text(fields={'raw': Keyword()}), "san": Text(multi=True, fields={'raw': Keyword()}), "issuer": Text(fields={'raw': Keyword()}), "ev": Boolean(), "expiration": Date(), "ocsp_uri": Text(fields={'raw': Keyword()}), "Crl_url": Text(fields={'raw': Keyword()}), "ocsp_stapling": Boolean(), }) vulnerabilities = Keyword(multi=True) def parseCSVLine(self, line): if line['id'] == "id": return if not self.ip or not self.hostname or not self.port: # host, ip and port m = reIpHostColumn.search(line['fqdn/ip']) if m: self.hostname, self.ip = m.groups() self.port = int(line['port']) if reProtocol.search(line['id']) and reOffers.search( line['finding']): # protocols self.result = True m = reProtocol.search(line['id']) if m: self.protocols.append(line['id'].upper()) elif reCipherColumnName.search(line['id']): # ciphers IT WORKS m = reCipherDetails.search(line['finding']) if m: self.ciphers.append(m.group(1)) elif reCipherTests.search(line['id']) and reVulnerable.search( line['finding']): # cipher tests m = reCipherTests.search(line['id']) print(m) if m: self.ciphertests.append(m.group(1)) if line['id'] == "cipher_order": # server prefers cipher IT WORKS self.serverpref.cipher_order = bool(reOk.search(line['severity'])) elif line[ 'id'] == "protocol_negotiated": # preferred protocol IT WORKS m = reDefaultProtocol.search(line['finding']) if m: self.serverpref.protocol = m.group(1) elif line['id'] == "cipher_negotiated": # preferred cipher IT WORKS m = reDefaultCipher.search(line['finding']) if m: self.serverpref.cipher = m.group(1) elif line['id'] == "cert_keySize": # certificate key size IT WORKS m = reKeySize.search(line['finding']) if m: self.cert.keysize = int(m.group(1)) elif line[ 'id'] == "cert_signatureAlgorithm": # certificate sign algorithm IT WORKS m = reSignAlgorithm.search(line['finding']) if m: self.cert.signalgo = m.group(1) elif line[ 'id'] == "cert_fingerprintSHA1": # certificate fingerprints SHA1 IT WORKS m = reFPSHA1.search(line['finding']) if m: self.cert.sha1_fingerprint = m.group(1) elif line[ 'id'] == "cert_fingerprintSHA256": # certificate fingerprints SHA256 IT WORKS m = reFPSHA256.search(line['finding']) if m: self.cert.sha256_fingerprint = m.group(1) elif line[ 'id'] == "cert_fingerprintMD5": # certificate fingerprints MD5 IT WORKS m = reFPMD5.search(line['finding']) if m: self.cert.md5_fingerprint = m.group(1) elif line['id'] == "cert_commonName": # certificate CN IT WORKS m = reCN.search(line['finding']) if m: self.cert.cn = m.group(1) elif line[ 'id'] == "cert_subjectAltName": # certificate SAN KINDA WORKS NEEDS REVISION m = reSAN.search(line['finding']) #print(m) if m: self.cert.san = m.group(1) #sans = m.group(1) #for san in sans.split(" "): # if san != "--": # self.cert.san.append(san)""" elif line['id'] == "cert_caIssuers": # certificate issuer IT WORKS m = reIssuer.search(line['finding']) if m: self.cert.issuer = m.group(1) elif line['id'] == "ev": # certificate extended validation NOT SUERE self.cert.ev = bool(reYes.search(line['finding'])) elif line['id'] == "cert_notAfter": # certificate expiration IT WORKS m = reExpiration.search(line['finding']) if m: unparsedDate = m.group(1) self.cert.expiration = datetime.strptime( unparsedDate, "%Y-%m-%d %H:%M") elif line[ 'id'] == "cert_ocspURL": # certificate OCSP URI IT WORKS ELSE NEEDS REWORK m = reOCSPURI.search(line['finding']) #print(m) if m: self.cert.ocsp_uri = m.group(1) else: self.cert.ocsp_uri = "-" elif line[ 'id'] == "cert_crlDistributionPoints": # certificate CRL WORKS m = reAll.search(line['finding']) #print(m) if m: self.cert.Crl_url = m.group(1) else: self.cert.Crl_url = "-" elif line['id'] == "OCSP_stapling": # certificate OCSP stapling self.cert.ocsp_stapling = not bool( reNotOffered.search(line['finding'])) elif line['id'] in ("heartbleed", "CCS", "secure_renego", "secure_client_renego", "CRIME_TLS", "SWEET32", "POODLE_SSL", "fallback_SCSV", "FREAK", "DROWN", "LOGJAM", "BEAST", "LUCKY13", "RC4") and reVulnerable.search(line['severity']): m = reVulnerable.search(line['severity']) if str(m.group(1)) != '': self.vulnerabilities.append(line['id'].upper()) def parseCSV(self, csvfile): if self.source: m = reDefaultFilename.search(self.source) if m: self.ip = m.group('ip') self.port = int(m.group('port') or 0) self.timestamp = datetime.strptime(m.group('datetime'), "%Y%m%d-%H%M") csvReader = csv.DictReader(csvfile, fieldnames=("id", "fqdn/ip", "port", "severity", "finding", "cve", "cwe"), delimiter=',', quotechar='"') for line in csvReader: self.parseCSVLine(line) def save(self, **kwargs): if not self.timestamp: self.timestamp = datetime.now(tz) if not self.port: raise ValueError("Empty scan result") self.svcid = "%s:%d" % (self.ip, int(self.port) or 0) if not self.result: self.result = False if 'debug' in kwargs and kwargs['debug']: pp.pprint(self.to_dict()) return super().save()
class Live(DocType): id = Long() speaker_id = Integer() speaker_name = Text(analyzer='ik_max_word') feedback_score = Float() # 评分 topic_names = Text(analyzer='ik_max_word') # 话题标签名字 seats_taken = Integer() # 参与人数 subject = Text(analyzer='ik_max_word') # 标题 amount = Float() # 价格(RMB) description = Text(analyzer='ik_max_word') status = Boolean() # public(True)/ended(False) starts_at = Date() outline = Text(analyzer='ik_max_word') # Live内容 speaker_message_count = Integer() tag_names = Text(analyzer='ik_max_word') liked_num = Integer() topics = Keyword() live_suggest = Completion(analyzer=ik_analyzer) cover = Text(index='not_analyzed') zhuanlan_url = Text(index='not_analyzed') @property def id(self): return self._id @property def speaker(self): return session.query(User).get(self.speaker_id) @property def url(self): return LIVE_URL.format(self.id) class Meta: index = 'live130' def to_dict(self, include_extended=True): d = super().to_dict() if include_extended: d.update({ 'id': self._id, 'type': 'live', 'speaker': self.speaker.to_dict(), 'url': self.url }) return d @classmethod async def add(cls, **kwargs): id = kwargs.pop('id', None) if id is None: return False live = cls(meta={'id': int(id)}, **kwargs) await live.save() return live @classmethod async def _execute(cls, s, order_by=None): if order_by is not None: s = s.sort(order_by) lives = await s.execute() return [live.to_dict() for live in lives] @classmethod def apply_weight(cls, s, start, limit): return s.query(Q('function_score', functions=[gauss_sf, log_sf])).extra(**{ 'from': start, 'size': limit }) @classmethod async def ik_search(cls, query, status=None, start=0, limit=10): s = cls.search() s = s.query('multi_match', query=query, fields=SEARCH_FIELDS) if status is not None: s = s.query('match', status=status) s = cls.apply_weight(s, start, limit) return await cls._execute(s) @classmethod async def explore(cls, from_date=None, to_date=None, order_by=None, start=0, limit=10, topic=None): s = cls.search() if topic is not None: s = s.query(Q('term', topic_names=topic)) starts_at = {} if from_date is not None: starts_at['from'] = from_date if to_date is not None: starts_at['to'] = to_date if starts_at: s = s.query(Q('range', starts_at=starts_at)) if order_by is None: s = cls.apply_weight(s, start, limit) return await cls._execute(s, order_by) @classmethod async def get_hot_weekly(cls): today = date.today() return await cls.explore(from_date=today - timedelta(days=7), to_date=today, limit=20) @classmethod async def get_hot_monthly(cls): today = date.today() return await cls.explore(from_date=today - timedelta(days=30), to_date=today, limit=50) @classmethod async def ik_search_by_speaker_id(cls, speaker_id, order_by='-starts_at'): s = cls.search() s = s.query(Q('bool', should=Q('match', speaker_id=speaker_id))) return await cls._execute(s, order_by) @classmethod async def get_hot_topics(cls, size=50): s = cls.search() s.aggs.bucket('topics', A('terms', field='topics', size=size)) rs = await s.execute() buckets = rs.aggregations.topics.buckets topic_names = [r['key'] for r in buckets] topics = session.query(Topic).filter(Topic.name.in_(topic_names)).all() topics = sorted(topics, key=lambda t: topic_names.index(t.name)) return [topic.to_dict() for topic in topics] @classmethod async def ik_suggest(cls, query, size=10): s = cls.search() s = s.suggest('live_suggestion', query, completion={ 'field': 'live_suggest', 'fuzzy': { 'fuzziness': 2 }, 'size': size }) suggestions = await s.execute_suggest() matches = suggestions.live_suggestion[0].options ids = [match._id for match in matches] lives = await Live.mget(ids) return [live.to_dict() for live in lives]
class Repos(DocType): is_public = Boolean() created_at = Date()
class UserSearch(ModelSearchAdapter): model = User fuzzy = True class Meta: doc_type = 'User' first_name = String() last_name = String() about = String(analyzer=i18n_analyzer) organizations = String(index='not_analyzed') visible = Boolean() metrics = User.__search_metrics__ created = Date(format='date_hour_minute_second') user_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=True) sorts = { 'last_name': 'last_name', 'first_name': 'first_name', 'datasets': 'metrics.datasets', 'reuses': 'metrics.reuses', 'followers': 'metrics.followers', 'views': 'metrics.views', 'created': 'created', } facets = { 'organization': ModelTermsFacet(field='organizations', model=Organization), 'datasets': RangeFacet(field='metrics.datasets', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No datasets'), 'few': _('Few datasets'), 'many': _('Many datasets'), }), 'followers': RangeFacet(field='metrics.followers', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No followers'), 'few': _('Few followers'), 'many': _('Many followers'), }), } boosters = [ GaussDecay('metrics.reuses', 50, decay=0.8), GaussDecay('metrics.datasets', 50, decay=0.8), GaussDecay('metrics.followers', 200, 200, decay=0.8), ] @classmethod def serialize(cls, user): return { 'first_name': user.first_name, 'last_name': user.last_name, 'about': user.about, 'organizations': [str(o.id) for o in user.organizations], 'metrics': user.metrics, 'created': to_iso_datetime(user.created_at), 'user_suggest': { 'input': cls.completer_tokenize(user.fullname) + [user.id], 'output': str(user.id), 'payload': { 'avatar_url': user.avatar(40, external=True), 'first_name': user.first_name, 'last_name': user.last_name, 'slug': user.slug, }, }, 'visible': user.visible, }
class WhoisDomainRecordDoc(GenericDoc): class Meta: doc_type = "opendns_whois_record" addresses = String() administrativeContactCity = String() administrativeContactCountry = String() administrativeContactEmail = String(analyzer=email_analyzer) administrativeContactFax = String() administrativeContactFaxExt = String() administrativeContactName = String() administrativeContactOrganization = String() administrativeContactPostalCode = String() administrativeContactState = String() administrativeContactStreet = String() administrativeContactTelephone = String() administrativeContactTelephoneExt = String() auditUpdatedDate = Date() billingContactCity = String() billingContactCountry = String() billingContactEmail = String(analyzer=email_analyzer) billingContactFax = String() billingContactFaxExt = String() billingContactName = String() billingContactOrganization = String() billingContactPostalCode = String() billingContactState = String() billingContactStreet = String() billingContactTelephone = String() billingContactTelephoneExt = String() created = Date() domainName = String() emails = String(analyzer=email_analyzer) expires = Date() hasRawText = Boolean() nameServers = String() recordExpired = Boolean() registrantCity = String() registrantCountry = String() registrantEmail = String(analyzer=email_analyzer) registrantFax = String() registrantFaxExt = String() registrantName = String() registrantOrganization = String() registrantPostalCode = String() registrantState = String() registrantStreet = String() registrantTelephone = String() registrantTelephoneExt = String() registrarIANAID = Integer() registrarName = String() record_status = String() technicalContactCity = String() technicalContactCountry = String() technicalContactEmail = String(analyzer=email_analyzer) technicalContactFax = String() technicalContactFaxExt = String() technicalContactName = String() technicalContactOrganization = String() technicalContactPostalCode = String() technicalContactState = String() technicalContactStreet = String() technicalContactTelephone = String() technicalContactTelephoneExt = String() timeOfLatestRealtimeCheck = BetterDate(format="epoch_millis") timestamp = Date() updated = Date() whoisServers = String() zoneContactCity = String() zoneContactCountry = String() zoneContactEmail = String(analyzer=email_analyzer) zoneContactFax = String() zoneContactFaxExt = String() zoneContactName = String() zoneContactOrganization = String() zoneContactPostalCode = String() zoneContactState = String() zoneContactStreet = String() zoneContactTelephone = String() zoneContactTelephoneExt = String() def __init__(self, jdata={}): GenericDoc.__init__(self) for k, v in jdata.items(): if v is None: v = {} setattr(self, k, v)
class IndexedPublicationLegacy(Document): startDate = Date() endDate = Date() description = Text(analyzer='english') facility = Nested( properties={ 'country': Text(analyzer='english'), 'name': Text(analyzer='english'), 'state': Text(analyzer='english') }) deleted = Boolean() path = Text(fields={ '_exact': Keyword(), '_path': Text(analyzer=path_analyzer) }) title = Text(analyzer='english', fields={'_exact': Keyword()}) name = Text(fields={'_exact': Keyword()}) equipment = Nested( properties={ 'component': Text(analyzer='english'), 'equipment': Text(analyzer='english'), 'equipmentClass': Text(analyzer='english'), 'facility': Text(analyzer='english') }) system = Text(fields={'_exact': Keyword()}) organization = Nested( properties={ 'country': Text(analyzer='english'), 'name': Text(analyzer='english'), 'state': Text(analyzer='english') }) pis = Nested( properties={ 'lastName': Text(analyzer='english'), 'firstName': Text(analyzer='english') }) project = Text(fields={'_exact': Keyword()}) sponsor = Nested(properties={ 'name': Text(analyzer='english'), 'url': Text() }) fundorg = Text(analyzer='english', fields={'_exact': Keyword()}) fundorgprojid = Text(fields={'_exact': Keyword()}) publications = Nested( properties={ 'authors': Text(analyzer='english', multi=True), 'title': Text(analyzer='english') }) experiments = Nested( properties={ 'startDate': Date(), 'endDate': Date(), 'doi': Keyword(), 'description': Text(analyzer='english'), 'facility': Nested( properties={ 'country': Text(analyzer='english'), 'state': Text(analyzer='english'), 'name': Text(analyzer='english'), }), 'deleted': Boolean(), 'path': Text(fields={ '_exact': Keyword(), '_path': Text(analyzer=path_analyzer) }), 'material': Nested( properties={ 'materials': Text(analyzer='english', multi=True), 'component': Text(analyzer='english') }), 'equipment': Nested( properties={ 'component': Text(analyzer='english'), 'equipment': Text(analyzer='english'), 'equipmentClass': Text(analyzer='english'), 'facility': Text(analyzer='english') }), 'title': Text(analyzer='english'), 'sensors': Text(analyzer='english', multi=True), 'type': Text(analyzer='english'), 'specimenType': Nested( properties={ 'name': Text(analyzer='english'), 'description': Text(analyzer='english') }), 'name': Text(analyzer='english'), 'creators': Nested( properties={ 'lastName': Text(analyzer='english'), 'firstName': Text(analyzer='english') }) }) @classmethod def from_id(cls, project_id): if project_id is None: raise DocumentNotFound() id_filter = Q('term', **{'name._exact': project_id}) search = cls.search().filter(id_filter) try: res = search.execute() except Exception as e: raise e if res.hits.total.value > 1: id_filter = Q('term', **{'_id': res[0].meta.id}) # Delete all files indexed with the same system/path, except the first result delete_query = id_filter & ~id_filter cls.search().filter(delete_query).delete() return cls.get(res[0].meta.id) elif res.hits.total.value == 1: return cls.get(res[0].meta.id) else: raise DocumentNotFound("No document found for " "{}".format(project_id)) class Index: name = settings.ES_INDICES['publications_legacy']['alias'] class Meta: dynamic = MetaField('strict')
class ReuseSearch(ModelSearchAdapter): model = Reuse fuzzy = True class Meta: doc_type = 'Reuse' title = String(analyzer=i18n_analyzer, fields={'raw': String(index='not_analyzed')}) description = String(analyzer=i18n_analyzer) url = String(index='not_analyzed') organization = String(index='not_analyzed') owner = String(index='not_analyzed') type = String(index='not_analyzed') tags = String(index='not_analyzed', fields={'i18n': String(index='not_analyzed')}) badges = String(index='not_analyzed') tag_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=False) datasets = Object(properties={ 'id': String(index='not_analyzed'), 'title': String(), }) created = Date(format='date_hour_minute_second') last_modified = Date(format='date_hour_minute_second') metrics = metrics_mapping_for(Reuse) featured = Boolean() reuse_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=True) extras = Object() facets = { 'tag': TermsFacet(field='tags'), 'organization': ModelTermsFacet(field='organization', model=Organization), 'owner': ModelTermsFacet(field='owner', model=User), 'dataset': ModelTermsFacet(field='dataset.id', model=Dataset), 'type': TermsFacet(field='type', labelizer=reuse_type_labelizer), 'datasets': RangeFacet(field='metrics.datasets', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No datasets'), 'few': _('Few datasets'), 'many': _('Many datasets'), }), 'followers': RangeFacet(field='metrics.followers', ranges=[('none', (None, 1)), ('few', (1, 5)), ('many', (5, None))], labels={ 'none': _('No followers'), 'few': _('Few followers'), 'many': _('Many followers'), }), 'badge': TermsFacet(field='badges', labelizer=reuse_badge_labelizer), 'featured': BoolFacet(field='featured'), } sorts = { 'title': 'title.raw', 'created': 'created', 'last_modified': 'last_modified', 'datasets': 'metrics.datasets', 'followers': 'metrics.followers', 'views': 'metrics.views', } boosters = [ BoolBooster('featured', lazy('featured_boost')), GaussDecay('metrics.datasets', max_datasets, decay=lazy('datasets_decay')), GaussDecay('metrics.followers', max_followers, decay=lazy('followers_decay')), ] @classmethod def is_indexable(cls, reuse): return (reuse.deleted is None and len(reuse.datasets) > 0 and not reuse.private) @classmethod def serialize(cls, reuse): """By default use the ``to_dict`` method and exclude ``_id``, ``_cls`` and ``owner`` fields. """ datasets = Dataset.objects(id__in=[r.id for r in reuse.datasets]) datasets = list(datasets.only('id', 'title').no_dereference()) organization = None owner = None if reuse.organization: organization = Organization.objects( id=reuse.organization.id).first() elif reuse.owner: owner = User.objects(id=reuse.owner.id).first() return { 'title': reuse.title, 'description': reuse.description, 'url': reuse.url, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'type': reuse.type, 'tags': reuse.tags, 'tag_suggest': reuse.tags, 'badges': [badge.kind for badge in reuse.badges], 'created': to_iso_datetime(reuse.created_at), 'last_modified': to_iso_datetime(reuse.last_modified), 'dataset': [{ 'id': str(d.id), 'title': d.title } for d in datasets], 'metrics': reuse.metrics, 'featured': reuse.featured, 'extras': reuse.extras, 'reuse_suggest': { 'input': cls.completer_tokenize(reuse.title) + [reuse.id], 'output': str(reuse.id), 'payload': { 'title': reuse.title, 'slug': reuse.slug, 'image_url': reuse.image(40, external=True), }, }, }
class IndexedFile(Document): """ Elasticsearch document representing an indexed file. Thin wrapper around `elasticsearch_dsl.Document`. """ name = Text(analyzer=file_analyzer, fields={ '_exact': Keyword(), '_pattern': Text(analyzer=file_pattern_analyzer), '_reverse': Text(analyzer=reverse_file_analyzer) }) path = Text(fields={ '_comps': Text(analyzer=path_analyzer), '_exact': Keyword(), '_reverse': Text(analyzer=reverse_file_analyzer) }, ) lastModified = Date() length = Long() format = Text() mimeType = Keyword() type = Text() system = Text(fields={'_exact': Keyword()}) basePath = Text(fields={ '_comps': Text(analyzer=path_analyzer), '_exact': Keyword() }) lastUpdated = Date() pems = Object( properties={ 'username': Keyword(), 'recursive': Boolean(), 'permission': Object(properties={ 'read': Boolean(), 'write': Boolean(), 'execute': Boolean() }) }) def save(self, *args, **kwargs): """ Sets `lastUpdated` attribute on save. Otherwise see elasticsearch_dsl.Document.save() """ self.lastUpdated = datetime.datetime.now() return super(IndexedFile, self).save(*args, **kwargs) def update(self, *args, **kwargs): """ Sets `lastUpdated` attribute on save. Otherwise see elasticsearch_dsl.Document.update() """ lastUpdated = datetime.datetime.now() return super(IndexedFile, self).update(lastUpdated=lastUpdated, *args, **kwargs) @classmethod def from_path(cls, system, path): """ Fetches an IndexedFile with the specified system and path. Parameters ---------- system: str System attribute of the indexed file. path: str Path attribute of the indexed file. Returns ------- IndexedFile Raises ------ elasticsearch.exceptions.NotFoundError """ uuid = file_uuid_sha256(system, path) return cls.get(uuid) def children(self): """ Yields all children of the indexed file. Non-recursive. Yields ------ IndexedFile """ search = self.search() search = search.filter('term', **{'basePath._exact': self.path}) search = search.filter('term', **{'system._exact': self.system}) for hit in search.scan(): yield self.get(hit.meta.id) def delete_recursive(self): """ Recursively delete an indexed file and all of its children. Returns ------- Void """ for child in self.children(): child.delete_recursive() self.delete() class Index: name = settings.ES_INDEX_PREFIX.format('files')
class Repos(DocType): is_public = Boolean()
class Order(BaseESModel): """Elasticsearch representation of Order model.""" id = Keyword() reference = fields.NormalizedKeyword(fields={ 'trigram': fields.TrigramText(), }, ) status = fields.NormalizedKeyword() company = fields.company_field() contact = fields.contact_or_adviser_field() created_by = fields.contact_or_adviser_field(include_dit_team=True) created_on = Date() modified_on = Date() primary_market = fields.id_name_field() sector = fields.sector_field() uk_region = fields.id_name_field() description = fields.EnglishText() contacts_not_to_approach = Text() further_info = Text() existing_agents = Text(index=False) delivery_date = Date() service_types = fields.id_name_field() contact_email = fields.NormalizedKeyword() contact_phone = Keyword() subscribers = fields.contact_or_adviser_field(include_dit_team=True) assignees = fields.contact_or_adviser_field(include_dit_team=True) po_number = Keyword(index=False) discount_value = Integer(index=False) vat_status = Keyword(index=False) vat_number = Keyword(index=False) vat_verified = Boolean(index=False) net_cost = Integer(index=False) subtotal_cost = Integer(fields={ 'keyword': Keyword(), }, ) vat_cost = Integer(index=False) total_cost = Integer(fields={ 'keyword': Keyword(), }, ) payment_due_date = Date() paid_on = Date() completed_by = fields.contact_or_adviser_field() completed_on = Date() cancelled_by = fields.contact_or_adviser_field() cancelled_on = Date() cancellation_reason = fields.id_name_field() billing_company_name = Text() billing_contact_name = Text() billing_email = fields.NormalizedKeyword() billing_phone = fields.NormalizedKeyword() billing_address_1 = Text() billing_address_2 = Text() billing_address_town = fields.NormalizedKeyword() billing_address_county = fields.NormalizedKeyword() billing_address_postcode = Text() billing_address_country = fields.id_name_field() MAPPINGS = { 'company': dict_utils.company_dict, 'contact': dict_utils.contact_or_adviser_dict, 'created_by': dict_utils.adviser_dict_with_team, 'primary_market': dict_utils.id_name_dict, 'sector': dict_utils.sector_dict, 'uk_region': dict_utils.id_name_dict, 'service_types': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'subscribers': lambda col: [ dict_utils.contact_or_adviser_dict(c.adviser, include_dit_team=True) for c in col.all() ], 'assignees': lambda col: [ dict_utils.contact_or_adviser_dict(c.adviser, include_dit_team=True) for c in col.all() ], 'billing_address_country': dict_utils.id_name_dict, 'completed_by': dict_utils.contact_or_adviser_dict, 'cancelled_by': dict_utils.contact_or_adviser_dict, 'cancellation_reason': dict_utils.id_name_dict, } COMPUTED_MAPPINGS = { 'payment_due_date': lambda x: x.invoice.payment_due_date if x.invoice else None, } SEARCH_FIELDS = ( 'id', 'reference.trigram', 'company.name', 'company.name.trigram', 'contact.name', 'contact.name.trigram', 'total_cost.keyword', 'subtotal_cost.keyword', ) class Meta: """Default document meta data.""" doc_type = DEFAULT_MAPPING_TYPE class Index: doc_type = DEFAULT_MAPPING_TYPE
class Contact(BaseESModel): """Elasticsearch representation of Contact model.""" id = Keyword() address_1 = Text() address_2 = Text() address_town = fields.NormalizedKeyword() address_county = fields.NormalizedKeyword() address_postcode = Text() address_country = fields.id_name_field() address_same_as_company = Boolean() adviser = fields.contact_or_adviser_field() archived = Boolean() archived_by = fields.contact_or_adviser_field() archived_on = Date() archived_reason = Text() company = fields.company_field() company_sector = fields.sector_field() company_uk_region = fields.id_name_field() created_by = fields.contact_or_adviser_field(include_dit_team=True) created_on = Date() email = fields.NormalizedKeyword() email_alternative = Text() first_name = Text(fields={ 'keyword': fields.NormalizedKeyword(), }, ) job_title = fields.NormalizedKeyword() last_name = Text(fields={ 'keyword': fields.NormalizedKeyword(), }, ) modified_on = Date() name = Text(fields={ 'keyword': fields.NormalizedKeyword(), 'trigram': fields.TrigramText(), }, ) notes = fields.EnglishText() primary = Boolean() telephone_alternative = Text() telephone_countrycode = Keyword() telephone_number = Keyword() title = fields.id_name_field() MAPPINGS = { 'adviser': dict_utils.contact_or_adviser_dict, 'archived_by': dict_utils.contact_or_adviser_dict, 'company': dict_utils.company_dict, 'created_by': dict_utils.adviser_dict_with_team, 'title': dict_utils.id_name_dict, } COMPUTED_MAPPINGS = { 'address_1': contact_dict_utils.computed_address_field('address_1'), 'address_2': contact_dict_utils.computed_address_field('address_2'), 'address_town': contact_dict_utils.computed_address_field('address_town'), 'address_county': contact_dict_utils.computed_address_field('address_county'), 'address_postcode': contact_dict_utils.computed_address_field('address_postcode'), 'address_country': contact_dict_utils.computed_address_field('address_country'), 'company_sector': dict_utils.computed_nested_sector_dict('company.sector'), 'company_uk_region': dict_utils.computed_nested_id_name_dict('company.uk_region'), } SEARCH_FIELDS = ( 'id', 'name', 'name.trigram', 'email', 'email_alternative', 'company.name', 'company.name.trigram', ) class Meta: """Default document meta data.""" doc_type = DEFAULT_MAPPING_TYPE class Index: doc_type = DEFAULT_MAPPING_TYPE
class BlogPostIndex(DocType): id = Integer() title = Text(analyzer="ik_max_word", search_analyzer="ik_max_word") content = Text(analyzer="ik_max_word", search_analyzer="ik_max_word") char_num = Integer() allow_comments = Boolean() vote_num = Integer() category = Text(analyzer="ik_max_word", search_analyzer="ik_max_word") tags = Text(analyzer="ik_max_word", search_analyzer="ik_max_word") publish_date = Date() suggestions = Completion() class Meta: index = "blogpost-index" @classmethod def add(cls, **kwargs): id = kwargs.pop("id", None) if id is None: return False blog = cls(meta={"id": id}, **kwargs) blog.save() return blog @staticmethod def search_posts(words, delim="......<br>......<br>......"): """ 用原生写法拼装结果 :param words: :return: """ q = { "_source": ["title", "category", "tags", "publish_date"], "query": { "bool": { "must": [], "must_not": [], "should": [ { "term": { "tags": "{}".format(words) } }, { "term": { "title": "{}".format(words) } }, { "term": { "content": "{}".format(words) } }, ], } }, "highlight": { "number_of_fragments": 3, "fragment_size": 150, "fields": { "title": { "pre_tags": ["<em>"], "post_tags": ["</em>"] }, "content": { "pre_tags": ["<em>"], "post_tags": ["</em>"] }, }, }, "from": 0, "size": 50, "sort": [], "aggs": {}, } response = es_client.search(index="blogpost-index", body=q) r = [] for item in response["hits"]["hits"]: if item.get("highlight", None): if item["highlight"].get("title", None): title = "".join(item["highlight"]["title"]) else: title = item["_source"]["title"] if item["highlight"].get("content", None): content = delim.join( item["highlight"]["content"]) + "......<br>" else: content = "" r.append({ "origin_title": item["_source"]["title"], "title": title, "content": content, }) return r @staticmethod def suggest_word(words): q = { "_source": False, "suggest": { "search-as-you-type-suggestion": { "prefix": "{}".format(words), "completion": { "field": "suggestions", "size": 10, "fuzzy": { "fuzziness": 2 }, "skip_duplicates": True, }, } }, } response = es_client.search(index="blogpost-index", body=q) tmp = response["suggest"]["search-as-you-type-suggestion"] options = [] if len(tmp) >= 1: options = [item["text"] for item in tmp[0]["options"]] return options @staticmethod def similar_recommends_post(words): pass
class DocTestSSLResult(DocType): class Meta: doc_type = "TestSSLResult" source = String(fields={'raw': String(index='not_analyzed')}) result = Boolean() timestamp = Date() ip = String(index='not_analyzed') hostname = String(index='not_analyzed') port = Integer() svcid = String(index='not_analyzed') protocols = String(index='not_analyzed', multi=True) ciphers = String(multi=True, fields={'raw': String(index='not_analyzed')}) ciphertests = String(index='not_analyzed', multi=True) serverpref = Object( properties = { "cipher_order": Boolean(), "protocol": String(index='not_analyzed'), "cipher": String(fields={'raw': String(index='not_analyzed')}) }) cert = Object( properties = { "keysize": Short(), "signalgo": String(fields={'raw': String(index='not_analyzed')}), "md5_fingerprint": String(index='not_analyzed'), "sha1_fingerprint": String(index='not_analyzed'), "sha256_fingerprint": String(index='not_analyzed'), "cn": String(fields={'raw': String(index='not_analyzed')}), "san": String(multi=True, fields={'raw': String(index='not_analyzed')}), "issuer": String(fields={'raw': String(index='not_analyzed')}), "ev": Boolean(), "expiration": Date(), "ocsp_uri": String(fields={'raw': String(index='not_analyzed')}), "ocsp_stapling": Boolean(), }) vulnerabilities = String(index='not_analyzed', multi=True) def parseCSVLine(self, line): if line['id'] == "id": return if not self.ip or not self.hostname or not self.port: # host, ip and port m = reIpHostColumn.search(line['host']) if m: self.hostname, self.ip = m.groups() self.port = int(line['port']) if reProtocol.search(line['id']) and reOffers.search(line['finding']): # protocols self.result = True m = reProtocol.search(line['id']) if m: self.protocols.append(line['id'].upper()) elif reCipherColumnName.search(line['id']): # ciphers m = reCipherDetails.search(line['finding']) if m: self.ciphers.append(m.group(1)) elif reCipherTests.search(line['id']) and reVulnerable.search(line['finding']): # cipher tests m = reCipherTests.search(line['id']) if m: self.ciphertests.append(m.group(1)) elif line['id'] == "order": # server prefers cipher self.serverpref.cipher_order = bool(reOk.search(line['finding'])) elif line['id'] == "order_proto": # preferred protocol m = reDefaultProtocol.search(line['finding']) if m: self.serverpref.protocol = m.group(1) elif line['id'] == "order_cipher": # preferred cipher m = reDefaultCipher.search(line['finding']) if m: self.serverpref.cipher = m.group(1) elif line['id'] == "key_size": # certificate key size m = reKeySize.search(line['finding']) if m: self.cert.keysize = int(m.group(1)) elif line['id'] == "algorithm": # certificate sign algorithm m = reSignAlgorithm.search(line['finding']) if m: self.cert.signalgo = m.group(1) elif line['id'] == "fingerprint": # certificate fingerprints m = reFPMD5.search(line['finding']) if m: self.cert.md5_fingerprint = m.group(1) m = reFPSHA1.search(line['finding']) if m: self.cert.sha1_fingerprint = m.group(1) m = reFPSHA256.search(line['finding']) if m: self.cert.sha256_fingerprint = m.group(1) elif line['id'] == "cn": # certificate CN m = reCN.search(line['finding']) if m: self.cert.cn = m.group(1) elif line['id'] == "san": # certificate SAN m = reSAN.search(line['finding']) if m: sans = m.group(1) for san in sans.split(" "): if san != "--": self.cert.san.append(san) elif line['id'] == "issuer": # certificate issuer m = reIssuer.search(line['finding']) if m: self.cert.issuer = m.group(1) elif line['id'] == "ev": # certificate extended validation self.cert.ev = bool(reYes.search(line['finding'])) elif line['id'] == "expiration": # certificate expiration m = reExpiration.search(line['finding']) if m: unparsedDate = m.group(1) self.cert.expiration = datetime.strptime(unparsedDate, "%Y-%m-%d %H:%M %z") elif line['id'] == "ocsp_uri": # certificate OCSP URI m = reOCSPURI.search(line['finding']) if m: self.cert.ocsp_uri = m.group(1) else: self.cert.ocsp_uri = "-" elif line['id'] == "ocsp_stapling": # certificate OCSP stapling self.cert.ocsp_stapling = not bool(reNotOffered.search(line['finding'])) elif line['id'] in ("heartbleed", "ccs", "secure_renego", "sec_client_renego", "crime", "breach", "poodle_ssl", "fallback_scsv", "freak", "DROWN", "logjam", "beast", "rc4") and reVulnerable.search(line['finding']): self.vulnerabilities.append(line['id'].upper()) def parseCSV(self, csvfile): if self.source: m = reDefaultFilename.search(self.source) if m: self.ip = m.group('ip') self.port = int(m.group('port') or 0) self.timestamp = datetime.strptime(m.group('datetime'), "%Y%m%d-%H%M") csvReader = csv.DictReader(csvfile, fieldnames=("id", "host", "port", "severity", "finding"), delimiter=',', quotechar='"') for line in csvReader: self.parseCSVLine(line) def save(self, **kwargs): if not self.timestamp: self.timestamp = datetime.now(tz) if not self.port: raise ValueError("Empty scan result") self.svcid = "%s:%d" % (self.ip, int(self.port) or 0) if not self.result: self.result = False if 'debug' in kwargs and kwargs['debug']: pp.pprint(self.to_dict()) return super().save()
class PoeItem(InnerDoc): """ items """ abyssJewel = Boolean() additionalProperties = Boolean(multi=True) artFilename = Text() category = Nested(PoeCategory) corrupted = Boolean() cosmeticMods = Text(multi=True) craftedMods = Text(multi=True) descrText = Text() duplicated = Boolean() elder = Boolean() enchantMods = Text(multi=True) explicitMods = Text(multi=True) flavourText = Text(multi=True) frameType = Integer() h = Integer() icon = Keyword() id = Keyword() identified = Boolean() ilvl = Integer() implicitMods = Text(multi=True) inventoryId = Text() isRelic = Boolean() league = Keyword() lockedToCharacter = Boolean() maxStackSize = Integer() name = Text() nextLevelRequirements = Nested(PoePropsReqs, multi=True) note = Keyword() properties = Nested(PoePropsReqs, multi=True) prophecyDiffText = Text() prophecyText = Text() requirements = Nested(PoePropsReqs, multi=True) secDescrText = Text() shaper = Boolean() socketedItems = Nested() sockets = Nested(PoeSockets) stackSize = Integer() support = Boolean() talismanTier = Integer() typeLine = Text() utilityMods = Text(multi=True) verified = Boolean() w = Integer() x = Integer() y = Integer()
class Group(Document): """ Meetup.com Group Model with elasticsearch persistence Meetup Group doc: https://meetup.com/de-DE/meetup_api/docs/:urlname/?uri=%2Fmeetup_api%2Fdocs%2F%3Aurlname%2F#get Elasticsearch persistence doc -> https://elasticsearch-dsl.readthedocs.io/en/latest/persistence.html#persistence Raises: GroupDoesNotExists: Raise when request a group wich does not exists on elasticsearch or on meetup """ class Index: """ Elasticsearch index of the model for override the default index -> https://elasticsearch-dsl.readthedocs.io/en/latest/persistence.html#document-life-cycle """ name = "meetup_group" # required fields meetup_id = Long(required=True) urlname = Text(required=True) created = Date(default_timezone="UTC", required=True) description = Text(analyzer="snowball", required=True) name = Text(required=True) link = Text(required=True) location = GeoPoint(required=True) members = Integer(required=True) status = Text(required=True) timezone = Text(required=True) visibility = Text(required=True) # optional fields nomination_acceptable = Boolean() city = Text() city_link = Text() country = Text() fee_options_currencies_code = Text() fee_options_currencies_default = Boolean() fee_options_type = Text() join_mode = Text() localized_country_name = Text() localized_location = Text() member_limit = Integer() short_link = Text() state = Text() untranslated_city = Text() welcome_message = Text() who = Text() # category category_id = Long() category_name = Text() category_shortname = Text() category_sort_name = Text() # meta_category meta_category_id = Long() meta_category_shortname = Text() meta_category_name = Text() meta_category_sort_name = Text() # topics topics = Nested(Topic) # organizer organizer_id = Integer() organizer_name = Text() organizer_bio = Text() # events events = Nested(Event) # suggest fields (auto fill on save) name_suggest = Completion() def add_event(self, event: Event): """ Add a single event object to the group. Arguments: event {Event} -- Event wich should be added """ self.events.append(event) def add_topic(self, topic: Topic): """ Add a single topic object to the group. Arguments: topic {Topic} -- Topic wich should be added """ self.topics.append(topic) def add_events(self, events: List[Event]): """ Add a mutiple event objects to the group. Arguments: events {List[Event]} -- Event list wich should be added """ self.events.extend(events) def event_exists(self, event_meetup_id: str) -> bool: """ Check if a event with the meetup_id exists in this group on elasticsearch Arguments: event_meetup_id {str} -- meetup_id of the requested event Returns: bool -- True -> Event exists; False -> Event does not exists """ for event in self.events: if event.meetup_id == event_meetup_id: return True return False def save(self, **kwargs): """ Overwrite save method to set suggest fields """ self.name_suggest = self.name return super().save(**kwargs) @property def last_event_time(self) -> Optional[datetime]: """ Get from the last event the event time, if any event exists Usage: group: Group = Group(...) group.last_event_time Returns: Optional[datetime] -- Last event time, when any event exists in this group else return None """ last_event_time: Optional[datetime] = None for event in self.events: if last_event_time: if event.time > last_event_time: last_event_time = event.time else: last_event_time = event.time return last_event_time @staticmethod def delete_if_exists(urlname: str) -> bool: """ Delete a group based on the urlname if exists. Usage: Group.delete_if_exists(urlname="MyGroupToDelete) Arguments: urlname {str} -- The Group URL name Returns: bool -- True -> Group was deletet; False -> Group doesn't exists on elasticsearch """ try: group: Group = Group.get_group(urlname) group.delete() return True except GroupDoesNotExists: return False @staticmethod def get_group(urlname: str) -> Group: """ Get Group from elasticseach based on urlname Arguments: urlname {str} -- Group urlname Raises: GroupDoesNotExists: When a Group does not exists on elasticsearch Returns: Group -- the request Group object from elasticsearch """ s: Search = Group.search() s = s.query("match", urlname=urlname) results: Response = s.execute() for group in results: return group raise GroupDoesNotExists( "{} does not exists in elasticsearch!".format(urlname)) @staticmethod def get_or_create_by_urlname( urlname: str, meetup_id: int, created: datetime, description: str, name: str, link: str, lat: float, lon: float, members: int, status: str, timezone: str, visibility: str, ) -> Group: """ Get a Group Object from elasticsearch based on the urlname and update the Group Object with all arguments. When the Group does not exists on elasticsearch, create a new Group Object with all arguments. Arguments: urlname {str} -- Meetup Group urlname meetup_id {int} -- Meetup Group id created {datetime} -- create time of the Meetup Group description {str} -- Meetup Group description name {str} -- Meetup Group name link {str} -- link to the Group Meetup URL lat {float} -- Meetup Group location lat lon {float} -- Meetup Group location lon members {int} -- Meetup Group members amount status {str} -- Meetup Group status timezone {str} -- Meetup Group timezone visibility {str} -- Meetup Group visibility Returns: Group -- Updated or new Group Object from elasticsearch """ s: Search = Group.search() s = s.query("match", urlname=urlname) results: Response = s.execute() for group in results: group.description = description group.name = name group.location = {"lat": lat, "lon": lon} group.members = members group.status = status group.timezone = timezone group.visibility = visibility group.save() return group return Group( urlname=urlname, meetup_id=meetup_id, created=created, description=description, name=name, link=link, location={ "lat": lat, "lon": lon }, members=members, status=status, timezone=timezone, visibility=visibility, ) @staticmethod def get_all_groups() -> List[Group]: """ Get all groups from Elasticsearch Raises: GroupDoesNotExists: When a Group does not exists on elasticsearch Returns: List[Group] -- all groups from elasticsearch """ s: Search = Group.search() s = s.query("match_all") results: Response = s.execute() groups: List[Group] = [] for group in results: groups.append(group) return groups @staticmethod def add_event_venue_to_list(venue_list: List[dict], event: Event) -> List[dict]: """ add venue to dict, if it wasn't already included Arguments: venue_list: List[dict] -- list of venue dicts event {Event} -- event to add Returns: List[dict] -- input venue_list with added event venue """ # check if there is no venue information in event if not event.venue_location or not event.venue_name: return venue_list # exit method if venue already exists for venue in venue_list: if venue["location"] == event.venue_location: return venue_list event_dict: dict = event.to_dict() # append venue if it does not exists venue_list.append({ "name": event_dict["venue_name"], "location": event_dict["venue_location"], }) return venue_list @staticmethod def get_venue_location_average(venue_list: List[dict]) -> dict: """ Calc the average location of all venues Arguments: venue_list {List[dict]} -- venue list for calc the average Returns: dict -- {'lat': float, 'lon': float} """ if len(venue_list) == 0: raise ValueError( "The size of venue_list need to be larger than 0!") lat_average: float = 0 lon_average: float = 0 for venue in venue_list: lat_average = lat_average + venue["location"]["lat"] lon_average = lon_average + venue["location"]["lon"] lat_average = lat_average / len(venue_list) lon_average = lon_average / len(venue_list) return {"lat": lat_average, "lon": lon_average} def to_json_dict(self, load_events: bool) -> dict: """ Convert to_dict into a JSON serializable dict object. Also add a venue dict to the group, for each venue that was used by that group in any event. Arguments: load_events {bool} -- load events into dict Returns: dict -- JSON serializable dict object """ group_dict: dict = self.to_dict() # set group venue group_dict["venues"] = [] for event in self.events: group_dict["venues"] = self.add_event_venue_to_list( group_dict["venues"], event) if len(group_dict["venues"]) > 0: group_dict[ "venue_location_average"] = self.get_venue_location_average( group_dict["venues"]) for field in group_dict: if "events" in group_dict: for event_dict in group_dict["events"]: # load events into dict if load_events: for event_field in event_dict: # todo remove double events to reduce bandwith if isinstance(event_dict[event_field], datetime): event_dict[event_field] = event_dict[ event_field].strftime( "%Y-%m-%dT%H:%M:%S%z") else: group_dict["events"] = [] if isinstance(group_dict[field], datetime): group_dict[field] = group_dict[field].strftime( "%Y-%m-%dT%H:%M:%S%z") return group_dict
class Company(BaseESModel): """Elasticsearch representation of Company model.""" id = Keyword() archived = Boolean() archived_by = fields.nested_contact_or_adviser_field('archived_by') archived_on = Date() archived_reason = Text() business_type = fields.nested_id_name_field() classification = fields.nested_id_name_field() companies_house_data = fields.nested_ch_company_field() company_number = fields.SortableCaseInsensitiveKeywordText() contacts = fields.nested_contact_or_adviser_field('contacts') created_on = Date() description = fields.EnglishText() employee_range = fields.nested_id_name_field() export_experience_category = fields.nested_id_name_field() export_to_countries = fields.nested_id_name_field() future_interest_countries = fields.nested_id_name_field() global_headquarters = fields.nested_id_name_field() headquarter_type = fields.nested_id_name_field() modified_on = Date() name = fields.SortableText(copy_to=['name_keyword', 'name_trigram']) name_keyword = fields.SortableCaseInsensitiveKeywordText() name_trigram = fields.TrigramText() one_list_account_owner = fields.nested_contact_or_adviser_field('one_list_account_owner') reference_code = fields.SortableCaseInsensitiveKeywordText() registered_address_1 = Text() registered_address_2 = Text() registered_address_town = fields.SortableCaseInsensitiveKeywordText() registered_address_county = Text() registered_address_country = fields.nested_id_name_partial_field( 'registered_address_country', ) registered_address_postcode = Text( copy_to=[ 'registered_address_postcode_trigram', ], ) registered_address_postcode_trigram = fields.TrigramText() sector = fields.nested_sector_field() trading_address_1 = Text() trading_address_2 = Text() trading_address_town = fields.SortableCaseInsensitiveKeywordText() trading_address_county = Text() trading_address_postcode = Text( copy_to=['trading_address_postcode_trigram'], ) trading_address_postcode_trigram = fields.TrigramText() trading_address_country = fields.nested_id_name_partial_field( 'trading_address_country', ) trading_name = fields.SortableText( copy_to=[ 'trading_name_keyword', 'trading_name_trigram', ], ) trading_name_keyword = fields.SortableCaseInsensitiveKeywordText() trading_name_trigram = fields.TrigramText() turnover_range = fields.nested_id_name_field() uk_region = fields.nested_id_name_field() uk_based = Boolean() vat_number = Keyword(index=False) website = Text() COMPUTED_MAPPINGS = { 'trading_name': attrgetter('alias'), } MAPPINGS = { 'id': str, 'archived_by': dict_utils.contact_or_adviser_dict, 'business_type': dict_utils.id_name_dict, 'classification': dict_utils.id_name_dict, 'companies_house_data': dict_utils.ch_company_dict, 'contacts': lambda col: [dict_utils.contact_or_adviser_dict(c) for c in col.all()], 'employee_range': dict_utils.id_name_dict, 'export_experience_category': dict_utils.id_name_dict, 'export_to_countries': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'future_interest_countries': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'global_headquarters': dict_utils.id_name_dict, 'headquarter_type': dict_utils.id_name_dict, 'one_list_account_owner': dict_utils.contact_or_adviser_dict, 'registered_address_country': dict_utils.id_name_dict, 'sector': dict_utils.sector_dict, 'trading_address_country': dict_utils.id_name_dict, 'turnover_range': dict_utils.id_name_dict, 'uk_based': bool, 'uk_region': dict_utils.id_name_dict, } SEARCH_FIELDS = ( 'name', 'name_trigram', 'company_number', 'trading_name', 'trading_name_trigram', 'reference_code', 'registered_address_country.name_trigram', 'registered_address_postcode_trigram', 'trading_address_country.name_trigram', 'trading_address_postcode_trigram', 'uk_region.name_trigram', ) class Meta: """Default document meta data.""" doc_type = 'company'
class IndexedFile(Document): name = Text(analyzer=file_analyzer, fields={ '_exact': Keyword(), '_pattern': Text(analyzer=file_pattern_analyzer), '_reverse': Text(analyzer=reverse_file_analyzer) }) path = Text(fields={ '_exact': Keyword(), '_path': Text(analyzer=path_analyzer) }) lastModified = Date() length = Long() format = Text() mimeType = Keyword() type = Text() system = Text(fields={'_exact': Keyword()}) systemId = Text() basePath = Text(fields={'_exact': Keyword()}) dsMeta = Nested() permissions = Nested( properties={ 'username': Keyword(), 'recursive': Boolean(), 'permission': Nested(properties={ 'read': Boolean(), 'write': Boolean(), 'execute': Boolean() }) }) @classmethod def _pems_filter(self): term_username_query = Q('term', **{'permissions.username': self.username}) term_world_query = Q('term', **{'permissions.username': '******'}) bool_query = Q('bool') bool_query.should = [term_username_query, term_world_query] nested_query = Q('nested') nested_query.path = 'permissions' nested_query.query = bool_query return nested_query @classmethod def from_path(cls, system, path): Index(settings.ES_INDICES['files']['alias']).refresh() search = cls.search() sys_filter = Q('term', **{'system._exact': system}) path_filter = Q('term', **{'path._exact': path}) search = search.filter(sys_filter & path_filter) try: res = search.execute() except Exception as exc: raise exc if res.hits.total.value > 1: id_filter = Q('term', **{'_id': res[0].meta.id}) # Delete all files indexed with the same system/path, except the first result delete_query = sys_filter & path_filter & ~id_filter cls.search().filter(delete_query).delete() return cls.get(res[0].meta.id) elif res.hits.total.value == 1: return cls.get(res[0].meta.id) else: raise DocumentNotFound("No document found for " "{}/{}".format(system, path)) @classmethod def children(cls, username, system, path, limit=100, search_after=None): search = cls.search() # search = search.filter(cls._pems_filter(username)) search = search.filter('term', **{'basePath._exact': path}) search = search.filter('term', **{'system._exact': system}) search = search.sort('_id') search = search.extra(size=limit) if search_after: search = search.extra(search_after=search_after) try: res = search.execute() except TransportError: raise TransportError if len(res.hits) > 0: wrapped_children = [cls.get(doc.meta.id) for doc in res] sort_key = res.hits.hits[-1]['sort'] return wrapped_children, sort_key else: return [], None class Index: name = settings.ES_INDICES['files']['alias'] class Meta: dynamic = MetaField('strict')
class Repos(Document): is_public = Boolean() created_at = Date()
class Repos(Document): is_public = Boolean() created_at = Date() class Index: name = "git"
class SneakerItem(Document): price = Integer(required=True) sizes = Keyword(multi=True) name = Text() brand = Text() model = Text() colorway = Text() item_id = Text( fields={'keyword': Keyword()} ) url = Text( analyzer='simple', fields={'keyword': Keyword()}, required=True, ) img_url = Text( analyzer='simple', fields={'keyword': Keyword()} ) telegram_img_id = Text( analyzer='simple', fields={'keyword': Keyword()} ) last_update = Double() new = Boolean() new_sizes = Keyword(multi=True) price_change = Integer() class Index: name = index_name using = client @property def descr(self): return (self.name or (self.brand + self.model + self.colorway)).upper() def get_bulk_update_dict(self): d = self.to_dict(include_meta=True) del d['_source'] d['_op_type'] = 'update' d['script'] = self.get_update_script() d['upsert'] = self.get_upsert_dict() return d def get_update_script(self): return { 'lang': 'painless', 'source': update_script, 'params': { 'sizes': list(self.sizes), 'price': self.price, 'new_update_time': get_time(), 'img_url': self.img_url, } } def get_upd_dict(self): pass def get_upsert_dict(self): d = self.to_dict() d['new'] = True return d
def build_mapping(cls): """Generate the mapping definition for indexed messages""" m = Mapping(cls.doc_type) m.meta('_all', enabled=True) # attachments m.field( 'attachments', Nested(doc_class=IndexedMessageAttachment, include_in_all=True, properties={ "content_type": Keyword(), "file_name": Keyword(), "is_inline": Boolean(), "size": Integer(), "temp_id": Keyword(), "url": Keyword(), "mime_boundary": Keyword() })) m.field('body_html', 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) m.field('body_plain', 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) m.field('date', 'date') m.field('date_delete', 'date') m.field('date_insert', 'date') m.field('date_sort', 'date') m.field('discussion_id', 'keyword') # external references m.field( 'external_references', Nested(doc_class=IndexedExternalReferences, include_in_all=True, properties={ "ancestors_ids": Keyword(), "message_id": Keyword(), "parent_id": Keyword() })) # identities identities = Nested(doc_class=IndexedExternalReferences, include_in_all=True) identities.field("identifier", "text", fields={ "raw": Keyword(), "parts": { "type": "text", "analyzer": "email_analyzer" } }) identities.field("type", "keyword") m.field('identities', identities) m.field('importance_level', 'short') m.field('is_answered', 'boolean') m.field('is_draft', 'boolean') m.field('is_unread', 'boolean') m.field('is_received', 'boolean') m.field('message_id', 'keyword') m.field('parent_id', 'keyword') # participants participants = Nested(doc_class=IndexedParticipant, include_in_all=True) participants.field("address", "text", analyzer="text_analyzer", fields={ "raw": { "type": "keyword" }, "parts": { "type": "text", "analyzer": "email_analyzer" } }) participants.field("contact_ids", Keyword(multi=True)) participants.field("label", "text", analyzer="text_analyzer") participants.field("protocol", Keyword()) participants.field("type", Keyword()) m.field('participants', participants) # PI pi = Nested(doc_class=PIIndexModel, include_in_all=True, properties={ "technic": "integer", "comportment": "integer", "context": "integer", "version": "integer", "date_update": "date" }) m.field("pi", pi) m.field('privacy_features', Nested(include_in_all=True)) m.field('raw_msg_id', "keyword") m.field('subject', 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) m.field('tags', Keyword(multi=True)) m.field('subject', 'text') m.field('tags', Keyword(multi=True)) m.field('type', 'keyword') return m
class DatasetSearch(ModelSearchAdapter): model = Dataset fuzzy = True exclude_fields = ['spatial.geom', 'spatial.zones.geom'] class Meta: doc_type = 'Dataset' title = String(analyzer=i18n_analyzer, fields={'raw': String(index='not_analyzed')}) description = String(analyzer=i18n_analyzer) license = String(index='not_analyzed') frequency = String(index='not_analyzed') organization = String(index='not_analyzed') owner = String(index='not_analyzed') tags = String(index='not_analyzed', fields={'i18n': String(index='not_analyzed')}) badges = String(index='not_analyzed') tag_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=False) resources = Object( properties={ 'title': String(), 'description': String(), 'format': String(index='not_analyzed') }) format_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=False) dataset_suggest = Completion(analyzer=simple, search_analyzer=simple, payloads=True) created = Date(format='date_hour_minute_second') last_modified = Date(format='date_hour_minute_second') metrics = metrics_mapping_for(Dataset) featured = Boolean() temporal_coverage = Nested(multi=False, properties={ 'start': Long(), 'end': Long() }) temporal_weight = Long(), geozones = Object( properties={ 'id': String(index='not_analyzed'), 'name': String(index='not_analyzed'), 'keys': String(index='not_analyzed') }) granularity = String(index='not_analyzed') spatial_weight = Long() from_certified = Boolean() fields = ( 'geozones.keys^9', 'geozones.name^9', 'acronym^7', 'title^6', 'tags.i18n^3', 'description', ) sorts = { 'title': 'title.raw', 'created': 'created', 'last_modified': 'last_modified', 'reuses': 'metrics.reuses', 'followers': 'metrics.followers', 'views': 'metrics.views', } facets = { 'tag': TermsFacet(field='tags'), 'badge': TermsFacet(field='badges', labelizer=dataset_badge_labelizer), 'organization': ModelTermsFacet(field='organization', model=Organization), 'owner': ModelTermsFacet(field='owner', model=User), 'license': ModelTermsFacet(field='license', model=License), 'geozone': ModelTermsFacet(field='geozones.id', model=GeoZone, labelizer=zone_labelizer), 'granularity': TermsFacet(field='granularity', labelizer=granularity_labelizer), 'format': TermsFacet(field='resources.format'), 'resource_type': TermsFacet(field='resources.type', labelizer=resource_type_labelizer), 'reuses': RangeFacet(field='metrics.reuses', ranges=[('none', (None, 1)), ('few', (1, 5)), ('quite', (5, 10)), ('many', (10, None))], labels={ 'none': _('Never reused'), 'few': _('Little reused'), 'quite': _('Quite reused'), 'many': _('Heavily reused'), }), 'temporal_coverage': TemporalCoverageFacet(field='temporal_coverage'), 'featured': BoolFacet(field='featured'), } boosters = [ BoolBooster('featured', 1.5), BoolBooster('from_certified', 1.2), ValueFactor('spatial_weight', missing=1), ValueFactor('temporal_weight', missing=1), GaussDecay('metrics.reuses', max_reuses, decay=0.1), GaussDecay('metrics.followers', max_followers, max_followers, decay=0.1), ] @classmethod def is_indexable(cls, dataset): return (dataset.deleted is None and len(dataset.resources) > 0 and not dataset.private) @classmethod def get_suggest_weight(cls, temporal_weight, spatial_weight, featured): '''Compute the suggest part of the indexation payload''' featured_weight = 1 if not featured else FEATURED_WEIGHT return int(temporal_weight * spatial_weight * featured_weight * 10) @classmethod def serialize(cls, dataset): organization = None owner = None image_url = None spatial_weight = DEFAULT_SPATIAL_WEIGHT temporal_weight = DEFAULT_TEMPORAL_WEIGHT if dataset.organization: organization = Organization.objects( id=dataset.organization.id).first() image_url = organization.logo(40, external=True) elif dataset.owner: owner = User.objects(id=dataset.owner.id).first() image_url = owner.avatar(40, external=True) certified = organization and organization.certified document = { 'title': dataset.title, 'description': dataset.description, 'license': getattr(dataset.license, 'id', None), 'tags': dataset.tags, 'badges': [badge.kind for badge in dataset.badges], 'tag_suggest': dataset.tags, 'resources': [{ 'title': r.title, 'description': r.description, 'format': r.format, 'type': r.type, } for r in dataset.resources], 'format_suggest': [r.format.lower() for r in dataset.resources if r.format], 'frequency': dataset.frequency, 'organization': str(organization.id) if organization else None, 'owner': str(owner.id) if owner else None, 'dataset_suggest': { 'input': cls.completer_tokenize(dataset.title) + [str(dataset.id)], 'output': dataset.title, 'payload': { 'id': str(dataset.id), 'slug': dataset.slug, 'acronym': dataset.acronym, 'image_url': image_url, }, }, 'created': dataset.created_at.strftime('%Y-%m-%dT%H:%M:%S'), 'last_modified': dataset.last_modified.strftime('%Y-%m-%dT%H:%M:%S'), 'metrics': dataset.metrics, 'featured': dataset.featured, 'from_certified': certified, } if (dataset.temporal_coverage is not None and dataset.temporal_coverage.start and dataset.temporal_coverage.end): start = dataset.temporal_coverage.start.toordinal() end = dataset.temporal_coverage.end.toordinal() temporal_weight = min((end - start) / 365, MAX_TEMPORAL_WEIGHT) document.update({ 'temporal_coverage': { 'start': start, 'end': end }, 'temporal_weight': temporal_weight, }) if dataset.spatial is not None: # Index precise zone labels and parents zone identifiers # to allow fast filtering. zone_ids = [z.id for z in dataset.spatial.zones] zones = GeoZone.objects(id__in=zone_ids).exclude('geom') parents = set() geozones = [] coverage_level = ADMIN_LEVEL_MAX for zone in zones: geozones.append({ 'id': zone.id, 'name': zone.name, 'keys': zone.keys_values }) parents |= set(zone.parents) coverage_level = min(coverage_level, admin_levels[zone.level]) geozones.extend([{'id': p} for p in parents]) spatial_weight = ADMIN_LEVEL_MAX / coverage_level document.update({ 'geozones': geozones, 'granularity': dataset.spatial.granularity, 'spatial_weight': spatial_weight, }) document['dataset_suggest']['weight'] = cls.get_suggest_weight( temporal_weight, spatial_weight, dataset.featured) if dataset.acronym: document['dataset_suggest']['input'].append(dataset.acronym) return document
class InvestmentProject(BaseESModel): """Elasticsearch representation of InvestmentProject.""" id = Keyword() actual_land_date = Date() actual_uk_regions = fields.nested_id_name_field() address_1 = Text() address_2 = Text() address_town = fields.SortableCaseInsensitiveKeywordText() address_postcode = Text() approved_commitment_to_invest = Boolean() approved_fdi = Boolean() approved_good_value = Boolean() approved_high_value = Boolean() approved_landed = Boolean() approved_non_fdi = Boolean() allow_blank_estimated_land_date = Boolean(index=False) allow_blank_possible_uk_regions = Boolean(index=False) anonymous_description = fields.EnglishText() archived = Boolean() archived_by = fields.nested_contact_or_adviser_field('archived_by') archived_on = Date() archived_reason = Text() associated_non_fdi_r_and_d_project = _nested_investment_project_field() average_salary = fields.nested_id_name_field() business_activities = fields.nested_id_name_field() client_cannot_provide_foreign_investment = Boolean() client_cannot_provide_total_investment = Boolean() client_contacts = fields.nested_contact_or_adviser_field('client_contacts') client_relationship_manager = fields.nested_contact_or_adviser_field( 'client_relationship_manager', include_dit_team=True, ) client_requirements = fields.TextWithKeyword() comments = fields.EnglishText() country_lost_to = _country_lost_to_mapping() created_on = Date() created_by = fields.nested_contact_or_adviser_field( 'created_by', include_dit_team=True, ) date_abandoned = Date() date_lost = Date() delivery_partners = fields.nested_id_name_field() description = fields.EnglishText() estimated_land_date = Date() export_revenue = Boolean() fdi_type = fields.nested_id_name_field() fdi_value = fields.nested_id_name_field() foreign_equity_investment = Double() government_assistance = Boolean() intermediate_company = fields.nested_id_name_field() investor_company = fields.nested_id_name_partial_field('investor_company') investor_company_country = fields.nested_id_name_field() investment_type = fields.nested_id_name_field() investor_type = fields.nested_id_name_field() level_of_involvement = fields.nested_id_name_field() likelihood_of_landing = Long() project_assurance_adviser = fields.nested_contact_or_adviser_field( 'project_assurance_adviser', include_dit_team=True, ) project_manager = fields.nested_contact_or_adviser_field( 'project_manager', include_dit_team=True, ) name = fields.SortableText(copy_to=['name_keyword', 'name_trigram']) name_keyword = fields.SortableCaseInsensitiveKeywordText() name_trigram = fields.TrigramText() new_tech_to_uk = Boolean() non_fdi_r_and_d_budget = Boolean() number_new_jobs = Integer() number_safeguarded_jobs = Long() modified_on = Date() project_arrived_in_triage_on = Date() project_code = fields.SortableCaseInsensitiveKeywordText( copy_to='project_code_trigram') project_code_trigram = fields.TrigramText() proposal_deadline = Date() other_business_activity = fields.TextWithKeyword() quotable_as_public_case_study = Boolean() r_and_d_budget = Boolean() reason_abandoned = fields.TextWithKeyword() reason_delayed = fields.TextWithKeyword() reason_lost = fields.TextWithKeyword() referral_source_activity = fields.nested_id_name_field() referral_source_activity_event = fields.SortableCaseInsensitiveKeywordText( ) referral_source_activity_marketing = fields.nested_id_name_field() referral_source_activity_website = fields.nested_id_name_field() referral_source_adviser = _referral_source_adviser_mapping() sector = fields.nested_sector_field() site_decided = Boolean() some_new_jobs = Boolean() specific_programme = fields.nested_id_name_field() stage = fields.nested_id_name_field() status = fields.SortableCaseInsensitiveKeywordText() team_members = fields.nested_contact_or_adviser_field( 'team_members', include_dit_team=True) total_investment = Double() uk_company = fields.nested_id_name_partial_field('uk_company') uk_company_decided = Boolean() uk_region_locations = fields.nested_id_name_field() will_new_jobs_last_two_years = Boolean() MAPPINGS = { 'id': str, 'actual_uk_regions': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'archived_by': dict_utils.contact_or_adviser_dict, 'associated_non_fdi_r_and_d_project': dict_utils.investment_project_dict, 'average_salary': dict_utils.id_name_dict, 'business_activities': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'client_contacts': lambda col: [dict_utils.contact_or_adviser_dict(c) for c in col.all()], 'client_relationship_manager': dict_utils.adviser_dict_with_team, 'country_lost_to': dict_utils.id_name_dict, 'created_by': dict_utils.adviser_dict_with_team, 'delivery_partners': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'fdi_type': dict_utils.id_name_dict, 'fdi_value': dict_utils.id_name_dict, 'intermediate_company': dict_utils.id_name_dict, 'investment_type': dict_utils.id_name_dict, 'investor_company': dict_utils.id_name_dict, 'investor_type': dict_utils.id_name_dict, 'level_of_involvement': dict_utils.id_name_dict, 'project_assurance_adviser': dict_utils.adviser_dict_with_team, 'project_code': str, 'project_manager': dict_utils.adviser_dict_with_team, 'referral_source_activity': dict_utils.id_name_dict, 'referral_source_activity_marketing': dict_utils.id_name_dict, 'referral_source_activity_website': dict_utils.id_name_dict, 'referral_source_adviser': dict_utils.contact_or_adviser_dict, 'sector': dict_utils.sector_dict, 'specific_programme': dict_utils.id_name_dict, 'stage': dict_utils.id_name_dict, 'team_members': lambda col: [ dict_utils.contact_or_adviser_dict(c.adviser, include_dit_team=True) for c in col.all() ], 'uk_company': dict_utils.id_name_dict, 'uk_region_locations': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], } COMPUTED_MAPPINGS = { 'investor_company_country': dict_utils.computed_nested_id_name_dict( 'investor_company.registered_address_country', ), } SEARCH_FIELDS = ( 'name', 'name_trigram', 'uk_company.name', 'uk_company.name_trigram', 'investor_company.name', 'investor_company.name_trigram', 'project_code_trigram', ) class Meta: """Default document meta data.""" doc_type = 'investment_project'
def build_mapping(cls): """Create elasticsearch indexed_contacts mapping object for an user.""" m = Mapping(cls.doc_type) m.meta('_all', enabled=True) m.field('additional_name', 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) # addresses addresses = Nested(doc_class=IndexedPostalAddress, include_in_all=True, properties={ "address_id": "keyword", "label": "text", "type": "keyword", "is_primary": "boolean", "street": "text", "city": "text", "postal_code": "keyword", "country": "text", "region": "text" }) m.field("addresses", addresses) m.field("avatar", "keyword") m.field('date_insert', 'date') m.field('date_update', 'date') m.field('deleted', 'date') # emails internet_addr = Nested( doc_class=IndexedInternetAddress, include_in_all=True, ) internet_addr.field("address", "text", analyzer="text_analyzer", fields={ "raw": { "type": "keyword" }, "parts": { "type": "text", "analyzer": "email_analyzer" } }) internet_addr.field("email_id", Keyword()) internet_addr.field("is_primary", Boolean()) internet_addr.field("label", "text", analyzer="text_analyzer") internet_addr.field("type", Keyword()) m.field("emails", internet_addr) m.field('family_name', "text", fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) m.field('given_name', 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) m.field("groups", Keyword(multi=True)) # social ids social_ids = Nested(doc_class=IndexedSocialIdentity, include_in_all=True, properties={ "name": "text", "type": "keyword", "infos": Nested() }) m.field("identities", social_ids) m.field("ims", internet_addr) m.field("infos", Nested()) m.field('name_prefix', 'keyword') m.field('name_suffix', 'keyword') # organizations organizations = Nested(doc_class=IndexedOrganization, include_in_all=True) organizations.field("deleted", Boolean()) organizations.field("department", "text", analyzer="text_analyzer") organizations.field("is_primary", Boolean()) organizations.field("job_description", "text") organizations.field("label", "text", analyzer="text_analyzer") organizations.field("name", 'text', fields={ "normalized": { "type": "text", "analyzer": "text_analyzer" } }) organizations.field("organization_id", Keyword()) organizations.field("title", Keyword()) organizations.field("type", Keyword()) m.field("organizations", organizations) # phones phones = Nested(doc_class=IndexedPhone, include_in_all=True, properties={ "is_primary": "boolean", "number": "text", "normalized_number": "text", "phone_id": "keyword", "type": "keyword", "uri": "keyword" }) m.field("phones", phones) # pi pi = Nested(doc_class=PIIndexModel, include_in_all=True, properties={ "comportment": "integer", "context": "integer", "date_update": "date", "technic": "integer", "version": "integer" }) m.field("pi", pi) m.field("privacy_features", Nested(include_in_all=True)) m.field("public_key", Nested()) m.field("social_identities", social_ids) m.field("tags", Keyword(multi=True)) m.field('title', 'text') return m
class Interaction(BaseESModel): """Elasticsearch representation of Interaction model.""" id = Keyword() company = fields.company_field() company_sector = fields.sector_field() company_one_list_group_tier = fields.id_unindexed_name_field() communication_channel = fields.id_unindexed_name_field() contacts = _contact_field() created_on = Date() date = Date() dit_participants = Object(_DITParticipant) event = fields.id_name_partial_field() investment_project = fields.id_unindexed_name_field() investment_project_sector = fields.sector_field() is_event = Boolean(index=False) grant_amount_offered = Double(index=False) kind = Keyword() modified_on = Date() net_company_receipt = Double(index=False) notes = fields.Text(index=False) policy_areas = fields.id_unindexed_name_field() policy_issue_types = fields.id_unindexed_name_field() service = fields.id_unindexed_name_field() service_delivery_status = fields.id_unindexed_name_field() subject = fields.NormalizedKeyword(fields={ 'english': fields.EnglishText(), }, ) was_policy_feedback_provided = Boolean() MAPPINGS = { 'company': dict_utils.company_dict, 'communication_channel': dict_utils.id_name_dict, 'contacts': dict_utils.contact_or_adviser_list_of_dicts, 'dit_participants': _dit_participant_list, 'event': dict_utils.id_name_dict, 'investment_project': dict_utils.id_name_dict, 'policy_areas': dict_utils.id_name_list_of_dicts, 'policy_issue_types': dict_utils.id_name_list_of_dicts, 'service': dict_utils.id_name_dict, 'service_delivery_status': dict_utils.id_name_dict, } COMPUTED_MAPPINGS = { 'company_sector': dict_utils.computed_nested_sector_dict('company.sector'), 'company_one_list_group_tier': lambda obj: dict_utils.id_name_dict( obj.company.get_one_list_group_tier() if obj.company else None, ), 'investment_project_sector': dict_utils.computed_nested_sector_dict('investment_project.sector', ), 'is_event': attrgetter('is_event'), } SEARCH_FIELDS = ( 'id', 'company.name', 'company.name.trigram', 'contacts.name', # to find 2-letter words 'contacts.name.trigram', 'event.name', 'event.name.trigram', 'subject.english', 'dit_participants.adviser.name', 'dit_participants.adviser.name.trigram', 'dit_participants.team.name', 'dit_participants.team.name.trigram', ) class Meta: """Default document meta data.""" doc_type = DOC_TYPE class Index: doc_type = DOC_TYPE
class LayerIndex(DocType): id = Integer() abstract = Text( fields={ 'pattern': field.Text(analyzer=pattern_analyzer), 'english': field.Text(analyzer='english') }) category__gn_description = Text() csw_type = Keyword() csw_wkt_geometry = Keyword() detail_url = Keyword() owner__username = Keyword(fields={'text': field.Text()}) owner__first_name = Text() owner__last_name = Text() is_published = Boolean() featured = Boolean() popular_count = Integer() share_count = Integer() rating = Integer() srid = Keyword() supplemental_information = Text() source_host = Keyword(fields={'text': field.Text()}) thumbnail_url = Keyword() uuid = Keyword() title = Text( fields={ 'pattern': field.Text(analyzer=pattern_analyzer), 'english': field.Text(analyzer='english') }) date = Date() type = Keyword(fields={ 'text': field.Text(), 'english': field.Text(analyzer='english') }) subtype = Keyword(fields={'text': field.Text()}) typename = Keyword() title_sortable = Keyword() category = Keyword(fields={ 'text': field.Text(), 'english': field.Text(analyzer='english') }) bbox_left = Float() bbox_right = Float() bbox_bottom = Float() bbox_top = Float() temporal_extent_start = Date() temporal_extent_end = Date() keywords = Keyword(fields={ 'text': field.Text(), 'english': field.Text(analyzer='english') }) regions = Keyword(fields={ 'text': field.Text(), 'english': field.Text(analyzer='english') }) references = Field( properties={ 'url': Text(), 'name': Keyword(fields={'text': field.Text()}), 'scheme': Keyword( fields={ 'text': field.Text(), 'pattern': field.Text(analyzer=pattern_analyzer) }) }) num_ratings = Integer() num_comments = Integer() geogig_link = Keyword() has_time = Boolean() class Meta: index = 'layer-index'
class Declaration(DocType, AbstractDeclaration): """Declaration document. Assumes there's a dynamic mapping with all fields not indexed by default.""" general = Object( properties={ 'full_name_suggest': Completion(preserve_separators=False), 'full_name': Text(index=True, analyzer='ukrainian'), 'full_name_for_sorting': Keyword(index=True, ignore_above=100), # only for sorting purposes 'name': Text(index=True, analyzer='ukrainian'), 'patronymic': Text(index=True, analyzer='ukrainian'), 'last_name': Text(index=True, analyzer='ukrainian'), 'family_raw': Text(index=True, analyzer='ukrainian'), 'family': Nested( properties={ 'name': Text(index=True, analyzer='ukrainian'), 'relations': Keyword(index=False), 'inn': Keyword(index=False) }), 'post_raw': Text(index=True, analyzer='ukrainian'), 'post': Object( properties={ 'region': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)}), 'office': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)}), 'post': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)}) }), 'addresses': Nested( properties={ 'place': Text(index=False), 'place_hidden': Boolean(index=False), 'place_district': Text(index=False), 'place_district_hidden': Boolean(index=False), 'place_city': Text(index=False), 'place_city_hidden': Boolean(index=False), 'place_city_type': Keyword(index=False), 'place_city_type_hidden': Boolean(index=False), 'place_address': Text(index=False), 'place_address_hidden': Boolean(index=False), 'place_address_type': Keyword(index=False) }) }) declaration = Object( properties={ 'date': NoneAwareDate(), 'notfull': Boolean(index=False), 'notfull_lostpages': Keyword(index=False), 'additional_info': Boolean(index=False), 'additional_info_text': Text(index=False), 'needs_scancopy_check': Boolean(index=False) }) intro = Object(properties={'declaration_year': Keyword(index=True)}) ft_src = Text(index=True, analyzer='ukrainian') # concatinated from set of fields for regular search (not deepsearch mode) index_card = Text(index=True, analyzer='ukrainian') INDEX_CARD_FIELDS = [ "general.last_name", "general.name", "general.patronymic", "general.full_name", "general.post.post", "general.post.office", "general.post.region", "general.post.actual_region", "intro.declaration_year", "intro.doc_type", "declaration.source", "declaration.url" ] INCOME_SINGLE_PROPERTIES = { 'value': Keyword(index=False), 'value_unclear': Boolean(index=False), 'comment': Text(index=False), 'family': Keyword(index=False), 'family_unclear': Boolean(index=False), 'family_comment': Text(index=False) } INCOME_LIST_PROPERTIES = { 'country': Keyword(index=False), 'country_comment': Text(index=False), 'cur': Keyword(index=False), 'cur_units': Keyword(index=False), 'uah_equal': Keyword(index=False) } income = Object( properties={ '5': Object(properties=INCOME_SINGLE_PROPERTIES), '6': Object(properties=INCOME_SINGLE_PROPERTIES), '7': Object(properties=INCOME_SINGLE_PROPERTIES), '8': Object(properties=INCOME_SINGLE_PROPERTIES), '9': Object(properties=INCOME_SINGLE_PROPERTIES), '10': Object(properties=INCOME_SINGLE_PROPERTIES), '11': Object(properties=INCOME_SINGLE_PROPERTIES), '12': Object(properties=INCOME_SINGLE_PROPERTIES), '13': Object(properties=INCOME_SINGLE_PROPERTIES), '14': Object(properties=INCOME_SINGLE_PROPERTIES), '15': Object(properties=INCOME_SINGLE_PROPERTIES), '16': Object(properties=INCOME_SINGLE_PROPERTIES), '17': Object(properties=INCOME_SINGLE_PROPERTIES), '18': Object(properties=INCOME_SINGLE_PROPERTIES), '19': Object(properties=INCOME_SINGLE_PROPERTIES), '20': Object(properties=INCOME_SINGLE_PROPERTIES), '21': Nested(properties=INCOME_LIST_PROPERTIES), '22': Nested(properties=INCOME_LIST_PROPERTIES) }) ESTATE_PROPERTIES = { 'region': Text(index=False), 'address': Text(index=False), 'space': Keyword(index=False), 'space_units': Keyword(index=False), 'space_comment': Text(index=False), 'costs': Keyword(index=False), 'costs_comment': Text(index=False), 'costs_rent': Keyword(index=False), 'costs_rent_comment': Text(index=False), 'costs_property': Keyword(index=False), 'costs_property_comment': Text(index=False) } estate = Object( properties={ '23': Nested(properties=ESTATE_PROPERTIES), '24': Nested(properties=ESTATE_PROPERTIES), '25': Nested(properties=ESTATE_PROPERTIES), '26': Nested(properties=ESTATE_PROPERTIES), '27': Nested(properties=ESTATE_PROPERTIES), '28': Nested(properties=ESTATE_PROPERTIES), '29': Nested(properties=ESTATE_PROPERTIES), '30': Nested(properties=ESTATE_PROPERTIES), '31': Nested(properties=ESTATE_PROPERTIES), '32': Nested(properties=ESTATE_PROPERTIES), '33': Nested(properties=ESTATE_PROPERTIES), '34': Nested(properties=ESTATE_PROPERTIES) }) VEHICLE_PROPERTIES = { "brand": Text(index=False), "brand_info": Text(index=False), "year": Keyword(index=False), "sum": Keyword(index=False), "sum_comment": Text(index=False), "sum_rent": Keyword(index=False), "sum_rent_comment": Text(index=False), "brand_hidden": Boolean(index=False), "brand_info_hidden": Boolean(index=False), "brand_info_unclear": Boolean(index=False) } vehicle = Object( properties={ '35': Nested(properties=VEHICLE_PROPERTIES), '36': Nested(properties=VEHICLE_PROPERTIES), '37': Nested(properties=VEHICLE_PROPERTIES), '38': Nested(properties=VEHICLE_PROPERTIES), '39': Nested(properties=VEHICLE_PROPERTIES), '40': Nested(properties=VEHICLE_PROPERTIES), '41': Nested(properties=VEHICLE_PROPERTIES), '42': Nested(properties=VEHICLE_PROPERTIES), '43': Nested(properties=VEHICLE_PROPERTIES), '44': Nested(properties=VEHICLE_PROPERTIES) }) BANKS_PROPERTIES = { 'sum': Keyword(index=False), 'sum_hidden': Boolean(index=False), 'sum_units': Keyword(index=False), 'sum_comment': Text(index=False), 'sum_foreign': Keyword(index=False), 'sum_foreign_units': Keyword(index=False), 'sum_foreign_comment': Text(index=False) } banks = Object( properties={ '45': Nested(properties=BANKS_PROPERTIES), '46': Nested(properties=BANKS_PROPERTIES), '47': Nested(properties=BANKS_PROPERTIES), '48': Nested(properties=BANKS_PROPERTIES), '49': Nested(properties=BANKS_PROPERTIES), '50': Nested(properties=BANKS_PROPERTIES), '51': Nested(properties=BANKS_PROPERTIES), '52': Nested(properties=BANKS_PROPERTIES), '53': Nested(properties=BANKS_PROPERTIES), }) LIABILITIES_PROPERTIES = { 'sum': Keyword(index=False), 'sum_comment': Text(index=False), 'sum_units': Keyword(index=False), 'sum_foreign': Keyword(index=False), 'sum_foreign_comment': Text(index=False) } liabilities = Object( properties={ '54': Nested(properties=LIABILITIES_PROPERTIES), '55': Nested(properties=LIABILITIES_PROPERTIES), '56': Nested(properties=LIABILITIES_PROPERTIES), '57': Nested(properties=LIABILITIES_PROPERTIES), '58': Nested(properties=LIABILITIES_PROPERTIES), '59': Nested(properties=LIABILITIES_PROPERTIES), '60': Nested(properties=LIABILITIES_PROPERTIES), '61': Nested(properties=LIABILITIES_PROPERTIES), '62': Nested(properties=LIABILITIES_PROPERTIES), '63': Nested(properties=LIABILITIES_PROPERTIES), '64': Nested(properties=LIABILITIES_PROPERTIES), }) def raw_source(self): src = self.to_dict() return blacklist(src, ["ft_src", "index_card"]) def infocard(self): return { "first_name": self.general.name, "patronymic": self.general.patronymic, "last_name": self.general.last_name, "office": self.general.post.office, "position": self.general.post.post, "source": getattr(self.declaration, "source", getattr(self, "source", "")), "id": self.meta.id, "url": settings.SITE_URL + reverse("details", kwargs={"declaration_id": self.meta.id}), "document_type": "Щорічна", "is_corrected": False, "created_date": getattr(self.intro, "date", getattr(self.declaration, "date", "")) } def related_entities(self): return { "people": { "family": list(self.get_family_members()) }, "documents": { "corrected": [], "originals": [], }, "companies": { "owned": [], "related": [], "all": [], } } def unified_source(self): try: doc = self.to_dict() doc["id"] = self.meta.id converter = PaperToNACPConverter(doc) return converter.convert() except ConverterError: return None class Meta: index = 'declarations_v2'
class Item(Document): id = Integer() title = Text() kind = Integer() content = Text() n_likes = Integer() n_collects = Integer() n_comments = Integer() can_show = Boolean() tags = Text(fields={"row": Keyword()}) created_at = Date() class Index: name = "test" @classmethod @cache(MC_KEY_ITEM.format("{id}", "{kind}")) def get(cls, id, kind): return super().get(f"{id}_{kind}") @classmethod def add(cls, item): obj = cls(**get_item_data(item)) obj.save() obj.clear_mc(item.id, item.kind) return obj @classmethod def update_item(cls, item): obj = cls.get(item.id, item.kind) if obj is None: return cls.add(obj) if not obj: return kw = get_item_data(item) try: obj.update(**kw) except ConflictError: obj.clear_mc(item.id, item.kind) obj = cls.get(item.id, item.kind) obj.update(**kw) obj.clear_mc(item.id, item.kind) return True @classmethod def clear_mc(cls, id, kind): rdb.delete(MC_KEY_ITEM.format(id, kind)) @classmethod def delete(cls, item): rs = cls.get(item.id, item.kind) if rs: super(cls, rs).delete() cls.clear_mc(item.id, item.kind) return True return False @classmethod def get_es(cls): search = cls.search() return connections.get_connection(search._using) @classmethod def bulk_update(cls, items, chunk_size=5000, op_type="update", **kwargs): index = cls._index._name _type = cls._doc_type.name obj = [{ "_op_type": op_type, "_id": f"{doc.id}_{doc.kind}", "_index": index, "_type": _type, "_source": doc.to_dict(), } for doc in items] client = cls.get_es() rs = list(parallel_bulk(client, obj, chunk_size=chunk_size, **kwargs)) for item in items: cls.clear_mc(item.id, item.kind) return rs @classmethod def new_search(cls, query, page, order_by=None, per_page=PER_PAGE): s = cls.search() s = s.query("multi_match", query=query, fields=SERACH_FIELDS) start = (page - 1) * PER_PAGE s = s.extra(**{"from": start, "size": per_page}) s = s if order_by is None else s.sort(order_by) rs = s.execute() dct = defaultdict(list) for i in rs: dct[i.kind].append(i.id) items = [] for kind, ids in dct.items(): target_cls = TARGET_MAPPER.get(kind) if target_cls: items_ = target_cls.get_multi(ids) items.extend(items_) return Pagination(query, page, per_page, rs.hits.total, items) @classmethod @cache( MC_KEY_POST_IDS_BY_TAG.format("{tag}", "{page}", "{order_by}", "{per_page}"), ONE_HOUR, ) def get_post_ids_by_tag(cls, tag, page, order_by=None, per_page=PER_PAGE): s = cls.search() # s = s.query(Q("bool", must=Q("term", tags=tag))) s = s.query(Q("bool", must=Q("term", kind=K_POST))) start = (page - 1) * PER_PAGE s = s.extra(**{"from": start, "size": per_page}) if order_by == "hot": s = s.query(Q("function_score", functions=[])) else: s = s.sort(order_by) rs = s.execute() ids = [obj.id for obj in rs] return Pagination(tag, page, per_page, rs.hits.total, ids)
class Company(BaseESModel): """Elasticsearch representation of Company model.""" id = Keyword() archived = Boolean() archived_by = fields.contact_or_adviser_field() archived_on = Date() archived_reason = Text() business_type = fields.id_name_field() company_number = fields.NormalizedKeyword() created_on = Date() description = fields.EnglishText() employee_range = fields.id_name_field() export_experience_category = fields.id_name_field() export_to_countries = fields.id_name_field() future_interest_countries = fields.id_name_field() global_headquarters = fields.id_name_field() headquarter_type = fields.id_name_field() modified_on = Date() name = Text(fields={ 'keyword': fields.NormalizedKeyword(), 'trigram': fields.TrigramText(), }, ) reference_code = fields.NormalizedKeyword() sector = fields.sector_field() address = fields.address_field() registered_address = fields.address_field() one_list_group_global_account_manager = _adviser_field_with_indexed_id() trading_names = fields.TextWithTrigram() turnover_range = fields.id_name_field() uk_region = fields.id_name_field() uk_based = Boolean() uk_address_postcode = fields.PostcodeKeyword() uk_registered_address_postcode = fields.PostcodeKeyword() vat_number = Keyword(index=False) duns_number = Keyword() website = Text() suggest = Completion(contexts=[ { 'name': 'country', 'type': 'category', }, ], ) latest_interaction_date = Date() COMPUTED_MAPPINGS = { 'suggest': get_suggestions, 'address': partial(dict_utils.address_dict, prefix='address'), 'registered_address': partial(dict_utils.address_dict, prefix='registered_address'), 'one_list_group_global_account_manager': dict_utils.computed_field_function( 'get_one_list_group_global_account_manager', dict_utils.contact_or_adviser_dict, ), 'latest_interaction_date': lambda obj: obj.latest_interaction_date, 'uk_address_postcode': lambda obj: obj.address_postcode if obj.uk_based else '', 'uk_registered_address_postcode': lambda obj: obj.registered_address_postcode if obj.uk_based else '', } MAPPINGS = { 'archived_by': dict_utils.contact_or_adviser_dict, 'business_type': dict_utils.id_name_dict, 'employee_range': dict_utils.id_name_dict, 'export_experience_category': dict_utils.id_name_dict, 'export_to_countries': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'future_interest_countries': lambda col: [dict_utils.id_name_dict(c) for c in col.all()], 'global_headquarters': dict_utils.id_name_dict, 'headquarter_type': dict_utils.id_name_dict, 'sector': dict_utils.sector_dict, 'turnover_range': dict_utils.id_name_dict, 'uk_based': bool, 'uk_region': dict_utils.id_name_dict, } SEARCH_FIELDS = ( 'id', 'name', # to find 2-letter words 'name.trigram', 'company_number', 'trading_names', # to find 2-letter words 'trading_names.trigram', 'reference_code', 'address.country.name.trigram', 'address.postcode.trigram', 'registered_address.country.name.trigram', 'registered_address.postcode.trigram', ) class Meta: """Default document meta data.""" doc_type = DOC_TYPE class Index: doc_type = DOC_TYPE
class Declaration(DocType, RelatedDeclarationsMixin): """Declaration document. Assumes there's a dynamic mapping with all fields not indexed by default.""" general = Object( properties={ 'full_name_suggest': Completion(preserve_separators=False), 'full_name': Text(index=True, analyzer='ukrainian'), 'name': Text(index=True, analyzer='ukrainian'), 'patronymic': Text(index=True, analyzer='ukrainian'), 'last_name': Text(index=True, analyzer='ukrainian'), 'family_raw': Text(index=True, analyzer='ukrainian'), 'family': Nested( properties={ 'name': Text(index=True, analyzer='ukrainian'), 'relations': Keyword(index=False), 'inn': Keyword(index=False) } ), 'post_raw': Text(index=True, analyzer='ukrainian'), 'post': Object( properties={ 'region': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)}), 'office': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)}), 'post': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)}) } ), 'addresses': Nested( properties={ 'place': Text(index=False), 'place_hidden': Boolean(index=False), 'place_district': Text(index=False), 'place_district_hidden': Boolean(index=False), 'place_city': Text(index=False), 'place_city_hidden': Boolean(index=False), 'place_city_type': Keyword(index=False), 'place_city_type_hidden': Boolean(index=False), 'place_address': Text(index=False), 'place_address_hidden': Boolean(index=False), 'place_address_type': Keyword(index=False) } ) } ) declaration = Object( properties={ 'date': NoneAwareDate(), 'notfull': Boolean(index=False), 'notfull_lostpages': Keyword(index=False), 'additional_info': Boolean(index=False), 'additional_info_text': Text(index=False), 'needs_scancopy_check': Boolean(index=False) } ) intro = Object( properties={ 'declaration_year': Keyword(index=True) } ) ft_src = Text(index=True, analyzer='ukrainian') INCOME_SINGLE_PROPERTIES = { 'value': Keyword(index=False), 'value_unclear': Boolean(index=False), 'comment': Text(index=False), 'family': Keyword(index=False), 'family_unclear': Boolean(index=False), 'family_comment': Text(index=False) } INCOME_LIST_PROPERTIES = { 'country': Keyword(index=False), 'country_comment': Text(index=False), 'cur': Keyword(index=False), 'cur_units': Keyword(index=False), 'uah_equal': Keyword(index=False) } income = Object( properties={ '5': Object( properties=INCOME_SINGLE_PROPERTIES ), '6': Object( properties=INCOME_SINGLE_PROPERTIES ), '7': Object( properties=INCOME_SINGLE_PROPERTIES ), '8': Object( properties=INCOME_SINGLE_PROPERTIES ), '9': Object( properties=INCOME_SINGLE_PROPERTIES ), '10': Object( properties=INCOME_SINGLE_PROPERTIES ), '11': Object( properties=INCOME_SINGLE_PROPERTIES ), '12': Object( properties=INCOME_SINGLE_PROPERTIES ), '13': Object( properties=INCOME_SINGLE_PROPERTIES ), '14': Object( properties=INCOME_SINGLE_PROPERTIES ), '15': Object( properties=INCOME_SINGLE_PROPERTIES ), '16': Object( properties=INCOME_SINGLE_PROPERTIES ), '17': Object( properties=INCOME_SINGLE_PROPERTIES ), '18': Object( properties=INCOME_SINGLE_PROPERTIES ), '19': Object( properties=INCOME_SINGLE_PROPERTIES ), '20': Object( properties=INCOME_SINGLE_PROPERTIES ), '21': Nested( properties=INCOME_LIST_PROPERTIES ), '22': Nested( properties=INCOME_LIST_PROPERTIES ) } ) ESTATE_PROPERTIES = { 'region': Text(index=False), 'address': Text(index=False), 'space': Keyword(index=False), 'space_units': Keyword(index=False), 'space_comment': Text(index=False), 'costs': Keyword(index=False), 'costs_comment': Text(index=False), 'costs_rent': Keyword(index=False), 'costs_rent_comment': Text(index=False), 'costs_property': Keyword(index=False), 'costs_property_comment': Text(index=False) } estate = Object( properties={ '23': Nested( properties=ESTATE_PROPERTIES ), '24': Nested( properties=ESTATE_PROPERTIES ), '25': Nested( properties=ESTATE_PROPERTIES ), '26': Nested( properties=ESTATE_PROPERTIES ), '27': Nested( properties=ESTATE_PROPERTIES ), '28': Nested( properties=ESTATE_PROPERTIES ), '29': Nested( properties=ESTATE_PROPERTIES ), '30': Nested( properties=ESTATE_PROPERTIES ), '31': Nested( properties=ESTATE_PROPERTIES ), '32': Nested( properties=ESTATE_PROPERTIES ), '33': Nested( properties=ESTATE_PROPERTIES ), '34': Nested( properties=ESTATE_PROPERTIES ) } ) VEHICLE_PROPERTIES = { "brand": Text(index=False), "brand_info": Text(index=False), "year": Keyword(index=False), "sum": Keyword(index=False), "sum_comment": Text(index=False), "sum_rent": Keyword(index=False), "sum_rent_comment": Text(index=False), "brand_hidden": Boolean(index=False), "brand_info_hidden": Boolean(index=False), "brand_info_unclear": Boolean(index=False) } vehicle = Object( properties={ '35': Nested( properties=VEHICLE_PROPERTIES ), '36': Nested( properties=VEHICLE_PROPERTIES ), '37': Nested( properties=VEHICLE_PROPERTIES ), '38': Nested( properties=VEHICLE_PROPERTIES ), '39': Nested( properties=VEHICLE_PROPERTIES ), '40': Nested( properties=VEHICLE_PROPERTIES ), '41': Nested( properties=VEHICLE_PROPERTIES ), '42': Nested( properties=VEHICLE_PROPERTIES ), '43': Nested( properties=VEHICLE_PROPERTIES ), '44': Nested( properties=VEHICLE_PROPERTIES ) } ) BANKS_PROPERTIES = { 'sum': Keyword(index=False), 'sum_hidden': Boolean(index=False), 'sum_units': Keyword(index=False), 'sum_comment': Text(index=False), 'sum_foreign': Keyword(index=False), 'sum_foreign_units': Keyword(index=False), 'sum_foreign_comment': Text(index=False) } banks = Object( properties={ '45': Nested( properties=BANKS_PROPERTIES ), '46': Nested( properties=BANKS_PROPERTIES ), '47': Nested( properties=BANKS_PROPERTIES ), '48': Nested( properties=BANKS_PROPERTIES ), '49': Nested( properties=BANKS_PROPERTIES ), '50': Nested( properties=BANKS_PROPERTIES ), '51': Nested( properties=BANKS_PROPERTIES ), '52': Nested( properties=BANKS_PROPERTIES ), '53': Nested( properties=BANKS_PROPERTIES ), } ) LIABILITIES_PROPERTIES = { 'sum': Keyword(index=False), 'sum_comment': Text(index=False), 'sum_units': Keyword(index=False), 'sum_foreign': Keyword(index=False), 'sum_foreign_comment': Text(index=False) } liabilities = Object( properties={ '54': Nested( properties=LIABILITIES_PROPERTIES ), '55': Nested( properties=LIABILITIES_PROPERTIES ), '56': Nested( properties=LIABILITIES_PROPERTIES ), '57': Nested( properties=LIABILITIES_PROPERTIES ), '58': Nested( properties=LIABILITIES_PROPERTIES ), '59': Nested( properties=LIABILITIES_PROPERTIES ), '60': Nested( properties=LIABILITIES_PROPERTIES ), '61': Nested( properties=LIABILITIES_PROPERTIES ), '62': Nested( properties=LIABILITIES_PROPERTIES ), '63': Nested( properties=LIABILITIES_PROPERTIES ), '64': Nested( properties=LIABILITIES_PROPERTIES ), } ) class Meta: index = 'declarations_v2'