Пример #1
0
class DocTestSSLResult(Document):

    source = Text(fields={'raw': Keyword()})
    result = Boolean()
    timestamp = Date()
    ip = Keyword()
    hostname = Keyword()
    port = Integer()
    svcid = Keyword()
    protocols = Keyword(multi=True)
    ciphers = Text(multi=True, fields={'raw': Keyword()})
    ciphertests = Keyword(multi=True)
    serverpref = Object(
        properties={
            "cipher_order": Boolean(),
            "protocol": Keyword(),
            "cipher": Text(fields={'raw': Keyword()})
        })
    cert = Object(
        properties={
            "keysize": Short(),
            "signalgo": Text(fields={'raw': Keyword()}),
            "md5_fingerprint": Keyword(),
            "sha1_fingerprint": Keyword(),
            "sha256_fingerprint": Keyword(),
            "cn": Text(fields={'raw': Keyword()}),
            "san": Text(multi=True, fields={'raw': Keyword()}),
            "issuer": Text(fields={'raw': Keyword()}),
            "ev": Boolean(),
            "expiration": Date(),
            "ocsp_uri": Text(fields={'raw': Keyword()}),
            "Crl_url": Text(fields={'raw': Keyword()}),
            "ocsp_stapling": Boolean(),
        })
    vulnerabilities = Keyword(multi=True)

    def parseCSVLine(self, line):
        if line['id'] == "id":
            return

        if not self.ip or not self.hostname or not self.port:  # host, ip and port
            m = reIpHostColumn.search(line['fqdn/ip'])
            if m:
                self.hostname, self.ip = m.groups()
            self.port = int(line['port'])

        if reProtocol.search(line['id']) and reOffers.search(
                line['finding']):  # protocols
            self.result = True
            m = reProtocol.search(line['id'])
            if m:
                self.protocols.append(line['id'].upper())

        elif reCipherColumnName.search(line['id']):  # ciphers IT WORKS
            m = reCipherDetails.search(line['finding'])
            if m:
                self.ciphers.append(m.group(1))

        elif reCipherTests.search(line['id']) and reVulnerable.search(
                line['finding']):  # cipher tests
            m = reCipherTests.search(line['id'])
            print(m)
            if m:
                self.ciphertests.append(m.group(1))

        if line['id'] == "cipher_order":  # server prefers cipher IT WORKS
            self.serverpref.cipher_order = bool(reOk.search(line['severity']))

        elif line[
                'id'] == "protocol_negotiated":  # preferred protocol IT WORKS
            m = reDefaultProtocol.search(line['finding'])

            if m:
                self.serverpref.protocol = m.group(1)

        elif line['id'] == "cipher_negotiated":  # preferred cipher  IT WORKS
            m = reDefaultCipher.search(line['finding'])
            if m:
                self.serverpref.cipher = m.group(1)

        elif line['id'] == "cert_keySize":  # certificate key size IT WORKS
            m = reKeySize.search(line['finding'])
            if m:
                self.cert.keysize = int(m.group(1))

        elif line[
                'id'] == "cert_signatureAlgorithm":  # certificate sign algorithm IT WORKS
            m = reSignAlgorithm.search(line['finding'])
            if m:
                self.cert.signalgo = m.group(1)

        elif line[
                'id'] == "cert_fingerprintSHA1":  # certificate fingerprints SHA1 IT WORKS

            m = reFPSHA1.search(line['finding'])
            if m:
                self.cert.sha1_fingerprint = m.group(1)

        elif line[
                'id'] == "cert_fingerprintSHA256":  # certificate fingerprints SHA256 IT WORKS

            m = reFPSHA256.search(line['finding'])
            if m:
                self.cert.sha256_fingerprint = m.group(1)

        elif line[
                'id'] == "cert_fingerprintMD5":  # certificate fingerprints MD5 IT WORKS
            m = reFPMD5.search(line['finding'])
            if m:
                self.cert.md5_fingerprint = m.group(1)

        elif line['id'] == "cert_commonName":  # certificate CN IT WORKS
            m = reCN.search(line['finding'])
            if m:
                self.cert.cn = m.group(1)

        elif line[
                'id'] == "cert_subjectAltName":  # certificate SAN KINDA WORKS NEEDS REVISION
            m = reSAN.search(line['finding'])
            #print(m)
            if m:
                self.cert.san = m.group(1)

#sans = m.group(1)
#for san in sans.split(" "):
#    if san != "--":
#        self.cert.san.append(san)"""

        elif line['id'] == "cert_caIssuers":  # certificate issuer IT WORKS
            m = reIssuer.search(line['finding'])
            if m:
                self.cert.issuer = m.group(1)

        elif line['id'] == "ev":  # certificate extended validation NOT SUERE
            self.cert.ev = bool(reYes.search(line['finding']))

        elif line['id'] == "cert_notAfter":  # certificate expiration IT WORKS
            m = reExpiration.search(line['finding'])
            if m:
                unparsedDate = m.group(1)
                self.cert.expiration = datetime.strptime(
                    unparsedDate, "%Y-%m-%d %H:%M")

        elif line[
                'id'] == "cert_ocspURL":  # certificate OCSP URI IT WORKS ELSE NEEDS REWORK
            m = reOCSPURI.search(line['finding'])
            #print(m)
            if m:
                self.cert.ocsp_uri = m.group(1)
            else:
                self.cert.ocsp_uri = "-"

        elif line[
                'id'] == "cert_crlDistributionPoints":  # certificate CRL WORKS
            m = reAll.search(line['finding'])
            #print(m)
            if m:
                self.cert.Crl_url = m.group(1)
            else:
                self.cert.Crl_url = "-"

        elif line['id'] == "OCSP_stapling":  # certificate OCSP stapling
            self.cert.ocsp_stapling = not bool(
                reNotOffered.search(line['finding']))

        elif line['id'] in ("heartbleed", "CCS", "secure_renego",
                            "secure_client_renego", "CRIME_TLS", "SWEET32",
                            "POODLE_SSL", "fallback_SCSV", "FREAK", "DROWN",
                            "LOGJAM", "BEAST", "LUCKY13",
                            "RC4") and reVulnerable.search(line['severity']):
            m = reVulnerable.search(line['severity'])
            if str(m.group(1)) != '':
                self.vulnerabilities.append(line['id'].upper())

    def parseCSV(self, csvfile):
        if self.source:
            m = reDefaultFilename.search(self.source)
            if m:
                self.ip = m.group('ip')
                self.port = int(m.group('port') or 0)
                self.timestamp = datetime.strptime(m.group('datetime'),
                                                   "%Y%m%d-%H%M")
        csvReader = csv.DictReader(csvfile,
                                   fieldnames=("id", "fqdn/ip", "port",
                                               "severity", "finding", "cve",
                                               "cwe"),
                                   delimiter=',',
                                   quotechar='"')
        for line in csvReader:
            self.parseCSVLine(line)

    def save(self, **kwargs):
        if not self.timestamp:
            self.timestamp = datetime.now(tz)
        if not self.port:
            raise ValueError("Empty scan result")

        self.svcid = "%s:%d" % (self.ip, int(self.port) or 0)
        if not self.result:
            self.result = False

        if 'debug' in kwargs and kwargs['debug']:
            pp.pprint(self.to_dict())
        return super().save()
Пример #2
0
class Live(DocType):
    id = Long()
    speaker_id = Integer()
    speaker_name = Text(analyzer='ik_max_word')
    feedback_score = Float()  # 评分
    topic_names = Text(analyzer='ik_max_word')  # 话题标签名字
    seats_taken = Integer()  # 参与人数
    subject = Text(analyzer='ik_max_word')  # 标题
    amount = Float()  # 价格(RMB)
    description = Text(analyzer='ik_max_word')
    status = Boolean()  # public(True)/ended(False)
    starts_at = Date()
    outline = Text(analyzer='ik_max_word')  # Live内容
    speaker_message_count = Integer()
    tag_names = Text(analyzer='ik_max_word')
    liked_num = Integer()
    topics = Keyword()
    live_suggest = Completion(analyzer=ik_analyzer)
    cover = Text(index='not_analyzed')
    zhuanlan_url = Text(index='not_analyzed')

    @property
    def id(self):
        return self._id

    @property
    def speaker(self):
        return session.query(User).get(self.speaker_id)

    @property
    def url(self):
        return LIVE_URL.format(self.id)

    class Meta:
        index = 'live130'

    def to_dict(self, include_extended=True):
        d = super().to_dict()
        if include_extended:
            d.update({
                'id': self._id,
                'type': 'live',
                'speaker': self.speaker.to_dict(),
                'url': self.url
            })
        return d

    @classmethod
    async def add(cls, **kwargs):
        id = kwargs.pop('id', None)
        if id is None:
            return False
        live = cls(meta={'id': int(id)}, **kwargs)
        await live.save()
        return live

    @classmethod
    async def _execute(cls, s, order_by=None):
        if order_by is not None:
            s = s.sort(order_by)
        lives = await s.execute()
        return [live.to_dict() for live in lives]

    @classmethod
    def apply_weight(cls, s, start, limit):
        return s.query(Q('function_score',
                         functions=[gauss_sf, log_sf])).extra(**{
                             'from': start,
                             'size': limit
                         })

    @classmethod
    async def ik_search(cls, query, status=None, start=0, limit=10):
        s = cls.search()
        s = s.query('multi_match', query=query, fields=SEARCH_FIELDS)
        if status is not None:
            s = s.query('match', status=status)
        s = cls.apply_weight(s, start, limit)
        return await cls._execute(s)

    @classmethod
    async def explore(cls,
                      from_date=None,
                      to_date=None,
                      order_by=None,
                      start=0,
                      limit=10,
                      topic=None):
        s = cls.search()
        if topic is not None:
            s = s.query(Q('term', topic_names=topic))
        starts_at = {}
        if from_date is not None:
            starts_at['from'] = from_date
        if to_date is not None:
            starts_at['to'] = to_date
        if starts_at:
            s = s.query(Q('range', starts_at=starts_at))
        if order_by is None:
            s = cls.apply_weight(s, start, limit)
        return await cls._execute(s, order_by)

    @classmethod
    async def get_hot_weekly(cls):
        today = date.today()
        return await cls.explore(from_date=today - timedelta(days=7),
                                 to_date=today,
                                 limit=20)

    @classmethod
    async def get_hot_monthly(cls):
        today = date.today()
        return await cls.explore(from_date=today - timedelta(days=30),
                                 to_date=today,
                                 limit=50)

    @classmethod
    async def ik_search_by_speaker_id(cls, speaker_id, order_by='-starts_at'):
        s = cls.search()
        s = s.query(Q('bool', should=Q('match', speaker_id=speaker_id)))
        return await cls._execute(s, order_by)

    @classmethod
    async def get_hot_topics(cls, size=50):
        s = cls.search()
        s.aggs.bucket('topics', A('terms', field='topics', size=size))
        rs = await s.execute()
        buckets = rs.aggregations.topics.buckets
        topic_names = [r['key'] for r in buckets]
        topics = session.query(Topic).filter(Topic.name.in_(topic_names)).all()
        topics = sorted(topics, key=lambda t: topic_names.index(t.name))
        return [topic.to_dict() for topic in topics]

    @classmethod
    async def ik_suggest(cls, query, size=10):
        s = cls.search()
        s = s.suggest('live_suggestion',
                      query,
                      completion={
                          'field': 'live_suggest',
                          'fuzzy': {
                              'fuzziness': 2
                          },
                          'size': size
                      })
        suggestions = await s.execute_suggest()
        matches = suggestions.live_suggestion[0].options
        ids = [match._id for match in matches]
        lives = await Live.mget(ids)
        return [live.to_dict() for live in lives]
Пример #3
0
class Repos(DocType):
    is_public = Boolean()
    created_at = Date()
Пример #4
0
class UserSearch(ModelSearchAdapter):
    model = User
    fuzzy = True

    class Meta:
        doc_type = 'User'

    first_name = String()
    last_name = String()
    about = String(analyzer=i18n_analyzer)
    organizations = String(index='not_analyzed')
    visible = Boolean()
    metrics = User.__search_metrics__
    created = Date(format='date_hour_minute_second')
    user_suggest = Completion(analyzer=simple,
                              search_analyzer=simple,
                              payloads=True)

    sorts = {
        'last_name': 'last_name',
        'first_name': 'first_name',
        'datasets': 'metrics.datasets',
        'reuses': 'metrics.reuses',
        'followers': 'metrics.followers',
        'views': 'metrics.views',
        'created': 'created',
    }
    facets = {
        'organization':
        ModelTermsFacet(field='organizations', model=Organization),
        'datasets':
        RangeFacet(field='metrics.datasets',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No datasets'),
                       'few': _('Few datasets'),
                       'many': _('Many datasets'),
                   }),
        'followers':
        RangeFacet(field='metrics.followers',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No followers'),
                       'few': _('Few followers'),
                       'many': _('Many followers'),
                   }),
    }
    boosters = [
        GaussDecay('metrics.reuses', 50, decay=0.8),
        GaussDecay('metrics.datasets', 50, decay=0.8),
        GaussDecay('metrics.followers', 200, 200, decay=0.8),
    ]

    @classmethod
    def serialize(cls, user):
        return {
            'first_name': user.first_name,
            'last_name': user.last_name,
            'about': user.about,
            'organizations': [str(o.id) for o in user.organizations],
            'metrics': user.metrics,
            'created': to_iso_datetime(user.created_at),
            'user_suggest': {
                'input': cls.completer_tokenize(user.fullname) + [user.id],
                'output': str(user.id),
                'payload': {
                    'avatar_url': user.avatar(40, external=True),
                    'first_name': user.first_name,
                    'last_name': user.last_name,
                    'slug': user.slug,
                },
            },
            'visible': user.visible,
        }
Пример #5
0
class WhoisDomainRecordDoc(GenericDoc):
    class Meta:
        doc_type = "opendns_whois_record"

    addresses = String()
    administrativeContactCity = String()
    administrativeContactCountry = String()
    administrativeContactEmail = String(analyzer=email_analyzer)
    administrativeContactFax = String()
    administrativeContactFaxExt = String()
    administrativeContactName = String()
    administrativeContactOrganization = String()
    administrativeContactPostalCode = String()
    administrativeContactState = String()
    administrativeContactStreet = String()
    administrativeContactTelephone = String()
    administrativeContactTelephoneExt = String()
    auditUpdatedDate = Date()
    billingContactCity = String()
    billingContactCountry = String()
    billingContactEmail = String(analyzer=email_analyzer)
    billingContactFax = String()
    billingContactFaxExt = String()
    billingContactName = String()
    billingContactOrganization = String()
    billingContactPostalCode = String()
    billingContactState = String()
    billingContactStreet = String()
    billingContactTelephone = String()
    billingContactTelephoneExt = String()
    created = Date()
    domainName = String()
    emails = String(analyzer=email_analyzer)
    expires = Date()
    hasRawText = Boolean()
    nameServers = String()
    recordExpired = Boolean()
    registrantCity = String()
    registrantCountry = String()
    registrantEmail = String(analyzer=email_analyzer)
    registrantFax = String()
    registrantFaxExt = String()
    registrantName = String()
    registrantOrganization = String()
    registrantPostalCode = String()
    registrantState = String()
    registrantStreet = String()
    registrantTelephone = String()
    registrantTelephoneExt = String()
    registrarIANAID = Integer()
    registrarName = String()
    record_status = String()
    technicalContactCity = String()
    technicalContactCountry = String()
    technicalContactEmail = String(analyzer=email_analyzer)
    technicalContactFax = String()
    technicalContactFaxExt = String()
    technicalContactName = String()
    technicalContactOrganization = String()
    technicalContactPostalCode = String()
    technicalContactState = String()
    technicalContactStreet = String()
    technicalContactTelephone = String()
    technicalContactTelephoneExt = String()
    timeOfLatestRealtimeCheck = BetterDate(format="epoch_millis")
    timestamp = Date()
    updated = Date()
    whoisServers = String()
    zoneContactCity = String()
    zoneContactCountry = String()
    zoneContactEmail = String(analyzer=email_analyzer)
    zoneContactFax = String()
    zoneContactFaxExt = String()
    zoneContactName = String()
    zoneContactOrganization = String()
    zoneContactPostalCode = String()
    zoneContactState = String()
    zoneContactStreet = String()
    zoneContactTelephone = String()
    zoneContactTelephoneExt = String()

    def __init__(self, jdata={}):
        GenericDoc.__init__(self)
        for k, v in jdata.items():
            if v is None:
                v = {}
            setattr(self, k, v)
Пример #6
0
class IndexedPublicationLegacy(Document):
    startDate = Date()
    endDate = Date()
    description = Text(analyzer='english')
    facility = Nested(
        properties={
            'country': Text(analyzer='english'),
            'name': Text(analyzer='english'),
            'state': Text(analyzer='english')
        })
    deleted = Boolean()
    path = Text(fields={
        '_exact': Keyword(),
        '_path': Text(analyzer=path_analyzer)
    })
    title = Text(analyzer='english', fields={'_exact': Keyword()})
    name = Text(fields={'_exact': Keyword()})
    equipment = Nested(
        properties={
            'component': Text(analyzer='english'),
            'equipment': Text(analyzer='english'),
            'equipmentClass': Text(analyzer='english'),
            'facility': Text(analyzer='english')
        })
    system = Text(fields={'_exact': Keyword()})
    organization = Nested(
        properties={
            'country': Text(analyzer='english'),
            'name': Text(analyzer='english'),
            'state': Text(analyzer='english')
        })
    pis = Nested(
        properties={
            'lastName': Text(analyzer='english'),
            'firstName': Text(analyzer='english')
        })
    project = Text(fields={'_exact': Keyword()})
    sponsor = Nested(properties={
        'name': Text(analyzer='english'),
        'url': Text()
    })
    fundorg = Text(analyzer='english', fields={'_exact': Keyword()})
    fundorgprojid = Text(fields={'_exact': Keyword()})
    publications = Nested(
        properties={
            'authors': Text(analyzer='english', multi=True),
            'title': Text(analyzer='english')
        })
    experiments = Nested(
        properties={
            'startDate':
            Date(),
            'endDate':
            Date(),
            'doi':
            Keyword(),
            'description':
            Text(analyzer='english'),
            'facility':
            Nested(
                properties={
                    'country': Text(analyzer='english'),
                    'state': Text(analyzer='english'),
                    'name': Text(analyzer='english'),
                }),
            'deleted':
            Boolean(),
            'path':
            Text(fields={
                '_exact': Keyword(),
                '_path': Text(analyzer=path_analyzer)
            }),
            'material':
            Nested(
                properties={
                    'materials': Text(analyzer='english', multi=True),
                    'component': Text(analyzer='english')
                }),
            'equipment':
            Nested(
                properties={
                    'component': Text(analyzer='english'),
                    'equipment': Text(analyzer='english'),
                    'equipmentClass': Text(analyzer='english'),
                    'facility': Text(analyzer='english')
                }),
            'title':
            Text(analyzer='english'),
            'sensors':
            Text(analyzer='english', multi=True),
            'type':
            Text(analyzer='english'),
            'specimenType':
            Nested(
                properties={
                    'name': Text(analyzer='english'),
                    'description': Text(analyzer='english')
                }),
            'name':
            Text(analyzer='english'),
            'creators':
            Nested(
                properties={
                    'lastName': Text(analyzer='english'),
                    'firstName': Text(analyzer='english')
                })
        })

    @classmethod
    def from_id(cls, project_id):
        if project_id is None:
            raise DocumentNotFound()
        id_filter = Q('term', **{'name._exact': project_id})
        search = cls.search().filter(id_filter)
        try:
            res = search.execute()
        except Exception as e:
            raise e
        if res.hits.total.value > 1:
            id_filter = Q('term', **{'_id': res[0].meta.id})
            # Delete all files indexed with the same system/path, except the first result
            delete_query = id_filter & ~id_filter
            cls.search().filter(delete_query).delete()
            return cls.get(res[0].meta.id)
        elif res.hits.total.value == 1:
            return cls.get(res[0].meta.id)
        else:
            raise DocumentNotFound("No document found for "
                                   "{}".format(project_id))

    class Index:
        name = settings.ES_INDICES['publications_legacy']['alias']

    class Meta:
        dynamic = MetaField('strict')
Пример #7
0
class ReuseSearch(ModelSearchAdapter):
    model = Reuse
    fuzzy = True

    class Meta:
        doc_type = 'Reuse'

    title = String(analyzer=i18n_analyzer,
                   fields={'raw': String(index='not_analyzed')})
    description = String(analyzer=i18n_analyzer)
    url = String(index='not_analyzed')
    organization = String(index='not_analyzed')
    owner = String(index='not_analyzed')
    type = String(index='not_analyzed')
    tags = String(index='not_analyzed',
                  fields={'i18n': String(index='not_analyzed')})
    badges = String(index='not_analyzed')
    tag_suggest = Completion(analyzer=simple,
                             search_analyzer=simple,
                             payloads=False)
    datasets = Object(properties={
        'id': String(index='not_analyzed'),
        'title': String(),
    })
    created = Date(format='date_hour_minute_second')
    last_modified = Date(format='date_hour_minute_second')
    metrics = metrics_mapping_for(Reuse)
    featured = Boolean()
    reuse_suggest = Completion(analyzer=simple,
                               search_analyzer=simple,
                               payloads=True)
    extras = Object()

    facets = {
        'tag':
        TermsFacet(field='tags'),
        'organization':
        ModelTermsFacet(field='organization', model=Organization),
        'owner':
        ModelTermsFacet(field='owner', model=User),
        'dataset':
        ModelTermsFacet(field='dataset.id', model=Dataset),
        'type':
        TermsFacet(field='type', labelizer=reuse_type_labelizer),
        'datasets':
        RangeFacet(field='metrics.datasets',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No datasets'),
                       'few': _('Few datasets'),
                       'many': _('Many datasets'),
                   }),
        'followers':
        RangeFacet(field='metrics.followers',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('many', (5, None))],
                   labels={
                       'none': _('No followers'),
                       'few': _('Few followers'),
                       'many': _('Many followers'),
                   }),
        'badge':
        TermsFacet(field='badges', labelizer=reuse_badge_labelizer),
        'featured':
        BoolFacet(field='featured'),
    }
    sorts = {
        'title': 'title.raw',
        'created': 'created',
        'last_modified': 'last_modified',
        'datasets': 'metrics.datasets',
        'followers': 'metrics.followers',
        'views': 'metrics.views',
    }
    boosters = [
        BoolBooster('featured', lazy('featured_boost')),
        GaussDecay('metrics.datasets',
                   max_datasets,
                   decay=lazy('datasets_decay')),
        GaussDecay('metrics.followers',
                   max_followers,
                   decay=lazy('followers_decay')),
    ]

    @classmethod
    def is_indexable(cls, reuse):
        return (reuse.deleted is None and len(reuse.datasets) > 0
                and not reuse.private)

    @classmethod
    def serialize(cls, reuse):
        """By default use the ``to_dict`` method

        and exclude ``_id``, ``_cls`` and ``owner`` fields.
        """
        datasets = Dataset.objects(id__in=[r.id for r in reuse.datasets])
        datasets = list(datasets.only('id', 'title').no_dereference())
        organization = None
        owner = None
        if reuse.organization:
            organization = Organization.objects(
                id=reuse.organization.id).first()
        elif reuse.owner:
            owner = User.objects(id=reuse.owner.id).first()
        return {
            'title': reuse.title,
            'description': reuse.description,
            'url': reuse.url,
            'organization': str(organization.id) if organization else None,
            'owner': str(owner.id) if owner else None,
            'type': reuse.type,
            'tags': reuse.tags,
            'tag_suggest': reuse.tags,
            'badges': [badge.kind for badge in reuse.badges],
            'created': to_iso_datetime(reuse.created_at),
            'last_modified': to_iso_datetime(reuse.last_modified),
            'dataset': [{
                'id': str(d.id),
                'title': d.title
            } for d in datasets],
            'metrics': reuse.metrics,
            'featured': reuse.featured,
            'extras': reuse.extras,
            'reuse_suggest': {
                'input': cls.completer_tokenize(reuse.title) + [reuse.id],
                'output': str(reuse.id),
                'payload': {
                    'title': reuse.title,
                    'slug': reuse.slug,
                    'image_url': reuse.image(40, external=True),
                },
            },
        }
Пример #8
0
class IndexedFile(Document):
    """
    Elasticsearch document representing an indexed file. Thin wrapper around
    `elasticsearch_dsl.Document`.
    """
    name = Text(analyzer=file_analyzer,
                fields={
                    '_exact': Keyword(),
                    '_pattern': Text(analyzer=file_pattern_analyzer),
                    '_reverse': Text(analyzer=reverse_file_analyzer)
                })
    path = Text(fields={
        '_comps': Text(analyzer=path_analyzer),
        '_exact': Keyword(),
        '_reverse': Text(analyzer=reverse_file_analyzer)
    }, )
    lastModified = Date()
    length = Long()
    format = Text()
    mimeType = Keyword()
    type = Text()
    system = Text(fields={'_exact': Keyword()})
    basePath = Text(fields={
        '_comps': Text(analyzer=path_analyzer),
        '_exact': Keyword()
    })
    lastUpdated = Date()
    pems = Object(
        properties={
            'username':
            Keyword(),
            'recursive':
            Boolean(),
            'permission':
            Object(properties={
                'read': Boolean(),
                'write': Boolean(),
                'execute': Boolean()
            })
        })

    def save(self, *args, **kwargs):
        """
        Sets `lastUpdated` attribute on save. Otherwise see elasticsearch_dsl.Document.save()
        """
        self.lastUpdated = datetime.datetime.now()
        return super(IndexedFile, self).save(*args, **kwargs)

    def update(self, *args, **kwargs):
        """
        Sets `lastUpdated` attribute on save. Otherwise see elasticsearch_dsl.Document.update()
        """
        lastUpdated = datetime.datetime.now()
        return super(IndexedFile, self).update(lastUpdated=lastUpdated,
                                               *args,
                                               **kwargs)

    @classmethod
    def from_path(cls, system, path):
        """
        Fetches an IndexedFile with the specified system and path.

        Parameters
        ----------
        system: str
            System attribute of the indexed file.
        path: str
            Path attribute of the indexed file.
        Returns
        -------
        IndexedFile

        Raises
        ------
        elasticsearch.exceptions.NotFoundError
        """
        uuid = file_uuid_sha256(system, path)
        return cls.get(uuid)

    def children(self):
        """
        Yields all children of the indexed file. Non-recursive.

        Yields
        ------
        IndexedFile
        """
        search = self.search()
        search = search.filter('term', **{'basePath._exact': self.path})
        search = search.filter('term', **{'system._exact': self.system})

        for hit in search.scan():
            yield self.get(hit.meta.id)

    def delete_recursive(self):
        """
        Recursively delete an indexed file and all of its children.

        Returns
        -------
        Void
        """
        for child in self.children():
            child.delete_recursive()
        self.delete()

    class Index:
        name = settings.ES_INDEX_PREFIX.format('files')
class Repos(DocType):
    is_public = Boolean()
Пример #10
0
class Order(BaseESModel):
    """Elasticsearch representation of Order model."""

    id = Keyword()
    reference = fields.NormalizedKeyword(fields={
        'trigram': fields.TrigramText(),
    }, )
    status = fields.NormalizedKeyword()
    company = fields.company_field()
    contact = fields.contact_or_adviser_field()
    created_by = fields.contact_or_adviser_field(include_dit_team=True)
    created_on = Date()
    modified_on = Date()
    primary_market = fields.id_name_field()
    sector = fields.sector_field()
    uk_region = fields.id_name_field()
    description = fields.EnglishText()
    contacts_not_to_approach = Text()
    further_info = Text()
    existing_agents = Text(index=False)
    delivery_date = Date()
    service_types = fields.id_name_field()
    contact_email = fields.NormalizedKeyword()
    contact_phone = Keyword()
    subscribers = fields.contact_or_adviser_field(include_dit_team=True)
    assignees = fields.contact_or_adviser_field(include_dit_team=True)
    po_number = Keyword(index=False)
    discount_value = Integer(index=False)
    vat_status = Keyword(index=False)
    vat_number = Keyword(index=False)
    vat_verified = Boolean(index=False)
    net_cost = Integer(index=False)
    subtotal_cost = Integer(fields={
        'keyword': Keyword(),
    }, )
    vat_cost = Integer(index=False)
    total_cost = Integer(fields={
        'keyword': Keyword(),
    }, )
    payment_due_date = Date()
    paid_on = Date()
    completed_by = fields.contact_or_adviser_field()
    completed_on = Date()
    cancelled_by = fields.contact_or_adviser_field()
    cancelled_on = Date()
    cancellation_reason = fields.id_name_field()

    billing_company_name = Text()
    billing_contact_name = Text()
    billing_email = fields.NormalizedKeyword()
    billing_phone = fields.NormalizedKeyword()
    billing_address_1 = Text()
    billing_address_2 = Text()
    billing_address_town = fields.NormalizedKeyword()
    billing_address_county = fields.NormalizedKeyword()
    billing_address_postcode = Text()
    billing_address_country = fields.id_name_field()

    MAPPINGS = {
        'company':
        dict_utils.company_dict,
        'contact':
        dict_utils.contact_or_adviser_dict,
        'created_by':
        dict_utils.adviser_dict_with_team,
        'primary_market':
        dict_utils.id_name_dict,
        'sector':
        dict_utils.sector_dict,
        'uk_region':
        dict_utils.id_name_dict,
        'service_types':
        lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
        'subscribers':
        lambda col: [
            dict_utils.contact_or_adviser_dict(c.adviser,
                                               include_dit_team=True)
            for c in col.all()
        ],
        'assignees':
        lambda col: [
            dict_utils.contact_or_adviser_dict(c.adviser,
                                               include_dit_team=True)
            for c in col.all()
        ],
        'billing_address_country':
        dict_utils.id_name_dict,
        'completed_by':
        dict_utils.contact_or_adviser_dict,
        'cancelled_by':
        dict_utils.contact_or_adviser_dict,
        'cancellation_reason':
        dict_utils.id_name_dict,
    }

    COMPUTED_MAPPINGS = {
        'payment_due_date':
        lambda x: x.invoice.payment_due_date if x.invoice else None,
    }

    SEARCH_FIELDS = (
        'id',
        'reference.trigram',
        'company.name',
        'company.name.trigram',
        'contact.name',
        'contact.name.trigram',
        'total_cost.keyword',
        'subtotal_cost.keyword',
    )

    class Meta:
        """Default document meta data."""

        doc_type = DEFAULT_MAPPING_TYPE

    class Index:
        doc_type = DEFAULT_MAPPING_TYPE
Пример #11
0
class Contact(BaseESModel):
    """Elasticsearch representation of Contact model."""

    id = Keyword()
    address_1 = Text()
    address_2 = Text()
    address_town = fields.NormalizedKeyword()
    address_county = fields.NormalizedKeyword()
    address_postcode = Text()
    address_country = fields.id_name_field()
    address_same_as_company = Boolean()
    adviser = fields.contact_or_adviser_field()
    archived = Boolean()
    archived_by = fields.contact_or_adviser_field()
    archived_on = Date()
    archived_reason = Text()
    company = fields.company_field()
    company_sector = fields.sector_field()
    company_uk_region = fields.id_name_field()
    created_by = fields.contact_or_adviser_field(include_dit_team=True)
    created_on = Date()
    email = fields.NormalizedKeyword()
    email_alternative = Text()
    first_name = Text(fields={
        'keyword': fields.NormalizedKeyword(),
    }, )
    job_title = fields.NormalizedKeyword()
    last_name = Text(fields={
        'keyword': fields.NormalizedKeyword(),
    }, )
    modified_on = Date()
    name = Text(fields={
        'keyword': fields.NormalizedKeyword(),
        'trigram': fields.TrigramText(),
    }, )
    notes = fields.EnglishText()
    primary = Boolean()
    telephone_alternative = Text()
    telephone_countrycode = Keyword()
    telephone_number = Keyword()
    title = fields.id_name_field()

    MAPPINGS = {
        'adviser': dict_utils.contact_or_adviser_dict,
        'archived_by': dict_utils.contact_or_adviser_dict,
        'company': dict_utils.company_dict,
        'created_by': dict_utils.adviser_dict_with_team,
        'title': dict_utils.id_name_dict,
    }

    COMPUTED_MAPPINGS = {
        'address_1':
        contact_dict_utils.computed_address_field('address_1'),
        'address_2':
        contact_dict_utils.computed_address_field('address_2'),
        'address_town':
        contact_dict_utils.computed_address_field('address_town'),
        'address_county':
        contact_dict_utils.computed_address_field('address_county'),
        'address_postcode':
        contact_dict_utils.computed_address_field('address_postcode'),
        'address_country':
        contact_dict_utils.computed_address_field('address_country'),
        'company_sector':
        dict_utils.computed_nested_sector_dict('company.sector'),
        'company_uk_region':
        dict_utils.computed_nested_id_name_dict('company.uk_region'),
    }

    SEARCH_FIELDS = (
        'id',
        'name',
        'name.trigram',
        'email',
        'email_alternative',
        'company.name',
        'company.name.trigram',
    )

    class Meta:
        """Default document meta data."""

        doc_type = DEFAULT_MAPPING_TYPE

    class Index:
        doc_type = DEFAULT_MAPPING_TYPE
Пример #12
0
class BlogPostIndex(DocType):
    id = Integer()
    title = Text(analyzer="ik_max_word", search_analyzer="ik_max_word")
    content = Text(analyzer="ik_max_word", search_analyzer="ik_max_word")
    char_num = Integer()
    allow_comments = Boolean()
    vote_num = Integer()
    category = Text(analyzer="ik_max_word", search_analyzer="ik_max_word")
    tags = Text(analyzer="ik_max_word", search_analyzer="ik_max_word")
    publish_date = Date()
    suggestions = Completion()

    class Meta:
        index = "blogpost-index"

    @classmethod
    def add(cls, **kwargs):
        id = kwargs.pop("id", None)
        if id is None:
            return False
        blog = cls(meta={"id": id}, **kwargs)
        blog.save()
        return blog

    @staticmethod
    def search_posts(words, delim="......<br>......<br>......"):
        """
        用原生写法拼装结果
        :param words:
        :return:
        """
        q = {
            "_source": ["title", "category", "tags", "publish_date"],
            "query": {
                "bool": {
                    "must": [],
                    "must_not": [],
                    "should": [
                        {
                            "term": {
                                "tags": "{}".format(words)
                            }
                        },
                        {
                            "term": {
                                "title": "{}".format(words)
                            }
                        },
                        {
                            "term": {
                                "content": "{}".format(words)
                            }
                        },
                    ],
                }
            },
            "highlight": {
                "number_of_fragments": 3,
                "fragment_size": 150,
                "fields": {
                    "title": {
                        "pre_tags": ["<em>"],
                        "post_tags": ["</em>"]
                    },
                    "content": {
                        "pre_tags": ["<em>"],
                        "post_tags": ["</em>"]
                    },
                },
            },
            "from": 0,
            "size": 50,
            "sort": [],
            "aggs": {},
        }
        response = es_client.search(index="blogpost-index", body=q)
        r = []
        for item in response["hits"]["hits"]:
            if item.get("highlight", None):
                if item["highlight"].get("title", None):
                    title = "".join(item["highlight"]["title"])
                else:
                    title = item["_source"]["title"]
                if item["highlight"].get("content", None):
                    content = delim.join(
                        item["highlight"]["content"]) + "......<br>"
                else:
                    content = ""
                r.append({
                    "origin_title": item["_source"]["title"],
                    "title": title,
                    "content": content,
                })
        return r

    @staticmethod
    def suggest_word(words):
        q = {
            "_source": False,
            "suggest": {
                "search-as-you-type-suggestion": {
                    "prefix": "{}".format(words),
                    "completion": {
                        "field": "suggestions",
                        "size": 10,
                        "fuzzy": {
                            "fuzziness": 2
                        },
                        "skip_duplicates": True,
                    },
                }
            },
        }
        response = es_client.search(index="blogpost-index", body=q)
        tmp = response["suggest"]["search-as-you-type-suggestion"]
        options = []
        if len(tmp) >= 1:
            options = [item["text"] for item in tmp[0]["options"]]
        return options

    @staticmethod
    def similar_recommends_post(words):
        pass
Пример #13
0
class DocTestSSLResult(DocType):
    class Meta:
        doc_type = "TestSSLResult"

    source = String(fields={'raw': String(index='not_analyzed')})
    result = Boolean()
    timestamp = Date()
    ip = String(index='not_analyzed')
    hostname = String(index='not_analyzed')
    port = Integer()
    svcid = String(index='not_analyzed')
    protocols = String(index='not_analyzed', multi=True)
    ciphers = String(multi=True, fields={'raw': String(index='not_analyzed')})
    ciphertests = String(index='not_analyzed', multi=True)
    serverpref = Object(
            properties = {
                "cipher_order": Boolean(),
                "protocol": String(index='not_analyzed'),
                "cipher": String(fields={'raw': String(index='not_analyzed')})
                })
    cert = Object(
            properties = {
                "keysize": Short(),
                "signalgo": String(fields={'raw': String(index='not_analyzed')}),
                "md5_fingerprint": String(index='not_analyzed'),
                "sha1_fingerprint": String(index='not_analyzed'),
                "sha256_fingerprint": String(index='not_analyzed'),
                "cn": String(fields={'raw': String(index='not_analyzed')}),
                "san": String(multi=True, fields={'raw': String(index='not_analyzed')}),
                "issuer": String(fields={'raw': String(index='not_analyzed')}),
                "ev": Boolean(),
                "expiration": Date(),
                "ocsp_uri": String(fields={'raw': String(index='not_analyzed')}),
                "ocsp_stapling": Boolean(),
                })
    vulnerabilities = String(index='not_analyzed', multi=True)

    def parseCSVLine(self, line):
        if line['id'] == "id":
            return
        if not self.ip or not self.hostname or not self.port:   # host, ip and port
            m = reIpHostColumn.search(line['host'])
            if m:
                self.hostname, self.ip = m.groups()
            self.port = int(line['port'])

        if reProtocol.search(line['id']) and reOffers.search(line['finding']):     # protocols
            self.result = True
            m = reProtocol.search(line['id'])
            if m:
                self.protocols.append(line['id'].upper())
        elif reCipherColumnName.search(line['id']):                  # ciphers
            m = reCipherDetails.search(line['finding'])
            if m:
                self.ciphers.append(m.group(1))
        elif reCipherTests.search(line['id']) and reVulnerable.search(line['finding']):                       # cipher tests
            m = reCipherTests.search(line['id'])
            if m:
                self.ciphertests.append(m.group(1))
        elif line['id'] == "order":                                 # server prefers cipher
            self.serverpref.cipher_order = bool(reOk.search(line['finding']))
        elif line['id'] == "order_proto":                           # preferred protocol
            m = reDefaultProtocol.search(line['finding'])
            if m:
                self.serverpref.protocol = m.group(1)
        elif line['id'] == "order_cipher":                          # preferred cipher
            m = reDefaultCipher.search(line['finding'])
            if m:
                self.serverpref.cipher = m.group(1)
        elif line['id'] == "key_size":                              # certificate key size
            m = reKeySize.search(line['finding'])
            if m:
                self.cert.keysize = int(m.group(1))
        elif line['id'] == "algorithm":                             # certificate sign algorithm
            m = reSignAlgorithm.search(line['finding'])
            if m:
                self.cert.signalgo = m.group(1)
        elif line['id'] == "fingerprint":                           # certificate fingerprints
            m = reFPMD5.search(line['finding'])
            if m:
                self.cert.md5_fingerprint = m.group(1)
            m = reFPSHA1.search(line['finding'])
            if m:
                self.cert.sha1_fingerprint = m.group(1)
            m = reFPSHA256.search(line['finding'])
            if m:
                self.cert.sha256_fingerprint = m.group(1)
        elif line['id'] == "cn":                                    # certificate CN
            m = reCN.search(line['finding'])
            if m:
                self.cert.cn = m.group(1)
        elif line['id'] == "san":                                   # certificate SAN
            m = reSAN.search(line['finding'])
            if m:
                sans = m.group(1)
                for san in sans.split(" "):
                    if san != "--":
                        self.cert.san.append(san)
        elif line['id'] == "issuer":                                # certificate issuer
            m = reIssuer.search(line['finding'])
            if m:
                self.cert.issuer = m.group(1)
        elif line['id'] == "ev":                                    # certificate extended validation
            self.cert.ev = bool(reYes.search(line['finding']))
        elif line['id'] == "expiration":                            # certificate expiration
            m = reExpiration.search(line['finding'])
            if m:
                unparsedDate = m.group(1)
                self.cert.expiration = datetime.strptime(unparsedDate, "%Y-%m-%d %H:%M %z") 
        elif line['id'] == "ocsp_uri":                              # certificate OCSP URI
            m = reOCSPURI.search(line['finding'])
            if m:
                self.cert.ocsp_uri = m.group(1)
            else:
                self.cert.ocsp_uri = "-"
        elif line['id'] == "ocsp_stapling":                         # certificate OCSP stapling
            self.cert.ocsp_stapling = not bool(reNotOffered.search(line['finding']))
        elif line['id'] in ("heartbleed", "ccs", "secure_renego", "sec_client_renego", "crime", "breach", "poodle_ssl", "fallback_scsv", "freak", "DROWN", "logjam", "beast", "rc4") and reVulnerable.search(line['finding']):
            self.vulnerabilities.append(line['id'].upper())

    def parseCSV(self, csvfile):
        if self.source:
            m = reDefaultFilename.search(self.source)
            if m:
                self.ip = m.group('ip')
                self.port = int(m.group('port') or 0)
                self.timestamp = datetime.strptime(m.group('datetime'), "%Y%m%d-%H%M")
        csvReader = csv.DictReader(csvfile, fieldnames=("id", "host", "port", "severity", "finding"), delimiter=',', quotechar='"')
        for line in csvReader:
            self.parseCSVLine(line)

    def save(self, **kwargs):
        if not self.timestamp:
            self.timestamp = datetime.now(tz)
        if not self.port:
            raise ValueError("Empty scan result")

        self.svcid = "%s:%d" % (self.ip, int(self.port) or 0)
        if not self.result:
            self.result = False

        if 'debug' in kwargs and kwargs['debug']:
            pp.pprint(self.to_dict())
        return super().save()
Пример #14
0
class PoeItem(InnerDoc):
    """
    items
    """

    abyssJewel = Boolean()
    additionalProperties = Boolean(multi=True)
    artFilename = Text()
    category = Nested(PoeCategory)
    corrupted = Boolean()
    cosmeticMods = Text(multi=True)
    craftedMods = Text(multi=True)
    descrText = Text()
    duplicated = Boolean()
    elder = Boolean()
    enchantMods = Text(multi=True)
    explicitMods = Text(multi=True)
    flavourText = Text(multi=True)
    frameType = Integer()
    h = Integer()
    icon = Keyword()
    id = Keyword()
    identified = Boolean()
    ilvl = Integer()
    implicitMods = Text(multi=True)
    inventoryId = Text()
    isRelic = Boolean()
    league = Keyword()
    lockedToCharacter = Boolean()
    maxStackSize = Integer()
    name = Text()
    nextLevelRequirements = Nested(PoePropsReqs, multi=True)
    note = Keyword()
    properties = Nested(PoePropsReqs, multi=True)
    prophecyDiffText = Text()
    prophecyText = Text()
    requirements = Nested(PoePropsReqs, multi=True)
    secDescrText = Text()
    shaper = Boolean()
    socketedItems = Nested()
    sockets = Nested(PoeSockets)
    stackSize = Integer()
    support = Boolean()
    talismanTier = Integer()
    typeLine = Text()
    utilityMods = Text(multi=True)
    verified = Boolean()
    w = Integer()
    x = Integer()
    y = Integer()
Пример #15
0
class Group(Document):
    """
    Meetup.com Group Model with elasticsearch persistence

    Meetup Group doc:
    https://meetup.com/de-DE/meetup_api/docs/:urlname/?uri=%2Fmeetup_api%2Fdocs%2F%3Aurlname%2F#get

    Elasticsearch persistence doc ->
    https://elasticsearch-dsl.readthedocs.io/en/latest/persistence.html#persistence

    Raises:
        GroupDoesNotExists: Raise when request a group wich does not exists on elasticsearch or on meetup
    """
    class Index:
        """
        Elasticsearch index of the model

        for override the default index ->
        https://elasticsearch-dsl.readthedocs.io/en/latest/persistence.html#document-life-cycle
        """

        name = "meetup_group"

    # required fields
    meetup_id = Long(required=True)
    urlname = Text(required=True)
    created = Date(default_timezone="UTC", required=True)
    description = Text(analyzer="snowball", required=True)
    name = Text(required=True)
    link = Text(required=True)
    location = GeoPoint(required=True)
    members = Integer(required=True)
    status = Text(required=True)
    timezone = Text(required=True)
    visibility = Text(required=True)

    # optional fields
    nomination_acceptable = Boolean()
    city = Text()
    city_link = Text()
    country = Text()
    fee_options_currencies_code = Text()
    fee_options_currencies_default = Boolean()
    fee_options_type = Text()
    join_mode = Text()
    localized_country_name = Text()
    localized_location = Text()
    member_limit = Integer()
    short_link = Text()
    state = Text()
    untranslated_city = Text()
    welcome_message = Text()
    who = Text()

    # category
    category_id = Long()
    category_name = Text()
    category_shortname = Text()
    category_sort_name = Text()

    # meta_category
    meta_category_id = Long()
    meta_category_shortname = Text()
    meta_category_name = Text()
    meta_category_sort_name = Text()

    # topics
    topics = Nested(Topic)

    # organizer
    organizer_id = Integer()
    organizer_name = Text()
    organizer_bio = Text()

    # events
    events = Nested(Event)

    # suggest fields (auto fill on save)
    name_suggest = Completion()

    def add_event(self, event: Event):
        """
        Add a single event object to the group.

        Arguments:
            event {Event} -- Event wich should be added
        """
        self.events.append(event)

    def add_topic(self, topic: Topic):
        """
        Add a single topic object to the group.

        Arguments:
            topic {Topic} -- Topic wich should be added
        """
        self.topics.append(topic)

    def add_events(self, events: List[Event]):
        """
        Add a mutiple event objects to the group.

        Arguments:
            events {List[Event]} -- Event list wich should be added
        """
        self.events.extend(events)

    def event_exists(self, event_meetup_id: str) -> bool:
        """
        Check if a event with the meetup_id exists in this group on elasticsearch

        Arguments:
            event_meetup_id {str} -- meetup_id of the requested event

        Returns:
            bool -- True -> Event exists; False -> Event does not exists
        """
        for event in self.events:
            if event.meetup_id == event_meetup_id:
                return True
        return False

    def save(self, **kwargs):
        """
        Overwrite save method to set suggest fields
        """
        self.name_suggest = self.name

        return super().save(**kwargs)

    @property
    def last_event_time(self) -> Optional[datetime]:
        """
        Get from the last event the event time, if any event exists

        Usage:
            group: Group = Group(...)

            group.last_event_time

        Returns:
            Optional[datetime] -- Last event time, when any event exists in this group else return
                                  None
        """
        last_event_time: Optional[datetime] = None
        for event in self.events:
            if last_event_time:
                if event.time > last_event_time:
                    last_event_time = event.time
            else:
                last_event_time = event.time
        return last_event_time

    @staticmethod
    def delete_if_exists(urlname: str) -> bool:
        """
        Delete a group based on the urlname if exists.

        Usage:
            Group.delete_if_exists(urlname="MyGroupToDelete)

        Arguments:
            urlname {str} -- The Group URL name

        Returns:
            bool -- True -> Group was deletet; False -> Group doesn't exists on elasticsearch
        """
        try:
            group: Group = Group.get_group(urlname)
            group.delete()
            return True
        except GroupDoesNotExists:
            return False

    @staticmethod
    def get_group(urlname: str) -> Group:
        """
        Get Group from elasticseach based on urlname

        Arguments:
            urlname {str} -- Group urlname

        Raises:
            GroupDoesNotExists: When a Group does not exists on elasticsearch

        Returns:
            Group -- the request Group object from elasticsearch
        """
        s: Search = Group.search()
        s = s.query("match", urlname=urlname)
        results: Response = s.execute()
        for group in results:
            return group
        raise GroupDoesNotExists(
            "{} does not exists in elasticsearch!".format(urlname))

    @staticmethod
    def get_or_create_by_urlname(
        urlname: str,
        meetup_id: int,
        created: datetime,
        description: str,
        name: str,
        link: str,
        lat: float,
        lon: float,
        members: int,
        status: str,
        timezone: str,
        visibility: str,
    ) -> Group:
        """
        Get a Group Object from elasticsearch based on the urlname and update the Group Object
        with all arguments.

        When the Group does not exists on elasticsearch, create a new Group Object with all
        arguments.

        Arguments:
            urlname {str} -- Meetup Group urlname
            meetup_id {int} -- Meetup Group id 
            created {datetime} -- create time of the Meetup Group
            description {str} -- Meetup Group description
            name {str} -- Meetup Group name
            link {str} -- link to the Group Meetup URL
            lat {float} -- Meetup Group location lat
            lon {float} -- Meetup Group location lon
            members {int} -- Meetup Group members amount
            status {str} -- Meetup Group status
            timezone {str} -- Meetup Group timezone
            visibility {str} -- Meetup Group visibility

        Returns:
            Group -- Updated or new Group Object from elasticsearch
        """

        s: Search = Group.search()
        s = s.query("match", urlname=urlname)
        results: Response = s.execute()

        for group in results:
            group.description = description
            group.name = name
            group.location = {"lat": lat, "lon": lon}
            group.members = members
            group.status = status
            group.timezone = timezone
            group.visibility = visibility
            group.save()
            return group

        return Group(
            urlname=urlname,
            meetup_id=meetup_id,
            created=created,
            description=description,
            name=name,
            link=link,
            location={
                "lat": lat,
                "lon": lon
            },
            members=members,
            status=status,
            timezone=timezone,
            visibility=visibility,
        )

    @staticmethod
    def get_all_groups() -> List[Group]:
        """
        Get all groups from Elasticsearch

        Raises:
            GroupDoesNotExists: When a Group does not exists on elasticsearch

        Returns:
            List[Group] -- all groups from elasticsearch
        """
        s: Search = Group.search()
        s = s.query("match_all")
        results: Response = s.execute()

        groups: List[Group] = []
        for group in results:
            groups.append(group)

        return groups

    @staticmethod
    def add_event_venue_to_list(venue_list: List[dict],
                                event: Event) -> List[dict]:
        """
        add venue to dict, if it wasn't already included

        Arguments:
            venue_list: List[dict] -- list of venue dicts
            event {Event} -- event to add

        Returns:
            List[dict] -- input venue_list with added event venue
        """

        # check if there is no venue information in event
        if not event.venue_location or not event.venue_name:
            return venue_list

        # exit method if venue already exists
        for venue in venue_list:
            if venue["location"] == event.venue_location:
                return venue_list

        event_dict: dict = event.to_dict()

        # append venue if it does not exists
        venue_list.append({
            "name": event_dict["venue_name"],
            "location": event_dict["venue_location"],
        })

        return venue_list

    @staticmethod
    def get_venue_location_average(venue_list: List[dict]) -> dict:
        """
        Calc the average location of all venues

        Arguments:
            venue_list {List[dict]} -- venue list for calc the average

        Returns:
            dict -- {'lat': float, 'lon': float}
        """

        if len(venue_list) == 0:
            raise ValueError(
                "The size of venue_list need to be larger than 0!")

        lat_average: float = 0
        lon_average: float = 0

        for venue in venue_list:
            lat_average = lat_average + venue["location"]["lat"]
            lon_average = lon_average + venue["location"]["lon"]

        lat_average = lat_average / len(venue_list)
        lon_average = lon_average / len(venue_list)

        return {"lat": lat_average, "lon": lon_average}

    def to_json_dict(self, load_events: bool) -> dict:
        """
        Convert to_dict into a JSON serializable dict object.
        Also add a venue dict to the group, for each venue that was used by that group in any event.

        Arguments:
            load_events {bool} -- load events into dict

        Returns:
            dict -- JSON serializable dict object
        """

        group_dict: dict = self.to_dict()

        # set group venue
        group_dict["venues"] = []
        for event in self.events:
            group_dict["venues"] = self.add_event_venue_to_list(
                group_dict["venues"], event)

        if len(group_dict["venues"]) > 0:
            group_dict[
                "venue_location_average"] = self.get_venue_location_average(
                    group_dict["venues"])

        for field in group_dict:

            if "events" in group_dict:
                for event_dict in group_dict["events"]:

                    # load events into dict
                    if load_events:
                        for event_field in event_dict:
                            # todo remove double events to reduce bandwith
                            if isinstance(event_dict[event_field], datetime):
                                event_dict[event_field] = event_dict[
                                    event_field].strftime(
                                        "%Y-%m-%dT%H:%M:%S%z")
                    else:
                        group_dict["events"] = []

            if isinstance(group_dict[field], datetime):
                group_dict[field] = group_dict[field].strftime(
                    "%Y-%m-%dT%H:%M:%S%z")

        return group_dict
Пример #16
0
class Company(BaseESModel):
    """Elasticsearch representation of Company model."""

    id = Keyword()
    archived = Boolean()
    archived_by = fields.nested_contact_or_adviser_field('archived_by')
    archived_on = Date()
    archived_reason = Text()
    business_type = fields.nested_id_name_field()
    classification = fields.nested_id_name_field()
    companies_house_data = fields.nested_ch_company_field()
    company_number = fields.SortableCaseInsensitiveKeywordText()
    contacts = fields.nested_contact_or_adviser_field('contacts')
    created_on = Date()
    description = fields.EnglishText()
    employee_range = fields.nested_id_name_field()
    export_experience_category = fields.nested_id_name_field()
    export_to_countries = fields.nested_id_name_field()
    future_interest_countries = fields.nested_id_name_field()
    global_headquarters = fields.nested_id_name_field()
    headquarter_type = fields.nested_id_name_field()
    modified_on = Date()
    name = fields.SortableText(copy_to=['name_keyword', 'name_trigram'])
    name_keyword = fields.SortableCaseInsensitiveKeywordText()
    name_trigram = fields.TrigramText()
    one_list_account_owner = fields.nested_contact_or_adviser_field('one_list_account_owner')
    reference_code = fields.SortableCaseInsensitiveKeywordText()
    registered_address_1 = Text()
    registered_address_2 = Text()
    registered_address_town = fields.SortableCaseInsensitiveKeywordText()
    registered_address_county = Text()
    registered_address_country = fields.nested_id_name_partial_field(
        'registered_address_country',
    )
    registered_address_postcode = Text(
        copy_to=[
            'registered_address_postcode_trigram',
        ],
    )
    registered_address_postcode_trigram = fields.TrigramText()
    sector = fields.nested_sector_field()
    trading_address_1 = Text()
    trading_address_2 = Text()
    trading_address_town = fields.SortableCaseInsensitiveKeywordText()
    trading_address_county = Text()
    trading_address_postcode = Text(
        copy_to=['trading_address_postcode_trigram'],
    )
    trading_address_postcode_trigram = fields.TrigramText()
    trading_address_country = fields.nested_id_name_partial_field(
        'trading_address_country',
    )
    trading_name = fields.SortableText(
        copy_to=[
            'trading_name_keyword',
            'trading_name_trigram',
        ],
    )
    trading_name_keyword = fields.SortableCaseInsensitiveKeywordText()
    trading_name_trigram = fields.TrigramText()
    turnover_range = fields.nested_id_name_field()
    uk_region = fields.nested_id_name_field()
    uk_based = Boolean()
    vat_number = Keyword(index=False)
    website = Text()

    COMPUTED_MAPPINGS = {
        'trading_name': attrgetter('alias'),
    }

    MAPPINGS = {
        'id': str,
        'archived_by': dict_utils.contact_or_adviser_dict,
        'business_type': dict_utils.id_name_dict,
        'classification': dict_utils.id_name_dict,
        'companies_house_data': dict_utils.ch_company_dict,
        'contacts': lambda col: [dict_utils.contact_or_adviser_dict(c) for c in col.all()],
        'employee_range': dict_utils.id_name_dict,
        'export_experience_category': dict_utils.id_name_dict,
        'export_to_countries': lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
        'future_interest_countries': lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
        'global_headquarters': dict_utils.id_name_dict,
        'headquarter_type': dict_utils.id_name_dict,
        'one_list_account_owner': dict_utils.contact_or_adviser_dict,
        'registered_address_country': dict_utils.id_name_dict,
        'sector': dict_utils.sector_dict,
        'trading_address_country': dict_utils.id_name_dict,
        'turnover_range': dict_utils.id_name_dict,
        'uk_based': bool,
        'uk_region': dict_utils.id_name_dict,
    }

    SEARCH_FIELDS = (
        'name',
        'name_trigram',
        'company_number',
        'trading_name',
        'trading_name_trigram',
        'reference_code',
        'registered_address_country.name_trigram',
        'registered_address_postcode_trigram',
        'trading_address_country.name_trigram',
        'trading_address_postcode_trigram',
        'uk_region.name_trigram',
    )

    class Meta:
        """Default document meta data."""

        doc_type = 'company'
Пример #17
0
class IndexedFile(Document):
    name = Text(analyzer=file_analyzer,
                fields={
                    '_exact': Keyword(),
                    '_pattern': Text(analyzer=file_pattern_analyzer),
                    '_reverse': Text(analyzer=reverse_file_analyzer)
                })
    path = Text(fields={
        '_exact': Keyword(),
        '_path': Text(analyzer=path_analyzer)
    })
    lastModified = Date()
    length = Long()
    format = Text()
    mimeType = Keyword()
    type = Text()
    system = Text(fields={'_exact': Keyword()})
    systemId = Text()
    basePath = Text(fields={'_exact': Keyword()})
    dsMeta = Nested()
    permissions = Nested(
        properties={
            'username':
            Keyword(),
            'recursive':
            Boolean(),
            'permission':
            Nested(properties={
                'read': Boolean(),
                'write': Boolean(),
                'execute': Boolean()
            })
        })

    @classmethod
    def _pems_filter(self):
        term_username_query = Q('term',
                                **{'permissions.username': self.username})
        term_world_query = Q('term', **{'permissions.username': '******'})
        bool_query = Q('bool')
        bool_query.should = [term_username_query, term_world_query]
        nested_query = Q('nested')
        nested_query.path = 'permissions'
        nested_query.query = bool_query
        return nested_query

    @classmethod
    def from_path(cls, system, path):
        Index(settings.ES_INDICES['files']['alias']).refresh()
        search = cls.search()
        sys_filter = Q('term', **{'system._exact': system})
        path_filter = Q('term', **{'path._exact': path})
        search = search.filter(sys_filter & path_filter)
        try:
            res = search.execute()
        except Exception as exc:
            raise exc
        if res.hits.total.value > 1:
            id_filter = Q('term', **{'_id': res[0].meta.id})
            # Delete all files indexed with the same system/path, except the first result
            delete_query = sys_filter & path_filter & ~id_filter
            cls.search().filter(delete_query).delete()
            return cls.get(res[0].meta.id)
        elif res.hits.total.value == 1:
            return cls.get(res[0].meta.id)
        else:
            raise DocumentNotFound("No document found for "
                                   "{}/{}".format(system, path))

    @classmethod
    def children(cls, username, system, path, limit=100, search_after=None):
        search = cls.search()
        # search = search.filter(cls._pems_filter(username))
        search = search.filter('term', **{'basePath._exact': path})
        search = search.filter('term', **{'system._exact': system})
        search = search.sort('_id')
        search = search.extra(size=limit)
        if search_after:
            search = search.extra(search_after=search_after)
        try:
            res = search.execute()
        except TransportError:
            raise TransportError
        if len(res.hits) > 0:
            wrapped_children = [cls.get(doc.meta.id) for doc in res]
            sort_key = res.hits.hits[-1]['sort']
            return wrapped_children, sort_key
        else:
            return [], None

    class Index:
        name = settings.ES_INDICES['files']['alias']

    class Meta:
        dynamic = MetaField('strict')
Пример #18
0
class Repos(Document):
    is_public = Boolean()
    created_at = Date()
class Repos(Document):
    is_public = Boolean()
    created_at = Date()

    class Index:
        name = "git"
Пример #20
0
class SneakerItem(Document):
    price = Integer(required=True)
    sizes = Keyword(multi=True)
    name = Text()
    brand = Text()
    model = Text()
    colorway = Text()
    item_id = Text(
        fields={'keyword': Keyword()}
    )
    url = Text(
        analyzer='simple',
        fields={'keyword': Keyword()},
        required=True,
    )
    img_url = Text(
        analyzer='simple',
        fields={'keyword': Keyword()}
    )
    telegram_img_id = Text(
        analyzer='simple',
        fields={'keyword': Keyword()}
    )
    last_update = Double()

    new = Boolean()
    new_sizes = Keyword(multi=True)
    price_change = Integer()

    class Index:
        name = index_name
        using = client

    @property
    def descr(self):
        return (self.name or
                (self.brand + self.model + self.colorway)).upper()

    def get_bulk_update_dict(self):
        d = self.to_dict(include_meta=True)
        del d['_source']
        d['_op_type'] = 'update'
        d['script'] = self.get_update_script()
        d['upsert'] = self.get_upsert_dict()
        return d

    def get_update_script(self):
        return {
            'lang': 'painless',
            'source': update_script,
            'params': {
                'sizes': list(self.sizes),
                'price': self.price,
                'new_update_time': get_time(),
                'img_url': self.img_url,
            }
        }

    def get_upd_dict(self):
        pass

    def get_upsert_dict(self):
        d = self.to_dict()
        d['new'] = True
        return d
Пример #21
0
    def build_mapping(cls):
        """Generate the mapping definition for indexed messages"""
        m = Mapping(cls.doc_type)
        m.meta('_all', enabled=True)
        # attachments
        m.field(
            'attachments',
            Nested(doc_class=IndexedMessageAttachment,
                   include_in_all=True,
                   properties={
                       "content_type": Keyword(),
                       "file_name": Keyword(),
                       "is_inline": Boolean(),
                       "size": Integer(),
                       "temp_id": Keyword(),
                       "url": Keyword(),
                       "mime_boundary": Keyword()
                   }))
        m.field('body_html',
                'text',
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        m.field('body_plain',
                'text',
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        m.field('date', 'date')
        m.field('date_delete', 'date')
        m.field('date_insert', 'date')
        m.field('date_sort', 'date')
        m.field('discussion_id', 'keyword')
        # external references
        m.field(
            'external_references',
            Nested(doc_class=IndexedExternalReferences,
                   include_in_all=True,
                   properties={
                       "ancestors_ids": Keyword(),
                       "message_id": Keyword(),
                       "parent_id": Keyword()
                   }))
        # identities
        identities = Nested(doc_class=IndexedExternalReferences,
                            include_in_all=True)
        identities.field("identifier",
                         "text",
                         fields={
                             "raw": Keyword(),
                             "parts": {
                                 "type": "text",
                                 "analyzer": "email_analyzer"
                             }
                         })
        identities.field("type", "keyword")
        m.field('identities', identities)

        m.field('importance_level', 'short')
        m.field('is_answered', 'boolean')
        m.field('is_draft', 'boolean')
        m.field('is_unread', 'boolean')
        m.field('is_received', 'boolean')
        m.field('message_id', 'keyword')
        m.field('parent_id', 'keyword')
        # participants
        participants = Nested(doc_class=IndexedParticipant,
                              include_in_all=True)
        participants.field("address",
                           "text",
                           analyzer="text_analyzer",
                           fields={
                               "raw": {
                                   "type": "keyword"
                               },
                               "parts": {
                                   "type": "text",
                                   "analyzer": "email_analyzer"
                               }
                           })
        participants.field("contact_ids", Keyword(multi=True))
        participants.field("label", "text", analyzer="text_analyzer")
        participants.field("protocol", Keyword())
        participants.field("type", Keyword())
        m.field('participants', participants)
        # PI
        pi = Nested(doc_class=PIIndexModel,
                    include_in_all=True,
                    properties={
                        "technic": "integer",
                        "comportment": "integer",
                        "context": "integer",
                        "version": "integer",
                        "date_update": "date"
                    })
        m.field("pi", pi)
        m.field('privacy_features', Nested(include_in_all=True))
        m.field('raw_msg_id', "keyword")
        m.field('subject',
                'text',
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        m.field('tags', Keyword(multi=True))

        m.field('subject', 'text')
        m.field('tags', Keyword(multi=True))
        m.field('type', 'keyword')

        return m
Пример #22
0
class DatasetSearch(ModelSearchAdapter):
    model = Dataset
    fuzzy = True
    exclude_fields = ['spatial.geom', 'spatial.zones.geom']

    class Meta:
        doc_type = 'Dataset'

    title = String(analyzer=i18n_analyzer,
                   fields={'raw': String(index='not_analyzed')})
    description = String(analyzer=i18n_analyzer)
    license = String(index='not_analyzed')
    frequency = String(index='not_analyzed')
    organization = String(index='not_analyzed')
    owner = String(index='not_analyzed')
    tags = String(index='not_analyzed',
                  fields={'i18n': String(index='not_analyzed')})
    badges = String(index='not_analyzed')
    tag_suggest = Completion(analyzer=simple,
                             search_analyzer=simple,
                             payloads=False)
    resources = Object(
        properties={
            'title': String(),
            'description': String(),
            'format': String(index='not_analyzed')
        })
    format_suggest = Completion(analyzer=simple,
                                search_analyzer=simple,
                                payloads=False)
    dataset_suggest = Completion(analyzer=simple,
                                 search_analyzer=simple,
                                 payloads=True)
    created = Date(format='date_hour_minute_second')
    last_modified = Date(format='date_hour_minute_second')
    metrics = metrics_mapping_for(Dataset)
    featured = Boolean()
    temporal_coverage = Nested(multi=False,
                               properties={
                                   'start': Long(),
                                   'end': Long()
                               })
    temporal_weight = Long(),
    geozones = Object(
        properties={
            'id': String(index='not_analyzed'),
            'name': String(index='not_analyzed'),
            'keys': String(index='not_analyzed')
        })
    granularity = String(index='not_analyzed')
    spatial_weight = Long()
    from_certified = Boolean()

    fields = (
        'geozones.keys^9',
        'geozones.name^9',
        'acronym^7',
        'title^6',
        'tags.i18n^3',
        'description',
    )
    sorts = {
        'title': 'title.raw',
        'created': 'created',
        'last_modified': 'last_modified',
        'reuses': 'metrics.reuses',
        'followers': 'metrics.followers',
        'views': 'metrics.views',
    }

    facets = {
        'tag':
        TermsFacet(field='tags'),
        'badge':
        TermsFacet(field='badges', labelizer=dataset_badge_labelizer),
        'organization':
        ModelTermsFacet(field='organization', model=Organization),
        'owner':
        ModelTermsFacet(field='owner', model=User),
        'license':
        ModelTermsFacet(field='license', model=License),
        'geozone':
        ModelTermsFacet(field='geozones.id',
                        model=GeoZone,
                        labelizer=zone_labelizer),
        'granularity':
        TermsFacet(field='granularity', labelizer=granularity_labelizer),
        'format':
        TermsFacet(field='resources.format'),
        'resource_type':
        TermsFacet(field='resources.type', labelizer=resource_type_labelizer),
        'reuses':
        RangeFacet(field='metrics.reuses',
                   ranges=[('none', (None, 1)), ('few', (1, 5)),
                           ('quite', (5, 10)), ('many', (10, None))],
                   labels={
                       'none': _('Never reused'),
                       'few': _('Little reused'),
                       'quite': _('Quite reused'),
                       'many': _('Heavily reused'),
                   }),
        'temporal_coverage':
        TemporalCoverageFacet(field='temporal_coverage'),
        'featured':
        BoolFacet(field='featured'),
    }
    boosters = [
        BoolBooster('featured', 1.5),
        BoolBooster('from_certified', 1.2),
        ValueFactor('spatial_weight', missing=1),
        ValueFactor('temporal_weight', missing=1),
        GaussDecay('metrics.reuses', max_reuses, decay=0.1),
        GaussDecay('metrics.followers',
                   max_followers,
                   max_followers,
                   decay=0.1),
    ]

    @classmethod
    def is_indexable(cls, dataset):
        return (dataset.deleted is None and len(dataset.resources) > 0
                and not dataset.private)

    @classmethod
    def get_suggest_weight(cls, temporal_weight, spatial_weight, featured):
        '''Compute the suggest part of the indexation payload'''
        featured_weight = 1 if not featured else FEATURED_WEIGHT
        return int(temporal_weight * spatial_weight * featured_weight * 10)

    @classmethod
    def serialize(cls, dataset):
        organization = None
        owner = None
        image_url = None
        spatial_weight = DEFAULT_SPATIAL_WEIGHT
        temporal_weight = DEFAULT_TEMPORAL_WEIGHT

        if dataset.organization:
            organization = Organization.objects(
                id=dataset.organization.id).first()
            image_url = organization.logo(40, external=True)
        elif dataset.owner:
            owner = User.objects(id=dataset.owner.id).first()
            image_url = owner.avatar(40, external=True)

        certified = organization and organization.certified

        document = {
            'title':
            dataset.title,
            'description':
            dataset.description,
            'license':
            getattr(dataset.license, 'id', None),
            'tags':
            dataset.tags,
            'badges': [badge.kind for badge in dataset.badges],
            'tag_suggest':
            dataset.tags,
            'resources': [{
                'title': r.title,
                'description': r.description,
                'format': r.format,
                'type': r.type,
            } for r in dataset.resources],
            'format_suggest':
            [r.format.lower() for r in dataset.resources if r.format],
            'frequency':
            dataset.frequency,
            'organization':
            str(organization.id) if organization else None,
            'owner':
            str(owner.id) if owner else None,
            'dataset_suggest': {
                'input':
                cls.completer_tokenize(dataset.title) + [str(dataset.id)],
                'output': dataset.title,
                'payload': {
                    'id': str(dataset.id),
                    'slug': dataset.slug,
                    'acronym': dataset.acronym,
                    'image_url': image_url,
                },
            },
            'created':
            dataset.created_at.strftime('%Y-%m-%dT%H:%M:%S'),
            'last_modified':
            dataset.last_modified.strftime('%Y-%m-%dT%H:%M:%S'),
            'metrics':
            dataset.metrics,
            'featured':
            dataset.featured,
            'from_certified':
            certified,
        }
        if (dataset.temporal_coverage is not None
                and dataset.temporal_coverage.start
                and dataset.temporal_coverage.end):
            start = dataset.temporal_coverage.start.toordinal()
            end = dataset.temporal_coverage.end.toordinal()
            temporal_weight = min((end - start) / 365, MAX_TEMPORAL_WEIGHT)
            document.update({
                'temporal_coverage': {
                    'start': start,
                    'end': end
                },
                'temporal_weight': temporal_weight,
            })

        if dataset.spatial is not None:
            # Index precise zone labels and parents zone identifiers
            # to allow fast filtering.
            zone_ids = [z.id for z in dataset.spatial.zones]
            zones = GeoZone.objects(id__in=zone_ids).exclude('geom')
            parents = set()
            geozones = []
            coverage_level = ADMIN_LEVEL_MAX
            for zone in zones:
                geozones.append({
                    'id': zone.id,
                    'name': zone.name,
                    'keys': zone.keys_values
                })
                parents |= set(zone.parents)
                coverage_level = min(coverage_level, admin_levels[zone.level])

            geozones.extend([{'id': p} for p in parents])

            spatial_weight = ADMIN_LEVEL_MAX / coverage_level
            document.update({
                'geozones': geozones,
                'granularity': dataset.spatial.granularity,
                'spatial_weight': spatial_weight,
            })

        document['dataset_suggest']['weight'] = cls.get_suggest_weight(
            temporal_weight, spatial_weight, dataset.featured)

        if dataset.acronym:
            document['dataset_suggest']['input'].append(dataset.acronym)

        return document
Пример #23
0
class InvestmentProject(BaseESModel):
    """Elasticsearch representation of InvestmentProject."""

    id = Keyword()
    actual_land_date = Date()
    actual_uk_regions = fields.nested_id_name_field()
    address_1 = Text()
    address_2 = Text()
    address_town = fields.SortableCaseInsensitiveKeywordText()
    address_postcode = Text()
    approved_commitment_to_invest = Boolean()
    approved_fdi = Boolean()
    approved_good_value = Boolean()
    approved_high_value = Boolean()
    approved_landed = Boolean()
    approved_non_fdi = Boolean()
    allow_blank_estimated_land_date = Boolean(index=False)
    allow_blank_possible_uk_regions = Boolean(index=False)
    anonymous_description = fields.EnglishText()
    archived = Boolean()
    archived_by = fields.nested_contact_or_adviser_field('archived_by')
    archived_on = Date()
    archived_reason = Text()
    associated_non_fdi_r_and_d_project = _nested_investment_project_field()
    average_salary = fields.nested_id_name_field()
    business_activities = fields.nested_id_name_field()
    client_cannot_provide_foreign_investment = Boolean()
    client_cannot_provide_total_investment = Boolean()
    client_contacts = fields.nested_contact_or_adviser_field('client_contacts')
    client_relationship_manager = fields.nested_contact_or_adviser_field(
        'client_relationship_manager',
        include_dit_team=True,
    )
    client_requirements = fields.TextWithKeyword()
    comments = fields.EnglishText()
    country_lost_to = _country_lost_to_mapping()
    created_on = Date()
    created_by = fields.nested_contact_or_adviser_field(
        'created_by',
        include_dit_team=True,
    )
    date_abandoned = Date()
    date_lost = Date()
    delivery_partners = fields.nested_id_name_field()
    description = fields.EnglishText()
    estimated_land_date = Date()
    export_revenue = Boolean()
    fdi_type = fields.nested_id_name_field()
    fdi_value = fields.nested_id_name_field()
    foreign_equity_investment = Double()
    government_assistance = Boolean()
    intermediate_company = fields.nested_id_name_field()
    investor_company = fields.nested_id_name_partial_field('investor_company')
    investor_company_country = fields.nested_id_name_field()
    investment_type = fields.nested_id_name_field()
    investor_type = fields.nested_id_name_field()
    level_of_involvement = fields.nested_id_name_field()
    likelihood_of_landing = Long()
    project_assurance_adviser = fields.nested_contact_or_adviser_field(
        'project_assurance_adviser',
        include_dit_team=True,
    )
    project_manager = fields.nested_contact_or_adviser_field(
        'project_manager',
        include_dit_team=True,
    )
    name = fields.SortableText(copy_to=['name_keyword', 'name_trigram'])
    name_keyword = fields.SortableCaseInsensitiveKeywordText()
    name_trigram = fields.TrigramText()
    new_tech_to_uk = Boolean()
    non_fdi_r_and_d_budget = Boolean()
    number_new_jobs = Integer()
    number_safeguarded_jobs = Long()
    modified_on = Date()
    project_arrived_in_triage_on = Date()
    project_code = fields.SortableCaseInsensitiveKeywordText(
        copy_to='project_code_trigram')
    project_code_trigram = fields.TrigramText()
    proposal_deadline = Date()
    other_business_activity = fields.TextWithKeyword()
    quotable_as_public_case_study = Boolean()
    r_and_d_budget = Boolean()
    reason_abandoned = fields.TextWithKeyword()
    reason_delayed = fields.TextWithKeyword()
    reason_lost = fields.TextWithKeyword()
    referral_source_activity = fields.nested_id_name_field()
    referral_source_activity_event = fields.SortableCaseInsensitiveKeywordText(
    )
    referral_source_activity_marketing = fields.nested_id_name_field()
    referral_source_activity_website = fields.nested_id_name_field()
    referral_source_adviser = _referral_source_adviser_mapping()
    sector = fields.nested_sector_field()
    site_decided = Boolean()
    some_new_jobs = Boolean()
    specific_programme = fields.nested_id_name_field()
    stage = fields.nested_id_name_field()
    status = fields.SortableCaseInsensitiveKeywordText()
    team_members = fields.nested_contact_or_adviser_field(
        'team_members', include_dit_team=True)
    total_investment = Double()
    uk_company = fields.nested_id_name_partial_field('uk_company')
    uk_company_decided = Boolean()
    uk_region_locations = fields.nested_id_name_field()
    will_new_jobs_last_two_years = Boolean()

    MAPPINGS = {
        'id':
        str,
        'actual_uk_regions':
        lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
        'archived_by':
        dict_utils.contact_or_adviser_dict,
        'associated_non_fdi_r_and_d_project':
        dict_utils.investment_project_dict,
        'average_salary':
        dict_utils.id_name_dict,
        'business_activities':
        lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
        'client_contacts':
        lambda col: [dict_utils.contact_or_adviser_dict(c) for c in col.all()],
        'client_relationship_manager':
        dict_utils.adviser_dict_with_team,
        'country_lost_to':
        dict_utils.id_name_dict,
        'created_by':
        dict_utils.adviser_dict_with_team,
        'delivery_partners':
        lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
        'fdi_type':
        dict_utils.id_name_dict,
        'fdi_value':
        dict_utils.id_name_dict,
        'intermediate_company':
        dict_utils.id_name_dict,
        'investment_type':
        dict_utils.id_name_dict,
        'investor_company':
        dict_utils.id_name_dict,
        'investor_type':
        dict_utils.id_name_dict,
        'level_of_involvement':
        dict_utils.id_name_dict,
        'project_assurance_adviser':
        dict_utils.adviser_dict_with_team,
        'project_code':
        str,
        'project_manager':
        dict_utils.adviser_dict_with_team,
        'referral_source_activity':
        dict_utils.id_name_dict,
        'referral_source_activity_marketing':
        dict_utils.id_name_dict,
        'referral_source_activity_website':
        dict_utils.id_name_dict,
        'referral_source_adviser':
        dict_utils.contact_or_adviser_dict,
        'sector':
        dict_utils.sector_dict,
        'specific_programme':
        dict_utils.id_name_dict,
        'stage':
        dict_utils.id_name_dict,
        'team_members':
        lambda col: [
            dict_utils.contact_or_adviser_dict(c.adviser,
                                               include_dit_team=True)
            for c in col.all()
        ],
        'uk_company':
        dict_utils.id_name_dict,
        'uk_region_locations':
        lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
    }

    COMPUTED_MAPPINGS = {
        'investor_company_country':
        dict_utils.computed_nested_id_name_dict(
            'investor_company.registered_address_country', ),
    }

    SEARCH_FIELDS = (
        'name',
        'name_trigram',
        'uk_company.name',
        'uk_company.name_trigram',
        'investor_company.name',
        'investor_company.name_trigram',
        'project_code_trigram',
    )

    class Meta:
        """Default document meta data."""

        doc_type = 'investment_project'
Пример #24
0
    def build_mapping(cls):
        """Create elasticsearch indexed_contacts mapping object for an user."""
        m = Mapping(cls.doc_type)
        m.meta('_all', enabled=True)
        m.field('additional_name',
                'text',
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        # addresses
        addresses = Nested(doc_class=IndexedPostalAddress,
                           include_in_all=True,
                           properties={
                               "address_id": "keyword",
                               "label": "text",
                               "type": "keyword",
                               "is_primary": "boolean",
                               "street": "text",
                               "city": "text",
                               "postal_code": "keyword",
                               "country": "text",
                               "region": "text"
                           })
        m.field("addresses", addresses)
        m.field("avatar", "keyword")
        m.field('date_insert', 'date')
        m.field('date_update', 'date')
        m.field('deleted', 'date')
        # emails
        internet_addr = Nested(
            doc_class=IndexedInternetAddress,
            include_in_all=True,
        )
        internet_addr.field("address",
                            "text",
                            analyzer="text_analyzer",
                            fields={
                                "raw": {
                                    "type": "keyword"
                                },
                                "parts": {
                                    "type": "text",
                                    "analyzer": "email_analyzer"
                                }
                            })
        internet_addr.field("email_id", Keyword())
        internet_addr.field("is_primary", Boolean())
        internet_addr.field("label", "text", analyzer="text_analyzer")
        internet_addr.field("type", Keyword())
        m.field("emails", internet_addr)

        m.field('family_name',
                "text",
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        m.field('given_name',
                'text',
                fields={
                    "normalized": {
                        "type": "text",
                        "analyzer": "text_analyzer"
                    }
                })
        m.field("groups", Keyword(multi=True))
        # social ids
        social_ids = Nested(doc_class=IndexedSocialIdentity,
                            include_in_all=True,
                            properties={
                                "name": "text",
                                "type": "keyword",
                                "infos": Nested()
                            })
        m.field("identities", social_ids)
        m.field("ims", internet_addr)
        m.field("infos", Nested())
        m.field('name_prefix', 'keyword')
        m.field('name_suffix', 'keyword')
        # organizations
        organizations = Nested(doc_class=IndexedOrganization,
                               include_in_all=True)
        organizations.field("deleted", Boolean())
        organizations.field("department", "text", analyzer="text_analyzer")
        organizations.field("is_primary", Boolean())
        organizations.field("job_description", "text")
        organizations.field("label", "text", analyzer="text_analyzer")
        organizations.field("name",
                            'text',
                            fields={
                                "normalized": {
                                    "type": "text",
                                    "analyzer": "text_analyzer"
                                }
                            })
        organizations.field("organization_id", Keyword())
        organizations.field("title", Keyword())
        organizations.field("type", Keyword())
        m.field("organizations", organizations)
        # phones
        phones = Nested(doc_class=IndexedPhone,
                        include_in_all=True,
                        properties={
                            "is_primary": "boolean",
                            "number": "text",
                            "normalized_number": "text",
                            "phone_id": "keyword",
                            "type": "keyword",
                            "uri": "keyword"
                        })

        m.field("phones", phones)
        # pi
        pi = Nested(doc_class=PIIndexModel,
                    include_in_all=True,
                    properties={
                        "comportment": "integer",
                        "context": "integer",
                        "date_update": "date",
                        "technic": "integer",
                        "version": "integer"
                    })
        m.field("pi", pi)
        m.field("privacy_features", Nested(include_in_all=True))
        m.field("public_key", Nested())
        m.field("social_identities", social_ids)
        m.field("tags", Keyword(multi=True))
        m.field('title', 'text')

        return m
Пример #25
0
class Interaction(BaseESModel):
    """Elasticsearch representation of Interaction model."""

    id = Keyword()
    company = fields.company_field()
    company_sector = fields.sector_field()
    company_one_list_group_tier = fields.id_unindexed_name_field()
    communication_channel = fields.id_unindexed_name_field()
    contacts = _contact_field()
    created_on = Date()
    date = Date()
    dit_participants = Object(_DITParticipant)
    event = fields.id_name_partial_field()
    investment_project = fields.id_unindexed_name_field()
    investment_project_sector = fields.sector_field()
    is_event = Boolean(index=False)
    grant_amount_offered = Double(index=False)
    kind = Keyword()
    modified_on = Date()
    net_company_receipt = Double(index=False)
    notes = fields.Text(index=False)
    policy_areas = fields.id_unindexed_name_field()
    policy_issue_types = fields.id_unindexed_name_field()
    service = fields.id_unindexed_name_field()
    service_delivery_status = fields.id_unindexed_name_field()
    subject = fields.NormalizedKeyword(fields={
        'english': fields.EnglishText(),
    }, )
    was_policy_feedback_provided = Boolean()

    MAPPINGS = {
        'company': dict_utils.company_dict,
        'communication_channel': dict_utils.id_name_dict,
        'contacts': dict_utils.contact_or_adviser_list_of_dicts,
        'dit_participants': _dit_participant_list,
        'event': dict_utils.id_name_dict,
        'investment_project': dict_utils.id_name_dict,
        'policy_areas': dict_utils.id_name_list_of_dicts,
        'policy_issue_types': dict_utils.id_name_list_of_dicts,
        'service': dict_utils.id_name_dict,
        'service_delivery_status': dict_utils.id_name_dict,
    }

    COMPUTED_MAPPINGS = {
        'company_sector':
        dict_utils.computed_nested_sector_dict('company.sector'),
        'company_one_list_group_tier':
        lambda obj: dict_utils.id_name_dict(
            obj.company.get_one_list_group_tier() if obj.company else None, ),
        'investment_project_sector':
        dict_utils.computed_nested_sector_dict('investment_project.sector', ),
        'is_event':
        attrgetter('is_event'),
    }

    SEARCH_FIELDS = (
        'id',
        'company.name',
        'company.name.trigram',
        'contacts.name',  # to find 2-letter words
        'contacts.name.trigram',
        'event.name',
        'event.name.trigram',
        'subject.english',
        'dit_participants.adviser.name',
        'dit_participants.adviser.name.trigram',
        'dit_participants.team.name',
        'dit_participants.team.name.trigram',
    )

    class Meta:
        """Default document meta data."""

        doc_type = DOC_TYPE

    class Index:
        doc_type = DOC_TYPE
Пример #26
0
class LayerIndex(DocType):
    id = Integer()
    abstract = Text(
        fields={
            'pattern': field.Text(analyzer=pattern_analyzer),
            'english': field.Text(analyzer='english')
        })
    category__gn_description = Text()
    csw_type = Keyword()
    csw_wkt_geometry = Keyword()
    detail_url = Keyword()
    owner__username = Keyword(fields={'text': field.Text()})
    owner__first_name = Text()
    owner__last_name = Text()
    is_published = Boolean()
    featured = Boolean()
    popular_count = Integer()
    share_count = Integer()
    rating = Integer()
    srid = Keyword()
    supplemental_information = Text()
    source_host = Keyword(fields={'text': field.Text()})
    thumbnail_url = Keyword()
    uuid = Keyword()
    title = Text(
        fields={
            'pattern': field.Text(analyzer=pattern_analyzer),
            'english': field.Text(analyzer='english')
        })
    date = Date()
    type = Keyword(fields={
        'text': field.Text(),
        'english': field.Text(analyzer='english')
    })
    subtype = Keyword(fields={'text': field.Text()})
    typename = Keyword()
    title_sortable = Keyword()
    category = Keyword(fields={
        'text': field.Text(),
        'english': field.Text(analyzer='english')
    })
    bbox_left = Float()
    bbox_right = Float()
    bbox_bottom = Float()
    bbox_top = Float()
    temporal_extent_start = Date()
    temporal_extent_end = Date()
    keywords = Keyword(fields={
        'text': field.Text(),
        'english': field.Text(analyzer='english')
    })
    regions = Keyword(fields={
        'text': field.Text(),
        'english': field.Text(analyzer='english')
    })
    references = Field(
        properties={
            'url':
            Text(),
            'name':
            Keyword(fields={'text': field.Text()}),
            'scheme':
            Keyword(
                fields={
                    'text': field.Text(),
                    'pattern': field.Text(analyzer=pattern_analyzer)
                })
        })
    num_ratings = Integer()
    num_comments = Integer()
    geogig_link = Keyword()
    has_time = Boolean()

    class Meta:
        index = 'layer-index'
Пример #27
0
class Declaration(DocType, AbstractDeclaration):
    """Declaration document.
    Assumes there's a dynamic mapping with all fields not indexed by default."""
    general = Object(
        properties={
            'full_name_suggest':
            Completion(preserve_separators=False),
            'full_name':
            Text(index=True, analyzer='ukrainian'),
            'full_name_for_sorting':
            Keyword(index=True, ignore_above=100),  # only for sorting purposes
            'name':
            Text(index=True, analyzer='ukrainian'),
            'patronymic':
            Text(index=True, analyzer='ukrainian'),
            'last_name':
            Text(index=True, analyzer='ukrainian'),
            'family_raw':
            Text(index=True, analyzer='ukrainian'),
            'family':
            Nested(
                properties={
                    'name': Text(index=True, analyzer='ukrainian'),
                    'relations': Keyword(index=False),
                    'inn': Keyword(index=False)
                }),
            'post_raw':
            Text(index=True, analyzer='ukrainian'),
            'post':
            Object(
                properties={
                    'region':
                    Text(index=True,
                         analyzer='ukrainian',
                         fields={'raw': Keyword(index=True)}),
                    'office':
                    Text(index=True,
                         analyzer='ukrainian',
                         fields={'raw': Keyword(index=True)}),
                    'post':
                    Text(index=True,
                         analyzer='ukrainian',
                         fields={'raw': Keyword(index=True)})
                }),
            'addresses':
            Nested(
                properties={
                    'place': Text(index=False),
                    'place_hidden': Boolean(index=False),
                    'place_district': Text(index=False),
                    'place_district_hidden': Boolean(index=False),
                    'place_city': Text(index=False),
                    'place_city_hidden': Boolean(index=False),
                    'place_city_type': Keyword(index=False),
                    'place_city_type_hidden': Boolean(index=False),
                    'place_address': Text(index=False),
                    'place_address_hidden': Boolean(index=False),
                    'place_address_type': Keyword(index=False)
                })
        })
    declaration = Object(
        properties={
            'date': NoneAwareDate(),
            'notfull': Boolean(index=False),
            'notfull_lostpages': Keyword(index=False),
            'additional_info': Boolean(index=False),
            'additional_info_text': Text(index=False),
            'needs_scancopy_check': Boolean(index=False)
        })
    intro = Object(properties={'declaration_year': Keyword(index=True)})
    ft_src = Text(index=True, analyzer='ukrainian')

    # concatinated from set of fields for regular search (not deepsearch mode)
    index_card = Text(index=True, analyzer='ukrainian')

    INDEX_CARD_FIELDS = [
        "general.last_name", "general.name", "general.patronymic",
        "general.full_name", "general.post.post", "general.post.office",
        "general.post.region", "general.post.actual_region",
        "intro.declaration_year", "intro.doc_type", "declaration.source",
        "declaration.url"
    ]

    INCOME_SINGLE_PROPERTIES = {
        'value': Keyword(index=False),
        'value_unclear': Boolean(index=False),
        'comment': Text(index=False),
        'family': Keyword(index=False),
        'family_unclear': Boolean(index=False),
        'family_comment': Text(index=False)
    }
    INCOME_LIST_PROPERTIES = {
        'country': Keyword(index=False),
        'country_comment': Text(index=False),
        'cur': Keyword(index=False),
        'cur_units': Keyword(index=False),
        'uah_equal': Keyword(index=False)
    }
    income = Object(
        properties={
            '5': Object(properties=INCOME_SINGLE_PROPERTIES),
            '6': Object(properties=INCOME_SINGLE_PROPERTIES),
            '7': Object(properties=INCOME_SINGLE_PROPERTIES),
            '8': Object(properties=INCOME_SINGLE_PROPERTIES),
            '9': Object(properties=INCOME_SINGLE_PROPERTIES),
            '10': Object(properties=INCOME_SINGLE_PROPERTIES),
            '11': Object(properties=INCOME_SINGLE_PROPERTIES),
            '12': Object(properties=INCOME_SINGLE_PROPERTIES),
            '13': Object(properties=INCOME_SINGLE_PROPERTIES),
            '14': Object(properties=INCOME_SINGLE_PROPERTIES),
            '15': Object(properties=INCOME_SINGLE_PROPERTIES),
            '16': Object(properties=INCOME_SINGLE_PROPERTIES),
            '17': Object(properties=INCOME_SINGLE_PROPERTIES),
            '18': Object(properties=INCOME_SINGLE_PROPERTIES),
            '19': Object(properties=INCOME_SINGLE_PROPERTIES),
            '20': Object(properties=INCOME_SINGLE_PROPERTIES),
            '21': Nested(properties=INCOME_LIST_PROPERTIES),
            '22': Nested(properties=INCOME_LIST_PROPERTIES)
        })

    ESTATE_PROPERTIES = {
        'region': Text(index=False),
        'address': Text(index=False),
        'space': Keyword(index=False),
        'space_units': Keyword(index=False),
        'space_comment': Text(index=False),
        'costs': Keyword(index=False),
        'costs_comment': Text(index=False),
        'costs_rent': Keyword(index=False),
        'costs_rent_comment': Text(index=False),
        'costs_property': Keyword(index=False),
        'costs_property_comment': Text(index=False)
    }
    estate = Object(
        properties={
            '23': Nested(properties=ESTATE_PROPERTIES),
            '24': Nested(properties=ESTATE_PROPERTIES),
            '25': Nested(properties=ESTATE_PROPERTIES),
            '26': Nested(properties=ESTATE_PROPERTIES),
            '27': Nested(properties=ESTATE_PROPERTIES),
            '28': Nested(properties=ESTATE_PROPERTIES),
            '29': Nested(properties=ESTATE_PROPERTIES),
            '30': Nested(properties=ESTATE_PROPERTIES),
            '31': Nested(properties=ESTATE_PROPERTIES),
            '32': Nested(properties=ESTATE_PROPERTIES),
            '33': Nested(properties=ESTATE_PROPERTIES),
            '34': Nested(properties=ESTATE_PROPERTIES)
        })

    VEHICLE_PROPERTIES = {
        "brand": Text(index=False),
        "brand_info": Text(index=False),
        "year": Keyword(index=False),
        "sum": Keyword(index=False),
        "sum_comment": Text(index=False),
        "sum_rent": Keyword(index=False),
        "sum_rent_comment": Text(index=False),
        "brand_hidden": Boolean(index=False),
        "brand_info_hidden": Boolean(index=False),
        "brand_info_unclear": Boolean(index=False)
    }
    vehicle = Object(
        properties={
            '35': Nested(properties=VEHICLE_PROPERTIES),
            '36': Nested(properties=VEHICLE_PROPERTIES),
            '37': Nested(properties=VEHICLE_PROPERTIES),
            '38': Nested(properties=VEHICLE_PROPERTIES),
            '39': Nested(properties=VEHICLE_PROPERTIES),
            '40': Nested(properties=VEHICLE_PROPERTIES),
            '41': Nested(properties=VEHICLE_PROPERTIES),
            '42': Nested(properties=VEHICLE_PROPERTIES),
            '43': Nested(properties=VEHICLE_PROPERTIES),
            '44': Nested(properties=VEHICLE_PROPERTIES)
        })

    BANKS_PROPERTIES = {
        'sum': Keyword(index=False),
        'sum_hidden': Boolean(index=False),
        'sum_units': Keyword(index=False),
        'sum_comment': Text(index=False),
        'sum_foreign': Keyword(index=False),
        'sum_foreign_units': Keyword(index=False),
        'sum_foreign_comment': Text(index=False)
    }
    banks = Object(
        properties={
            '45': Nested(properties=BANKS_PROPERTIES),
            '46': Nested(properties=BANKS_PROPERTIES),
            '47': Nested(properties=BANKS_PROPERTIES),
            '48': Nested(properties=BANKS_PROPERTIES),
            '49': Nested(properties=BANKS_PROPERTIES),
            '50': Nested(properties=BANKS_PROPERTIES),
            '51': Nested(properties=BANKS_PROPERTIES),
            '52': Nested(properties=BANKS_PROPERTIES),
            '53': Nested(properties=BANKS_PROPERTIES),
        })

    LIABILITIES_PROPERTIES = {
        'sum': Keyword(index=False),
        'sum_comment': Text(index=False),
        'sum_units': Keyword(index=False),
        'sum_foreign': Keyword(index=False),
        'sum_foreign_comment': Text(index=False)
    }
    liabilities = Object(
        properties={
            '54': Nested(properties=LIABILITIES_PROPERTIES),
            '55': Nested(properties=LIABILITIES_PROPERTIES),
            '56': Nested(properties=LIABILITIES_PROPERTIES),
            '57': Nested(properties=LIABILITIES_PROPERTIES),
            '58': Nested(properties=LIABILITIES_PROPERTIES),
            '59': Nested(properties=LIABILITIES_PROPERTIES),
            '60': Nested(properties=LIABILITIES_PROPERTIES),
            '61': Nested(properties=LIABILITIES_PROPERTIES),
            '62': Nested(properties=LIABILITIES_PROPERTIES),
            '63': Nested(properties=LIABILITIES_PROPERTIES),
            '64': Nested(properties=LIABILITIES_PROPERTIES),
        })

    def raw_source(self):
        src = self.to_dict()
        return blacklist(src, ["ft_src", "index_card"])

    def infocard(self):
        return {
            "first_name":
            self.general.name,
            "patronymic":
            self.general.patronymic,
            "last_name":
            self.general.last_name,
            "office":
            self.general.post.office,
            "position":
            self.general.post.post,
            "source":
            getattr(self.declaration, "source", getattr(self, "source", "")),
            "id":
            self.meta.id,
            "url":
            settings.SITE_URL +
            reverse("details", kwargs={"declaration_id": self.meta.id}),
            "document_type":
            "Щорічна",
            "is_corrected":
            False,
            "created_date":
            getattr(self.intro, "date", getattr(self.declaration, "date", ""))
        }

    def related_entities(self):
        return {
            "people": {
                "family": list(self.get_family_members())
            },
            "documents": {
                "corrected": [],
                "originals": [],
            },
            "companies": {
                "owned": [],
                "related": [],
                "all": [],
            }
        }

    def unified_source(self):
        try:
            doc = self.to_dict()
            doc["id"] = self.meta.id
            converter = PaperToNACPConverter(doc)
            return converter.convert()
        except ConverterError:
            return None

    class Meta:
        index = 'declarations_v2'
Пример #28
0
class Item(Document):
    id = Integer()
    title = Text()
    kind = Integer()
    content = Text()
    n_likes = Integer()
    n_collects = Integer()
    n_comments = Integer()
    can_show = Boolean()
    tags = Text(fields={"row": Keyword()})
    created_at = Date()

    class Index:
        name = "test"

    @classmethod
    @cache(MC_KEY_ITEM.format("{id}", "{kind}"))
    def get(cls, id, kind):
        return super().get(f"{id}_{kind}")

    @classmethod
    def add(cls, item):
        obj = cls(**get_item_data(item))
        obj.save()
        obj.clear_mc(item.id, item.kind)
        return obj

    @classmethod
    def update_item(cls, item):
        obj = cls.get(item.id, item.kind)
        if obj is None:
            return cls.add(obj)
        if not obj:
            return

        kw = get_item_data(item)
        try:
            obj.update(**kw)
        except ConflictError:
            obj.clear_mc(item.id, item.kind)
            obj = cls.get(item.id, item.kind)
            obj.update(**kw)
        obj.clear_mc(item.id, item.kind)
        return True

    @classmethod
    def clear_mc(cls, id, kind):
        rdb.delete(MC_KEY_ITEM.format(id, kind))

    @classmethod
    def delete(cls, item):
        rs = cls.get(item.id, item.kind)
        if rs:
            super(cls, rs).delete()
            cls.clear_mc(item.id, item.kind)
            return True
        return False

    @classmethod
    def get_es(cls):
        search = cls.search()
        return connections.get_connection(search._using)

    @classmethod
    def bulk_update(cls, items, chunk_size=5000, op_type="update", **kwargs):
        index = cls._index._name
        _type = cls._doc_type.name
        obj = [{
            "_op_type": op_type,
            "_id": f"{doc.id}_{doc.kind}",
            "_index": index,
            "_type": _type,
            "_source": doc.to_dict(),
        } for doc in items]
        client = cls.get_es()
        rs = list(parallel_bulk(client, obj, chunk_size=chunk_size, **kwargs))
        for item in items:
            cls.clear_mc(item.id, item.kind)
        return rs

    @classmethod
    def new_search(cls, query, page, order_by=None, per_page=PER_PAGE):
        s = cls.search()
        s = s.query("multi_match", query=query, fields=SERACH_FIELDS)
        start = (page - 1) * PER_PAGE
        s = s.extra(**{"from": start, "size": per_page})
        s = s if order_by is None else s.sort(order_by)
        rs = s.execute()
        dct = defaultdict(list)
        for i in rs:
            dct[i.kind].append(i.id)

        items = []
        for kind, ids in dct.items():
            target_cls = TARGET_MAPPER.get(kind)
            if target_cls:
                items_ = target_cls.get_multi(ids)
                items.extend(items_)

        return Pagination(query, page, per_page, rs.hits.total, items)

    @classmethod
    @cache(
        MC_KEY_POST_IDS_BY_TAG.format("{tag}", "{page}", "{order_by}",
                                      "{per_page}"),
        ONE_HOUR,
    )
    def get_post_ids_by_tag(cls, tag, page, order_by=None, per_page=PER_PAGE):
        s = cls.search()
        # s = s.query(Q("bool", must=Q("term", tags=tag)))
        s = s.query(Q("bool", must=Q("term", kind=K_POST)))
        start = (page - 1) * PER_PAGE
        s = s.extra(**{"from": start, "size": per_page})
        if order_by == "hot":
            s = s.query(Q("function_score", functions=[]))
        else:
            s = s.sort(order_by)
        rs = s.execute()
        ids = [obj.id for obj in rs]
        return Pagination(tag, page, per_page, rs.hits.total, ids)
Пример #29
0
class Company(BaseESModel):
    """Elasticsearch representation of Company model."""

    id = Keyword()
    archived = Boolean()
    archived_by = fields.contact_or_adviser_field()
    archived_on = Date()
    archived_reason = Text()
    business_type = fields.id_name_field()
    company_number = fields.NormalizedKeyword()
    created_on = Date()
    description = fields.EnglishText()
    employee_range = fields.id_name_field()
    export_experience_category = fields.id_name_field()
    export_to_countries = fields.id_name_field()
    future_interest_countries = fields.id_name_field()
    global_headquarters = fields.id_name_field()
    headquarter_type = fields.id_name_field()
    modified_on = Date()
    name = Text(fields={
        'keyword': fields.NormalizedKeyword(),
        'trigram': fields.TrigramText(),
    }, )
    reference_code = fields.NormalizedKeyword()
    sector = fields.sector_field()
    address = fields.address_field()
    registered_address = fields.address_field()
    one_list_group_global_account_manager = _adviser_field_with_indexed_id()
    trading_names = fields.TextWithTrigram()
    turnover_range = fields.id_name_field()
    uk_region = fields.id_name_field()
    uk_based = Boolean()
    uk_address_postcode = fields.PostcodeKeyword()
    uk_registered_address_postcode = fields.PostcodeKeyword()
    vat_number = Keyword(index=False)
    duns_number = Keyword()
    website = Text()
    suggest = Completion(contexts=[
        {
            'name': 'country',
            'type': 'category',
        },
    ], )
    latest_interaction_date = Date()

    COMPUTED_MAPPINGS = {
        'suggest':
        get_suggestions,
        'address':
        partial(dict_utils.address_dict, prefix='address'),
        'registered_address':
        partial(dict_utils.address_dict, prefix='registered_address'),
        'one_list_group_global_account_manager':
        dict_utils.computed_field_function(
            'get_one_list_group_global_account_manager',
            dict_utils.contact_or_adviser_dict,
        ),
        'latest_interaction_date':
        lambda obj: obj.latest_interaction_date,
        'uk_address_postcode':
        lambda obj: obj.address_postcode if obj.uk_based else '',
        'uk_registered_address_postcode':
        lambda obj: obj.registered_address_postcode if obj.uk_based else '',
    }

    MAPPINGS = {
        'archived_by':
        dict_utils.contact_or_adviser_dict,
        'business_type':
        dict_utils.id_name_dict,
        'employee_range':
        dict_utils.id_name_dict,
        'export_experience_category':
        dict_utils.id_name_dict,
        'export_to_countries':
        lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
        'future_interest_countries':
        lambda col: [dict_utils.id_name_dict(c) for c in col.all()],
        'global_headquarters':
        dict_utils.id_name_dict,
        'headquarter_type':
        dict_utils.id_name_dict,
        'sector':
        dict_utils.sector_dict,
        'turnover_range':
        dict_utils.id_name_dict,
        'uk_based':
        bool,
        'uk_region':
        dict_utils.id_name_dict,
    }

    SEARCH_FIELDS = (
        'id',
        'name',  # to find 2-letter words
        'name.trigram',
        'company_number',
        'trading_names',  # to find 2-letter words
        'trading_names.trigram',
        'reference_code',
        'address.country.name.trigram',
        'address.postcode.trigram',
        'registered_address.country.name.trigram',
        'registered_address.postcode.trigram',
    )

    class Meta:
        """Default document meta data."""

        doc_type = DOC_TYPE

    class Index:
        doc_type = DOC_TYPE
Пример #30
0
class Declaration(DocType, RelatedDeclarationsMixin):
    """Declaration document.
    Assumes there's a dynamic mapping with all fields not indexed by default."""
    general = Object(
        properties={
            'full_name_suggest': Completion(preserve_separators=False),
            'full_name': Text(index=True, analyzer='ukrainian'),
            'name': Text(index=True, analyzer='ukrainian'),
            'patronymic': Text(index=True, analyzer='ukrainian'),
            'last_name': Text(index=True, analyzer='ukrainian'),
            'family_raw': Text(index=True, analyzer='ukrainian'),
            'family': Nested(
                properties={
                    'name': Text(index=True, analyzer='ukrainian'),
                    'relations': Keyword(index=False),
                    'inn': Keyword(index=False)
                }
            ),
            'post_raw': Text(index=True, analyzer='ukrainian'),
            'post': Object(
                properties={
                    'region': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)}),
                    'office': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)}),
                    'post': Text(index=True, analyzer='ukrainian', fields={'raw': Keyword(index=True)})
                }
            ),
            'addresses': Nested(
                properties={
                    'place': Text(index=False),
                    'place_hidden': Boolean(index=False),
                    'place_district': Text(index=False),
                    'place_district_hidden': Boolean(index=False),
                    'place_city': Text(index=False),
                    'place_city_hidden': Boolean(index=False),
                    'place_city_type': Keyword(index=False),
                    'place_city_type_hidden': Boolean(index=False),
                    'place_address': Text(index=False),
                    'place_address_hidden': Boolean(index=False),
                    'place_address_type': Keyword(index=False)
                }
            )
        }
    )
    declaration = Object(
        properties={
            'date': NoneAwareDate(),
            'notfull': Boolean(index=False),
            'notfull_lostpages': Keyword(index=False),
            'additional_info': Boolean(index=False),
            'additional_info_text': Text(index=False),
            'needs_scancopy_check': Boolean(index=False)
        }
    )
    intro = Object(
        properties={
            'declaration_year': Keyword(index=True)
        }
    )
    ft_src = Text(index=True, analyzer='ukrainian')

    INCOME_SINGLE_PROPERTIES = {
        'value': Keyword(index=False),
        'value_unclear': Boolean(index=False),
        'comment': Text(index=False),
        'family': Keyword(index=False),
        'family_unclear': Boolean(index=False),
        'family_comment': Text(index=False)
    }
    INCOME_LIST_PROPERTIES = {
        'country': Keyword(index=False),
        'country_comment': Text(index=False),
        'cur': Keyword(index=False),
        'cur_units': Keyword(index=False),
        'uah_equal': Keyword(index=False)
    }
    income = Object(
        properties={
            '5': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '6': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '7': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '8': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '9': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '10': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '11': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '12': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '13': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '14': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '15': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '16': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '17': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '18': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '19': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '20': Object(
                properties=INCOME_SINGLE_PROPERTIES
            ),
            '21': Nested(
                properties=INCOME_LIST_PROPERTIES
            ),
            '22': Nested(
                properties=INCOME_LIST_PROPERTIES
            )
        }
    )

    ESTATE_PROPERTIES = {
        'region': Text(index=False),
        'address': Text(index=False),
        'space': Keyword(index=False),
        'space_units': Keyword(index=False),
        'space_comment': Text(index=False),
        'costs': Keyword(index=False),
        'costs_comment': Text(index=False),
        'costs_rent': Keyword(index=False),
        'costs_rent_comment': Text(index=False),
        'costs_property': Keyword(index=False),
        'costs_property_comment': Text(index=False)
    }
    estate = Object(
        properties={
            '23': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '24': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '25': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '26': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '27': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '28': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '29': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '30': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '31': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '32': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '33': Nested(
                properties=ESTATE_PROPERTIES
            ),
            '34': Nested(
                properties=ESTATE_PROPERTIES
            )
        }
    )

    VEHICLE_PROPERTIES = {
        "brand": Text(index=False),
        "brand_info": Text(index=False),
        "year": Keyword(index=False),
        "sum": Keyword(index=False),
        "sum_comment": Text(index=False),
        "sum_rent": Keyword(index=False),
        "sum_rent_comment": Text(index=False),
        "brand_hidden": Boolean(index=False),
        "brand_info_hidden": Boolean(index=False),
        "brand_info_unclear": Boolean(index=False)
    }
    vehicle = Object(
        properties={
            '35': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '36': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '37': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '38': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '39': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '40': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '41': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '42': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '43': Nested(
                properties=VEHICLE_PROPERTIES
            ),
            '44': Nested(
                properties=VEHICLE_PROPERTIES
            )
        }
    )

    BANKS_PROPERTIES = {
        'sum': Keyword(index=False),
        'sum_hidden': Boolean(index=False),
        'sum_units': Keyword(index=False),
        'sum_comment': Text(index=False),
        'sum_foreign': Keyword(index=False),
        'sum_foreign_units': Keyword(index=False),
        'sum_foreign_comment': Text(index=False)
    }
    banks = Object(
        properties={
            '45': Nested(
                properties=BANKS_PROPERTIES
            ),
            '46': Nested(
                properties=BANKS_PROPERTIES
            ),
            '47': Nested(
                properties=BANKS_PROPERTIES
            ),
            '48': Nested(
                properties=BANKS_PROPERTIES
            ),
            '49': Nested(
                properties=BANKS_PROPERTIES
            ),
            '50': Nested(
                properties=BANKS_PROPERTIES
            ),
            '51': Nested(
                properties=BANKS_PROPERTIES
            ),
            '52': Nested(
                properties=BANKS_PROPERTIES
            ),
            '53': Nested(
                properties=BANKS_PROPERTIES
            ),
        }
    )

    LIABILITIES_PROPERTIES = {
        'sum': Keyword(index=False),
        'sum_comment': Text(index=False),
        'sum_units': Keyword(index=False),
        'sum_foreign': Keyword(index=False),
        'sum_foreign_comment': Text(index=False)
    }
    liabilities = Object(
        properties={
            '54': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '55': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '56': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '57': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '58': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '59': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '60': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '61': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '62': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '63': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
            '64': Nested(
                properties=LIABILITIES_PROPERTIES
            ),
        }
    )

    class Meta:
        index = 'declarations_v2'