Пример #1
0
class InitiativeDoc(SerializedDoc):
    identifier = field.String()
    name = field.String()
    principal_agent = field.Nested(multi=False, properties={'name': field.String()})
    member_countries = field.Nested(doc_class=CountryDoc)
    geographic_scope = field.Nested(
        doc_class=CountryDoc,
        properties={
            'name': field.String(fields={'raw': field.String(index='not_analyzed')})
        }
    )
    initiative_type = field.Object(properties={'name': field.String(fields={'raw': field.String(index='not_analyzed')})})
    start_year = field.Integer()

    def get_display_name(self):
        return self.name
    def prepare_doc(self):
        _fields, _map = {}, {}
        for idx, _f in enumerate(self.schema['fields'], 1):
            alias_name = _f['name']
            field_name = 'col{}'.format(idx)
            _field = self._schema2doc_map[_f['type']]
            _map[field_name] = alias_name
            _fields[field_name] = _field

        if self.has_geo_data:
            _fields['shape'] = dsl_field.GeoShape()
            _fields['point'] = dsl_field.GeoPoint()
            _fields['label'] = dsl_field.Text()
            _fields['shape_type'] = dsl_field.Integer()

        _fields['resource'] = dsl_field.Nested(
            properties={
                'id':
                dsl_field.Integer(),
                'title':
                dsl_field.Text(analyzer=polish_analyzer,
                               fields={'raw': dsl_field.Keyword()})
            })

        _fields['updated_at'] = dsl_field.Date()
        _fields['row_no'] = dsl_field.Long()
        _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc
Пример #3
0
    def doc(self):
        if not self._doc_cache:
            _fields, _map = {}, {}
            for idx, _f in enumerate(self.schema['fields']):
                alias_name = _f['name']
                field_name = 'col{}'.format(idx + 1)
                _field = _schema2doc_map[_f['type']]
                _map[field_name] = alias_name
                _fields[field_name] = _field

            _fields['resource'] = dsl_field.Nested(
                properties={
                    'id': dsl_field.Integer(),
                    'title': dsl_field.Text(
                        analyzer=polish_analyzer,
                        fields={'raw': dsl_field.Keyword()})
                }
            )

            _fields['updated_at'] = dsl_field.Date()
            _fields['row_no'] = dsl_field.Long()

            doc = type(self.idx_name, (DocType,), _fields)
            doc._doc_type.index = self.idx_name
            doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
            doc._doc_type.mapping._meta['_meta']
            self._doc_cache = doc
        return self._doc_cache
Пример #4
0
class DocWithNested(document.DocType):
    comments = field.Nested(
        properties={
            'title': field.Text(),
            'tags': field.Keyword(multi=True)
        }
    )
Пример #5
0
    class Mapping(Content.Mapping):

        # contributions = ContributionField()
        tags = field.Nested()

        class Meta:
            orphaned = True
            includes = ('tags',)
Пример #6
0
def test_nested_provides_direct_access_to_its_fields():
    f = field.Nested(
        properties={'name': {
            'type': 'text',
            'index': 'not_analyzed'
        }})

    assert 'name' in f
    assert f['name'] == field.Text(index='not_analyzed')
Пример #7
0
def test_nested_provides_direct_access_to_its_fields():
    f = field.Nested(
        properties={"name": {
            "type": "text",
            "index": "not_analyzed"
        }})

    assert "name" in f
    assert f["name"] == field.Text(index="not_analyzed")
Пример #8
0
class Entry(document.Document):
    forms = field.Nested(Form)
    created = field.Date()
    superentry = field.Text()

    def save(self, **kwargs):
        return super(Entry, self).save(**kwargs)

    def is_published(self):
        return datetime.now() > self.created
Пример #9
0
def test_modifying_nested():
    f = field.Nested()
    f = f.field('name', 'string', index='not_analyzed')

    assert {
        'type': 'nested',
        'properties': {
            'name': {'type': 'string', 'index': 'not_analyzed'}
        },
    } == f.to_dict()
Пример #10
0
class ProjectDoc(SerializedDoc):
    identifier = field.String()
    name = field.String()
    alternate_name = field.String()
    description = field.String(
        analyzer=html_strip,
        fields={'raw': field.String(index='not_analyzed')}
    )
    status = field.String(fields={'raw': field.String(index='not_analyzed')})
    start_year = field.Integer()
    countries = field.Nested(
        doc_class=CountryDoc,  # project_location aggregation/facet uses the raw multifield
        properties={
            'name': field.String(fields={'raw': field.String(index='not_analyzed')})
        }
    )
    infrastructure_type = field.Object(
        properties={'name': field.String(fields={'raw': field.String(index='not_analyzed')})}
    )
    # Providing a doc_class for initiatives produced errors, so keep it simple!
    initiatives = field.Nested(properties={'name': field.String()})
    funding = field.Object(
        multi=True,
        properties={
            'sources': field.Object(
                multi=True,
                properties={
                    'name': field.String(fields={'raw': field.String(index='not_analyzed')}),
                }
            )
        }
    )
    regions = field.Nested(
        doc_class=RegionDoc,
        properties={
            'name': field.String(fields={'raw': field.String(index='not_analyzed')})
        }
    )

    def get_display_name(self):
        return self.name
Пример #11
0
class PersonDoc(SerializedDoc):
    identifier = field.String()
    given_name = field.String()
    additional_name = field.String()
    family_name = field.String()
    description = field.String(
        analyzer=html_strip,
        fields={'raw': field.String(index='not_analyzed')}
    )
    citizenships = field.Nested(doc_class=CountryDoc)
    position_set = field.Nested(
        doc_class=PositionDoc,
        properties={
            'title': field.String(),
            'organization': field.Object(properties={'name': field.String()})
        }
    )
    events = field.Nested(properties={'name': field.String()})

    def get_display_name(self):
        return " ".join((self.given_name, self.family_name))
Пример #12
0
class CompanyDocument(Document):
    address = field.Nested(
        properties={
            'care_of': field.Keyword(index=False, store=True),
            'po_box': field.Keyword(index=False, store=True),
            'address_line_1': field.Keyword(index=False, store=True),
            'address_line_2': field.Keyword(index=False, store=True),
            'locality': field.Keyword(index=False, store=True),
            'region': field.Keyword(index=False, store=True),
            'country': field.Keyword(index=False, store=True),
            'postal_code': field.Keyword(index=False, store=True)
        })
    country_of_origin = field.Keyword(index=False, store=True)
    address_snippet = field.Keyword(index=False, store=True)
    company_name = field.Text()
    company_number = field.Text()
    company_status = field.Keyword(index=False, store=True)
    type = field.Keyword(index=False, store=True)
    date_of_cessation = field.Date(index=False, format='yyyy-MM-dd')
    date_of_creation = field.Date(index=False, format='yyyy-MM-dd')
    sic_codes = field.Keyword(index=False, store=True)

    class Meta:
        index = settings.ELASTICSEARCH_COMPANY_INDEX_ALIAS

    def to_dict(self, include_meta=False):
        meta = super().to_dict(include_meta)
        if '_source' in meta:
            company = meta['_source']
            company['title'] = company['company_name']
            company['address']['country'] = company['country_of_origin']
            company['company_type'] = company['type']
            meta['_source'] = self.reformat_date(company)
        return meta

    def to_profile_dict(self):
        company = self.to_dict()
        company['registered_office_address'] = company['address']
        return self.reformat_date(company)

    @staticmethod
    def reformat_date(company):
        if 'date_of_creation' in company:
            company['date_of_creation'] = (
                company['date_of_creation'].strftime('%Y-%m-%d'))
        if 'date_of_cessation' in company:
            company['date_of_cessation'] = (
                company['date_of_cessation'].strftime('%Y-%m-%d'))
        return company
Пример #13
0
class Profile(Document):
    created_at = field.Date()
    ssn_trace = field.Nested(Ssn_trace)

    meta_sub_profiles = field.Nested(Inner_sub_profile)

    class Index:
        name = "profile"

    @property
    def pk(self):
        return self.meta.id

    @property
    def sub_profiles(self):
        return [p.sub_profile for p in self.meta_sub_profiles]

    def validate_sub_profiles(self, *sub_profiles_ids):
        valid_sub_profiles_ids = {
            s.sub_profile_id
            for s in self.meta_sub_profiles
        }
        for sub_profile_id in sub_profiles_ids:
            if sub_profile_id not in valid_sub_profiles_ids:
                raise ValueError(
                    "the profile: '{}' dont have the sub profile: '{}'".format(
                        self.meta.id, sub_profile_id))

    def attach_sub_profiles(self, sub_profiles):
        if not isinstance(self.meta_sub_profiles, list):
            self.meta_sub_profiles = []
        for sub_profile in sub_profiles:
            self.meta_sub_profiles.append({
                'sub_profile_id': sub_profile.meta.id,
                'status': 'unknown'
            })
Пример #14
0
class EventDoc(SerializedDoc):
    name = field.String()
    description = field.String(
        analyzer=html_strip,
        fields={'raw': field.String(index='not_analyzed')}
    )
    event_type = field.Object(properties={'name': field.String(fields={'raw': field.String(index='not_analyzed')})})
    start_year = field.Integer()
    places = field.Nested(
        doc_class=PlaceDoc,
        properties={'location_display': field.String(fields={'raw': field.String(index='not_analyzed')})}
    )

    def get_display_name(self):
        return self.name
Пример #15
0
class OrganizationDoc(SerializedDoc):
    name = field.String()
    description = field.String(
        analyzer=html_strip,
        fields={'raw': field.String(index='not_analyzed')}
    )
    mission = field.String()
    countries = field.Nested(doc_class=CountryDoc)
    headquarters_location = field.String(fields={'raw': field.String(index='not_analyzed')})
    scope_of_operations = field.String(
        multi=True,
        fields={'raw': field.String(index='not_analyzed')}
    )
    start_year = field.Integer()

    def get_display_name(self):
        return self.name
Пример #16
0
class EntryDoc(SerializedDoc):
    title = field.String()
    author = field.String()
    content = field.String(
        analyzer=html_strip,
        fields={'raw': field.String(index='not_analyzed')}
    )
    description = field.String(
        analyzer=html_strip,
        fields={'raw': field.String(index='not_analyzed')}
    )
    publication_date = field.Date()
    categories = field.Nested(
        doc_class=CategoryDoc,
        properties={'name': field.String(fields={'raw': field.String(index='not_analyzed')})}
    )

    def get_display_name(self):
        return self.title
    def prepare_doc(self):
        _fields = {
            'shape':
            dsl_field.GeoShape(),
            'point':
            dsl_field.GeoPoint(),
            'shape_type':
            dsl_field.Integer(),
            'label':
            dsl_field.Text(),
            'resource':
            dsl_field.Nested(
                properties={
                    'id':
                    dsl_field.Integer(),
                    'title':
                    dsl_field.Text(analyzer=polish_analyzer,
                                   fields={'raw': dsl_field.Keyword()})
                }),
            'updated_at':
            dsl_field.Date(),
            'row_no':
            dsl_field.Long()
        }
        _map = {}

        for idx, _f in enumerate(self.schema, 1):
            if _f.type not in self._schema2doc_map:
                continue
            alias_name = _f.name
            field_name = f'col{idx}'
            _field = self._schema2doc_map[_f.type]
            _map[field_name] = alias_name
            _fields[field_name] = _field
            _fields['Index'] = type('Index', (type, ), {'name': self.idx_name})

        doc = type(self.idx_name, (Document, ), _fields)
        doc._doc_type.mapping._meta['_meta'] = {'headers': _map}
        return doc
Пример #18
0
class CompanyDocType(DocType):
    date_of_creation = FormattedDate(date_format='%Y-%m-%d')
    description = field.Text()
    employees = field.Text()
    facebook_url = field.Text()
    pk = field.Integer()
    keywords = field.Text()
    linkedin_url = field.Text()
    logo = field.Text()
    has_single_sector = field.Boolean()
    modified = FormattedDate(date_format='%Y-%m-%dT%H:%M:%S.%fZ')
    name = field.Text()
    number = field.Text()
    sectors = field.Text(multi=True)
    sectors_label = field.Text(multi=True)
    slug = field.Text()
    summary = field.Text()
    twitter_url = field.Text()
    website = field.Text()
    supplier_case_studies = field.Nested(
        properties={
            'pk': field.Integer(),
            'title': field.Text(),
            'short_summary': field.Text(),
            'description': field.Text(),
            'sector': field.Text(),
            'keywords': field.Text(),
            'image_one_caption': field.Text(),
            'image_two_caption': field.Text(),
            'image_three_caption': field.Text(),
            'testimonial': field.Text(),
            'slug': field.Text(),
        })

    class Meta:
        index = 'company'
Пример #19
0
class DocWithNested(document.DocType):
    comments = field.Nested(properties={'title': field.String()})
class OptionalObjectWithRequiredField(document.Document):
    comments = field.Nested(properties={'title': field.Keyword(required=True)})
Пример #21
0
class CompanyDocument(Document):
    wildcard = field.Text(analyzer=american_english_analyzer)
    casestudy_wildcard = field.Text(analyzer=american_english_analyzer)
    keyword_wildcard = field.Keyword()

    case_study_count = field.Integer()
    date_of_creation = field.Date(index=False)
    description = field.Text(
        copy_to='wildcard', analyzer=american_english_analyzer
    )
    has_description = field.Boolean()
    employees = field.Keyword(index=False, store=True)
    facebook_url = field.Keyword(index=False, store=True)
    pk = field.Integer(index=False)
    keywords = field.Text(copy_to='wildcard')
    linkedin_url = field.Keyword(index=False, store=True)
    logo = field.Keyword(index=False, store=True)
    has_single_sector = field.Boolean()
    modified = field.Date(index=False)
    ordering_name = field.Keyword()
    name = field.Text(copy_to=['wildcard', 'ordering_name'])
    number = field.Keyword(copy_to='keyword_wildcard',)
    sectors = field.Keyword(multi=True, copy_to='keyword_wildcard', store=True)
    sectors_label = field.Keyword(
        multi=True, copy_to='keyword_wildcard', store=True
    )
    expertise_industries = field.Keyword(
        multi=True, copy_to='keyword_wildcard', store=True
    )
    expertise_regions = field.Keyword(
        multi=True, copy_to='keyword_wildcard', store=True
    )
    expertise_languages = field.Keyword(
        multi=True, copy_to='keyword_wildcard', store=True
    )
    expertise_countries = field.Keyword(
        multi=True, copy_to='keyword_wildcard', store=True
    )
    # Represents Dict as it's the primitive datatype for this field
    expertise_products_services = field.Object()
    expertise_products_services_labels = field.Keyword(
        multi=True, copy_to='keyword_wildcard', store=True
    )
    expertise_labels = field.Keyword(
        multi=True, copy_to='keyword_wildcard', store=True
    )
    slug = field.Keyword(copy_to='keyword_wildcard', store=True)
    summary = field.Text(
        copy_to='wildcard', analyzer=american_english_analyzer
    )
    twitter_url = field.Keyword(index=False, store=True)
    website = field.Keyword(copy_to='keyword_wildcard', store=True)
    supplier_case_studies = field.Nested(
        properties={
            'pk': field.Integer(index=False),
            'title': field.Text(copy_to='casestudy_wildcard'),
            'short_summary': field.Text(copy_to='casestudy_wildcard'),
            'description': field.Text(copy_to='casestudy_wildcard'),
            'sector': field.Keyword(copy_to='keyword_wildcard', store=True),
            'keywords': field.Text(copy_to='casestudy_wildcard'),
            'image_one_caption': field.Text(copy_to='casestudy_wildcard'),
            'image_two_caption': field.Text(copy_to='casestudy_wildcard'),
            'image_three_caption': field.Text(copy_to='casestudy_wildcard'),
            'testimonial': field.Text(copy_to='casestudy_wildcard'),
            'website': field.Keyword(copy_to='casestudy_wildcard', store=True),
            'slug': field.Keyword(copy_to='keyword_wildcard', store=True),
            'testimonial_name': field.Keyword(
                copy_to='casestudy_wildcard', store=True
            ),
            'testimonial_company': field.Text(copy_to='casestudy_wildcard'),
            'testimonial_job_title': field.Text(copy_to='casestudy_wildcard'),
        }
    )
    is_showcase_company = field.Boolean()
    is_published_investment_support_directory = field.Boolean()
    is_published_find_a_supplier = field.Boolean()

    class Meta:
        index = settings.ELASTICSEARCH_COMPANY_INDEX_ALIAS
class DocWithNested(document.Document):
    comments = field.Nested(Comment)
class NestedSecret(document.Document):
    secrets = field.Nested(SecretDoc)
Пример #24
0
 class MySubDocWithNested(MyDoc):
     nested_inner = field.Nested(MyInner)
Пример #25
0
def test_nested_provides_direct_access_to_its_fields():
    f = field.Nested()
    f.field('name', 'string', index='not_analyzed')

    assert 'name' in f
    assert f['name'] == field.String(index='not_analyzed')
Пример #26
0
class DocWithNested(document.Document):
    comments = field.Nested(Comment)

    class Index:
        name = "test-doc-with-nested"
Пример #27
0
class OptionalObjectWithRequiredField(document.Document):
    comments = field.Nested(properties={"title": field.Keyword(required=True)})

    class Index:
        name = "test-required"
Пример #28
0
class NestedSecret(document.Document):
    secrets = field.Nested(SecretDoc)

    class Index:
        name = "test-nested-secret"
Пример #29
0
class WikiDocumentType(document.DocType):
    excerpt_fields = ['summary', 'content']
    exclude_slugs = [
        'Talk:', 'User:'******'User_talk:', 'Template_talk:', 'Project_talk:'
    ]

    boost = field.Float(null_value=1.0)
    content = field.String(analyzer='kuma_content',
                           term_vector='with_positions_offsets')
    css_classnames = field.String(analyzer='case_insensitive_keyword')
    html_attributes = field.String(analyzer='case_insensitive_keyword')
    id = field.Long()
    kumascript_macros = field.String(analyzer='case_insensitive_keyword')
    locale = field.String(index='not_analyzed')
    modified = field.Date()
    parent = field.Nested(
        properties={
            'id': field.Long(),
            'title': field.String(analyzer='kuma_title'),
            'slug': field.String(index='not_analyzed'),
            'locale': field.String(index='not_analyzed'),
        })
    slug = field.String(index='not_analyzed')
    summary = field.String(analyzer='kuma_content',
                           term_vector='with_positions_offsets')
    tags = field.String(analyzer='case_sensitive')
    title = field.String(analyzer='kuma_title', boost=1.2)

    class Meta(object):
        mapping = Mapping('wiki_document')
        mapping.meta('_all', enalbed=False)

    @classmethod
    def get_connection(cls, alias='default'):
        return connections.get_connection(alias)

    @classmethod
    def get_doc_type(cls):
        return cls._doc_type.name

    @classmethod
    def from_django(cls, obj):
        doc = {
            'id': obj.id,
            'title': obj.title,
            'slug': obj.slug,
            'summary': obj.get_summary(strip_markup=True),
            'locale': obj.locale,
            'modified': obj.modified,
            'content': strip_tags(obj.rendered_html),
            'tags': list(obj.tags.values_list('name', flat=True)),
            'kumascript_macros': obj.extract_kumascript_macro_names(),
            'css_classnames': obj.extract_css_classnames(),
            'html_attributes': obj.extract_html_attributes(),
        }

        # Check if the document has a document zone attached
        try:
            is_zone = bool(obj.zone)
        except ObjectDoesNotExist:
            is_zone = False

        if is_zone:
            # boost all documents that are a zone
            doc['boost'] = 8.0
        elif obj.slug.count('/') == 1:
            # a little boost if no zone but still first level
            doc['boost'] = 4.0
        else:
            doc['boost'] = 1.0
        if obj.parent:
            doc['parent'] = {
                'id': obj.parent.id,
                'title': obj.parent.title,
                'locale': obj.parent.locale,
                'slug': obj.parent.slug,
            }
        else:
            doc['parent'] = {}

        return doc

    @classmethod
    def get_mapping(cls):
        return cls._doc_type.mapping.to_dict()

    @classmethod
    def get_analysis(cls):
        return {
            'filter': {
                'kuma_word_delimiter': {
                    'type': 'word_delimiter',
                    'preserve_original': True,  # hi-fi -> hifi, hi-fi
                    'catenate_words': True,  # hi-fi -> hifi
                    'catenate_numbers': True,  # 90-210 -> 90210
                }
            },
            'analyzer': {
                'default': {
                    'tokenizer': 'standard',
                    'filter': ['standard', 'elision']
                },
                # a custom analyzer that strips html and uses our own
                # word delimiter filter and the elision filter
                # (e.g. L'attribut -> attribut). The rest is the same as
                # the snowball analyzer
                'kuma_content': {
                    'type':
                    'custom',
                    'tokenizer':
                    'standard',
                    'char_filter': ['html_strip'],
                    'filter': [
                        'elision',
                        'kuma_word_delimiter',
                        'lowercase',
                        'standard',
                        'stop',
                        'snowball',
                    ],
                },
                'kuma_title': {
                    'type':
                    'custom',
                    'tokenizer':
                    'standard',
                    'filter': [
                        'elision',
                        'kuma_word_delimiter',
                        'lowercase',
                        'standard',
                        'snowball',
                    ],
                },
                'case_sensitive': {
                    'type': 'custom',
                    'tokenizer': 'keyword'
                },
                'case_insensitive_keyword': {
                    'type': 'custom',
                    'tokenizer': 'keyword',
                    'filter': 'lowercase'
                }
            },
        }

    @classmethod
    def get_settings(cls):
        return {
            'mappings': cls.get_mapping(),
            'settings': {
                'analysis': cls.get_analysis(),
                'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS,
                'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS,
            }
        }

    @classmethod
    def bulk_index(cls, documents, id_field='id', es=None, index=None):
        """Index of a bunch of documents."""
        es = es or cls.get_connection()
        index = index or cls.get_index()
        type = cls.get_doc_type()

        actions = [{
            '_index': index,
            '_type': type,
            '_id': d['id'],
            '_source': d
        } for d in documents]

        bulk(es, actions)

    @classmethod
    def bulk_delete(cls, ids, es=None, index=None):
        """Index of a bunch of documents."""
        es = es or cls.get_connection()
        index = index or cls.get_index()
        type = cls.get_doc_type()

        actions = [{
            '_op_type': 'delete',
            '_index': index,
            '_type': type,
            '_id': _id
        } for _id in ids]

        bulk(es, actions)

    @classmethod
    def get_index(cls):
        from kuma.search.models import Index
        return Index.objects.get_current().prefixed_name

    @classmethod
    def search(cls, **kwargs):
        options = {
            'using': connections.get_connection(),
            'index': cls.get_index(),
            'doc_type': {
                cls._doc_type.name: cls.from_es
            },
        }
        options.update(kwargs)
        sq = Search(**options)

        return sq

    @classmethod
    def get_model(cls):
        from kuma.wiki.models import Document
        return Document

    @classmethod
    def get_indexable(cls, percent=100):
        """
        For this mapping type return a list of model IDs that should be
        indexed with the management command, in a full reindex.

        WARNING: When changing this code make sure to update the
                 ``should_update`` method below, too!

        """
        model = cls.get_model()

        excludes = []
        for exclude in cls.exclude_slugs:
            excludes.append(Q(slug__icontains=exclude))

        qs = (model.objects.filter(is_template=False,
                                   is_redirect=False,
                                   deleted=False).exclude(
                                       reduce(operator.or_, excludes)))

        percent = percent / 100
        if percent < 1:
            qs = qs[:int(qs.count() * percent)]

        return qs.values_list('id', flat=True)

    @classmethod
    def should_update(cls, obj):
        """
        Given a Document instance should return boolean value
        whether the instance should be indexed or not.

        WARNING: This *must* mirror the logic of the ``get_indexable``
                 method above!
        """
        return (not obj.is_template and not obj.is_redirect and not obj.deleted
                and
                not any([exclude in obj.slug
                         for exclude in cls.exclude_slugs]))

    def get_excerpt(self):
        if getattr(self, 'highlight', False):
            for excerpt_field in self.excerpt_fields:
                if excerpt_field in self.highlight:
                    return u'…'.join(self.highlight[excerpt_field])
        return self.summary

    @classmethod
    def reindex_all(cls, chunk_size=500, index=None, percent=100):
        """Rebuild ElasticSearch indexes.

        :arg chunk_size: how many documents to bulk index as a single chunk.
        :arg index: the `Index` object to reindex into. Uses the current
            promoted index if none provided.
        :arg percent: 1 to 100--the percentage of the db to index.

        """
        from kuma.search.models import Index
        from kuma.search.tasks import prepare_index, finalize_index
        from kuma.wiki.tasks import index_documents

        index = index or Index.objects.get_current()

        # Get the list of document IDs to index.
        indexable = WikiDocumentType.get_indexable(percent)

        total = len(indexable)
        total_chunks = int(ceil(total / chunk_size))

        pre_task = prepare_index.si(index.pk)
        post_task = finalize_index.si(index.pk)

        if not total:
            # If there's no data we still create the index and finalize it.
            chain(pre_task, post_task).apply_async()
        else:
            index_tasks = [
                index_documents.si(chunk, index.pk)
                for chunk in chunked(indexable, chunk_size)
            ]
            chord_flow(pre_task, index_tasks, post_task).apply_async()

        message = _(
            'Indexing {total} documents into {n} chunks of size {size} into '
            'index {index}.'.format(total=total,
                                    n=total_chunks,
                                    size=chunk_size,
                                    index=index.prefixed_name))
        return message
Пример #30
0
class NestedSecret(document.DocType):
    secrets = field.Nested(properties={'title': SecretField()})