Python batch_qs示例

编程语言: Python

命名空间/包名称: mainapp.services

方法/功能: batch_qs

hotexamples.com的示例: 6

Python batch_qs - 已找到6个示例。这些是从开源项目中提取的最受好评的mainapp.services.batch_qs现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： data_preprocessor.py 项目： KindYAK/NLPMonitor

    def preprocessing_raw_data(self):
        batch_size = 10000

        def is_kazakh(text):
            return sum([c in "ӘәҒғҚқҢңӨөҰұҮүІі" for c in text]) > 0.07

        qs = Document.objects.filter(id__gt=0).order_by('id')
        number_of_documents = qs.count()
        for i, batch in enumerate(batch_qs(qs, batch_size=batch_size)):
            print(f"Processing {i*batch_size}/{number_of_documents}")
            for j, doc in enumerate(batch):
                if i == 0:
                    print(f"{j}/{batch_size}")
                if "<" in doc.text or ">" in doc.text or "<" in doc.title or ">" in doc.title:
                    doc.text = BeautifulSoup(
                        doc.text,
                        "html.parser").text.strip().replace('\n', '')
                    doc.title = BeautifulSoup(
                        doc.title,
                        "html.parser").text.strip().replace('\n', '')
            Document.objects.bulk_update(batch, fields=['text', 'title'])
        for i, batch in enumerate(batch_qs(qs, batch_size=batch_size)):
            print(f"Deleting {i*batch_size}/{number_of_documents}")
            for doc in batch:
                if is_kazakh(doc.text + doc.title):
                    doc.delete()

示例#2

显示文件

def document_generator(qs):
    for batch in batch_qs(qs, batch_size=batch_size):
        for document in batch:
            obj = ESDocument()
            obj.init_from_model(document)
            obj = obj.to_dict()
            obj['corpus'] = f"hate_{obj['class_label']}"
            if random.randint(1, 100) <= percent_test:
                obj['corpus'] = "hate_test"
            yield obj

示例#3

显示文件

文件： 0031_auto_20200519_1844.py 项目： KindYAK/NLPMonitor

def init_document_datetime_activity_parsed(apps, schema_editor):
    MyModel = apps.get_model('mainapp', 'Document')
    qs = MyModel.objects.exclude(datetime=None).order_by('id').only('datetime_activity_parsed', 'datetime_created',)
    qs = qs.exclude(num_views=None)
    qs = qs.order_by('id')
    number_of_documents = qs.count()
    batch_size = 10000
    for i, batch in enumerate(batch_qs(qs, batch_size=batch_size)):
        print(f"Processing {i * batch_size}/{number_of_documents}")
        for j, doc in enumerate(batch):
            if i == 0:
                print(f"{j}/{batch_size}")
            doc.datetime_activity_parsed = doc.datetime_created
        MyModel.objects.bulk_update(batch, fields=['datetime_activity_parsed'])

示例#4

显示文件

def copy_date(apps, schema_editor):
    MyModel = apps.get_model('mainapp', 'Document')
    qs = MyModel.objects.exclude(datetime=None).order_by('id').only(
        'datetime',
        'date',
    )
    number_of_documents = qs.count()
    batch_size = 10000
    for i, batch in enumerate(batch_qs(qs, batch_size=batch_size)):
        print(f"Processing {i * batch_size}/{number_of_documents}")
        for j, doc in enumerate(batch):
            if i == 0:
                print(f"{j}/{batch_size}")
            doc.date = doc.datetime.date()
        MyModel.objects.bulk_update(batch, fields=['date'])

示例#5

显示文件

文件： fix_late_dates.py 项目： KindYAK/NLPMonitor

import datetime

from mainapp.models import *
from mainapp.services import batch_qs

batch_size = 10000

qs = Document.objects.filter(id__gt=0).order_by('id')
number_of_documents = qs.count()
for i, batch in enumerate(batch_qs(qs, batch_size=batch_size)):
    print(f"Processing {i*batch_size}/{number_of_documents}")
    for j, doc in enumerate(batch):
        if i == 0:
            print(f"{j}/{batch_size}")
        if doc.datetime and doc.datetime.date() > datetime.datetime.now().date(
        ):
            actual_date = doc.datetime + datetime.timedelta(hours=6)
            if actual_date.day <= 12:
                doc.datetime = doc.datetime.replace(month=actual_date.day,
                                                    day=actual_date.month)
    Document.objects.bulk_update(batch, fields=['datetime'])

示例#6

显示文件

 def document_generator(self, qs):
     for batch in batch_qs(qs, batch_size=self.batch_size):
         for document in batch:
             obj = ESDocument()
             obj.init_from_model(document)
             yield obj.to_dict()