Exemplo n.º 1
0
    def migrate(self, filename=None, reset=False, max_retry=2, chunk_size=100, **kw):
        doc_provider = self.get_document_provider()
        iterable = doc_provider.get_document_iterator(chunk_size)
        progress = ProgressManager(
            iterable,
            total=doc_provider.get_total_document_count(),
            reset=reset,
            chunk_size=chunk_size,
            logger=CouchProcessorProgressLogger(self.doc_types),
        )
        if self.has_worker_pool:
            assert "iterable" not in kw, kw
            kw.update(iterable=iterable, max_retry=max_retry)

        with self.get_doc_migrator(filename, **kw) as migrator, progress:
            for doc in iterable:
                success = migrator.migrate(doc)
                if success:
                    progress.add()
                else:
                    try:
                        iterable.retry(doc, max_retry)
                    except TooManyRetries:
                        progress.skip(doc)

        if not progress.skipped:
            self.write_migration_completed_state()

        return progress.total, progress.skipped
Exemplo n.º 2
0
    def _get_couch_form_chunks(run_config):
        db = XFormInstance.get_db()
        view_name = 'by_domain_doc_type_date/view'

        keys = [{
            'startkey': [
                couch_domain, doc_type,
                json_format_datetime(run_config.start_date)
            ],
            'endkey': [
                couch_domain, doc_type,
                json_format_datetime(run_config.end_date)
            ],
        } for couch_domain in _get_matching_couch_domains(run_config)
                for doc_type in ['XFormArchived', 'XFormInstance']]
        if not keys:
            return

        def _get_length():
            length = 0
            for key in keys:
                result = db.view(view_name, reduce=True, **key).one()
                if result:
                    length += result['value']
            return length

        iteration_key = f'couch_forms-{run_config.iteration_key}'
        iterable = resumable_view_iterator(XFormInstance.get_db(),
                                           iteration_key,
                                           view_name,
                                           keys,
                                           chunk_size=CHUNK_SIZE,
                                           full_row=True)
        progress = ProgressManager(iterable,
                                   total=_get_length(),
                                   reset=False,
                                   chunk_size=CHUNK_SIZE,
                                   logger=ProcessorProgressLogger(
                                       '[Couch Forms] ', sys.stderr))
        with progress:
            for chunk in chunked(iterable, CHUNK_SIZE):
                records = []
                for row in chunk:
                    form_id = row['id']
                    domain, doc_type, received_on = row['key']
                    received_on = iso_string_to_datetime(received_on)
                    assert run_config.domain in (domain, ALL_DOMAINS)
                    records.append(
                        (form_id, doc_type, 'COUCH_XMLNS_NOT_SUPPORTED',
                         received_on, domain))
                yield records
                progress.add(len(chunk))
Exemplo n.º 3
0
def _get_resumable_chunked_iterator(dbaccessor, iteration_key, log_prefix):
    total_docs = 0
    for db in dbaccessor.sql_db_aliases:
        total_docs += dbaccessor.get_approximate_doc_count(db)

    iterable = resumable_sql_model_iterator(iteration_key,
                                            dbaccessor,
                                            chunk_size=CHUNK_SIZE,
                                            transform=lambda x: x)
    progress = ProgressManager(iterable,
                               total=total_docs,
                               reset=False,
                               chunk_size=CHUNK_SIZE,
                               logger=ProcessorProgressLogger(
                                   log_prefix, sys.stderr))
    with progress:
        for chunk in chunked(iterable, CHUNK_SIZE):
            yield chunk
            progress.add(len(chunk))
Exemplo n.º 4
0
    def migrate(self,
                filename=None,
                reset=False,
                max_retry=2,
                chunk_size=100,
                **kw):
        doc_provider = self.get_document_provider()
        iterable = doc_provider.get_document_iterator(chunk_size)
        progress = ProgressManager(
            iterable,
            total=doc_provider.get_total_document_count(),
            reset=reset,
            chunk_size=chunk_size,
            logger=CouchProcessorProgressLogger(self.doc_types),
        )
        if self.has_worker_pool:
            assert "iterable" not in kw, kw
            kw.update(iterable=iterable, max_retry=max_retry)

        with self.get_doc_migrator(filename, **kw) as migrator, progress:
            for doc in iterable:
                success = migrator.migrate(doc)
                if success:
                    progress.add()
                else:
                    try:
                        iterable.retry(doc, max_retry)
                    except TooManyRetries:
                        progress.skip(doc)

        if not progress.skipped:
            self.write_migration_completed_state()

        return progress.total, progress.skipped
Exemplo n.º 5
0
    def migrate(self,
                filename=None,
                reset=False,
                max_retry=2,
                chunk_size=100,
                **kw):
        provider_kwargs = {}
        for kwarg in ('date_range', 'domain'):
            if kwarg in kw:
                provider_kwargs[kwarg] = kw.pop(kwarg)
        doc_provider = self.get_document_provider(**provider_kwargs)
        iterable = doc_provider.get_document_iterator(chunk_size)
        progress = ProgressManager(
            iterable,
            total=doc_provider.get_total_document_count(),
            reset=reset,
            chunk_size=chunk_size,
            logger=CouchProcessorProgressLogger(self.doc_types),
        )
        if self.has_worker_pool:
            assert "iterable" not in kw, kw
            kw.update(iterable=iterable, max_retry=max_retry)

        with self.get_doc_migrator(filename, **kw) as migrator, progress:
            for doc in iterable:
                success = migrator.migrate(doc)
                if success:
                    progress.add()
                else:
                    progress.skip(doc)

        if not progress.skipped:
            self.write_migration_completed_state()

        return progress.total, progress.skipped