Пример #1
0
def iter_id_chunks(domain, doc_type, migration_id, couch_db):
    def data_function(**view_kwargs):
        return couch_db.view('by_domain_doc_type_date/view', **view_kwargs)

    endkey, docid = get_endkey_docid(domain, doc_type, migration_id)
    args_provider = NoSkipArgsProvider({
        'startkey': [domain, doc_type],
        'endkey': endkey,
        'endkey_docid': docid,
        'inclusive_end': False,
        'limit': iter_id_chunks.chunk_size,
        'include_docs': False,
        'reduce': False,
    })
    args, kwargs = args_provider.get_initial_args()
    while True:
        results = list(data_function(*args, **kwargs))
        results = args_provider.adjust_results(results, args, kwargs)
        if not results:
            break
        yield [r["id"] for r in results]
        try:
            args, kwargs = args_provider.get_next_args(results[-1], *args,
                                                       **kwargs)
        except StopIteration:
            break
Пример #2
0
    def __iter__(self):
        def data_function(**view_kwargs):
            return couch_db.view('by_domain_doc_type_date/view', **view_kwargs)

        log.info("preparing to rewind: %s", self.move_to)
        state_json = self.itr.state.to_json()
        self._save_resume_state(state_json)
        couch_db = XFormInstance.get_db()
        args_provider = NoSkipArgsProvider({
            'startkey': state_json["kwargs"]["startkey"],
            'startkey_docid': state_json["kwargs"]["startkey_docid"],
            'endkey': [self.domain, self.doc_type],
            'descending': True,
            'limit': 1000,
            'include_docs': True,
            'reduce': False,
        })
        args, kwargs = args_provider.get_initial_args()
        while True:
            results = list(data_function(*args, **kwargs))
            results = args_provider.adjust_results(results, args, kwargs)
            if not results:
                break
            for result in results:
                yield get_received_on(result["doc"], self.stats)
            try:
                args, kwargs = args_provider.get_next_args(results[-1], *args, **kwargs)
            except StopIteration:
                break
Пример #3
0
def _iter_missing_ids(db, min_tries, resume_key, view_name, view_params, repair):
    def data_function(**view_kwargs):
        @retry_on_couch_error
        def get_doc_ids():
            results = list(db.view(view_name, **view_kwargs))
            if "limit" in view_kwargs and results:
                nonlocal last_result
                last_result = results[-1]
                replace_limit_with_endkey(view_kwargs, last_result)
            return {r["id"] for r in results}

        def replace_limit_with_endkey(view_kwargs, last_result):
            assert "endkey_docid" not in view_kwargs, view_kwargs
            view_kwargs.pop("limit")
            view_kwargs["endkey"] = last_result["key"]
            view_kwargs["endkey_docid"] = last_result["id"]

        last_result = None
        missing, tries = find_missing_ids(get_doc_ids, min_tries=min_tries)
        if last_result is None:
            log.debug("no results %s - %s", view_kwargs['startkey'], view_kwargs['endkey'])
            assert not missing
            return []
        if missing and repair:
            missing, tries2, repaired = repair_couch_docs(db, missing, get_doc_ids, min_tries)
            tries += tries2
        else:
            repaired = 0
        log.debug(f"{len(missing)}/{tries} start={view_kwargs['startkey']} {missing or ''}")
        last_result["missing_info"] = missing, tries, repaired
        return [last_result]

    args_provider = NoSkipArgsProvider(view_params)
    return ResumableFunctionIterator(resume_key, data_function, args_provider)
Пример #4
0
def _iter_docs(domain, doc_type, resume_key, stopper):
    def data_function(**view_kwargs):
        return couch_db.view('by_domain_doc_type_date/view', **view_kwargs)

    if "." in doc_type:
        doc_type, row_key = doc_type.split(".")
    else:
        row_key = "doc"

    if stopper.clean_break:
        return []
    couch_db = XFormInstance.get_db()
    args_provider = NoSkipArgsProvider({
        'startkey': [domain, doc_type],
        'endkey': [domain, doc_type, {}],
        'limit': _iter_docs.chunk_size,
        'include_docs': row_key == "doc",
        'reduce': False,
    })
    rows = ResumableFunctionIterator(
        resume_key,
        data_function,
        args_provider,
        item_getter=None,
        event_handler=MigrationPaginationEventHandler(domain, stopper))
    return (row[row_key] for row in rows)
Пример #5
0
def _iter_docs(domain, doc_type, resume_key, stopper):
    @retry_on_couch_error
    def data_function(**view_kwargs):
        view_name = 'by_domain_doc_type_date/view'
        results = list(couch_db.view(view_name, **view_kwargs))
        assert all(r['key'][0] == domain for r in results), \
            _repr_bad_results(view_name, view_kwargs, results, domain)
        return results

    if "." in doc_type:
        doc_type, row_key = doc_type.split(".")
    else:
        row_key = "doc"

    if stopper.clean_break:
        return []
    couch_db = XFormInstance.get_db()
    args_provider = NoSkipArgsProvider({
        'startkey': [domain, doc_type],
        'endkey': [domain, doc_type, {}],
        'limit': _iter_docs.chunk_size,
        'include_docs': row_key == "doc",
        'reduce': False,
    })
    rows = ResumableFunctionIterator(
        resume_key,
        data_function,
        args_provider,
        item_getter=None,
        event_handler=MigrationPaginationEventHandler(domain, stopper))
    if rows.state.is_resume():
        log.info("iteration state: %r", rows.state.to_json())
    row = None
    try:
        for row in rows:
            yield row[row_key]
    finally:
        if row is not None:
            row_copy = dict(row)
            row_copy.pop("doc", None)
            log.info("last item: %r", row_copy)
        log.info("final iteration state: %r", rows.state.to_json())