def test_get_cases(self):
        case1 = _create_case()
        case2 = _create_case()

        cases = CaseAccessorSQL.get_cases(['missing_case'])
        self.assertEqual(0, len(cases))

        cases = CaseAccessorSQL.get_cases([case1.case_id])
        self.assertEqual(1, len(cases))
        self.assertEqual(case1.case_id, cases[0].case_id)

        cases = CaseAccessorSQL.get_cases([case1.case_id, case2.case_id], ordered=True)
        self.assertEqual(2, len(cases))
        self.assertEqual(case1.case_id, cases[0].case_id)
        self.assertEqual(case2.case_id, cases[1].case_id)
    def test_get_cases(self):
        case1 = _create_case()
        case2 = _create_case()

        cases = CaseAccessorSQL.get_cases(['missing_case'])
        self.assertEqual(0, len(cases))

        cases = CaseAccessorSQL.get_cases([case1.case_id])
        self.assertEqual(1, len(cases))
        self.assertEqual(case1.case_id, cases[0].case_id)

        cases = CaseAccessorSQL.get_cases([case1.case_id, case2.case_id], ordered=True)
        self.assertEqual(2, len(cases))
        self.assertEqual(case1.case_id, cases[0].case_id)
        self.assertEqual(case2.case_id, cases[1].case_id)
Exemplo n.º 3
0
 def _create_docs(cls, count):
     case_ids = [uuid.uuid4().hex for i in range(count)]
     [
         create_form_for_test(cls.domain, case_id=case_id)
         for case_id in case_ids
     ]
     return CaseAccessorSQL.get_cases(case_ids, ordered=True)
Exemplo n.º 4
0
def iter_patch_form_diffs(domain, *, kind=None, doc_ids=None, by_kind=None):
    if kind:
        if by_kind:
            raise ValueError("cannot query 'kind' and 'by_kind' together")
        if kind not in ["forms", "cases"]:
            raise ValueError(f"kind must be 'forms' or 'cases'; got {kind}")
        if not doc_ids:
            raise ValueError(f"please specify doc ids: --select={kind}:id,...")
        by_kind = {kind: doc_ids}
    if by_kind:
        if by_kind.keys() - {"forms", "cases"}:
            kinds = list(by_kind)
            raise ValueError(f"valid kinds 'forms' and 'cases'; got {kinds}")
        form_ids = by_kind.get("forms", [])
        case_ids = by_kind.get("cases", [])
        if case_ids:
            # may be inefficient for cases with many forms
            for case in CaseAccessorSQL.get_cases(case_ids):
                form_ids.extend(case.xform_ids)
        forms = (f for f in FormAccessorSQL.get_forms(form_ids)
                 if f.xmlns == PatchForm.xmlns)
    else:
        # based on iter_form_ids_by_xmlns
        q_expr = Q(domain=domain, xmlns=PatchForm.xmlns)
        forms = paginate_query_across_partitioned_databases(
            XFormInstanceSQL, q_expr, load_source='couch_to_sql_migration')
    for form in forms:
        yield from iter_doc_diffs(form)
Exemplo n.º 5
0
 def get_subcases(self, index_identifier=None):
     from corehq.form_processor.backends.sql.dbaccessors import CaseAccessorSQL
     subcase_ids = [
         ix.referenced_id for ix in self.reverse_indices
         if (index_identifier is None or ix.identifier == index_identifier)
     ]
     return list(CaseAccessorSQL.get_cases(subcase_ids))
Exemplo n.º 6
0
    def _diff_cases(self, couch_cases):
        from corehq.apps.tzmigration.timezonemigration import json_diff
        self.log_debug('Calculating case diffs for {} cases'.format(
            len(couch_cases)))
        case_ids = list(couch_cases)
        sql_cases = CaseAccessorSQL.get_cases(case_ids)
        for sql_case in sql_cases:
            couch_case = couch_cases[sql_case.case_id]
            sql_case_json = sql_case.to_json()
            diffs = json_diff(couch_case,
                              sql_case_json,
                              track_list_indices=False)
            diffs = filter_case_diffs(
                couch_case, sql_case_json, diffs,
                self.forms_that_touch_cases_without_actions)
            if diffs and not sql_case.is_deleted:
                couch_case, diffs = self._rebuild_couch_case_and_re_diff(
                    couch_case, sql_case_json)

            if diffs:
                self.diff_db.add_diffs(couch_case['doc_type'],
                                       sql_case.case_id, diffs)

        self._diff_ledgers(case_ids)

        self.processed_docs += len(case_ids)
        self._log_case_diff_count(throttled=True)
Exemplo n.º 7
0
    def _diff_cases(self, couch_cases):
        from corehq.apps.tzmigration.timezonemigration import json_diff
        self.log_debug('Calculating case diffs for {} cases'.format(len(couch_cases)))
        case_ids = list(couch_cases)
        sql_cases = CaseAccessorSQL.get_cases(case_ids)
        for sql_case in sql_cases:
            couch_case = couch_cases[sql_case.case_id]
            sql_case_json = sql_case.to_json()
            diffs = json_diff(couch_case, sql_case_json, track_list_indices=False)
            diffs = filter_case_diffs(
                couch_case, sql_case_json, diffs, self.forms_that_touch_cases_without_actions
            )
            if diffs and not sql_case.is_deleted:
                couch_case, diffs = self._rebuild_couch_case_and_re_diff(couch_case, sql_case_json)

            if diffs:
                self.diff_db.add_diffs(
                    couch_case['doc_type'], sql_case.case_id,
                    diffs
                )

        self._diff_ledgers(case_ids)

        self.processed_docs += len(case_ids)
        self._log_case_diff_count(throttled=True)
Exemplo n.º 8
0
def diff_cases(couch_cases, log_cases=False):
    """Diff cases and return diff data

    :param couch_cases: dict `{<case_id>: <case_json>, ...}`
    :returns: `DiffData`
    """
    assert isinstance(couch_cases, dict), repr(couch_cases)[:100]
    assert "_diff_state" in globals()
    data = DiffData()
    dd_count = partial(datadog_counter, tags=["domain:" + _diff_state.domain])
    case_ids = list(couch_cases)
    sql_case_ids = set()
    for sql_case in CaseAccessorSQL.get_cases(case_ids):
        case_id = sql_case.case_id
        sql_case_ids.add(case_id)
        couch_case, diffs, changes = diff_case(sql_case, couch_cases[case_id],
                                               dd_count)
        if diffs:
            dd_count("commcare.couchsqlmigration.case.has_diff")
        if changes:
            dd_count("commcare.couchsqlmigration.case.did_change")
        data.doc_ids.append(case_id)
        data.diffs.append((couch_case['doc_type'], case_id, diffs))
        data.changes.append((couch_case['doc_type'], case_id, changes))
        if log_cases:
            log.info("case %s -> %s diffs", case_id, len(diffs))

    diffs, changes = diff_ledgers(case_ids, dd_count)
    data.diffs.extend(diffs)
    data.changes.extend(changes)
    add_missing_docs(data, couch_cases, sql_case_ids, dd_count)
    return data
Exemplo n.º 9
0
 def get_subcases(self, index_identifier=None):
     from corehq.form_processor.backends.sql.dbaccessors import CaseAccessorSQL
     subcase_ids = [
         ix.referenced_id for ix in self.reverse_indices
         if (index_identifier is None or ix.identifier == index_identifier)
     ]
     return list(CaseAccessorSQL.get_cases(subcase_ids))
Exemplo n.º 10
0
    def handle(self, domain, **options):
        domain = options.get('domain')
        case_ids = options.get('case_id')
        db = options.get('db')

        if case_ids:
            form_ids = set()
            for case in CaseAccessorSQL.get_cases(case_ids):
                assert not domain or case.domain == domain, 'Case "%s" not in domain "%s"' % (
                    case.case_id, domain)
                form_ids.update(case.xform_ids)

            check_and_process_forms(form_ids)
        else:
            if domain:
                domains = [domain]
            else:
                domains = iter_domains()

            for domain in domains:
                print(u"Checking domain: %s" % domain)
                form_ids_to_check = set()
                dbs = [db] if db else get_db_aliases_for_partitioned_query()
                for dbname in dbs:
                    form_ids_to_check.update(
                        XFormInstanceSQL.objects.using(dbname).filter(
                            domain=domain,
                            state=XFormInstanceSQL.DEPRECATED).values_list(
                                'orig_id', flat=True))

                print('  Found %s forms to check' % len(form_ids_to_check))
                for chunk in chunked(form_ids_to_check, 500):
                    check_and_process_forms(chunk)
Exemplo n.º 11
0
def add_cases_missing_from_couch(data, case_ids):
    sql_ids = {c.case_id for c in CaseAccessorSQL.get_cases(list(case_ids))}
    data.doc_ids.extend(case_ids)
    for case_id in case_ids:
        new = "present" if case_id in sql_ids else MISSING
        data.diffs.append((
            "CommCareCase",
            case_id,
            [Diff("missing", path=["*"], old_value=MISSING, new_value=new)],
        ))
Exemplo n.º 12
0
    def _diff_cases(self, couch_cases):
        from corehq.apps.tzmigration.timezonemigration import json_diff
        self.log_debug('Calculating case diffs for {} cases'.format(len(couch_cases)))
        case_ids = list(couch_cases)
        sql_cases = CaseAccessorSQL.get_cases(case_ids)
        for sql_case in sql_cases:
            couch_case = couch_cases[sql_case.case_id]
            sql_case_json = sql_case.to_json()
            diffs = json_diff(couch_case, sql_case_json, track_list_indices=False)
            self.diff_db.add_diffs(
                couch_case['doc_type'], sql_case.case_id,
                filter_case_diffs(couch_case, sql_case_json, diffs, self.forms_that_touch_cases_without_actions)
            )

        self._diff_ledgers(case_ids)
Exemplo n.º 13
0
    def _diff_cases(self, couch_cases):
        from corehq.apps.tzmigration.timezonemigration import json_diff
        self.log_debug('Calculating case diffs for {} cases'.format(len(couch_cases)))
        case_ids = list(couch_cases)
        sql_cases = CaseAccessorSQL.get_cases(case_ids)
        for sql_case in sql_cases:
            couch_case = couch_cases[sql_case.case_id]
            sql_case_json = sql_case.to_json()
            diffs = json_diff(couch_case, sql_case_json, track_list_indices=False)
            self.diff_db.add_diffs(
                couch_case['doc_type'], sql_case.case_id,
                filter_case_diffs(couch_case, sql_case_json, diffs, self.forms_that_touch_cases_without_actions)
            )

        self._diff_ledgers(case_ids)
Exemplo n.º 14
0
    def handle(self, domain, **options):
        domain = options.get('domain')
        case_ids = options.get('case_id')
        db = options.get('db')

        self.log_filename = 'undo_uuid_clash.{}.log'.format(
            datetime.utcnow().isoformat())
        print('\nWriting output to log file: {}\n'.format(self.log_filename))

        if case_ids:
            form_ids = set()
            for case in CaseAccessorSQL.get_cases(case_ids):
                assert not domain or case.domain == domain, 'Case "%s" not in domain "%s"' % (
                    case.case_id, domain)
                form_ids.update(case.xform_ids)

            with self:
                check_and_process_forms(form_ids, self)
        else:
            if domain:
                domains = [domain]
            else:
                domains = iter_domains()

            for domain in domains:
                print(u"Checking domain: %s" % domain)
                form_ids_to_check = set()
                dbs = [db] if db else get_db_aliases_for_partitioned_query()
                for dbname in dbs:
                    form_ids_to_check.update(
                        XFormInstanceSQL.objects.using(dbname).filter(
                            domain=domain,
                            state=XFormInstanceSQL.DEPRECATED).values_list(
                                'orig_id', flat=True))

                print('  Found %s forms to check' % len(form_ids_to_check))
                with self:
                    for chunk in chunked(form_ids_to_check, 500):
                        check_and_process_forms(chunk, self)

        def __enter__(self):
            self._log_file = open(self.log_filename, 'w')

        def __exit__(self, exc_type, exc_val, exc_tb):
            self._log_file.close()

        def log(message):
            self._log_file.write(message)
Exemplo n.º 15
0
    def handle(self, domain, *, state_dir, commit, debug, **options):
        if not should_use_sql_backend(domain):
            raise CommandError(
                f'Cannot unsort commits on couch domain: {domain}')

        assert Domain.get_by_name(domain), f'Unknown domain "{domain}"'
        setup_logging(state_dir, "unsort_sql_cases", debug)

        if commit:
            log.info("COMMIT MODE: show and save unsorted transactions...")
        else:
            log.info("DRY RUN: show but do not save unsorted transactions...")
        case_ids = iter_sql_cases_with_sorted_transactions(domain)
        for batch in chunked(case_ids, 100, list):
            for sql_case in CaseAccessorSQL.get_cases(batch):
                unsort_transactions(sql_case, commit)
Exemplo n.º 16
0
    def _diff_cases(self, couch_cases):
        from corehq.apps.tzmigration.timezonemigration import json_diff
        log.debug('Calculating case diffs for {} cases'.format(
            len(couch_cases)))
        statedb = self.statedb
        counts = defaultdict(int)
        case_ids = list(couch_cases)
        sql_cases = CaseAccessorSQL.get_cases(case_ids)
        sql_case_ids = set()
        for sql_case in sql_cases:
            sql_case_ids.add(sql_case.case_id)
            couch_case = couch_cases[sql_case.case_id]
            sql_case_json = sql_case.to_json()
            diffs = json_diff(couch_case,
                              sql_case_json,
                              track_list_indices=False)
            diffs = filter_case_diffs(couch_case, sql_case_json, diffs,
                                      statedb)
            if diffs and not sql_case.is_deleted:
                try:
                    couch_case, diffs = self._rebuild_couch_case_and_re_diff(
                        couch_case, sql_case_json)
                except Exception as err:
                    log.warning('Case {} rebuild -> {}: {}'.format(
                        sql_case.case_id,
                        type(err).__name__, err))
            if diffs:
                statedb.add_diffs(couch_case['doc_type'], sql_case.case_id,
                                  diffs)
            counts[couch_case['doc_type']] += 1

        self._diff_ledgers(case_ids)

        if len(case_ids) != len(sql_case_ids):
            couch_ids = set(case_ids)
            assert not (sql_case_ids - couch_ids), sql_case_ids - couch_ids
            missing_cases = [couch_cases[x] for x in couch_ids - sql_case_ids]
            log.debug("Found %s missing SQL cases", len(missing_cases))
            for doc_type, doc_ids in self._filter_missing_cases(missing_cases):
                statedb.add_missing_docs(doc_type, doc_ids)
                counts[doc_type] += len(doc_ids)

        for doc_type, count in six.iteritems(counts):
            statedb.increment_counter(doc_type, count)
        self.processed_docs += len(case_ids)
        self._log_case_diff_count(throttled=True)
Exemplo n.º 17
0
def diff_cases(couch_cases, statedb):
    """Diff a batch of cases

    There is a small chance that two concurrent calls to this function,
    each having copies of the same case could write conflicting diffs to
    the state db (worst case: duplicate diffs in case db). It is even
    more unlikely that the relevant SQL case would also be changed at
    the same time, resulting in the outcome of the concurrent diffs to
    be different (worst case: replace real diff with none). Luckly a
    concurrent change to the SQL case will cause a subsequent diff to be
    queued to happen at a later time, which will replace any conflicting
    case diffs in the state db.

    :param couch_cases: dict `{<case_id>: <case_json>, ...}`
    """
    log.debug('Calculating case diffs for {} cases'.format(len(couch_cases)))
    counts = defaultdict(int)
    case_ids = list(couch_cases)
    sql_cases = CaseAccessorSQL.get_cases(case_ids)
    sql_case_ids = set()
    for sql_case in sql_cases:
        sql_case_ids.add(sql_case.case_id)
        couch_case = couch_cases[sql_case.case_id]
        couch_case, diffs = diff_case(sql_case, couch_case, statedb)
        statedb.replace_case_diffs(couch_case['doc_type'], sql_case.case_id,
                                   diffs)
        counts[couch_case['doc_type']] += 1

    diff_ledgers(case_ids, statedb)

    if len(case_ids) != len(sql_case_ids):
        couch_ids = set(case_ids)
        assert not (sql_case_ids - couch_ids), sql_case_ids - couch_ids
        missing_cases = [couch_cases[x] for x in couch_ids - sql_case_ids]
        log.debug("Found %s missing SQL cases", len(missing_cases))
        for doc_type, doc_ids in filter_missing_cases(missing_cases):
            statedb.add_missing_docs(doc_type, doc_ids)
            counts[doc_type] += len(doc_ids)

    for doc_type, count_ in counts.items():
        statedb.increment_counter(doc_type, count_)
Exemplo n.º 18
0
    def handle(self, domain, **options):
        debug = options.get('debug')
        domain = options.get('domain')
        case_ids = options.get('case_id')
        db = options.get('db')

        self.log_filename = 'undo_uuid_clash.{}.log'.format(datetime.utcnow().isoformat())
        print('\nWriting output to log file: {}\n'.format(self.log_filename))

        if case_ids:
            form_ids = set()
            for case in CaseAccessorSQL.get_cases(case_ids):
                assert not domain or case.domain == domain, 'Case "%s" not in domain "%s"' % (case.case_id, domain)
                form_ids.update(case.xform_ids)

            with self:
                check_and_process_forms(form_ids, self, debug)
        else:
            if domain:
                domains = [domain]
            else:
                domains = iter_domains()

            for domain in domains:
                print("Checking domain: %s" % domain)
                form_ids_to_check = set()
                dbs = [db] if db else get_db_aliases_for_partitioned_query()
                for dbname in dbs:
                    form_ids_to_check.update(
                        XFormInstanceSQL.objects.using(dbname)
                        .filter(domain=domain, state=XFormInstanceSQL.DEPRECATED)
                        .values_list('orig_id', flat=True)
                    )

                print('  Found %s forms to check' % len(form_ids_to_check))
                with self:
                    for chunk in chunked(form_ids_to_check, 500):
                        check_and_process_forms(chunk, self, debug)
Exemplo n.º 19
0
 def _iter_cases(self, case_ids):
     return iter(CaseAccessorSQL.get_cases(case_ids))
Exemplo n.º 20
0
def get_sql_cases(case_ids):
    return CaseAccessorSQL.get_cases(case_ids)
Exemplo n.º 21
0
 def get_supply_points(supply_point_ids):
     return list(CaseAccessorSQL.get_cases(supply_point_ids))
Exemplo n.º 22
0
 def get_supply_points(supply_point_ids):
     return list(CaseAccessorSQL.get_cases(supply_point_ids))
Exemplo n.º 23
0
 def get_sql_docs(ids):
     return {c.case_id: c for c in CaseAccessorSQL.get_cases(ids)}
Exemplo n.º 24
0
 def iter_all_changes(self, start_from=None):
     case_ids = CaseAccessorSQL.get_case_ids_in_domain(self.domain)
     for case in CaseAccessorSQL.get_cases(case_ids):
         yield _sql_case_to_change(case)
Exemplo n.º 25
0
 def _iter_cases(self, case_ids):
     return iter(CaseAccessorSQL.get_cases(case_ids))
Exemplo n.º 26
0
 def _create_docs(cls, count):
     case_ids = [uuid.uuid4().hex for i in range(count)]
     [create_form_for_test(cls.domain, case_id=case_id) for case_id in case_ids]
     return CaseAccessorSQL.get_cases(case_ids, ordered=True)
Exemplo n.º 27
0
def process_ucr_changes(domain, case_ids):
    cases = CaseAccessorSQL.get_cases(case_ids)
    docs = [case.to_json() for case in cases]