Пример #1
0
    def handle(self, log_file, **options):
        self.domain = 'hki-nepal-suaahara-2'
        loc_mapping = {}
        locs = SQLLocation.objects.filter(domain=self.domain, level=4)
        for loc in locs:
            loc_mapping[loc.site_code] = loc.location_id

        failed_updates = []
        household_cases = CaseES().domain(self.domain).case_type('household').count()
        member_cases = CaseES().domain(self.domain).case_type('household_member').count()
        total_cases = household_cases + member_cases
        with open(log_file, "w", encoding='utf-8') as fh:
            fh.write('--------Successful Form Ids----------')
            for cases in chunked(with_progress_bar(self._get_cases_to_process(), total_cases), 100):
                cases_to_update = self._process_cases(cases, failed_updates, loc_mapping)
                try:
                    xform, cases = bulk_update_cases(
                        self.domain, cases_to_update, self.__module__)
                    fh.write(xform.form_id)
                except LocalSubmissionError as e:
                    print(six.text_type(e))
                    failed_updates.extend(case[0] for case in cases_to_update)
            fh.write('--------Failed Cases--------------')
            for case_id in failed_updates:
                fh.write(case_id)
Пример #2
0
    def test_case_soft_deletion(self):
        case_id, case_name = self._create_case_and_sync_to_es()

        # verify there
        results = CaseES().run()
        self.assertEqual(1, results.total)

        # soft delete the case
        with self.process_case_changes:
            CaseAccessors(self.domain).soft_delete_cases([case_id])
        self.elasticsearch.indices.refresh(CASE_INDEX_INFO.index)

        # ensure not there anymore
        results = CaseES().run()
        self.assertEqual(0, results.total)
Пример #3
0
    def _cases_created_per_user_per_month(self, case_type=None):
        query = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
            self.domain).opened_range(
                gte=self.date_start, lt=self.date_end).aggregation(
                    TermsAggregation('cases_per_user', 'owner_id',
                                     size=100).aggregation(
                                         DateHistogram('cases_by_date',
                                                       'opened_on',
                                                       interval='month'))))
        if case_type:
            query = query.case_type(case_type)

        results = query.size(0).run()

        stats = defaultdict(list)
        cases_per_user = results.aggregations.cases_per_user
        for bucket in cases_per_user.buckets_list:
            counts_by_date = {
                b['key_as_string']: b['doc_count']
                for b in bucket.cases_by_date.normalized_buckets
            }
            for key, count in counts_by_date.items():
                stats[key].append(count)

        final_stats = []
        for month, case_count_list in sorted(list(stats.items()),
                                             key=lambda r: r[0]):
            final_stats.append(
                (month, sum(case_count_list) // len(case_count_list)))

        suffix = ''
        if case_type:
            suffix = '(case type: %s)' % case_type
        self._print_table(
            ['Month', 'Cases created per user %s' % suffix], final_stats)
Пример #4
0
    def test_case_soft_deletion(self):
        case_id, case_name = self._create_case_and_sync_to_es()

        # verify there
        results = CaseES().run()
        self.assertEqual(1, results.total)

        # soft delete the case
        with process_pillow_changes('case-pillow', {'skip_ucr': True}):
            with process_pillow_changes('DefaultChangeFeedPillow'):
                CaseAccessors(self.domain).soft_delete_cases([case_id])
        self.elasticsearch.indices.refresh(CASE_INDEX_INFO.index)

        # ensure not there anymore
        results = CaseES().run()
        self.assertEqual(0, results.total)
Пример #5
0
def scroll_case_names(domain, case_ids):
    query = (CaseES()
            .domain(domain)
            .case_ids(case_ids)
            .source(['name', '_id'])
            .size(CASE_SCROLL_SIZE))
    return query.scroll()
Пример #6
0
def _get_case_counts_by_user(domain,
                             datespan,
                             case_types=None,
                             is_opened=True,
                             user_ids=None,
                             export=False):
    date_field = 'opened_on' if is_opened else 'closed_on'
    user_field = 'opened_by' if is_opened else 'closed_by'

    es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE
    case_query = (CaseES(es_instance_alias=es_instance).domain(domain).filter(
        filters.date_range(
            date_field,
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date(),
        )).terms_aggregation(user_field, 'by_user').size(0))

    if case_types:
        case_query = case_query.case_type(case_types)
    else:
        case_query = case_query.filter(
            filters.NOT(case_type_filter('commcare-user')))

    if user_ids:
        case_query = case_query.filter(filters.term(user_field, user_ids))

    return case_query.run().aggregations.by_user.counts_by_bucket()
Пример #7
0
    def _cases_updated_per_user_per_month(self):
        results = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
            self.domain).active_in_range(
                gte=self.date_start, lt=self.date_end).aggregation(
                    TermsAggregation(
                        'cases_per_user', 'owner_id', size=100).aggregation(
                            NestedAggregation(
                                'actions', 'actions').aggregation(
                                    DateHistogram(
                                        'cases_by_date',
                                        'server_date',
                                        interval='month')))).size(0).run())

        stats = defaultdict(list)
        cases_per_user = results.aggregations.cases_per_user
        for bucket in cases_per_user.buckets_list:
            counts_by_date = {
                b['key_as_string']: b['doc_count']
                for b in bucket.actions.cases_by_date.normalized_buckets
            }
            for key, count in counts_by_date.items():
                stats[key].append(count)

        final_stats = []
        for month, case_count_list in sorted(list(stats.items()),
                                             key=lambda r: r[0]):
            final_stats.append(
                (month, sum(case_count_list) // len(case_count_list)))

        self._print_table(['Month', 'Cases updated per user'], final_stats)
Пример #8
0
    def options(self):

        query = CaseES("report_cases").domain(
            self.domain).case_type("participant")
        user = self.request.couch_user
        if user.is_web_user():
            owner_ids = [o.lower() for o in user.get_group_ids() if o]
            if owner_ids:
                query = query.filter(
                    filters.OR(owner(owner_ids),
                               user_filter(user._id.lower())))
            else:
                query = query.user(user._id.lower())

        results = query.values("full_name.#value")
        return [(case['_id'], case['full_name']['#value']) for case in results]
Пример #9
0
    def test_location_restricted_cases(self):
        domain_obj = bootstrap_domain(self.domain)
        self.addCleanup(domain_obj.delete)

        location_type_names = ['state', 'county', 'city']
        location_structure = [('Massachusetts', [('Middlesex', [
            ('Cambridge', []),
            ('Somerville', []),
        ]), ('Suffolk', [
            ('Boston', []),
        ])])]
        locations = setup_locations_and_types(self.domain, location_type_names,
                                              [], location_structure)[1]
        middlesex_user = CommCareUser.create(self.domain, 'guy-from-middlesex',
                                             '***', None, None)

        middlesex_user.add_to_assigned_locations(locations['Middlesex'])
        restrict_user_by_location(self.domain, middlesex_user)

        fake_request = MagicMock()
        fake_request.domain = self.domain
        fake_request.couch_user = middlesex_user

        self._send_case_to_es(owner_id=locations['Boston'].get_id)
        middlesex_case = self._send_case_to_es(
            owner_id=locations['Middlesex'].get_id)
        cambridge_case = self._send_case_to_es(
            owner_id=locations['Cambridge'].get_id)

        returned_case_ids = query_location_restricted_cases(
            CaseES().domain(self.domain), fake_request).get_ids()
        self.assertItemsEqual(returned_case_ids,
                              [middlesex_case.case_id, cambridge_case.case_id])
Пример #10
0
def get_case_export_base_query(domain, case_type):
    query = (CaseES().domain(domain).case_type(case_type))

    if not EXPORT_NO_SORT.enabled(domain):
        query = query.sort("opened_on")

    return query
Пример #11
0
 def _get_es_modified_dates(case_ids):
     results = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).case_ids(
         case_ids).values_list('_id', 'server_modified_on', 'domain'))
     return {
         _id: (iso_string_to_datetime(server_modified_on), domain)
         for _id, server_modified_on, domain in results
     }
Пример #12
0
def _get_case_counts_by_user(domain, datespan, case_types=None, is_opened=True, owner_ids=None):
    date_field = 'opened_on' if is_opened else 'closed_on'
    user_field = 'opened_by' if is_opened else 'closed_by'

    case_query = (CaseES()
        .domain(domain)
        .filter(
            filters.date_range(
                date_field,
                gte=datespan.startdate.date(),
                lte=datespan.enddate.date(),
            )
        )
        .terms_aggregation(user_field, 'by_user')
        .size(0))

    if case_types:
        case_query = case_query.case_type(case_types)
    else:
        case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user')))

    if owner_ids:
        case_query = case_query.filter(filters.term(user_field, owner_ids))

    return case_query.run().aggregations.by_user.counts_by_bucket()
Пример #13
0
 def handle(self, domain, log_file, **options):
     total_cases = CaseES().domain(domain).case_type(
         'household').is_closed().count()
     self.case_accessor = CaseAccessors(domain)
     failed_updates = []
     with open(log_file, "w", encoding='utf-8') as fh:
         fh.write('--------Successful Form Ids----------\n')
         for cases in chunked(
                 with_progress_bar(self._get_cases_to_process(domain),
                                   total_cases), 100):
             related_cases = self._get_related_cases(cases)
             case_tupes = [(case_id, {}, True) for case_id in related_cases]
             try:
                 xform, cases = bulk_update_cases(domain, case_tupes,
                                                  self.__module__)
                 fh.write(xform.form_id + '\n')
             except LocalSubmissionError as e:
                 print('submission error')
                 print(six.text_type(e))
                 failed_updates.extend(related_cases)
             except Exception as e:
                 print('unexpected error')
                 print(six.text_type(e))
                 failed_updates.extend(related_cases)
         fh.write('--------Failed Cases--------------\n')
         for case_id in failed_updates:
             fh.write(case_id)
         print('-------------COMPLETE--------------')
Пример #14
0
class ReportCaseReindexerTest(TestCase):
    def setUp(self):
        super(ReportCaseReindexerTest, self).setUp()
        FormProcessorTestUtils.delete_all_xforms()
        FormProcessorTestUtils.delete_all_cases()
        with trap_extra_setup(ConnectionError):
            self.elasticsearch = get_es_new()
            ensure_index_deleted(REPORT_CASE_INDEX_INFO.index)

    def tearDown(self):
        FormProcessorTestUtils.delete_all_xforms()
        FormProcessorTestUtils.delete_all_cases()
        ensure_index_deleted(REPORT_CASE_INDEX_INFO.index)
        super(ReportCaseReindexerTest, self).tearDown()

    @run_with_all_backends
    def test_report_case_reindexer(self):
        cases_included = set()
        for i in range(3):
            case = create_and_save_a_case(DOMAIN,
                                          uuid.uuid4().hex,
                                          'case_name-{}'.format(i))
            cases_included.add(case.case_id)

        # excluded case
        create_and_save_a_case('unsupported', uuid.uuid4().hex, 'unsupported')

        reindex_and_clean('report-case')

        # verify there
        results = CaseES("report_cases").run()
        self.assertEqual(3, results.total, results.hits)
        ids_in_es = {doc['_id'] for doc in results.hits}
        self.assertEqual(cases_included, ids_in_es)
Пример #15
0
def _get_case_case_counts_by_owner(domain,
                                   datespan,
                                   case_types,
                                   is_total=False,
                                   owner_ids=None,
                                   export=False):
    es_instance = ES_EXPORT_INSTANCE if export else ES_DEFAULT_INSTANCE
    case_query = (CaseES(
        es_instance_alias=es_instance).domain(domain).opened_range(
            lte=datespan.enddate.date()).NOT(
                closed_range_filter(
                    lt=datespan.startdate.date())).terms_aggregation(
                        'owner_id', 'owner_id').size(0))

    if case_types:
        case_query = case_query.filter({"terms": {"type.exact": case_types}})
    else:
        case_query = case_query.filter(
            filters.NOT(case_type_filter('commcare-user')))

    if not is_total:
        case_query = case_query.active_in_range(
            gte=datespan.startdate.date(),
            lte=datespan.enddate.date(),
        )

    if owner_ids:
        case_query = case_query.owner(owner_ids)

    return case_query.run().aggregations.owner_id.counts_by_bucket()
Пример #16
0
 def _get_es_modified_dates(case_ids):
     results = (CaseES(for_export=True).case_ids(case_ids).values_list(
         '_id', 'server_modified_on', 'domain'))
     return {
         _id: (iso_string_to_datetime(server_modified_on)
               if server_modified_on else None, domain)
         for _id, server_modified_on, domain in results
     }
Пример #17
0
def get_case_export_base_query(domain, case_type):
    query = (CaseES(es_instance_alias=ES_EXPORT_INSTANCE).domain(
        domain).case_type(case_type))

    if not EXPORT_NO_SORT.enabled(domain):
        query = query.sort("opened_on")

    return query
Пример #18
0
 def options(self):
     res = (CaseES('report_cases').domain(self.domain).exists(
         'care_site_display.#value').source('care_site_display').run())
     care_sites = {c['care_site_display']['#value'] for c in res.hits}
     return [{
         'val': care_site,
         'text': care_site
     } for care_site in care_sites]
Пример #19
0
def get_case_ids_missing_from_elasticsearch(all_case_ids):
    missing_from_elasticsearch = set()
    for case_ids in chunked(all_case_ids, 500):
        case_ids = set(case_ids)
        not_missing = set(CaseES().doc_id(case_ids).get_ids())
        missing_from_elasticsearch.update(case_ids - not_missing)
        assert not_missing - case_ids == set()
    return list(missing_from_elasticsearch)
Пример #20
0
def gen_children_only_ours(domain):
    """
    Returns a list of child_gmp cases where external_id is not set
    """
    result = (CaseES().domain(domain).case_type(CASE_TYPE).empty(
        'external_id').run())
    if result.total:
        for doc in result.hits:
            yield CommCareCase.wrap(doc)
 def _get_child_cases(self, household_ids):
     query = (CaseES(es_instance_alias='export')
              .domain('icds-cas')
              .case_type('person')
              .is_closed(False)
              .source(SOURCE_FIELDS)
              .filter(filters.term("indices.referenced_id", household_ids))
             )
     return query.run()
 def _get_closed_hh_cases(self, owners):
     query = (CaseES(es_instance_alias='export')
              .is_closed()
              .domain('icds-cas')
              .case_type('household')
              .owner(owners)
              .source(['case_id', 'closed_on', 'name'])
              .size(100)
              )
     return query.scroll()
Пример #23
0
    def test_report_case_pillow(self):
        case_id, case_name = self._create_case_and_sync_to_es(DOMAIN)

        # confirm change made it to elasticserach
        results = CaseES('report_cases').run()
        self.assertEqual(1, results.total)
        case_doc = results.hits[0]
        self.assertEqual(DOMAIN, case_doc['domain'])
        self.assertEqual(case_id, case_doc['_id'])
        self.assertEqual(case_name, case_doc['name'])
Пример #24
0
 def get_duplicate_id_case_info(domain, case_type, limit_debug_to=None):
     total_cases = CaseES().domain(domain).case_type(case_type).count()
     bad_cases = get_duplicated_case_stubs(domain, case_type)
     add_debug_info_to_cases(bad_cases, limit_debug_to)
     context = {
         'case_type': case_type,
         'num_bad_cases': len(bad_cases),
         'num_total_cases': total_cases,
         'num_good_cases': total_cases - len(bad_cases),
         'bad_cases': bad_cases,
     }
     return context
Пример #25
0
def run_messaging_rule(domain, rule_id):
    rule = _get_cached_rule(domain, rule_id)
    if not rule:
        return
    progress_helper = MessagingRuleProgressHelper(rule_id)
    total_cases_count = CaseES().domain(domain).case_type(
        rule.case_type).count()
    progress_helper.set_total_cases_to_be_processed(total_cases_count)

    db_aliases = get_db_aliases_for_partitioned_query()
    progress_helper.set_initial_progress(shard_count=len(db_aliases))
    for db_alias in db_aliases:
        run_messaging_rule_for_shard.delay(domain, rule_id, db_alias)
Пример #26
0
 def print_totals(self, domains):
     max_space = '\t' * (int(max([len(x) for x in domains]) / 8) + 2)
     header = 'Domain{}CaseES\t\tCaseSearchES\n'.format(max_space)
     divider = '{}\n'.format('*' * (len(header) + len(max_space) * 8))
     self.stdout.write(divider)
     self.stdout.write(header)
     self.stdout.write(divider)
     for domain in domains:
         spacer = max_space[int(len(domain) / 8):]
         total_case_es = CaseES().domain(domain).count()
         total_case_search = CaseSearchES().domain(domain).count()
         self.stdout.write('{domain}{spacer}{case_es}\t\t{case_search}\n'.format(
             domain=domain,
             spacer=spacer,
             case_es=total_case_es,
             case_search=total_case_search,
         ))
Пример #27
0
    def test_case_pillow_error_in_case_es(self):
        self.assertEqual(0, PillowError.objects.filter(pillow='case-pillow').count())
        with patch('corehq.pillows.case_search.domain_needs_search_index', return_value=True), \
            patch('corehq.pillows.case.transform_case_for_elasticsearch') as case_transform, \
            patch('corehq.pillows.case_search.transform_case_for_elasticsearch') as case_search_transform:
                case_transform.side_effect = Exception('case_transform error')
                case_search_transform.side_effect = Exception('case_search_transform error')
                case_id, case_name = self._create_case_and_sync_to_es()

        # confirm change did not make it to case search index
        results = CaseSearchES().run()
        self.assertEqual(0, results.total)

        # confirm change did not make it to case index
        results = CaseES().run()
        self.assertEqual(0, results.total)

        self.assertEqual(1, PillowError.objects.filter(pillow='case-pillow').count())
Пример #28
0
def get_case_and_action_counts_for_domains(domains):
    actions_agg = aggregations.NestedAggregation('actions', 'actions')
    aggregation = aggregations.TermsAggregation(
        'domain', 'domain').aggregation(actions_agg)
    results = CaseES() \
        .filter(filters.term('domain', domains)) \
        .aggregation(aggregation) \
        .size(0) \
        .run()

    domains_to_cases = results.aggregations.domain.buckets_dict

    def _domain_stats(domain_name):
        cases = domains_to_cases.get(domain_name, None)
        return {
            'cases': cases.doc_count if cases else 0,
            'case_actions': cases.actions.doc_count if cases else 0
        }

    return {domain: _domain_stats(domain) for domain in domains}
Пример #29
0
def _get_case_case_counts_by_owner(domain, datespan, case_types, is_total=False, owner_ids=None):
    case_query = (CaseES()
         .domain(domain)
         .opened_range(lte=datespan.enddate)
         .NOT(closed_range_filter(lt=datespan.startdate))
         .terms_aggregation('owner_id', 'owner_id')
         .size(0))

    if case_types:
        case_query = case_query.filter({"terms": {"type.exact": case_types}})
    else:
        case_query = case_query.filter(filters.NOT(case_type_filter('commcare-user')))

    if not is_total:
        case_query = case_query.active_in_range(
            gte=datespan.startdate,
            lte=datespan.enddate
        )

    if owner_ids:
        case_query = case_query.owner(owner_ids)

    return case_query.run().aggregations.owner_id.counts_by_bucket()
Пример #30
0
def run_messaging_rule(domain, rule_id):
    rule = _get_cached_rule(domain, rule_id)
    if not rule:
        return
    progress_helper = MessagingRuleProgressHelper(rule_id)
    total_cases_count = CaseES().domain(domain).case_type(
        rule.case_type).count()
    progress_helper.set_total_cases_to_be_processed(total_cases_count)

    def _run_rule_sequentially():
        incr = 0
        progress_helper.set_initial_progress()
        for case_id in get_case_ids_for_messaging_rule(domain, rule.case_type):
            sync_case_for_messaging_rule.delay(domain, case_id, rule_id)
            incr += 1
            if incr >= 1000:
                incr = 0
                progress_helper.update_total_key_expiry()
                if progress_helper.is_canceled():
                    break

        # By putting this task last in the queue, the rule should be marked
        # complete at about the time that the last tasks are finishing up.
        # This beats saving the task results in the database and using a
        # celery chord which would be more taxing on system resources.
        set_rule_complete.delay(rule_id)

    def _run_rule_on_multiple_shards():
        db_aliases = get_db_aliases_for_partitioned_query()
        progress_helper.set_initial_progress(shard_count=len(db_aliases))
        for db_alias in db_aliases:
            run_messaging_rule_for_shard.delay(domain, rule_id, db_alias)

    if should_use_sql_backend(domain):
        _run_rule_on_multiple_shards()
    else:
        _run_rule_sequentially()
Пример #31
0
def scroll_case_names(domain, case_ids):
    query = CaseES().domain(domain).case_ids(case_ids).source(["name", "_id"]).size(CASE_SCROLL_SIZE)
    return query.scroll()