示例#1
0
def populate_report_data(start_date, end_date, domain, runner, locations=None, strict=True):
    # first populate all the warehouse tables for all facilities
    # hard coded to know this is the first date with data
    start_date = max(start_date, default_start_date())

    # For QA purposes generate reporting data for only some small part of data.
    if not ILSGatewayConfig.for_domain(domain).all_stock_data:
        if locations is None:
            locations = _get_test_locations(domain)
        facilities = filter(lambda location: location.location_type == 'FACILITY', locations)
        non_facilities_types = ['DISTRICT', 'REGION', 'MSDZONE', 'MOHSW']
        non_facilities = []
        for location_type in non_facilities_types:
            non_facilities.extend(filter(lambda location: location.location_type == location_type, locations))
    else:
        facilities = Location.filter_by_type(domain, 'FACILITY')
        non_facilities = list(Location.filter_by_type(domain, 'DISTRICT'))
        non_facilities += list(Location.filter_by_type(domain, 'REGION'))
        non_facilities += list(Location.filter_by_type(domain, 'MSDZONE'))
        non_facilities += list(Location.filter_by_type(domain, 'MOHSW'))

    if runner.location:
        if runner.location.location_type.name.upper() != 'FACILITY':
            facilities = []
            non_facilities = itertools.dropwhile(
                lambda location: location._id != runner.location.location_id,
                non_facilities
            )
        else:
            facilities = itertools.dropwhile(
                lambda location: location._id != runner.location.location_id,
                facilities
            )

    facilities_chunked_list = chunked(facilities, 5)
    for chunk in facilities_chunked_list:
        res = chain(process_facility_warehouse_data.si(fac, start_date, end_date, runner) for fac in chunk)()
        res.get()

    non_facilities_chunked_list = chunked(non_facilities, 50)

    # then populate everything above a facility off a warehouse table
    for chunk in non_facilities_chunked_list:
        res = chain(
            process_non_facility_warehouse_data.si(org, start_date, end_date, runner, strict)
            for org in chunk
        )()
        res.get()
    runner.location = None
    runner.save()
    # finally go back through the history and initialize empty data for any
    # newly created facilities
    update_historical_data(domain)
示例#2
0
def populate_report_data(start_date, end_date, domain, runner, locations=None, strict=True):
    # first populate all the warehouse tables for all facilities
    # hard coded to know this is the first date with data
    start_date = max(start_date, default_start_date())

    # For QA purposes generate reporting data for only some small part of data.
    if not ILSGatewayConfig.for_domain(domain).all_stock_data:
        if locations is None:
            locations = _get_test_locations(domain)
        facilities = filter(lambda location: location.location_type == "FACILITY", locations)
        non_facilities_types = ["DISTRICT", "REGION", "MSDZONE", "MOHSW"]
        non_facilities = []
        for location_type in non_facilities_types:
            non_facilities.extend(filter(lambda location: location.location_type == location_type, locations))
    else:
        facilities = Location.filter_by_type(domain, "FACILITY")
        non_facilities = list(Location.filter_by_type(domain, "DISTRICT"))
        non_facilities += list(Location.filter_by_type(domain, "REGION"))
        non_facilities += list(Location.filter_by_type(domain, "MSDZONE"))
        non_facilities += list(Location.filter_by_type(domain, "MOHSW"))

    if runner.location:
        if runner.location.location_type.name.upper() != "FACILITY":
            facilities = []
            non_facilities = itertools.dropwhile(
                lambda location: location.location_id != runner.location.location_id, non_facilities
            )
        else:
            facilities = itertools.dropwhile(
                lambda location: location.location_id != runner.location.location_id, facilities
            )

    facilities_chunked_list = chunked(facilities, 5)
    for chunk in facilities_chunked_list:
        res = chain(process_facility_warehouse_data.si(fac, start_date, end_date, runner) for fac in chunk)()
        res.get()

    non_facilities_chunked_list = chunked(non_facilities, 50)

    # then populate everything above a facility off a warehouse table
    for chunk in non_facilities_chunked_list:
        res = chain(
            process_non_facility_warehouse_data.si(org, start_date, end_date, runner, strict) for org in chunk
        )()
        res.get()

    runner.location = None
    runner.save()
def update_supply_points(domain):
    device_id = __name__ + ".update_supply_points"
    case_blocks = (case_block(c) for c in get_cases(domain) if needs_update(c))
    if case_blocks:
        for chunk in chunked(case_blocks, 100):
            submit_case_blocks(chunk, domain, device_id=device_id)
            print("updated {} cases on domain {}".format(len(chunk), domain))
示例#4
0
def _copy(config):
    # unfortunately the only couch view we have for this needs to go by domain
    # will be a bit slow
    database = Domain.get_db()
    assert database.uri == config.source_db.uri, 'can only use "copy" with the main HQ DB as the source'
    domain_names = Domain.get_all_names()
    for domain in domain_names:
        for doc_type in config.doc_types:
            ids_of_this_type = [row['id'] for row in database.view(
                'domain/docs',
                startkey=[domain, doc_type],
                endkey=[domain, doc_type, {}],
                reduce=False,
                include_docs=False,
            )]
            if ids_of_this_type:
                new_revs = dict([
                    (row['id'], row['value']['rev'])
                    for row in config.dest_db.view('_all_docs', keys=ids_of_this_type, include_docs=False)
                    if 'error' not in row
                ])
                for id_group in chunked(ids_of_this_type, 500):
                    docs = get_docs(database, id_group)
                    for doc in docs:
                        if doc['_id'] in new_revs:
                            doc['_rev'] = new_revs[doc['_id']]
                    config.dest_db.bulk_save(docs)

            print 'copied {} {}s from {}'.format(len(ids_of_this_type), doc_type, domain)
    print 'copy docs complete'
示例#5
0
    def _get_view_results(self, total, chuck_size, doc_type="Bar"):
        doc_id_prefix = '{}-'.format(doc_type.lower())
        results = [(
            {"endkey": [doc_type, {}], "group_level": 1, "reduce": True, "startkey": [doc_type]},
            [{"key": doc_type, "value": total}]
        )]
        for chunk in chunked(list(range(total)), chuck_size):
            chunk_rows = [self._get_row(ident, doc_type=doc_type) for ident in chunk]
            if chunk[0] == 0:
                results.append((
                    {
                        'startkey': [doc_type], 'endkey': [doc_type, {}], 'reduce': False,
                        'limit': chuck_size, 'include_docs': True
                    },
                    chunk_rows
                ))
            else:
                previous = '{}{}'.format(doc_id_prefix, chunk[0] - 1)
                results.append((
                    {
                        'endkey': [doc_type, {}], 'skip': 1, 'startkey_docid': previous, 'reduce': False,
                        'startkey': [doc_type, previous], 'limit': chuck_size, 'include_docs': True
                    },
                    chunk_rows
                ))

        return results
示例#6
0
def _delete_all_cases(domain_name):
    logger.info('Deleting cases...')
    case_accessor = CaseAccessors(domain_name)
    case_ids = case_accessor.get_case_ids_in_domain()
    for case_id_chunk in chunked(with_progress_bar(case_ids, stream=silence_during_tests()), 500):
        case_accessor.soft_delete_cases(list(case_id_chunk))
    logger.info('Deleting cases complete.')
 def handle(self, child_file, **options):
     relevant_districts = SQLLocation.objects.filter(domain='icds-cas',
                                                     location_id__in=['d982a6fb4cca0824fbde59db18d2d422',
                                                                      '0ffe4a1f110ffc17bb9b749abdfd697c'])
     owners = SQLLocation.objects.get_queryset_descendants(relevant_districts, include_self=True)
     owner_name_mapping = {loc.location_id: loc.name for loc in owners}
     hh_cases = self._get_closed_hh_cases(list(owner_name_mapping))
     with open(child_file, 'w', encoding='utf-8') as child_csv:
         child_writer = csv.writer(child_csv)
         child_writer.writerow(CSV_HEADERS)
         for cases in chunked(with_progress_bar(hh_cases, hh_cases.count), 500):
             household_ids = []
             hh_map = {}
             for hh in cases:
                 hh_map[hh['case_id']] = (hh['name'].encode('utf-8'), hh.get('closed_on', '').encode('utf-8'))
                 household_ids.append(hh['case_id'])
             child_cases = self._get_child_cases(household_ids)
             ids = set(household_ids)
             for child in child_cases.hits:
                 parent_index = filter(
                     lambda index: index['referenced_id'] in ids and index['identifier'] == 'parent',
                     child['indices']
                 )[0]
                 if parent_index:
                     hh_id = parent_index['referenced_id']
                     row = [child.get(prop, '').encode('utf-8') for prop in CHILD_PROPERTIES]
                     row.append(owner_name_mapping.get(child.get('owner_id', ''), '').encode('utf-8'))
                     hh_info = (hh_id, hh_map[hh_id][0], hh_map[hh_id][1])
                     row.extend(hh_info)
                     child_writer.writerow(row)
    def handle_one(self, domain, case_type, chunk_size):
        self.log('Copying {case_type} cases in {domain}'
                 .format(case_type=case_type, domain=domain))
        old_db = CommCareCase.get_db()
        new_db = IndicatorCase.get_db()
        assert old_db.uri != new_db.uri
        # this dbaccessor pulls from old_db
        case_ids = get_case_ids_in_domain(domain, case_type)
        self.delete_bad_doc_types(case_ids, chunk_size)
        case_dict_chunks = chunked(iter_docs(old_db, case_ids, chunk_size),
                                   chunk_size)

        for case_dicts in case_dict_chunks:
            for case_dict in case_dicts:
                del case_dict['_rev']
                case_dict.pop('_attachments', None)
                case_dict['doc_type'] = "IndicatorCase"
            try:
                results = new_db.bulk_save(case_dicts)
            except BulkSaveError as error:
                results = error.results
            for result in results:
                if result.get('error') == 'conflict':
                    self.log('- OK: [{id}] is already in the indicator db'
                             .format(id=result.get('id')))
                elif 'error' in result:
                    self.log('- ERROR: [{id}] ({result})'.format(
                        id=result.get('id'),
                        result=json.dumps(result)
                    ))
                else:
                    self.log('- ADDED: [{id}] saved to indicator db'.format(
                        id=result.get('id')
                    ))
示例#9
0
def recalculate_stagnant_cases():
    domain = 'icds-cas'
    config_ids = [
        'static-icds-cas-static-ccs_record_cases_monthly_v2',
        'static-icds-cas-static-ccs_record_cases_monthly_tableau_v2',
        'static-icds-cas-static-child_cases_monthly_v2',
    ]

    stagnant_cases = set()

    for config_id in config_ids:
        config, is_static = get_datasource_config(config_id, domain)
        adapter = get_indicator_adapter(config)
        case_ids = _find_stagnant_cases(adapter)
        celery_task_logger.info(
            "Found {} stagnant cases in config {}".format(len(case_ids), config_id)
        )
        stagnant_cases = stagnant_cases.union(set(case_ids))
        celery_task_logger.info(
            "Total number of stagant cases is now {}".format(len(stagnant_cases))
        )

    case_accessor = CaseAccessors(domain)
    num_stagnant_cases = len(stagnant_cases)
    current_case_num = 0
    for case_ids in chunked(stagnant_cases, 1000):
        current_case_num += len(case_ids)
        cases = case_accessor.get_cases(list(case_ids))
        for case in cases:
            publish_case_saved(case, send_post_save_signal=False)
        celery_task_logger.info(
            "Resaved {} / {} cases".format(current_case_num, num_stagnant_cases)
        )
示例#10
0
    def _iter_update(doc_ids, try_num):
        with IterDB(db, chunksize=100) as iter_db:
            for chunk in chunked(set(doc_ids), 100):
                for res in send_keys_to_couch(db, keys=chunk):
                    raw_doc = res.get('doc')
                    doc_id = res.get('id', None)
                    if not raw_doc or not doc_id:
                        results.not_found_ids.add(res['key'])
                    else:
                        # copy the dictionary so we can tell if it changed
                        doc_update = fn(deepcopy(raw_doc))
                        if doc_update is None:
                            results.ignored_ids.add(doc_id)
                        elif (not isinstance(doc_update, DocUpdate)
                              or doc_update.doc.get('_id') != doc_id):
                            results.error_ids.add(doc_id)
                        elif doc_update.delete:
                            iter_db.delete(raw_doc)
                        elif not _is_unchanged(doc_update.doc, raw_doc):
                            iter_db.save(doc_update.doc)
                        else:
                            results.ignored_ids.add(doc_id)

        results.updated_ids.update(iter_db.saved_ids)
        results.deleted_ids.update(iter_db.deleted_ids)

        if iter_db.error_ids:
            if try_num >= max_retries:
                results.error_ids.update(iter_db.error_ids)
                msg = ("The following documents did not correctly save:\n" +
                       ", ".join(results.error_ids))
                raise IterUpdateError(results, msg)
            else:
                _iter_update(iter_db.error_ids, try_num + 1)
示例#11
0
def rebuild_indicators(indicator_config_id):
    config = _get_config_by_id(indicator_config_id)
    adapter = IndicatorSqlAdapter(config)
    couchdb = _get_db(config.referenced_doc_type)
    redis_client = get_redis_client().client.get_client()
    redis_key = _get_redis_key_for_config(config)

    if not is_static(indicator_config_id):
        # Save the start time now in case anything goes wrong. This way we'll be
        # able to see if the rebuild started a long time ago without finishing.
        config.meta.build.initiated = datetime.datetime.utcnow()
        config.meta.build.finished = False
        config.save()
        redis_key = _get_redis_key_for_config(config)

    adapter.rebuild_table()
    relevant_ids = get_doc_ids_in_domain_by_type(
        config.domain,
        config.referenced_doc_type,
        database=couchdb,
    )
    for docs in chunked(relevant_ids, 1000):
        redis_client.sadd(redis_key, *docs)

    _build_indicators(indicator_config_id, relevant_ids)
def iter_location_join_supply_point(all_location_ids, chunksize=100):

    # this function was copy-paste-modified from iter_docs

    database = Location.get_db()
    for location_ids in chunked(all_location_ids, chunksize):
        # sync supply point id
        locations = [row.get('doc')
                     for row in get_docs(database, keys=location_ids)
                     if row.get('doc')
                     and row.get('doc')['domain'] not in EXCLUDE_DOMAINS]

        supply_points = SupplyPointCase.view(
            'commtrack/supply_point_by_loc',
            keys=[[location['domain'], location['_id']]
                  for location in locations],
            include_docs=True,
            classes={'CommCareCase': SupplyPointCase},
        ).all()

        supply_points_index = {}

        for supply_point in supply_points:
            key = (supply_point.domain, supply_point.location_id)
            if key in supply_points_index:
                raise Exception(
                    "Multiple supply points have "
                    "domain={!r}, location_id={!r}".format(*key))
            supply_points_index[key] = supply_point

        for location in locations:
            yield (
                location,
                supply_points_index.get((location['domain'], location['_id']))
            )
示例#13
0
    def handle(self, log_file, **options):
        self.domain = 'hki-nepal-suaahara-2'
        loc_mapping = {}
        locs = SQLLocation.objects.filter(domain=self.domain, level=4)
        for loc in locs:
            loc_mapping[loc.site_code] = loc.location_id

        failed_updates = []
        household_cases = CaseES().domain(self.domain).case_type('household').count()
        member_cases = CaseES().domain(self.domain).case_type('household_member').count()
        total_cases = household_cases + member_cases
        with open(log_file, "w", encoding='utf-8') as fh:
            fh.write('--------Successful Form Ids----------')
            for cases in chunked(with_progress_bar(self._get_cases_to_process(), total_cases), 100):
                cases_to_update = self._process_cases(cases, failed_updates, loc_mapping)
                try:
                    xform, cases = bulk_update_cases(
                        self.domain, cases_to_update, self.__module__)
                    fh.write(xform.form_id)
                except LocalSubmissionError as e:
                    print(six.text_type(e))
                    failed_updates.extend(case[0] for case in cases_to_update)
            fh.write('--------Failed Cases--------------')
            for case_id in failed_updates:
                fh.write(case_id)
示例#14
0
 def handle(self, domain, log_file, **options):
     total_cases = CaseES().domain(domain).case_type('household').is_closed().count()
     self.case_accessor = CaseAccessors(domain)
     failed_updates = []
     with open(log_file, "w", encoding='utf-8') as fh:
         fh.write('--------Successful Form Ids----------\n')
         for cases in chunked(with_progress_bar(self._get_cases_to_process(domain), total_cases), 100):
             related_cases = self._get_related_cases(cases)
             case_tupes = [(case_id, {}, True) for case_id in related_cases]
             try:
                 xform, cases = bulk_update_cases(
                     domain, case_tupes, self.__module__)
                 fh.write(xform.form_id + '\n')
             except LocalSubmissionError as e:
                 print('submission error')
                 print(six.text_type(e))
                 failed_updates.extend(related_cases)
             except Exception as e:
                 print('unexpected error')
                 print(six.text_type(e))
                 failed_updates.extend(related_cases)
         fh.write('--------Failed Cases--------------\n')
         for case_id in failed_updates:
             fh.write(case_id)
         print('-------------COMPLETE--------------')
示例#15
0
    def _delete_couch_data(self):
        for doc_class, doc_ids in get_doc_ids_to_dump(self.domain_name):
            db = doc_class.get_db()
            for docs in chunked(iter_docs(db, doc_ids), 100):
                db.bulk_delete(docs)

            self.assertEqual(0, len(get_docs(db, doc_ids)))
示例#16
0
def stock_data_task(api_object):
    # checkpoint logic
    start_date = datetime.today()
    default_api = api_object.apis[0][0]
    checkpoint, _ = StockDataCheckpoint.objects.get_or_create(
        domain=api_object.domain,
        defaults={
            "api": default_api,
            "date": None,
            "limit": 1000,
            "offset": 0,
            "location": None,
            "start_date": start_date,
        },
    )

    if not checkpoint.api:
        checkpoint.api = default_api

    if not checkpoint.start_date:
        checkpoint.start_date = start_date
        checkpoint.save()

    if not api_object.all_stock_data:
        facilities = api_object.test_facilities
    else:
        facilities = api_object.get_ids()
    if checkpoint.location:
        external_id = api_object.get_last_processed_location(checkpoint)
        if external_id:
            facilities = list(itertools.dropwhile(lambda x: int(x) != int(external_id), facilities))
            process_facility_task(api_object, facilities[0], start_from=checkpoint.api)
            facilities = facilities[1:]

    if not checkpoint.date or checkpoint.location:
        # use subtasks only during initial migration
        facilities_chunked_list = chunked(facilities, 5)
        for chunk in facilities_chunked_list:
            api_object.process_data(process_facility_task, chunk)
    else:
        offset = checkpoint.offset
        for stock_api in itertools.dropwhile(lambda x: x.name != checkpoint.api, api_object.get_stock_apis_objects()):
            stock_api.add_date_filter(checkpoint.date, checkpoint.start_date)
            synchronization(
                stock_api,
                checkpoint,
                checkpoint.date,
                1000,
                offset,
                params={"domain": api_object.domain},
                domain=api_object.domain,
                atomic=True,
            )
            offset = 0

    checkpoint = StockDataCheckpoint.objects.get(domain=api_object.domain)
    save_stock_data_checkpoint(checkpoint, default_api, 1000, 0, checkpoint.start_date, None, False)
    checkpoint.start_date = None
    checkpoint.save()
示例#17
0
def filter_doc_ids_by_doc_type(db, doc_ids, doc_types):
    for doc_ids_chunk in chunked(doc_ids, 100):
        keys = [[doc_type, doc_id]
                for doc_id in doc_ids_chunk
                for doc_type in doc_types]
        results = db.view('all_docs/by_doc_type', keys=keys, reduce=False)
        for result in results:
            yield result['id']
示例#18
0
 def _iter_raw_cases(case_ids):
     if self.strip_history:
         for ids in chunked(case_ids, 100):
             for row in CommCareCase.get_db().view("case/get_lite", keys=ids, include_docs=False):
                 yield row["value"]
     else:
         for raw_case in iter_docs(CommCareCase.get_db(), case_ids):
             yield raw_case
示例#19
0
def delete_phone_numbers_for_owners(owner_ids):
    for ids in chunked(owner_ids, 50):
        results = VerifiedNumber.get_db().view(
            'sms/verified_number_by_owner_id',
            keys=ids,
            include_docs=True
        )
        soft_delete_docs([row['doc'] for row in results], VerifiedNumber)
def get_form_ids_missing_from_elasticsearch(all_form_ids):
    missing_from_elasticsearch = set()
    for form_ids in chunked(all_form_ids, 500):
        form_ids = set(form_ids)
        not_missing = set(FormES().doc_id(form_ids).get_ids())
        missing_from_elasticsearch.update(form_ids - not_missing)
        assert not_missing - form_ids == set()
    return list(missing_from_elasticsearch)
def get_case_ids_missing_from_elasticsearch(all_case_ids):
    missing_from_elasticsearch = set()
    for case_ids in chunked(all_case_ids, 500):
        case_ids = set(case_ids)
        not_missing = set(CaseES().doc_id(case_ids).get_ids())
        missing_from_elasticsearch.update(case_ids - not_missing)
        assert not_missing - case_ids == set()
    return list(missing_from_elasticsearch)
示例#22
0
def generate_user_ids_from_primary_location_ids(domain, location_ids):
    """
    Creates a generator for iterating through the user ids of the all the users in the
    given domain whose primary location is given in the list of location_ids.
    """
    for location_ids_chunk in chunked(location_ids, 50):
        for user_id in get_user_ids_from_primary_location_ids(domain, location_ids_chunk).keys():
            yield user_id
def iter_forms_with_cases(domain, since, chunksize=500):
    for form_list in chunked(forms_with_cases(domain, since), chunksize):
        case_id_mapping = case_ids_by_xform_id([f["_id"] for f in form_list])
        for form in form_list:
            form_id, f_case_ids, f_domain = form["_id"], form["fields"]["__retrieved_case_ids"], form["fields"]["domain"]
            received_on = form["fields"]["received_on"]
            for case_id in f_case_ids:
                yield form_id, received_on, case_id, case_id in case_id_mapping.get(form_id, []), f_domain
def prepare_metadata(doc_ids_by_domain):
    domain_id_rev_list = []
    for domain, all_doc_ids in doc_ids_by_domain.items():
        for doc_ids in chunked(all_doc_ids, 500):
            doc_id_rev_list = _bulk_get_revs(XFormInstance.get_db(), doc_ids)
            assert len(doc_id_rev_list) == len(doc_ids)
            domain_id_rev_list.extend([[domain, doc_id, doc_rev]
                                       for doc_id, doc_rev in doc_id_rev_list])
    return domain_id_rev_list
示例#25
0
    def _rebuild_queues(self, pool):
        prev_ids = self.queues.get_ids_from_run_timestamp()

        for chunked_ids in chunked(prev_ids, 100):
            chunk = list([_id for _id in chunked_ids if _id])
            for form in FormAccessorCouch.get_forms(chunk):
                self._try_to_process_form(form, pool)

        self._try_to_process_queues(pool)
示例#26
0
 def iter_cases(self, ids):
     database = CommCareCase.get_db()
     if not self.strip_history:
         for doc in iter_docs(database, ids):
             yield CommCareCase.wrap(doc)
     else:
         for doc_ids in chunked(ids, 100):
             for case in CommCareCase.bulk_get_lite(doc_ids):
                 yield case
示例#27
0
 def get_filters(self, domain_name):
     """
     :return: A generator of filters each filtering for at most 1000 users.
     """
     from corehq.apps.users.dbaccessors.all_commcare_users import get_all_user_ids_by_domain
     user_ids = get_all_user_ids_by_domain(domain_name, include_web_users=self.include_web_users)
     for chunk in chunked(user_ids, 1000):
         query_kwarg = '{}__in'.format(self.user_id_field)
         yield Q(**{query_kwarg: chunk})
示例#28
0
def iter_lite_cases_json(case_ids, chunksize=100):
    for case_id_chunk in chunked(case_ids, chunksize):
        rows = CommCareCase.get_db().view(
            'cases_get_lite/get_lite',
            keys=case_id_chunk,
            reduce=False,
        )
        for row in rows:
            yield row['value']
示例#29
0
def _delete_all_forms(domain_name):
    logger.info('Deleting forms...')
    form_accessor = FormAccessors(domain_name)
    form_ids = list(itertools.chain(*[
        form_accessor.get_all_form_ids_in_domain(doc_type=doc_type)
        for doc_type in doc_type_to_state
    ]))
    for form_id_chunk in chunked(with_progress_bar(form_ids, stream=silence_during_tests()), 500):
        form_accessor.soft_delete_forms(list(form_id_chunk))
    logger.info('Deleting forms complete.')
示例#30
0
 def bulk_delete(self, metas):
     success = True
     s3_bucket = self._s3_bucket()
     for chunk in chunked(metas, self.bulk_delete_chunksize):
         objects = [{"Key": meta.key} for meta in chunk]
         resp = s3_bucket.delete_objects(Delete={"Objects": objects})
         deleted = set(d["Key"] for d in resp.get("Deleted", []))
         success = success and all(o["Key"] in deleted for o in objects)
         self.metadb.bulk_delete(chunk)
     return success
示例#31
0
 def iter_documents(self, ids):
     from dimagi.utils.chunked import chunked
     for chunk in chunked(ids, 500):
         chunk = list([_f for _f in chunk if _f])
         for model in self._model_manager.filter(pk__in=chunk):
             yield self._doc_generator_fn(model)
示例#32
0
def iter_bulk_delete(database, ids, chunksize=100):
    for doc_ids in chunked(ids, chunksize):
        doc_dicts = get_docs(database, keys=doc_ids)
        database.bulk_delete(doc_dicts)
示例#33
0
def iter_docs(database, ids, chunksize=100):
    for doc_ids in chunked(ids, chunksize):
        for doc in get_docs(database, keys=doc_ids):
            doc_dict = doc.get('doc')
            if doc_dict:
                yield doc_dict
示例#34
0
 def iter_forms(self, form_ids):
     for chunk in chunked(form_ids, 100):
         chunk = list([_f for _f in chunk if _f])
         for form in self.get_forms(chunk):
             yield form
示例#35
0
def iter_bulk_delete(database, ids, chunksize=100):
    for doc_ids in chunked(ids, chunksize):
        doc_dicts = [doc.get('doc') for doc in get_docs(database, keys=doc_ids) if doc.get('doc')]
        database.bulk_delete(doc_dicts)
示例#36
0
def iter_es_docs(index_name, ids):
    """Returns a generator which pulls documents from elasticsearch in chunks"""
    for ids_chunk in chunked(ids, 100):
        for result in mget_query(index_name, ids_chunk, source=True):
            if result['found']:
                yield result['_source']
    def get_relevant_person_case_sets(self, person_ids):
        """
        Generator returning all relevant cases for the migration, grouped by person.

        This is a pretty nasty method, but it was the only way I could figure
        out how to group the queries together, rather than performing multiple
        queries per person case.
        """
        for person_chunk in chunked(person_ids, 100):
            person_chunk = list(filter(None, person_chunk))
            all_persons = {}  # case_id: PersonCaseSet
            for person in self.accessor.get_cases(person_chunk):
                # enrolled_in_private is blank/not set AND case_version is blank/not set
                # AND owner_id is within the location set being migrated
                if (person.get_case_property(ENROLLED_IN_PRIVATE) != 'true'
                        and not person.get_case_property(CASE_VERSION)):
                    all_persons[person.case_id] = PersonCaseSet(person)

            referrals_and_occurrences_to_person = {}
            type_to_bucket = {
                CASE_TYPE_OCCURRENCE: 'occurrences',
                CASE_TYPE_REFERRAL: 'referrals',
                CASE_TYPE_TRAIL: 'trails'
            }
            for case in self.accessor.get_reverse_indexed_cases(
                [person_id for person_id in all_persons]):
                bucket = type_to_bucket.get(case.type, None)
                if bucket:
                    for index in case.indices:
                        if index.referenced_id in all_persons:
                            getattr(all_persons[index.referenced_id],
                                    bucket).append(case)
                            if bucket != 'trails':
                                referrals_and_occurrences_to_person[
                                    case.case_id] = index.referenced_id
                            break

            type_to_bucket = {
                CASE_TYPE_EPISODE: 'episodes',
                CASE_TYPE_TEST: 'tests',
                CASE_TYPE_TRAIL: 'trails'
            }
            episodes_to_person = {}
            for case in self.accessor.get_reverse_indexed_cases(
                    referrals_and_occurrences_to_person.keys()):
                bucket = type_to_bucket.get(case.type, None)
                if bucket:
                    for index in case.indices:
                        person_id = referrals_and_occurrences_to_person.get(
                            index.referenced_id)
                        if person_id:
                            getattr(all_persons[person_id],
                                    bucket).append(case)
                            if case.type == CASE_TYPE_EPISODE:
                                episodes_to_person[case.case_id] = person_id
                            break

            for case in self.accessor.get_reverse_indexed_cases(
                    episodes_to_person.keys()):
                if case.type == CASE_TYPE_DRTB_HIV_REFERRAL:
                    for index in case.indices:
                        person_id = episodes_to_person.get(index.referenced_id)
                        if person_id:
                            all_persons[person_id].drtb_hiv.append(case)
                            break
示例#38
0
 def _iter_active_user_ids(self, groups):
     all_user_ids = {user_id for group in groups for user_id in group.users}
     for user_ids_chunk in chunked(all_user_ids, 1000):
         yield from (UserES().domain(self.domain).user_ids(
             user_ids_chunk).is_active(True).values_list('_id', flat=True))
示例#39
0
 def __iter__(self):
     for case_ids in chunked(self.all_case_ids, 500):
         for case in wrapped_docs(CommCareCase, case_ids):
             yield case
def _publish_docs_for_couch(doc_cls, get_meta, domain, records):
    doc_ids = [r.doc_id for r in records]
    for ids in chunked(doc_ids, 500):
        doc_id_rev_list = bulk_get_revs(doc_cls.get_db(), ids)
        for doc_id, doc_rev in doc_id_rev_list:
            publish_change(get_meta(domain, doc_id, doc_rev))
示例#41
0
 def iter_cases(self, case_ids):
     for chunk in chunked(case_ids, 100):
         chunk = list(filter(None, chunk))
         for case in self.get_cases(chunk):
             yield case
示例#42
0
 def iter_forms(self, form_ids):
     for chunk in chunked(form_ids, 100):
         chunk = list(filter(None, chunk))
         for form in self.get_forms(chunk):
             yield form
示例#43
0
 def stream_cases(all_case_ids):
     for case_ids in chunked(all_case_ids, 1000):
         for case in wrapped_docs(CommCareCase, keys=case_ids):
         # for case in CommCareCase.view('_all_docs', keys=case_ids, include_docs=True):
             yield case
示例#44
0
def save_locations(location_stubs,
                   types_by_code,
                   domain,
                   delay_updates,
                   excel_importer=None,
                   chunk_size=100):
    """
    :param location_stubs: (list) List of LocationStub objects with
        attributes like 'db_object', 'needs_save', 'do_delete' set
    :param types_by_code: (dict) Mapping of 'code' to LocationType SQL objects
    :param excel_importer: Used for providing progress feedback. Disabled on None

    This recursively saves tree top to bottom. Note that the bulk updates are not possible
    as the mptt.Model (inherited by SQLLocation) doesn't support bulk creation
    """
    def order_by_location_type():
        # returns locations in the order from top to bottom
        types_by_parent = defaultdict(list)
        for _type in types_by_code.values():
            key = _type.parent_type.code if _type.parent_type else ROOT_LOCATION_TYPE
            types_by_parent[key].append(_type)

        location_stubs_by_type = defaultdict(list)
        for l in location_stubs:
            location_stubs_by_type[l.location_type].append(l)

        top_to_bottom_locations = []

        def append_at_bottom(parent_type):
            top_to_bottom_locations.extend(
                location_stubs_by_type[parent_type.code])
            for child_type in types_by_parent[parent_type.code]:
                append_at_bottom(child_type)

        for top_type in types_by_parent[ROOT_LOCATION_TYPE]:
            append_at_bottom(top_type)

        return top_to_bottom_locations

    def _process_locations(locs_to_process, to_be_deleted):
        for loc in locs_to_process:
            if excel_importer:
                excel_importer.add_progress()
            if loc.do_delete:
                if not loc.is_new:
                    # keep track of to be deleted items to delete them in top-to-bottom order
                    to_be_deleted.append(loc.db_object)
            elif loc.needs_save:
                loc_object = loc.db_object
                loc_object.location_type = types_by_code.get(loc.location_type)
                if loc.parent_code and loc.parent_code is not ROOT_LOCATION_TYPE:
                    # refetch parent_location object so that mptt related fields are updated consistently,
                    #   since we are saving top to bottom, parent_location would not have any pending
                    #   saves, so this is the right point to refetch the object.
                    loc_object.parent = SQLLocation.objects.get(
                        domain=domain, site_code__iexact=loc.parent_code)
                else:
                    loc_object.parent = None
                loc.db_object.save()

    to_be_deleted = []

    top_to_bottom_locations = order_by_location_type()
    if delay_updates:
        for locs in chunked(top_to_bottom_locations, chunk_size):
            with transaction.atomic():
                with SQLLocation.objects.delay_mptt_updates():
                    _process_locations(locs, to_be_deleted)
    else:
        _process_locations(top_to_bottom_locations, to_be_deleted)

    for locs in chunked(reversed(to_be_deleted), chunk_size):
        # Deletion has to happen bottom to top, otherwise mptt complains
        #   about missing parents
        with transaction.atomic():
            with SQLLocation.objects.delay_mptt_updates():
                for l in locs:
                    l.delete()
示例#45
0
def iter_es_docs(index_name, ids):
    """Returns a generator which pulls documents from elasticsearch in chunks"""
    for ids_chunk in chunked(ids, 100):
        yield from mget_query(index_name, ids_chunk)
示例#46
0
def delete_all_docs_by_doc_type(db, doc_types):
    for chunk in chunked(get_all_docs_with_doc_types(db, doc_types), 100):
        db.bulk_delete(chunk)
示例#47
0
 def test_chunked(self):
     self.assertEquals(list(chunked(range(10), 4)), [
         (0, 1, 2, 3),
         (4, 5, 6, 7),
         (8, 9)
     ])
示例#48
0
def track_periodic_data():
    """
    Sync data that is neither event or page based with hubspot/Kissmetrics
    :return:
    """
    # Start by getting a list of web users mapped to their domains
    six_months_ago = date.today() - timedelta(days=180)
    users_to_domains = (UserES().web_users()
                        .last_logged_in(gte=six_months_ago).source(['domains', 'email', 'date_joined'])
                        .analytics_enabled()
                        .run().hits)
    # users_to_domains is a list of dicts
    domains_to_forms = FormES().terms_aggregation('domain', 'domain').size(0).run()\
        .aggregations.domain.counts_by_bucket()
    domains_to_mobile_users = UserES().mobile_users().terms_aggregation('domain', 'domain').size(0).run()\
                                      .aggregations.domain.counts_by_bucket()

    # Keep track of india and www data seperately
    env = get_instance_string()

    # Track no of users and domains with max_forms greater than HUBSPOT_THRESHOLD
    number_of_users = 0
    number_of_domains_with_forms_gt_threshold = 0

    for num_forms in domains_to_forms.values():
        if num_forms > HUBSPOT_THRESHOLD:
            number_of_domains_with_forms_gt_threshold += 1

    # For each web user, iterate through their domains and select the max number of form submissions and
    # max number of mobile workers
    for users in chunked(users_to_domains, 500):
        submit = []
        for user in users:
            email = user.get('email')
            if not _email_is_valid(email):
                continue

            number_of_users += 1
            date_created = user.get('date_joined')
            max_forms = 0
            max_workers = 0
            max_export = 0
            max_report = 0

            for domain in user['domains']:
                if domain in domains_to_forms and domains_to_forms[domain] > max_forms:
                    max_forms = domains_to_forms[domain]
                if domain in domains_to_mobile_users and domains_to_mobile_users[domain] > max_workers:
                    max_workers = domains_to_mobile_users[domain]
                if _get_export_count(domain) > max_export:
                    max_export = _get_export_count(domain)
                if _get_report_count(domain) > max_report:
                    max_report = _get_report_count(domain)

            project_spaces_created = ", ".join(get_domains_created_by_user(email))

            user_json = {
                'email': email,
                'properties': [
                    {
                        'property': '{}max_form_submissions_in_a_domain'.format(env),
                        'value': max_forms
                    },
                    {
                        'property': '{}max_mobile_workers_in_a_domain'.format(env),
                        'value': max_workers
                    },
                    {
                        'property': '{}project_spaces_created_by_user'.format(env),
                        'value': project_spaces_created,
                    },
                    {
                        'property': '{}over_300_form_submissions'.format(env),
                        'value': max_forms > HUBSPOT_THRESHOLD
                    },
                    {
                        'property': '{}date_created'.format(env),
                        'value': date_created
                    },
                    {
                        'property': '{}max_exports_in_a_domain'.format(env),
                        'value': max_export
                    },
                    {
                        'property': '{}max_custom_reports_in_a_domain'.format(env),
                        'value': max_report
                    }
                ]
            }
            submit.append(user_json)

        submit_json = json.dumps(submit)

        submit_data_to_hub_and_kiss(submit_json)
    update_datadog_metrics({
        DATADOG_WEB_USERS_GAUGE: number_of_users,
        DATADOG_DOMAINS_EXCEEDING_FORMS_GAUGE: number_of_domains_with_forms_gt_threshold
    })
示例#49
0
 def iter_cases(self, case_ids):
     for chunk in chunked(case_ids, 100):
         chunk = list([_f for _f in chunk if _f])
         for case in self.get_cases(chunk):
             yield case
示例#50
0
 def get_filters(self, domain_name):
     for chunk in chunked(self.get_ids(domain_name), self.chunksize):
         query_kwarg = '{}__in'.format(self.field)
         yield Q(**{query_kwarg: chunk})
示例#51
0
def iter_docs(database, ids, chunksize=100, **query_params):
    for doc_ids in chunked(ids, chunksize):
        for doc in get_docs(database, keys=doc_ids, **query_params):
            yield doc
示例#52
0
def save_locations(location_stubs,
                   types_by_code,
                   old_collection,
                   excel_importer=None,
                   chunk_size=100):
    """
    :param location_stubs: (list) List of LocationStub objects with
        attributes like 'db_object', 'needs_save', 'do_delete' set
    :param types_by_code: (dict) Mapping of 'code' to LocationType SQL objects
    :param excel_importer: Used for providing progress feedback. Disabled on None

    This recursively saves tree top to bottom.
    """
    def order_by_location_type():
        # returns locations in the order from top to bottom
        types_by_parent = defaultdict(list)
        for _type in types_by_code.values():
            key = _type.parent_type.code if _type.parent_type else ROOT_LOCATION_TYPE
            types_by_parent[key].append(_type)

        location_stubs_by_type = defaultdict(list)
        for l in location_stubs:
            location_stubs_by_type[l.location_type].append(l)

        top_to_bottom_locations = []

        def append_at_bottom(parent_type):
            top_to_bottom_locations.extend(
                location_stubs_by_type[parent_type.code])
            for child_type in types_by_parent[parent_type.code]:
                append_at_bottom(child_type)

        for top_type in types_by_parent[ROOT_LOCATION_TYPE]:
            append_at_bottom(top_type)

        return top_to_bottom_locations

    # Go through all locations and either flag for deletion or save
    location_stubs_by_code = {stub.site_code: stub for stub in location_stubs}
    to_delete = []
    for stubs in chunked(order_by_location_type(), chunk_size):
        with transaction.atomic():
            for loc in stubs:
                if loc.do_delete:
                    if loc.is_new:
                        if excel_importer:
                            excel_importer.add_progress()
                    else:
                        to_delete.append(loc)
                    continue
                if excel_importer:
                    excel_importer.add_progress()
                if loc.needs_save:
                    # attach location type and parent to location, then save
                    loc_object = loc.db_object
                    loc_object.location_type = types_by_code.get(
                        loc.location_type)
                    parent_code = loc.parent_code
                    if parent_code == ROOT_LOCATION_TYPE:
                        loc_object.parent = None
                    elif parent_code:
                        if parent_code in location_stubs_by_code:
                            loc_object.parent = location_stubs_by_code[
                                parent_code].db_object
                        else:
                            loc_object.parent = old_collection.locations_by_site_code[
                                parent_code]
                    loc_object.save()

    _delete_locations(to_delete, old_collection, excel_importer, chunk_size)
示例#53
0
def update_current_MALT():
    today = datetime.date.today()
    this_month_dict = {'month': today.month, 'year': today.year}
    domains = Domain.get_all_names()
    for chunk in chunked(domains, 1000):
        update_malt.delay(this_month_dict, chunk)
示例#54
0
 def hard_delete_cases(domain_name):
     print("Hard-deleting cases...")
     case_ids = iter_ids(CommCareCase, 'case_id', domain_name)
     for chunk in chunked(case_ids, 1000, list):
         CommCareCase.objects.hard_delete_cases(domain_name, chunk)
示例#55
0
    def do_filter(self, domain):
        def update_doc_diffs(doc_diffs):
            ids = [d[1] for d in doc_diffs]
            sql_docs = get_sql_docs(ids)
            couch_docs = get_couch_docs(ids)
            new_diffs = []
            for kind, doc_id, diffs in doc_diffs:
                couch_json = get_json(kind, doc_id, couch_docs)
                sql_json = get_json(kind, doc_id, sql_docs)
                json_diffs = [json_diff(d) for d in diffs]
                new_diffs = filter_diffs(couch_json, sql_json, json_diffs)
                if len(json_diffs) == len(new_diffs):
                    continue
                if self.dry_run:
                    type_ = "changes" if self.changes else "diffs"
                    print(
                        f"{kind} {doc_id}: {len(diffs)} -> {len(new_diffs)} {type_}"
                    )
                elif self.changes:
                    new_diffs = convert_diffs_to_changes(new_diffs, diffs)
                    statedb.add_changes(kind, doc_id, new_diffs)
                else:
                    statedb.add_diffs(kind, doc_id, new_diffs)

        def convert_diffs_to_changes(new_diffs, planning_diffs):
            reason = {d.reason for d in planning_diffs}
            assert len(reason) == 1, reason
            return diffs_to_changes(new_diffs, reason.pop())

        def get_json(kind, doc_id, docs):
            doc = docs.get(doc_id)
            return doc.to_json() if doc is not None else {"doc_type": kind}

        def json_diff(diff):
            jd = diff.json_diff
            return jd._replace(path=tuple(jd.path))

        statedb = self.open_state_db(domain, readonly=self.dry_run)
        select = self.get_select_kwargs()
        if select and select["kind"] in MissingIds.form_types:

            def get_sql_docs(ids):
                return {f.form_id: f for f in get_sql_forms(ids)}

            def get_couch_docs(ids):
                return {f.form_id: f for f in get_couch_forms(ids)}

            filter_diffs = filter_form_diffs
        elif select and select["kind"] in MissingIds.case_types:

            def get_sql_docs(ids):
                return {c.case_id: c for c in get_sql_cases(ids)}

            def get_couch_docs(ids):
                return {c.case_id: c for c in get_couch_cases(ids)}

            filter_diffs = filter_case_diffs
        else:
            raise NotImplementedError(f"--select={self.select}")
        prompt = self.dry_run and self.stop
        if self.changes:
            doc_diffs = statedb.iter_doc_changes(**select)
        else:
            doc_diffs = statedb.iter_doc_diffs(**select)
        doc_diffs = self.with_progress(doc_diffs, statedb, select)
        for batch in chunked(doc_diffs, self.batch_size, list):
            update_doc_diffs(batch)
            if prompt and not confirm("show more?"):
                break
示例#56
0
 def hard_delete_forms(domain_name):
     print("Hard-deleting forms...")
     form_ids = iter_ids(XFormInstance, 'form_id', domain_name)
     for chunk in chunked(form_ids, 1000, list):
         XFormInstance.objects.hard_delete_forms(domain_name, chunk)
示例#57
0
 def _rebuild_queues(self, form_ids):
     for chunk in chunked(form_ids, 100, list):
         for form in FormAccessorCouch.get_forms(chunk):
             self._try_to_process_form(form)
示例#58
0
class Command(CaseUpdateCommand):
    help = ("Updates all case indices of a specfied case type to use an extension relationship instead of parent.")

    def case_block(self, case):
        index = case.indices[0]
        return ElementTree.tostring(CaseBlock.deprecated_init(
            create=False,
            case_id=case.case_id,
            index={index.identifier: (index.referenced_type, index.referenced_id, "extension")},
        ).as_xml()).decode('utf-8')

    def update_cases(self, domain, case_type, user_id):
        accessor = CaseAccessors(domain)
        case_ids = accessor.get_case_ids_in_domain(case_type)
        print(f"Found {len(case_ids)} {case_type} cases in {domain}")

        case_blocks = []
        skip_count = 0
        for case in accessor.iter_cases(case_ids):
            if should_skip(case):
                skip_count += 1
            elif needs_update(case):
                case_blocks.append(self.case_block(case))
        print(f"{len(case_blocks)} to update in {domain}, {skip_count} cases have skipped due to multiple indices.")

        total = 0
        for chunk in chunked(case_blocks, BATCH_SIZE):
            submit_case_blocks(chunk, domain, device_id=DEVICE_ID, user_id=user_id)
            total += len(chunk)
            print("Updated {} cases on domain {}".format(total, domain))
示例#59
0
def remove_deleted_ownerships(deleted_fixture_ids, domain):
    from corehq.apps.fixtures.models import FixtureOwnership
    for fixture_ids in chunked(deleted_fixture_ids, 100):
        bad_ownerships = FixtureOwnership.for_all_item_ids(fixture_ids, domain)
        FixtureOwnership.get_db().bulk_delete(bad_ownerships)
示例#60
0
def build_async_indicators(indicator_doc_ids):
    # written to be used with _queue_indicators, indicator_doc_ids must
    #   be a chunk of 100
    for ids in chunked(indicator_doc_ids, 10):
        _build_async_indicators(ids)