def save_ledger_values(ledger_values, stock_result=None): if not ledger_values and not ( stock_result and stock_result.cases_with_deprecated_transactions): return try: if stock_result and stock_result.cases_with_deprecated_transactions: db_cases = split_list_by_db_partition( stock_result.cases_with_deprecated_transactions) for db_name, case_ids in db_cases: LedgerTransaction.objects.using(db_name).filter( case_id__in=case_ids, form_id=stock_result.xform.form_id).delete() for ledger_value in ledger_values: transactions_to_save = ledger_value.get_live_tracked_models( LedgerTransaction) with transaction.atomic(using=ledger_value.db, savepoint=False): ledger_value.save() for trans in transactions_to_save: trans.save() ledger_value.clear_tracked_models() except InternalError as e: raise LedgerSaveError(e)
def drop_sql_ids(self, couch_ids): """Filter the given couch ids, removing ids that are in SQL""" for dbname, form_ids in split_list_by_db_partition(couch_ids): with XFormInstanceSQL.get_cursor_for_partition_db( dbname, readonly=True) as cursor: cursor.execute(self.sql, [form_ids]) yield from (form_id for form_id, in cursor.fetchall())
def hard_delete_forms(self, domain, form_ids, delete_attachments=True): assert isinstance(form_ids, list) deleted_count = 0 for db_name, split_form_ids in split_list_by_db_partition(form_ids): # cascade should delete the operations _, deleted_models = self.using(db_name).filter( domain=domain, form_id__in=split_form_ids ).delete() deleted_count += deleted_models.get(self.model._meta.label, 0) if delete_attachments and deleted_count: if deleted_count != len(form_ids): # in the unlikely event that we didn't delete all forms (because they weren't all # in the specified domain), only delete attachments for forms that were deleted. deleted_forms = [ form_id for form_id in form_ids if not self.form_exists(form_id) ] else: deleted_forms = form_ids metas = get_blob_db().metadb.get_for_parents(deleted_forms) get_blob_db().bulk_delete(metas=metas) return deleted_count
def bulk_delete(self, metas): """Delete blob metadata in bulk :param metas: A list of `BlobMeta` objects. """ if any(meta.id is None for meta in metas): raise ValueError("cannot delete unsaved BlobMeta") delete_blobs_sql = """ WITH deleted AS ( DELETE FROM blobs_blobmeta WHERE id IN %s RETURNING * ), ins AS ( INSERT INTO blobs_deletedblobmeta ( "id", "domain", "parent_id", "name", "key", "type_code", "created_on", "deleted_on" ) ( SELECT "id", "domain", "parent_id", "name", "key", "type_code", "created_on", %s AS "deleted_on" FROM deleted WHERE expires_on IS NULL ) ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name, key = EXCLUDED.key, type_code = EXCLUDED.type_code, created_on = EXCLUDED.created_on, deleted_on = CLOCK_TIMESTAMP() WHERE blobs_deletedblobmeta.parent_id = EXCLUDED.parent_id and blobs_deletedblobmeta.key = EXCLUDED.key ) SELECT COUNT(*) FROM deleted; """ now = _utcnow() parents = defaultdict(list) for meta in metas: parents[meta.parent_id].append(meta.id) for dbname, split_parent_ids in split_list_by_db_partition(parents): ids = tuple(m for p in split_parent_ids for m in parents[p]) with BlobMeta.get_cursor_for_partition_db(dbname) as cursor: cursor.execute(delete_blobs_sql, [ids, now]) deleted_bytes = sum(m.stored_content_length for m in metas) metrics_counter('commcare.blobs.deleted.count', value=len(metas)) metrics_counter('commcare.blobs.deleted.bytes', value=deleted_bytes)
def _drop_sql_form_ids(couch_ids, domain): from corehq.sql_db.util import split_list_by_db_partition get_missing_forms = """ SELECT couch.form_id FROM (SELECT unnest(%s) AS form_id) AS couch LEFT JOIN form_processor_xforminstancesql sql USING (form_id) WHERE sql.form_id IS NULL """ for dbname, form_ids in split_list_by_db_partition(couch_ids): with XFormInstanceSQL.get_cursor_for_partition_db( dbname, readonly=True) as cursor: cursor.execute(get_missing_forms, [form_ids]) yield from (form_id for form_id, in cursor.fetchall())
def get_forms_to_reprocess(form_ids): forms_to_process = [] edited_forms = {} for dbname, forms_by_db in split_list_by_db_partition(form_ids): edited_forms.update({ form.form_id: form for form in XFormInstanceSQL.objects.using(dbname) .filter(form_id__in=forms_by_db) .exclude(deprecated_form_id__isnull=True) }) deprecated_form_ids = { form.deprecated_form_id for form in six.itervalues(edited_forms) } for dbname, forms_by_db in split_list_by_db_partition(deprecated_form_ids): deprecated_forms = XFormInstanceSQL.objects.using(dbname).filter(form_id__in=forms_by_db) for deprecated_form in deprecated_forms: live_form = edited_forms[deprecated_form.orig_id] if deprecated_form.xmlns != live_form.xmlns: forms_to_process.append((live_form, deprecated_form)) return forms_to_process
def _drop_sql_form_ids(couch_ids, domain): from django.db import connections from corehq.sql_db.util import split_list_by_db_partition get_missing_forms = """ SELECT couch.form_id FROM (SELECT unnest(%s) AS form_id) AS couch LEFT JOIN form_processor_xforminstancesql sql USING (form_id) WHERE sql.form_id IS NULL """ for dbname, form_ids in split_list_by_db_partition(couch_ids): with connections[dbname].cursor() as cursor: cursor.execute(get_missing_forms, [form_ids]) yield from (form_id for form_id, in cursor.fetchall())
def get_forms_to_reprocess(form_ids): forms_to_process = [] edited_forms = {} for dbname, forms_by_db in split_list_by_db_partition(form_ids): edited_forms.update({ form.form_id: form for form in XFormInstanceSQL.objects.using(dbname) .filter(form_id__in=forms_by_db) .exclude(deprecated_form_id__isnull=True) }) deprecated_form_ids = { form.deprecated_form_id for form in six.itervalues(edited_forms) } for dbname, forms_by_db in split_list_by_db_partition(deprecated_form_ids): deprecated_forms = XFormInstanceSQL.objects.using(dbname).filter(form_id__in=forms_by_db) for deprecated_form in deprecated_forms: live_form = edited_forms[deprecated_form.orig_id] if deprecated_form.xmlns != live_form.xmlns: forms_to_process.append((live_form, deprecated_form)) return forms_to_process
def bulk_delete(self, metas): """Delete blob metadata in bulk :param metas: A list of `BlobMeta` objects. """ if any(meta.id is None for meta in metas): raise ValueError("cannot delete unsaved BlobMeta") parents = defaultdict(list) for meta in metas: parents[meta.parent_id].append(meta.id) for db_name, split_parent_ids in split_list_by_db_partition(parents): ids = chain.from_iterable(parents[x] for x in split_parent_ids) BlobMeta.objects.using(db_name).filter(id__in=list(ids)).delete() deleted_bytes = sum(meta.content_length for m in metas) datadog_counter('commcare.blobs.deleted.count', value=len(metas)) datadog_counter('commcare.blobs.deleted.bytes', value=deleted_bytes)
def bulk_delete(self, metas): """Delete blob metadata in bulk :param metas: A list of `BlobMeta` objects. """ if any(meta.id is None for meta in metas): raise ValueError("cannot delete unsaved BlobMeta") delete_blobs_sql = """ WITH deleted AS ( DELETE FROM blobs_blobmeta WHERE id IN %s RETURNING * ), ins AS ( INSERT INTO blobs_deletedblobmeta ( "id", "domain", "parent_id", "name", "key", "type_code", "created_on", "deleted_on" ) SELECT "id", "domain", "parent_id", "name", "key", "type_code", "created_on", %s AS "deleted_on" FROM deleted WHERE expires_on IS NULL ) SELECT COUNT(*) FROM deleted; """ now = _utcnow() parents = defaultdict(list) for meta in metas: parents[meta.parent_id].append(meta.id) for dbname, split_parent_ids in split_list_by_db_partition(parents): ids = tuple(m for p in split_parent_ids for m in parents[p]) with connections[dbname].cursor() as cursor: cursor.execute(delete_blobs_sql, [ids, now]) deleted_bytes = sum(meta.content_length for m in metas) datadog_counter('commcare.blobs.deleted.count', value=len(metas)) datadog_counter('commcare.blobs.deleted.bytes', value=deleted_bytes)
def iter_docs_not_in_sql(form_ids, couch_db): def get_missing_form_ids(db, db_form_ids): with db.cursor() as cursor: cursor.execute(sql, [db_form_ids]) return [r[0] for r in cursor.fetchall()] sql = f""" SELECT maybe_missing.id FROM (SELECT UNNEST(%s) AS id) maybe_missing LEFT JOIN {XFormInstanceSQL._meta.db_table} migrated_form ON migrated_form.form_id = maybe_missing.id WHERE migrated_form.id IS NULL """ for db_name, db_form_ids in split_list_by_db_partition(form_ids): missing_ids = get_missing_form_ids(connections[db_name], db_form_ids) if missing_ids: log.debug("missing ids: %s", missing_ids) yield from iter_docs(couch_db, missing_ids)
def _ledgers_per_case(self): results = (LedgerES(es_instance_alias=ES_EXPORT_INSTANCE).domain( self.domain).aggregation( TermsAggregation('by_case', 'case_id', size=100)).size(0).run()) ledgers_per_case = results.aggregations.by_case case_ids = set() ledger_counts = [] for case_id, ledger_count in ledgers_per_case.counts_by_bucket().items( ): case_ids.add(case_id) ledger_counts.append(ledger_count) if not case_ids: self.stdout.write("Domain has no ledgers") return avg_ledgers_per_case = sum(ledger_counts) // len(case_ids) case_types_result = CaseES(es_instance_alias=ES_EXPORT_INSTANCE)\ .domain(self.domain).case_ids(case_ids)\ .aggregation(TermsAggregation('types', 'type'))\ .size(0).run() case_types = case_types_result.aggregations.types.keys self.stdout.write('\nCase Types with Ledgers') for type_ in case_types: self._print_value( 'case_type', type_, CaseES().domain(self.domain).case_type(type_).count()) if should_use_sql_backend(self.domain): db_name = get_db_aliases_for_partitioned_query()[ 0] # just query one shard DB results = (CommCareCaseSQL.objects.using(db_name).filter( domain=self.domain, closed=True, type=type_).annotate( lifespan=F('closed_on') - F('opened_on')).annotate( avg_lifespan=Avg('lifespan')).values( 'avg_lifespan', flat=True)) self._print_value('Average lifespan for "%s" cases' % type_, results[0]['avg_lifespan']) self._cases_created_per_user_per_month(type_) self._print_value('Average ledgers per case', avg_ledgers_per_case) if should_use_sql_backend(self.domain): stats = defaultdict(list) for db_name, case_ids_p in split_list_by_db_partition(case_ids): transactions_per_case_per_month = ( LedgerTransaction.objects.using(db_name).filter( case_id__in=case_ids).annotate( m=Month('server_date'), y=Year('server_date')).values( 'case_id', 'y', 'm').annotate(count=Count('id'))) for row in transactions_per_case_per_month: month = date(row['y'], row['m'], 1) stats[month].append(row['count']) else: transactions_per_case_per_month = (StockTransaction.objects.filter( case_id__in=case_ids).annotate( m=Month('report__date'), y=Year('report__date')).values( 'case_id', 'y', 'm').annotate(count=Count('id'))) stats = defaultdict(list) for row in transactions_per_case_per_month: month = date(row['y'], row['m'], 1) stats[month].append(row['count']) final_stats = [] for month, transaction_count_list in sorted(list(stats.items()), key=lambda r: r[0]): final_stats.append( (month.isoformat(), sum(transaction_count_list) // len(transaction_count_list))) self._print_table(['Month', 'Ledgers updated per case'], final_stats)
def _ledgers_per_case(self): db_name = get_db_aliases_for_partitioned_query()[ 0] # just query one shard DB results = (LedgerValue.objects.using(db_name).filter( domain=self.domain).values('case_id').annotate( ledger_count=Count('pk')))[:100] case_ids = set() ledger_count = 0 for result in results: case_ids.add(result['case_id']) ledger_count += result['ledger_count'] if not case_ids: self.stdout.write("Domain has no ledgers") return avg_ledgers_per_case = ledger_count / len(case_ids) case_types_result = CaseES(for_export=True)\ .domain(self.domain).case_ids(case_ids)\ .aggregation(TermsAggregation('types', 'type.exact'))\ .size(0).run() case_types = case_types_result.aggregations.types.keys self.stdout.write('\nCase Types with Ledgers') for type_ in case_types: self._print_value( 'case_type', type_, CaseES().domain(self.domain).case_type(type_).count()) db_name = get_db_aliases_for_partitioned_query()[ 0] # just query one shard DB results = (CommCareCase.objects.using(db_name).filter( domain=self.domain, closed=True, type=type_).annotate( lifespan=F('closed_on') - F('opened_on')).annotate( avg_lifespan=Avg('lifespan')).values('avg_lifespan', flat=True)) self._print_value('Average lifespan for "%s" cases' % type_, results[0]['avg_lifespan']) self._cases_created_per_user_per_month(type_) self._print_value('Average ledgers per case', avg_ledgers_per_case) stats = defaultdict(list) for db_name, case_ids_p in split_list_by_db_partition(case_ids): transactions_per_case_per_month = ( LedgerTransaction.objects.using(db_name).filter( case_id__in=case_ids).annotate( m=Month('server_date'), y=Year('server_date')).values( 'case_id', 'y', 'm').annotate(count=Count('id'))) for row in transactions_per_case_per_month: month = date(row['y'], row['m'], 1) stats[month].append(row['count']) final_stats = [] for month, transaction_count_list in sorted(list(stats.items()), key=lambda r: r[0]): final_stats.append( (month.isoformat(), sum(transaction_count_list) // len(transaction_count_list))) self.stdout.write('Ledger updates per case') self._print_table(['Month', 'Ledgers updated per case'], final_stats)
def get_ledger_transactions_for_form(form_id, limit_to_cases): for db_name, case_ids in split_list_by_db_partition(limit_to_cases): resultset = LedgerTransaction.objects.using(db_name).filter( case_id__in=case_ids, form_id=form_id) for trans in resultset: yield trans