def _init_table(cls, data_source_id): datasource_id = StaticDataSourceConfiguration.get_doc_id(cls.domain, data_source_id) datasource = StaticDataSourceConfiguration.by_id(datasource_id) adapter = get_indicator_adapter(datasource) adapter.build_table() cls.adapters.append(adapter) return adapter
def handle(self, file_path, *args, **options): domain = 'icds-cas' data_source_id = StaticDataSourceConfiguration.get_doc_id(domain, PERSON_TABLE_ID) config = StaticDataSourceConfiguration.by_id(data_source_id) adapter = get_indicator_adapter(config) session_helper = connection_manager.get_session_helper(adapter.engine_id) person_table_name = get_table_name(domain, PERSON_TABLE_ID) awc_location_table_name = get_table_name(domain, AWC_LOCATION_TABLE_ID) session = session_helper.Session with open( os.path.join(os.path.dirname(__file__), 'sql_scripts', 'nos_of_deaths.sql'), encoding='utf-8' ) as f: sql_script = f.read() rows = session.execute( sql_script % { 'person_table_name': person_table_name, 'awc_location_table_name': awc_location_table_name } ) with open(file_path, 'w', encoding='utf-8') as file_object: writer = csv.writer(file_object) writer.writerow([ 'State', 'District', 'AWC', 'Month', 'Deaths', ]) writer.writerows(rows)
def handle(self, domain, file_path, *args, **options): data_source_id = StaticDataSourceConfiguration.get_doc_id( domain, PERSON_TABLE_ID) config = StaticDataSourceConfiguration.by_id(data_source_id) adapter = get_indicator_adapter(config) session_helper = connection_manager.get_session_helper( adapter.engine_id) person_table_name = get_table_name(domain, PERSON_TABLE_ID) awc_location_table_name = get_table_name(domain, AWC_LOCATION_TABLE_ID) session = session_helper.Session with open(os.path.join(os.path.dirname(__file__), 'sql_scripts', 'select_non_aadhar.sql'), encoding='utf-8') as f: sql_script = f.read() rows = session.execute( sql_script % { 'person_table_name': person_table_name, 'awc_location_table_name': awc_location_table_name }) with open(file_path, 'wb') as file_object: writer = csv.writer(file_object) writer.writerow([ 'Name of Beneficiary', 'Date of Birth', 'AWC', 'Block', 'District', 'State' ]) writer.writerows(rows)
def test_doc_filter_mapping(self, doc_type, data_source_id): config_id = StaticDataSourceConfiguration.get_doc_id( DASHBOARD_DOMAIN, data_source_id) config = StaticDataSourceConfiguration.by_id(config_id) doc_filters = UCR_MAPPING[doc_type][data_source_id] self.assertEqual(doc_type, config.referenced_doc_type) self.assertEqual(set(doc_filters), set(config.get_case_type_or_xmlns_filter()))
def setUpClass(cls): super(BaseEnikshayDatasourceTest, cls).setUpClass() cls._call_center_domain_mock.start() cls.static_datasource = StaticDataSourceConfiguration.wrap( cls.get_json(cls.datasource_filename)) cls.domain = cls.static_datasource.domains[0] cls.datasource = StaticDataSourceConfiguration._get_datasource_config( cls.static_datasource, cls.domain, ) cls.factory = CaseFactory(domain=cls.domain)
def setUpClass(cls): super(BaseEnikshayDatasourceTest, cls).setUpClass() cls._call_center_domain_mock.start() cls.static_datasource = StaticDataSourceConfiguration.wrap( cls.get_json(cls.datasource_filename) ) cls.domain = cls.static_datasource.domains[0] cls.datasource = StaticDataSourceConfiguration._get_datasource_config( cls.static_datasource, cls.domain, ) cls.factory = CaseFactory(domain=cls.domain)
def setUpClass(cls): super(BaseICDSDatasourceTest, cls).setUpClass() cls._call_center_domain_mock.start() cls.static_datasource = StaticDataSourceConfiguration.wrap( cls.get_json(cls.datasource_filename) ) cls.domain = cls.static_datasource.domains[0] cls.datasource = StaticDataSourceConfiguration._get_datasource_config( cls.static_datasource, cls.domain, ) cls.casefactory = CaseFactory(domain=cls.domain) cls.adapter = get_indicator_adapter(cls.datasource) cls.adapter.rebuild_table()
def _get_ds(ds_domains, data_source_path): data_source_json = _get_json(data_source_path) data_source_json['domains'] = ds_domains data_source_json['server_environment'] = [settings.SERVER_ENVIRONMENT] ds_conf = StaticDataSourceConfiguration.wrap( deepcopy(data_source_json)) return ds_conf, data_source_path
def _setup_ucr_tables(): with mock.patch('corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider'): with override_settings(SERVER_ENVIRONMENT=TEST_ENVIRONMENT): configs = StaticDataSourceConfiguration.by_domain(TEST_DOMAIN) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() engine = connection_manager.get_engine('aaa-data') metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) for file_name in os.listdir(INPUT_PATH): with open(os.path.join(INPUT_PATH, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', columns=columns )
def tearDownModule(): if settings.USE_PARTITIONED_DATABASE: return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() with override_settings(SERVER_ENVIRONMENT='icds'): configs = StaticDataSourceConfiguration.by_domain('icds-cas') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: if adapter.config.table_id == 'static-child_health_cases': # hack because this is in a migration adapter.clear_table() continue adapter.drop_table() engine = connection_manager.get_engine(ICDS_UCR_ENGINE_ID) with engine.begin() as connection: metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) table = metadata.tables['ucr_table_name_mapping'] delete = table.delete() connection.execute(delete) LocationType.objects.filter(domain='icds-cas').delete() SQLLocation.objects.filter(domain='icds-cas').delete() Domain.get_by_name('icds-cas').delete() _call_center_domain_mock.stop()
def agg_from_household_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id( domain, 'reach-household_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{child_tablename}" AS child SET awc_id = household.awc_owner_id, village_id = household.village_owner_id, hh_address = household.hh_address, hh_religion = household.hh_religion, hh_caste = household.hh_caste, hh_bpl_apl = household.hh_bpl_apl FROM ( SELECT doc_id, awc_owner_id, village_owner_id, hh_address, hh_religion, hh_caste, hh_bpl_apl FROM "{household_cases_ucr_tablename}" ) household WHERE child.household_case_id = household.doc_id """.format( child_tablename=cls._meta.db_table, household_cases_ucr_tablename=ucr_tablename, ), { 'domain': domain, 'window_start': window_start, 'window_end': window_end }
def agg_from_village_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id( domain, 'reach-village_location') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{child_tablename}" AS child SET sc_id = village.sc_id, phc_id = village.phc_id, taluka_id = village.taluka_id, district_id = village.district_id, state_id = village.state_id FROM ( SELECT doc_id, sc_id, phc_id, taluka_id, district_id, state_id FROM "{village_location_ucr_tablename}" ) village WHERE child.village_id = village.doc_id """.format( child_tablename=cls._meta.db_table, village_location_ucr_tablename=ucr_tablename, ), { 'domain': domain, 'window_start': window_start, 'window_end': window_end }
def agg_from_eligible_couple_forms_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-eligible_couple_forms') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{woman_tablename}" AS woman SET fp_current_method_ranges = eligible_couple_fp.fp_current_method_ranges FROM ( SELECT person_case_id, array_agg(fp_current_method_range) AS fp_current_method_ranges FROM ( SELECT person_case_id, fp_current_method, daterange(timeend::date, next_timeend::date) AS fp_current_method_range FROM ( SELECT person_case_id, fp_current_method, timeend, LEAD(fp_current_method) OVER w AS next_fp_current_method, LEAD(timeend) OVER w AS next_timeend FROM "{eligible_couple_ucr_tablename}" WINDOW w AS (PARTITION BY person_case_id ORDER BY timeend DESC) ) AS _tmp_table ) eligible_couple WHERE fp_current_method != 'none' GROUP BY person_case_id ) AS eligible_couple_fp WHERE woman.person_case_id = eligible_couple_fp.person_case_id """.format( woman_tablename=cls._meta.db_table, eligible_couple_ucr_tablename=ucr_tablename, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def _get_old_new_tablenames(engine_id=None): by_engine_id = defaultdict(list) seen_tables = defaultdict(set) for ds in StaticDataSourceConfiguration.all(): ds_engine_id = ds['engine_id'] if engine_id and ds_engine_id != engine_id: continue old, new = _table_names(ds.domain, ds.table_id) if old in seen_tables[ds_engine_id]: logger.warning('Duplicate table: %s - %s', ds.get_id, old) continue seen_tables[ds_engine_id].add(old) by_engine_id[ds_engine_id].append(DSConf(old, new, ds)) data_source_ids = [ row['id'] for row in DataSourceConfiguration.view( 'userreports/active_data_sources', reduce=False, include_docs=False) ] for ds in iter_docs(DataSourceConfiguration.get_db(), data_source_ids): ds = DataSourceConfiguration.wrap(ds) ds_engine_id = ds['engine_id'] if engine_id and ds_engine_id != engine_id: continue old, new = _table_names(ds.domain, ds.table_id) if old in seen_tables[ds_engine_id]: logger.warning('Duplicate table: %s - %s', ds.get_id, old) continue seen_tables[ds_engine_id].add(old) by_engine_id[ds_engine_id].append(DSConf(old, new, ds)) return by_engine_id
def agg_from_eligible_couple_forms_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-eligible_couple_forms') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ INSERT INTO "{woman_history_tablename}" AS woman ( person_case_id, fp_current_method_history, fp_preferred_method_history, family_planning_form_history ) ( SELECT person_case_id, array_agg(fp_current_method) AS fp_current_method_history, array_agg(fp_preferred_method) AS fp_preferred_method_history, array_agg(timeend) AS family_planning_form_history FROM ( SELECT person_case_id, timeend, ARRAY[timeend::text, fp_current_method] AS fp_current_method, ARRAY[timeend::text, fp_preferred_method] AS fp_preferred_method FROM "{eligible_couple_ucr_tablename}" ) eligible_couple GROUP BY person_case_id ) ON CONFLICT (person_case_id) DO UPDATE SET fp_current_method_history = EXCLUDED.fp_current_method_history, fp_preferred_method_history = EXCLUDED.fp_preferred_method_history, family_planning_form_history = EXCLUDED.family_planning_form_history """.format( woman_history_tablename=cls._meta.db_table, eligible_couple_ucr_tablename=ucr_tablename, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def agg_from_ccs_record_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id( domain, 'reach-ccs_record_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{woman_tablename}" AS woman SET pregnant_ranges = ccs_record.pregnant_ranges FROM ( SELECT person_case_id, array_agg(pregnant_range) as pregnant_ranges FROM( SELECT person_case_id, daterange(opened_on::date, add, '[]') as pregnant_range FROM "{ccs_record_cases_ucr_tablename}" WHERE opened_on < add OR add IS NULL GROUP BY person_case_id, pregnant_range ) AS _tmp_table GROUP BY person_case_id ) ccs_record WHERE woman.person_case_id = ccs_record.person_case_id """.format( woman_tablename=cls._meta.db_table, ccs_record_cases_ucr_tablename=ucr_tablename, ), { 'domain': domain, 'window_start': window_start, 'window_end': window_end }
def agg_from_child_health_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-child_health_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ INSERT INTO "{child_tablename}" AS child ( domain, person_case_id, child_health_case_id, mother_case_id, opened_on, closed_on, birth_weight, breastfed_within_first, is_exclusive_breastfeeding, comp_feeding, diet_diversity, diet_quantity, hand_wash ) ( SELECT %(domain)s AS domain, person_case_id, doc_id, mother_case_id, opened_on, closed_on, birth_weight, breastfed_within_first, is_exclusive_breastfeeding, comp_feeding, diet_diversity, diet_quantity, hand_wash FROM "{child_health_cases_ucr_tablename}" child_health ) ON CONFLICT (child_health_case_id) DO UPDATE SET mother_case_id = EXCLUDED.mother_case_id, closed_on = EXCLUDED.closed_on, birth_weight = EXCLUDED.birth_weight, breastfed_within_first = EXCLUDED.breastfed_within_first, is_exclusive_breastfeeding = EXCLUDED.is_exclusive_breastfeeding, comp_feeding = EXCLUDED.comp_feeding, diet_diversity = EXCLUDED.diet_diversity, diet_quantity = EXCLUDED.diet_quantity, hand_wash = EXCLUDED.hand_wash """.format( child_tablename=cls._meta.db_table, child_health_cases_ucr_tablename=ucr_tablename, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def agg_from_person_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id( domain, 'reach-person_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{child_tablename}" AS child SET household_case_id = person.household_case_id, dob = person.dob, sex = person.sex, migration_status = person.migration_status, has_aadhar_number = person.has_aadhar_number, contact_phone_number = person.contact_phone_number FROM ( SELECT household_case_id, doc_id, dob, sex, migration_status, aadhar_number IS NOT NULL and aadhar_number != '' AS has_aadhar_number, contact_phone_number FROM "{person_cases_ucr_tablename}" ) person WHERE child.person_case_id = person.doc_id """.format( child_tablename=cls._meta.db_table, person_cases_ucr_tablename=ucr_tablename, ), { 'domain': domain, 'window_start': window_start, 'window_end': window_end }
def agg_from_tasks_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-tasks_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) product_codes = ', '.join("'{}'".format(code) for code in PRODUCT_CODES) column_names = ', '.join('due_list_date_{}'.format(code) for code in PRODUCT_CODES) return """ UPDATE "{child_tablename}" AS child SET tasks_case_id = tasks.doc_id, last_immunization_type = tasks.last_immunization_type, last_immunization_date = tasks.last_immunization_date FROM ( SELECT doc_id AS doc_id, parent_case_id AS parent_case_id, LAST_VALUE(product_code) OVER w AS last_immunization_type, LAST_VALUE(product_date) OVER w AS last_immunization_date FROM ( SELECT doc_id, parent_case_id, unnest(array[{product_codes}]) AS product_code, unnest(array[{column_names}]) AS product_date FROM "{tasks_cases_ucr_tablename}" WHERE tasks_type = 'child' ) AS _tasks WHERE product_date != '1970-01-01' WINDOW w AS (PARTITION BY doc_id, parent_case_id ORDER BY product_date DESC) ) tasks WHERE child.child_health_case_id = tasks.parent_case_id """.format( child_tablename=cls._meta.db_table, tasks_cases_ucr_tablename=ucr_tablename, product_codes=product_codes, column_names=column_names, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def agg_from_delivery_forms_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id( domain, 'reach-delivery_forms') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{child_tablename}" AS child SET ccs_record_case_id = delivery_forms.ccs_record_case_id, child_cried = delivery_forms.child_cried FROM ( SELECT child_health_case_id, LAST_VALUE(ccs_record_case_id) OVER w AS ccs_record_case_id, LAST_VALUE(child_cried) OVER w as child_cried FROM "{delivery_form_ucr_tablename}" WINDOW w AS (PARTITION BY child_health_case_id ORDER BY timeend DESC) ) AS delivery_forms WHERE child.child_health_case_id = delivery_forms.child_health_case_id """.format( child_tablename=cls._meta.db_table, delivery_form_ucr_tablename=ucr_tablename, ), { 'domain': domain, 'window_start': window_start, 'window_end': window_end }
def rebuild_indicators(indicator_config_id): is_static = indicator_config_id.startswith(StaticDataSourceConfiguration._datasource_id_prefix) if is_static: config = StaticDataSourceConfiguration.by_id(indicator_config_id) else: config = DataSourceConfiguration.get(indicator_config_id) # Save the start time now in case anything goes wrong. This way we'll be # able to see if the rebuild started a long time ago without finishing. config.meta.build.initiated = datetime.datetime.utcnow() config.save() adapter = IndicatorSqlAdapter(config) adapter.rebuild_table() couchdb = _get_db(config.referenced_doc_type) relevant_ids = get_doc_ids(config.domain, config.referenced_doc_type, database=couchdb) for doc in iter_docs(couchdb, relevant_ids, chunksize=500): try: # save is a noop if the filter doesn't match adapter.save(doc) except DataError as e: logging.exception("problem saving document {} to table. {}".format(doc["_id"], e)) if not is_static: config.meta.build.finished = True config.save()
def setUpModule(): _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('champ-cameroon') try: configs = StaticDataSourceConfiguration.by_domain(domain.name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.build_table() engine = connection_manager.get_engine(UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = get_table_name(domain.name, file_name[:-4]) table = metadata.tables[table_name] postgres_copy.copy_from(f, table, engine, format='csv', null='', header=True) except Exception: tearDownModule() raise _call_center_domain_mock.stop()
def _setup_ucr_tables(): with mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ): with override_settings(SERVER_ENVIRONMENT=TEST_ENVIRONMENT): configs = StaticDataSourceConfiguration.by_domain(TEST_DOMAIN) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() engine = connection_manager.get_engine('aaa-data') metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) for file_name in os.listdir(INPUT_PATH): with open(os.path.join(INPUT_PATH, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from(f, table, engine, format='csv', null='', columns=columns)
def setup_tables_and_fixtures(domain_name): configs = StaticDataSourceConfiguration.by_domain(domain_name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() cleanup_misc_agg_tables() engine = connection_manager.get_engine(ICDS_UCR_CITUS_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] if not table_name.startswith('icds_dashboard_'): columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from(f, table, engine, format='csv', null='', columns=columns) _distribute_tables_for_citus(engine)
def handle(self, domains, **options): if not domains and not options['all']: raise CommandError('Specify specific domains or --all') all_domains = [domain.name for domain in get_call_center_domains() if domain.use_fixtures] if domains: for domain in domains: assert domain in all_domains, "Domain '{}' is not a Call Center domain".format(domain) else: domains = all_domains threshold = options['threshold'] domain_stats = get_call_center_data_source_stats(domains) for domain in domains: stats = domain_stats[domain] print('Checking domain:', domain) if stats.error: print('Error getting stats:\n', stats.error) continue for stat in stats.iter_data_source_stats(): diff = math.fabs(stat.ucr_percent - stat.es_percent) if diff > threshold: print("rebuilding data source '{}' in domain '{}': diff = {}".format( stat.name, domain, diff )) try: rebuild_indicators( StaticDataSourceConfiguration.get_doc_id(domain, TABLE_IDS[stat.name]) ) except Exception as e: sys.stderr.write("Error rebuilding data source '{}' in domain '{}':\n{}".format( stat.name, domain, e ))
def get_datasources_for_domain(domain, referenced_doc_type=None, include_static=False, include_aggregate=False): from corehq.apps.userreports.models import DataSourceConfiguration, StaticDataSourceConfiguration key = [domain] if referenced_doc_type: key.append(referenced_doc_type) datasources = sorted( DataSourceConfiguration.view( 'userreports/data_sources_by_build_info', startkey=key, endkey=key + [{}], reduce=False, include_docs=True ), key=lambda config: config.display_name or '') if include_static: static_ds = StaticDataSourceConfiguration.by_domain(domain) if referenced_doc_type: static_ds = [ds for ds in static_ds if ds.referenced_doc_type == referenced_doc_type] datasources.extend(sorted(static_ds, key=lambda config: config.display_name)) if include_aggregate: from corehq.apps.aggregate_ucrs.models import AggregateTableDefinition datasources.extend(AggregateTableDefinition.objects.filter(domain=domain).all()) return datasources
def handle(self, **options): data_sources = list(DataSourceConfiguration.all()) data_sources.extend(list(StaticDataSourceConfiguration.all())) engine_ids = self._get_engine_ids(data_sources, options.get('engine_id')) tables_to_remove_by_engine = defaultdict(list) for engine_id in engine_ids: engine = connection_manager.get_engine(engine_id) with engine.begin() as connection: migration_context = get_migration_context( connection, include_object=_include_object) raw_diffs = compare_metadata(migration_context, metadata) diffs = reformat_alembic_diffs(raw_diffs) tables_to_remove_by_engine[engine_id] = [ diff.table_name for diff in diffs if diff.type == 'remove_table' ] for engine_id, tablenames in tables_to_remove_by_engine.items(): engine = connection_manager.get_engine(engine_id) for tablename in tablenames: with engine.begin() as connection: try: result = connection.execute( 'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"' .format(tablename=tablename)) except Exception: print(tablename, "no inserted_at column, probably not UCR") else: print(tablename, result.fetchone())
def _ucr(self): static_datasources = StaticDataSourceConfiguration.by_domain( self.domain) dynamic_datasources = DataSourceConfiguration.by_domain(self.domain) self._print_value('Static UCR data sources', len(static_datasources)) self._print_value('Dynamic UCR data sources', len(dynamic_datasources)) def _get_count(config): table_name = get_table_name(config.domain, config.table_id) db_name = connection_manager.get_django_db_alias(config.engine_id) return _get_count_from_explain_raw( db_name, 'SELECT * FROM "%s"' % table_name, []) def _get_table_size(config): table_name = get_table_name(config.domain, config.table_id) db_name = connection_manager.get_django_db_alias(config.engine_id) db_cursor = connections[db_name].cursor() with db_cursor as cursor: cursor.execute( "SELECT pg_total_relation_size('\"%s\"')" % table_name, []) bytes = cursor.fetchone()[0] return bytes rows = sorted( [(datasource.display_name, _get_count(datasource), datasource.referenced_doc_type, _get_table_size(datasource)) for datasource in static_datasources + dynamic_datasources], key=lambda r: r[-1]) self._print_table([ 'Datasource name', 'Row count (approximate)', 'Doc type', 'Size', 'Size (bytes)' ], rows)
def setUpModule(): if isinstance(Domain.get_db(), Mock): # needed to skip setUp for javascript tests thread on Travis return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('champ-cameroon') with override_settings(SERVER_ENVIRONMENT='production'): configs = StaticDataSourceConfiguration.by_domain(domain.name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.build_table() engine = connection_manager.get_engine(UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = get_table_name(domain.name, file_name[:-4]) table = metadata.tables[table_name] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', header=True ) _call_center_domain_mock.stop()
def test_is_static_positive_yaml(self): with override_settings(STATIC_DATA_SOURCES=[ self.get_path('sample_static_data_source', 'yaml') ]): example = list( StaticDataSourceConfiguration.all(use_server_filter=False))[0] self.assertTrue(example.is_static)
def tearDownModule(): if settings.USE_PARTITIONED_DATABASE: return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() with override_settings(SERVER_ENVIRONMENT='icds'): configs = StaticDataSourceConfiguration.by_domain('icds-cas') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: if adapter.config.table_id == 'static-child_health_cases': # hack because this is in a migration adapter.clear_table() continue adapter.drop_table() cleanup_misc_agg_tables() LocationType.objects.filter(domain='icds-cas').delete() SQLLocation.objects.filter(domain='icds-cas').delete() Domain.get_by_name('icds-cas').delete() _call_center_domain_mock.stop() _stop_transaction_exemption()
def handle(self, **options): data_sources = list(DataSourceConfiguration.all()) data_sources.extend(list(StaticDataSourceConfiguration.all())) engine_ids = self._get_engine_ids(data_sources, options.get('engine_id')) tables_to_remove_by_engine = defaultdict(list) for engine_id in engine_ids: engine = connection_manager.get_engine(engine_id) with engine.begin() as connection: migration_context = get_migration_context(connection, include_object=_include_object) raw_diffs = compare_metadata(migration_context, metadata) diffs = reformat_alembic_diffs(raw_diffs) tables_to_remove_by_engine[engine_id] = [ diff.table_name for diff in diffs if diff.type == 'remove_table' ] for engine_id, tablenames in tables_to_remove_by_engine.items(): engine = connection_manager.get_engine(engine_id) for tablename in tablenames: with engine.begin() as connection: try: result = connection.execute( 'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"'.format(tablename=tablename) ) except Exception: print(tablename, "no inserted_at column, probably not UCR") else: print(tablename, result.fetchone())
def test_deactivate_noop(self): with override_settings(STATIC_DATA_SOURCES=[ self.get_path('sample_static_data_source', 'json') ]): example = list(StaticDataSourceConfiguration.all())[0] # since this is a SimpleTest, this should fail if the call actually hits the DB example.deactivate()
def agg_from_household_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-household_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{child_tablename}" AS child SET awc_id = household.awc_owner_id, village_id = household.village_owner_id, hh_address = household.hh_address, hh_religion = household.hh_religion, hh_caste = household.hh_caste, hh_bpl_apl = household.hh_bpl_apl FROM ( SELECT doc_id, awc_owner_id, village_owner_id, hh_address, hh_religion, hh_caste, hh_bpl_apl FROM "{household_cases_ucr_tablename}" ) household WHERE child.household_case_id = household.doc_id """.format( child_tablename=cls._meta.db_table, household_cases_ucr_tablename=ucr_tablename, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def agg_from_person_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-person_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{child_tablename}" AS child SET household_case_id = person.household_case_id, dob = person.dob, sex = person.sex, name = person.name, migration_status = person.migration_status, has_aadhar_number = person.has_aadhar_number, contact_phone_number = person.contact_phone_number FROM ( SELECT household_case_id, doc_id, dob, sex, name, migration_status, aadhar_number IS NOT NULL and aadhar_number != '' AS has_aadhar_number, contact_phone_number FROM "{person_cases_ucr_tablename}" ) person WHERE child.person_case_id = person.doc_id """.format( child_tablename=cls._meta.db_table, person_cases_ucr_tablename=ucr_tablename, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def agg_from_eligible_couple_forms_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-eligible_couple_forms') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{woman_tablename}" AS woman SET fp_current_method_ranges = eligible_couple_fp.fp_current_method_ranges FROM ( SELECT person_case_id, array_agg(fp_current_method_range) AS fp_current_method_ranges FROM ( SELECT person_case_id, fp_current_method, daterange(timeend::date, next_timeend::date) AS fp_current_method_range FROM ( SELECT person_case_id, fp_current_method, timeend, LEAD(fp_current_method) OVER w AS next_fp_current_method, LEAD(timeend) OVER w AS next_timeend FROM "{eligible_couple_ucr_tablename}" WINDOW w AS (PARTITION BY person_case_id ORDER BY timeend ASC) ) AS _tmp_table ) eligible_couple WHERE fp_current_method != 'none' GROUP BY person_case_id ) AS eligible_couple_fp WHERE woman.person_case_id = eligible_couple_fp.person_case_id """.format( woman_tablename=cls._meta.db_table, eligible_couple_ucr_tablename=ucr_tablename, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def get_datasources_for_domain(domain, referenced_doc_type=None, include_static=False, include_aggregate=False): from corehq.apps.userreports.models import DataSourceConfiguration, StaticDataSourceConfiguration key = [domain] if referenced_doc_type: key.append(referenced_doc_type) datasources = sorted(DataSourceConfiguration.view( 'userreports/data_sources_by_build_info', startkey=key, endkey=key + [{}], reduce=False, include_docs=True), key=lambda config: config.display_name or '') if include_static: static_ds = StaticDataSourceConfiguration.by_domain(domain) if referenced_doc_type: static_ds = [ ds for ds in static_ds if ds.referenced_doc_type == referenced_doc_type ] datasources.extend( sorted(static_ds, key=lambda config: config.display_name)) if include_aggregate: from corehq.apps.aggregate_ucrs.models import AggregateTableDefinition datasources.extend( AggregateTableDefinition.objects.filter(domain=domain).all()) return datasources
def handle(self, **options): data_sources = list(DataSourceConfiguration.all()) data_sources.extend(list(StaticDataSourceConfiguration.all())) tables_by_engine_id = self._get_tables_by_engine_id( data_sources, options.get('engine_id')) tables_to_remove_by_engine = defaultdict(list) for engine_id, expected_tables in tables_by_engine_id.items(): engine = connection_manager.get_engine(engine_id) with engine.begin() as connection: # Using string formatting rather than execute with %s syntax # is acceptable here because the strings we're inserting are static # and only templated for DRYness results = connection.execute(f""" SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE' AND ( table_name LIKE '{UCR_TABLE_PREFIX}%%' OR table_name LIKE '{LEGACY_UCR_TABLE_PREFIX}%%' ); """).fetchall() tables_in_db = {r[0] for r in results} tables_to_remove_by_engine[ engine_id] = tables_in_db - expected_tables for engine_id, tablenames in tables_to_remove_by_engine.items(): print("\nTables no longer referenced in database: {}:\n".format( engine_id)) engine = connection_manager.get_engine(engine_id) if not tablenames: print("\t No tables to prune") continue for tablename in tablenames: with engine.begin() as connection: try: result = connection.execute( f'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"' ) except Exception: print( f"\t{tablename}: no inserted_at column, probably not UCR" ) else: row_count, idle_since = result.fetchone() if row_count == 0: print(f"\t{tablename}: {row_count} rows") if options['drop_empty_tables']: connection.execute(f'DROP TABLE "{tablename}"') print(f'\t^-- deleted {tablename}') else: print( f"\t{tablename}: {row_count} rows, idle since {idle_since}" )
def _shared_context(domain): static_reports = list(StaticReportConfiguration.by_domain(domain)) static_data_sources = list(StaticDataSourceConfiguration.by_domain(domain)) return { 'domain': domain, 'reports': ReportConfiguration.by_domain(domain) + static_reports, 'data_sources': DataSourceConfiguration.by_domain(domain) + static_data_sources, }
def _make_data_source_for_domain(data_source_json, domain_name): from corehq.apps.userreports.models import StaticDataSourceConfiguration from corehq.apps.userreports.models import DataSourceConfiguration doc = deepcopy(data_source_json) doc['domain'] = domain_name doc['_id'] = StaticDataSourceConfiguration.get_doc_id(domain_name, doc['table_id']) return DataSourceConfiguration.wrap(doc)
def handle(self, domain, **options): tables = StaticDataSourceConfiguration.by_domain(domain) tables.extend(DataSourceConfiguration.by_domain(domain)) print("Rebuilding {} tables".format(len(tables))) for table in tables: tasks.rebuild_indicators(table._id)
def test_for_table_id_conflicts(self): counts = Counter((ds.table_id, ds.domain) for ds in StaticDataSourceConfiguration.all()) duplicates = [k for k, v in counts.items() if v > 1] msg = "The following data source configs have duplicate table_ids on the same domains:\n{}".format( "\n".join("table_id: {}, domain: {}".format(table_id, domain) for table_id, domain in duplicates)) self.assertEqual(0, len(duplicates), msg)
def call_center_data_source_configuration_provider(): data_source_paths = [FORM_DATA_SOURCE_PATH, CASE_DATA_SOURCE_PATH, CASE_ACTION_DATA_SOURCE_PATH] domains = [domain.name for domain in get_call_center_domains() if domain.use_fixtures] for data_source_path in data_source_paths: data_source_json = _get_json(data_source_path) ds_conf = StaticDataSourceConfiguration.wrap(deepcopy(data_source_json)) ds_conf.domains = domains yield ds_conf, data_source_path
def main_context(self): static_reports = list(StaticReportConfiguration.by_domain(self.domain)) static_data_sources = list(StaticDataSourceConfiguration.by_domain(self.domain)) context = super(BaseUserConfigReportsView, self).main_context context.update({ 'reports': ReportConfiguration.by_domain(self.domain) + static_reports, 'data_sources': DataSourceConfiguration.by_domain(self.domain) + static_data_sources, }) return context
def test_get_all(self): with override_settings(STATIC_DATA_SOURCES=[self.get_path('sample_static_data_source', 'json')]): all = list(StaticDataSourceConfiguration.all()) self.assertEqual(2, len(all)) example, dimagi = all self.assertEqual('example', example.domain) self.assertEqual('dimagi', dimagi.domain) for config in all: self.assertEqual('all_candidates', config.table_id)
def copy_ucr_datasources(self): datasource_map = {} datasources = get_datasources_for_domain(self.existing_domain) for datasource in datasources: datasource.meta.build.finished = False datasource.meta.build.initiated = None old_id, new_id = self.save_couch_copy(datasource, self.new_domain) datasource_map[old_id] = new_id for static_datasource in StaticDataSourceConfiguration.by_domain(self.existing_domain): table_id = static_datasource.get_id.replace( StaticDataSourceConfiguration._datasource_id_prefix + self.existing_domain + "-", "" ) new_id = StaticDataSourceConfiguration.get_doc_id(self.new_domain, table_id) # check that new datasource is in new domain's list of static datasources StaticDataSourceConfiguration.by_id(new_id) datasource_map[static_datasource.get_id] = new_id return datasource_map
def __init__(self, domain, *args, **kwargs): self.domain = domain standard_sources = DataSourceConfiguration.by_domain(self.domain) custom_sources = list(StaticDataSourceConfiguration.by_domain(domain)) available_data_sources = standard_sources + custom_sources super(ReportDataSourceField, self).__init__( choices=[(src._id, src.display_name) for src in available_data_sources], *args, **kwargs )
def _data_sources_by_engine_id(): by_engine_id = defaultdict(list) for ds in StaticDataSourceConfiguration.all(): ds_engine_id = ds['engine_id'] by_engine_id[ds_engine_id].append(ds) for ds in DataSourceConfiguration.all(): ds_engine_id = ds['engine_id'] by_engine_id[ds_engine_id].append(ds) return by_engine_id
def __init__(self, domain, *args, **kwargs): self.domain = domain standard_sources = DataSourceConfiguration.by_domain(self.domain) custom_sources = list(StaticDataSourceConfiguration.by_domain(domain)) available_data_sources = standard_sources + custom_sources if toggles.AGGREGATE_UCRS.enabled(domain): from corehq.apps.aggregate_ucrs.models import AggregateTableDefinition available_data_sources += AggregateTableDefinition.objects.filter(domain=self.domain) super(ReportDataSourceField, self).__init__( choices=[(src.data_source_id, src.display_name) for src in available_data_sources], *args, **kwargs )
def agg_from_person_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-person_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ INSERT INTO "{woman_tablename}" AS child ( domain, household_case_id, person_case_id, opened_on, closed_on, name, dob, marital_status, sex, migration_status, age_marriage, has_aadhar_number, husband_name, contact_phone_number, num_male_children_died, num_female_children_died, blood_group ) ( SELECT %(domain)s, household_case_id, doc_id, opened_on, closed_on, name, dob, marital_status, sex, migration_status, age_marriage, aadhar_number IS NOT NULL and aadhar_number != '' AS has_aadhar_number, husband_name, contact_phone_number, num_male_children_died, num_female_children_died, blood_group FROM "{person_cases_ucr_tablename}" person WHERE sex = 'F' AND date_part('year', age(dob)) BETWEEN 15 AND 49 ) ON CONFLICT (person_case_id) DO UPDATE SET closed_on = EXCLUDED.closed_on, name = EXCLUDED.name, dob = EXCLUDED.dob, marital_status = EXCLUDED.marital_status, sex = EXCLUDED.sex, migration_status = EXCLUDED.migration_status, age_marriage = EXCLUDED.age_marriage, has_aadhar_number = EXCLUDED.has_aadhar_number, husband_name = EXCLUDED.husband_name, contact_phone_number = EXCLUDED.contact_phone_number, num_male_children_died = EXCLUDED.num_male_children_died, num_female_children_died = EXCLUDED.num_female_children_died, blood_group = EXCLUDED.blood_group """.format( woman_tablename=cls._meta.db_table, person_cases_ucr_tablename=ucr_tablename, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def agg_from_person_case_ucr(cls, domain, window_start, window_end): doc_id = StaticDataSourceConfiguration.get_doc_id(domain, 'reach-person_cases') config, _ = get_datasource_config(doc_id, domain) ucr_tablename = get_table_name(domain, config.table_id) return """ UPDATE "{child_tablename}" AS child SET household_case_id = person.household_case_id FROM ( SELECT doc_id, household_case_id FROM "{person_cases_ucr_tablename}" ) person WHERE child.person_case_id = person.doc_id """.format( child_tablename=cls._meta.db_table, person_cases_ucr_tablename=ucr_tablename, ), {'domain': domain, 'window_start': window_start, 'window_end': window_end}
def handle(self, **options): data_sources = list(DataSourceConfiguration.all()) data_sources.extend(list(StaticDataSourceConfiguration.all())) tables_by_engine_id = self._get_tables_by_engine_id(data_sources, options.get('engine_id')) tables_to_remove_by_engine = defaultdict(list) for engine_id, expected_tables in tables_by_engine_id.items(): engine = connection_manager.get_engine(engine_id) with engine.begin() as connection: results = connection.execute(""" SELECT table_name FROM information_schema.tables WHERE table_schema='public' AND table_type='BASE TABLE' AND ( table_name LIKE '{}%%' OR table_name LIKE '{}%%' ); """.format(UCR_TABLE_PREFIX, LEGACY_UCR_TABLE_PREFIX)).fetchall() tables_in_db = {r[0] for r in results} tables_to_remove_by_engine[engine_id] = tables_in_db - expected_tables for engine_id, tablenames in tables_to_remove_by_engine.items(): print("\nTables no longer referenced in database: {}:\n".format(engine_id)) engine = connection_manager.get_engine(engine_id) if not tablenames: print("\t No tables to prune") continue for tablename in tablenames: if options['show_counts']: with engine.begin() as connection: try: result = connection.execute( 'SELECT COUNT(*), MAX(inserted_at) FROM "{tablename}"'.format(tablename=tablename) ) except Exception: print("\t{}: no inserted_at column, probably not UCR".format(tablename)) else: print("\t{}: {}".foramt(tablename, result.fetchone())) else: print("\t{}".format(tablename))
def tearDownModule(): if isinstance(Domain.get_db(), Mock): # needed to skip setUp for javascript tests thread on Travis return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() configs = StaticDataSourceConfiguration.by_domain('champ-cameroon') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.drop_table() Domain.get_by_name('champ-cameroon').delete() _call_center_domain_mock.stop()