def setUpModule(): _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('champ-cameroon') try: configs = StaticDataSourceConfiguration.by_domain(domain.name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.build_table() engine = connection_manager.get_engine(UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = get_table_name(domain.name, file_name[:-4]) table = metadata.tables[table_name] postgres_copy.copy_from(f, table, engine, format='csv', null='', header=True) except Exception: tearDownModule() raise _call_center_domain_mock.stop()
def _setup_ucr_tables(): with mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ): with override_settings(SERVER_ENVIRONMENT=TEST_ENVIRONMENT): configs = StaticDataSourceConfiguration.by_domain(TEST_DOMAIN) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() engine = connection_manager.get_engine('aaa-data') metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) for file_name in os.listdir(INPUT_PATH): with open(os.path.join(INPUT_PATH, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from(f, table, engine, format='csv', null='', columns=columns)
def setup_tables_and_fixtures(domain_name): configs = StaticDataSourceConfiguration.by_domain(domain_name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() cleanup_misc_agg_tables() engine = connection_manager.get_engine(ICDS_UCR_CITUS_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] if not table_name.startswith('icds_dashboard_'): columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from(f, table, engine, format='csv', null='', columns=columns) _distribute_tables_for_citus(engine)
def flush_queue_to_database(self): for file_name, data_list, data_insert_function in [ ('pull_request_data.json', self.pull_request_data, self.insert_pull_requests), ('review_decisions_data.json', self.review_decisions_data, self.insert_comments_and_reviews), ('labels_data.json', self.labels_data, self.insert_labels) ]: if not len(data_list): continue json_path = os.path.join(self.json_data_directory, file_name) with open(json_path, 'w') as json_file: for item in data_list: item = flatten_json(item) for key in item.keys(): if isinstance(item[key], str) and key not in ('author_login', 'id', 'pull_request_id', 'name'): if key == 'id': print(item[key]) input_string = item[key] item[key] = ' '.join([re.sub(r'\W+', '', s) for s in input_string.split()]).replace('"', '') string = json.dumps(item, ensure_ascii=True, separators=(',', ':'), default=str) + '\n' json_file.write(string) with session_scope() as db_session: db_session.execute('TRUNCATE etl_data;') with open(json_path, 'rb') as fp: postgres_copy.copy_from(fp, ETLData, db_session.connection(), ['data']) data_insert_function() self.pull_request_data = [] self.review_decisions_data = [] self.labels_data = []
def _setup_ucr_tables(): with mock.patch('corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider'): with override_settings(SERVER_ENVIRONMENT=TEST_ENVIRONMENT): configs = StaticDataSourceConfiguration.by_domain(TEST_DOMAIN) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() engine = connection_manager.get_engine('aaa-data') metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) for file_name in os.listdir(INPUT_PATH): with open(os.path.join(INPUT_PATH, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', columns=columns )
def write_rated_frames(frames: List[List]) -> object: """ Bulk write to a temporay table through a csv file, then copy to table. :frames (List[List]) A list of list of values (Each element contains the equivalent of a frame dictionary, as a list) Return an object with details about the Truncate query """ merging_frames = """ INSERT INTO frames SELECT DISTINCT * FROM frames_copy ON CONFLICT ON CONSTRAINT frames_pkey DO NOTHING """ res = db.engine.execute('TRUNCATE frames_copy') connection = db.engine.raw_connection() with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', delete=True) as f: write_to_csv(f, frames) logging.info(f'wrote to temp file {f.name}') with connection.cursor() as cursor: logging.info(f'copying from {f.name} to frames_copy..') postgres_copy.copy_from( f, TableWrap(schema='public', name='frames_copy'), connection) logging.info('merging frames_copy into frames..') cursor.execute(merging_frames) connection.commit() connection.close() logging.info('done!') return res
def save_db_objects(db_engine, db_objects): """Saves a collection of SQLAlchemy model objects to the database using a COPY command Args: db_engine (sqlalchemy.engine) db_objects (iterable) SQLAlchemy model objects, corresponding to a valid table """ db_objects = iter(db_objects) first_object = next(db_objects) type_of_object = type(first_object) columns = [col.name for col in first_object.__table__.columns] with PipeTextIO( partial( _write_csv, db_objects=chain((first_object, ), db_objects), type_of_object=type_of_object, )) as pipe: postgres_copy.copy_from( source=pipe, dest=type_of_object, engine_or_conn=db_engine, columns=columns, format="csv", )
def write_rated_frames(frames): merging_frames = """ INSERT INTO frames SELECT DISTINCT * FROM frames_copy ON CONFLICT ON CONSTRAINT frames_pkey DO NOTHING """ res = db.engine.execute('TRUNCATE frames_copy') connection = db.engine.raw_connection() with tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', delete=True) as f: write_to_csv(f, frames) logging.info(f'wrote to temp file {f.name}') with connection.cursor() as cursor: logging.info(f'copying from {f.name} to frames_copy..') postgres_copy.copy_from( f, TableWrap(schema='public', name='frames_copy'), connection) logging.info('merging frames_copy into frames..') cursor.execute(merging_frames) connection.commit() connection.close() logging.info('done!') return res
def setUpModule(): if isinstance(Domain.get_db(), Mock): # needed to skip setUp for javascript tests thread on Travis return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('champ-cameroon') with override_settings(SERVER_ENVIRONMENT='production'): configs = StaticDataSourceConfiguration.by_domain(domain.name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.build_table() engine = connection_manager.get_engine(UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = get_table_name(domain.name, file_name[:-4]) table = metadata.tables[table_name] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', header=True ) _call_center_domain_mock.stop()
def test_copy_model(self, session, objects): sio = io.StringIO() sio.write(u'\t'.join(['4', 'The Works'])) sio.seek(0) copy_from(sio, Album, session.connection().engine) assert session.query(Album).count() == len(objects) + 1 row = session.query(Album).filter_by(id=4).first() assert row.id == 4 assert row.name == 'The Works'
def load_samples(self, directory: Path, samples: Iterable[Table]) -> None: "Copy the generated sample CSVs into the database" for table in samples: with open(directory / Path(table.name).with_suffix(".csv")) as file: postgres_copy.copy_from(file, table, self.engine, format="csv", header=True)
def insert_transactions(): for TransactionType in [ BankingTransactions, BitfinexTransactions, BitstampTransactions, BittrexTransactions, BtceTransactions, CoinapultTransactions, CryptsyTransactions, GeminiTransactions, GenesisTransactions, HavelockTransactions, KrakenTransactions, MtgoxTransactions, PoloniexTransactions ]: file_names = list( glob.iglob(data_directory + TransactionType.file_pattern, recursive=True)) for file_name in file_names: logging.info(file_name) user_name = file_name.split('/')[-2] with session_scope() as session: user = ( session.query(Users) .filter(Users.role == user_name) .one() ) user_id = user.id records = TransactionType.process_file(file_name=file_name, user_id=user_id) logging.info(f'Found {len(records)} records') tmp_file_path = data_directory + '/tmp.csv' with open(tmp_file_path, 'w', newline='') as csvfile: field_names = set().union(*(d.keys() for d in records)) writer = csv.DictWriter(csvfile, fieldnames=field_names) writer.writeheader() for record in records: writer.writerow(record) flags = {'format': 'csv', 'header': True} with session_scope(database='bitcoinadvisory') as session: with open(tmp_file_path, 'rb') as fp: try: postgres_copy.copy_from(fp, TransactionType, session.connection(), field_names, **flags) except psycopg2.IntegrityError as exc: logging.warn(exc) os.remove(tmp_file_path)
def _csv_dump_load(df, table_name, engine, csvdir='', keep_csv=False): """ Write a dataframe to CSV and load it into postgresql using COPY FROM. The fastest way to load a bunch of records is using the database's native text file copy function. This function dumps a given dataframe out to a CSV file, and then loads it into the specified table using a sqlalchemy wrapper around the postgresql COPY FROM command, called postgres_copy. Note that this creates an additional in-memory representation of the data, which takes slightly less memory than the DataFrame itself. Args: df (pandas.DataFrame): The DataFrame which is to be dumped to CSV and loaded into the database. All DataFrame columns must have exactly the same names as the database fields they are meant to populate, and all column data types must be directly compatible with the database fields they are meant to populate. Do any cleanup before you call this function. table_name (str): The exact name of the database table which the DataFrame df is going to be used to populate. It will be used both to look up an SQLAlchemy table object in the PUDLBase metadata object, and to name the CSV file. engine (sqlalchemy.engine): SQLAlchemy database engine, which will be used to pull the CSV output into the database. csvdir (str): Path to the directory into which the CSV file should be saved, if it's being kept. keep_csv (bool): True if the CSV output should be saved after the data has been loaded into the database. False if they should be deleted. NOTE: If multiple COPYs are done for the same table_name, only the last will be retained by keep_csv, which may be unsatisfying. Returns: Nothing. """ import postgres_copy import io tbl = pudl.models.entities.PUDLBase.metadata.tables[table_name] with io.StringIO() as f: df.to_csv(f, index=False) f.seek(0) postgres_copy.copy_from(f, tbl, engine, columns=tuple(df.columns), format='csv', header=True, delimiter=',') if keep_csv: print(f"DEBUG: writing CSV") import shutil import os f.seek(0) outfile = os.path.join(csvdir, table_name + '.csv') shutil.copyfileobj(f, outfile)
def test_copy_csv(self, session, objects): sio = io.StringIO() sio.write(u'\n'.join( [','.join(['aid', 'name']), ','.join(['4', 'The Works'])])) sio.seek(0) flags = {'format': 'csv', 'header': True} copy_from(sio, Album, session.connection().engine, **flags) assert session.query(Album).count() == len(objects) + 1 row = session.query(Album).filter_by(id=4).first() assert row.id == 4 assert row.name == 'The Works'
def _test_transactions(self, session, conn, sio, objects): # Test rollback sio.seek(0) session.execute('begin;') copy_from(sio, Album, conn) session.execute('rollback;') self._verify_rollback(session, objects) # Test commit sio.seek(0) session.execute('begin;') copy_from(sio, Album, conn) session.execute('commit;') self._verify_commit(session, objects)
def ingest_to_model(file, model): """Load data from a CSV file handle into storage for a SQLAlchemy model class. """ reader = csv.reader(file) columns = tuple('\"{}\"'.format(n) for n in reader.next()) postgres_copy.copy_from(file, model, db.engine, columns=columns, format='csv', HEADER=False) fix_autoincrement(model.__table__.name)
def ingest_to_model(file, model, engine=None): """Load data from a CSV file handle into storage for a SQLAlchemy model class. """ if engine is None: engine = db.engine reader = csv.reader(file) columns = tuple('"{}"'.format(n) for n in next(reader)) postgres_copy.copy_from(file, model, engine, columns=columns, format="csv", HEADER=False) fix_autoincrement(engine, model.__table__.name)
def setUpTestData(cls): engine = connection_manager.get_session_helper('default').engine metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name)) as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] postgres_copy.copy_from(f, table, engine, format='csv', null='', header=True)
def save_db_objects(db_engine, db_objects): """Saves a collection of SQLAlchemy model objects to the database using a COPY command Args: db_engine (sqlalchemy.engine) db_objects (list) SQLAlchemy model objects, corresponding to a valid table """ with tempfile.TemporaryFile(mode="w+") as f: writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL) for db_object in db_objects: writer.writerow( [getattr(db_object, col.name) for col in db_object.__table__.columns] ) f.seek(0) postgres_copy.copy_from(f, type(db_objects[0]), db_engine, format="csv")
def test_copy_csv(self, session, objects): sio = io.StringIO() sio.write( u'\n'.join([ ','.join(['aid', 'name']), ','.join(['4', 'The Works']) ]) ) sio.seek(0) flags = {'format': 'csv', 'header': True} copy_from(sio, Album, session.connection().engine, **flags) assert session.query(Album).count() == len(objects) + 1 row = session.query(Album).filter_by(id=4).first() assert row.id == 4 assert row.name == 'The Works'
def save_db_objects(db_engine, db_objects): """Saves a collection of SQLAlchemy model objects to the database using a COPY command Args: db_engine (sqlalchemy.engine) db_objects (iterable) SQLAlchemy model objects, corresponding to a valid table """ db_objects = iter(db_objects) first_object = next(db_objects) type_of_object = type(first_object) with PipeTextIO( partial(_write_csv, db_objects=chain((first_object, ), db_objects), type_of_object=type_of_object)) as pipe: postgres_copy.copy_from(pipe, type_of_object, db_engine, format="csv")
def setUpModule(): if settings.USE_PARTITIONED_DATABASE: return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('icds-cas') location_type = LocationType.objects.create( domain=domain.name, name='block', ) SQLLocation.objects.create( domain=domain.name, name='b1', location_id='b1', location_type=location_type ) with override_settings(SERVER_ENVIRONMENT='icds'): configs = StaticDataSourceConfiguration.by_domain('icds-cas') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: if adapter.config.table_id == 'static-child_health_cases': # hack because this is in a migration continue adapter.build_table() engine = connection_manager.get_session_helper(settings.ICDS_UCR_TEST_DATABASE_ALIAS).engine metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name)) as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] postgres_copy.copy_from(f, table, engine, format='csv', null='', header=True) try: move_ucr_data_into_aggregation_tables(datetime(2017, 5, 28), intervals=2) except AssertionError: pass _call_center_domain_mock.stop()
def insert_to_db(engine, rows): """ Encapsulated logic for inserting a new incident into the database """ try: with open("/tmp/tmp.csv", "w") as csv_file: header = [ "hour" if h == "hour_" else h for h in list(rows[0].keys()) ] w = csv.writer(csv_file, delimiter=",", quotechar="\"") for row in rows: row["hour"] = row.pop("hour_", None) out_row = [row[h] for h in header] w.writerow(out_row) with open("/tmp/tmp.csv", "r") as csv_file: postgres_copy.copy_from(csv_file, CrimeIncident, engine, format="csv") except IndexError: logging.info("Finished.") sys.exit()
def load(self, table: str, filepath: str) -> None: self.log.info(f"loading table {table!r} with content at {filepath!r}") with open(filepath, "r") as fp: reader = csv.reader(fp) row = next(iter(reader), None) if not row: self.log.warning( f"failed to load data from file {filepath!r}: File is empty." ) return None columns = tuple(row[:]) fp.seek(0) self.log.info(f"found columns: {columns}") target_table = self.get_sqlalchemy_table(table) self.log.info(f"writing to columns: {target_table.columns.keys()}") postgres_copy.copy_from( fp, self.get_sqlalchemy_table(table), self.engine, format="csv", header=True, columns=columns, )
def resync_data(): try: clear_data() data = urllib.urlopen( 'http://api.dft.gov.uk/v3/trafficcounts/export/la/Devon.csv').read( ) fd, path = tempfile.mkstemp() os.write(fd, data) os.close(fd) with open(path) as fp: postgres_copy.copy_from(fp, Traffic, db.get_engine(), format='csv', header='true') os.remove(path) return Traffic.query.count() except Exception: return 0
def _copy_from_file(self): """Import file into DB""" table_already_exists = self.table_name in Inspector.from_engine(self.engine).get_table_names(schema=self.schema_name) if table_already_exists and self.force: self.logger.info("Dropping table '%s.%s' due to force mode", self.schema_name, self.table_name) drop_table = "drop table if exists {0}.{1};" self._execute_sql(drop_table, self.schema_name, self.table_name) elif table_already_exists: return RawCitation.__table__.schema = self.schema_name RawCitation.__table__.create(bind=self.engine) # Import a tab-delimited file with open(self.input_refids_filename) as fp: l = postgres_copy.copy_from(fp, RawCitation, self.engine, columns=('bibcode', 'payload'))
def test_engine(self, session, objects): sio = io.StringIO() sio.write(u'\t'.join(['4', 'The Works'])) sio.seek(0) copy_from(sio, Album, session.connection().engine) self._verify_commit(session, objects)
def setUpModule(): _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('test-pna') region_location_type = LocationType.objects.create( domain='test-pna', name='R\u00e9gion', ) SQLLocation.objects.create(domain='test-pna', name='Region Test', location_id='8cde73411ddc4488a7f913c99499ead4', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='PASSY', location_id='1991b4dfe166335e342f28134b85fcac', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r1', location_id='0682630532ff25717176320482ff1028', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r2', location_id='582c5d65a307baa7a38e7b5e651fd5fc', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r3', location_id='bd0395ba4a4fbd38c90765bd04208a8f', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r4', location_id='6ed1f958fccd1b8202e8e30851a2b326', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r5', location_id='1991b4dfe166335e342f28134b85f516', location_type=region_location_type) district_location_type = LocationType.objects.create( domain='test-pna', name='District', ) SQLLocation.objects.create(domain='test-pna', name='District Test', location_id='3db74fac2bad4e708e2b03800cc5ab73', location_type=district_location_type) pps_location_type = LocationType.objects.create( domain='test-pna', name='PPS', ) SQLLocation.objects.create(domain='test-pna', name='P2', location_id='ccf4430f5c3f493797486d6ce1c39682', location_type=pps_location_type) SQLProduct.objects.create(domain='test-pna', name='Collier', code='product1', product_id='product1') SQLProduct.objects.create(domain='test-pna', name='CU', code='product2', product_id='product2') SQLProduct.objects.create(domain='test-pna', name='Depo-Provera', code='product3', product_id='product3') SQLProduct.objects.create(domain='test-pna', name='DIU', code='product4', product_id='product4') SQLProduct.objects.create(domain='test-pna', name='Jadelle', code='product5', product_id='product5') SQLProduct.objects.create(domain='test-pna', name='Microgynon/Lof.', code='product6', product_id='product6') SQLProduct.objects.create(domain='test-pna', name='Microlut/Ovrette', code='product7', product_id='product7') SQLProduct.objects.create(domain='test-pna', name='Preservatif Feminin', code='product8', product_id='product8') SQLProduct.objects.create(domain='test-pna', name='Preservatif Masculin', code='product9', product_id='product9') SQLProduct.objects.create(domain='test-pna', name='Sayana Press', code='product10', product_id='product10') SQLProduct.objects.create(domain='test-pna', name='IMPLANON', code='product11', product_id='product11') SQLProduct.objects.create(domain='test-pna', name='Product 7', code='p7', product_id='p7') with override_settings(SERVER_ENVIRONMENT='production'): configs = StaticDataSourceConfiguration.by_domain(domain.name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.build_table() engine = connection_manager.get_engine(UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = get_table_name(domain.name, file_name[:-4]) table = metadata.tables[table_name] postgres_copy.copy_from(f, table, engine, format='csv', null='', header=True) _call_center_domain_mock.stop()
def setUpModule(): if isinstance(Domain.get_db(), Mock): # needed to skip setUp for javascript tests thread on Travis return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('test-pna') region_location_type = LocationType.objects.create( domain='test-pna', name='R\u00e9gion', ) SQLLocation.objects.create( domain='test-pna', name='Region Test', location_id='8cde73411ddc4488a7f913c99499ead4', location_type=region_location_type ) SQLLocation.objects.create( domain='test-pna', name='PASSY', location_id='1991b4dfe166335e342f28134b85fcac', location_type=region_location_type ) SQLLocation.objects.create( domain='test-pna', name='r1', location_id='0682630532ff25717176320482ff1028', location_type=region_location_type ) SQLLocation.objects.create( domain='test-pna', name='r2', location_id='582c5d65a307baa7a38e7b5e651fd5fc', location_type=region_location_type ) SQLLocation.objects.create( domain='test-pna', name='r3', location_id='bd0395ba4a4fbd38c90765bd04208a8f', location_type=region_location_type ) SQLLocation.objects.create( domain='test-pna', name='r4', location_id='6ed1f958fccd1b8202e8e30851a2b326', location_type=region_location_type ) SQLLocation.objects.create( domain='test-pna', name='r5', location_id='1991b4dfe166335e342f28134b85f516', location_type=region_location_type ) district_location_type = LocationType.objects.create( domain='test-pna', name='District', ) SQLLocation.objects.create( domain='test-pna', name='District Test', location_id='3db74fac2bad4e708e2b03800cc5ab73', location_type=district_location_type ) pps_location_type = LocationType.objects.create( domain='test-pna', name='PPS', ) SQLLocation.objects.create( domain='test-pna', name='P2', location_id='ccf4430f5c3f493797486d6ce1c39682', location_type=pps_location_type ) SQLProduct.objects.create( domain='test-pna', name='Collier', code='product1', product_id='product1' ) SQLProduct.objects.create( domain='test-pna', name='CU', code='product2', product_id='product2' ) SQLProduct.objects.create( domain='test-pna', name='Depo-Provera', code='product3', product_id='product3' ) SQLProduct.objects.create( domain='test-pna', name='DIU', code='product4', product_id='product4' ) SQLProduct.objects.create( domain='test-pna', name='Jadelle', code='product5', product_id='product5' ) SQLProduct.objects.create( domain='test-pna', name='Microgynon/Lof.', code='product6', product_id='product6' ) SQLProduct.objects.create( domain='test-pna', name='Microlut/Ovrette', code='product7', product_id='product7' ) SQLProduct.objects.create( domain='test-pna', name='Preservatif Feminin', code='product8', product_id='product8' ) SQLProduct.objects.create( domain='test-pna', name='Preservatif Masculin', code='product9', product_id='product9' ) SQLProduct.objects.create( domain='test-pna', name='Sayana Press', code='product10', product_id='product10' ) SQLProduct.objects.create( domain='test-pna', name='IMPLANON', code='product11', product_id='product11' ) SQLProduct.objects.create( domain='test-pna', name='Product 7', code='p7', product_id='p7' ) with override_settings(SERVER_ENVIRONMENT='production'): configs = StaticDataSourceConfiguration.by_domain(domain.name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.build_table() engine = connection_manager.get_engine(UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = get_table_name(domain.name, file_name[:-4]) table = metadata.tables[table_name] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', header=True ) _call_center_domain_mock.stop()
def setUpModule(): if settings.USE_PARTITIONED_DATABASE: print( '============= WARNING: not running test setup because settings.USE_PARTITIONED_DATABASE is True.' ) return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('icds-cas') location_type = LocationType.objects.create( domain=domain.name, name='block', ) SQLLocation.objects.create(domain=domain.name, name='b1', location_id='b1', location_type=location_type) state_location_type = LocationType.objects.create( domain=domain.name, name='state', ) SQLLocation.objects.create(domain=domain.name, name='st1', location_id='st1', location_type=state_location_type) awc_location_type = LocationType.objects.create( domain=domain.name, name='awc', ) SQLLocation.objects.create(domain=domain.name, name='a7', location_id='a7', location_type=awc_location_type) with override_settings(SERVER_ENVIRONMENT='icds-new'): configs = StaticDataSourceConfiguration.by_domain('icds-cas') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() engine = connection_manager.get_engine(ICDS_UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] if not table_name.startswith('icds_dashboard_'): postgres_copy.copy_from(f, table, engine, format=b'csv', null=b'', header=True) _aggregate_child_health_pnc_forms('st1', datetime(2017, 3, 31)) try: move_ucr_data_into_aggregation_tables(datetime(2017, 5, 28), intervals=2) except AssertionError as e: # we always use soft assert to email when the aggregation has completed if "Aggregation completed" not in str(e): print(e) tearDownModule() raise except Exception as e: print(e) tearDownModule() raise finally: _call_center_domain_mock.stop() with connections['icds-ucr'].cursor() as cursor: create_views(cursor)
def _write_to_db( self, model_id, matrix_store, predictions, labels, misc_db_parameters, ): """Writes given predictions to database entity_ids, predictions, labels are expected to be in the same order Args: model_id (int) the id of the model associated with the given predictions matrix_store (catwalk.storage.MatrixStore) the matrix and metadata entity_ids (iterable) entity ids that predictions were made on predictions (iterable) predicted values labels (iterable) labels of prediction set (int) the id of the model to predict based off of """ session = self.sessionmaker() self._existing_predictions(session, model_id, matrix_store)\ .delete(synchronize_session=False) session.expire_all() db_objects = [] test_label_timespan = matrix_store.metadata['label_timespan'] logging.warning(test_label_timespan) if 'as_of_date' in matrix_store.matrix.index.names: logging.info( 'as_of_date found as part of matrix index, using index for table as_of_dates' ) session.commit() session.close() with tempfile.TemporaryFile(mode='w+') as f: writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL) for index, score, label in zip(matrix_store.matrix.index, predictions, labels): entity_id, as_of_date = index prediction = Prediction( model_id=int(model_id), entity_id=int(entity_id), as_of_date=as_of_date, score=float(score), label_value=int(label) if not math.isnan(label) else None, matrix_uuid=matrix_store.uuid, test_label_timespan=test_label_timespan, **misc_db_parameters) writer.writerow([ prediction.model_id, prediction.entity_id, prediction.as_of_date, prediction.score, prediction.label_value, prediction.rank_abs, prediction.rank_pct, prediction.matrix_uuid, prediction.test_label_timespan ]) f.seek(0) postgres_copy.copy_from(f, Prediction, self.db_engine, format='csv') else: logging.info( 'as_of_date not found as part of matrix index, using matrix metadata end_time as as_of_date' ) temp_df = pandas.DataFrame({'score': predictions}) rankings_abs = temp_df['score'].rank(method='dense', ascending=False) rankings_pct = temp_df['score'].rank(method='dense', ascending=False, pct=True) for entity_id, score, label, rank_abs, rank_pct in zip( matrix_store.matrix.index, predictions, labels, rankings_abs, rankings_pct): db_objects.append( Prediction(model_id=int(model_id), entity_id=int(entity_id), as_of_date=matrix_store.metadata['end_time'], score=round(float(score), 10), label_value=int(label) if not math.isnan(label) else None, rank_abs=int(rank_abs), rank_pct=round(float(rank_pct), 10), matrix_uuid=matrix_store.uuid, test_label_timespan=test_label_timespan, **misc_db_parameters)) session.bulk_save_objects(db_objects) session.commit() session.close()
def setUpModule(): if isinstance(Domain.get_db(), Mock): # needed to skip setUp for javascript tests thread on Travis return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('test-pna') region_location_type = LocationType.objects.create( domain='test-pna', name='R\u00e9gion', ) SQLLocation.objects.create(domain='test-pna', name='Region Test', location_id='8cde73411ddc4488a7f913c99499ead4', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='PASSY', location_id='1991b4dfe166335e342f28134b85fcac', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r1', location_id='0682630532ff25717176320482ff0d6e', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r2', location_id='0682630532ff25717176320482ff1028', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r3', location_id='1991b4dfe166335e342f28134b85e7df', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r4', location_id='1991b4dfe166335e342f28134b85f516', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r5', location_id='3f720b4e733bea3cc401150231831e95', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r6', location_id='582c5d65a307baa7a38e7b5e651fd5fc', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r7', location_id='6ed1f958fccd1b8202e8e30851a2b326', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r8', location_id='942e078b8dfa9551a9ff799301b0854d', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r9', location_id='942e078b8dfa9551a9ff799301b08642', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r10', location_id='942e078b8dfa9551a9ff799301b08682', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r11', location_id='abb51a7f4ab64b70b899d86e54e62f51', location_type=region_location_type) SQLLocation.objects.create(domain='test-pna', name='r12', location_id='bd0395ba4a4fbd38c90765bd04208a8f', location_type=region_location_type) district_location_type = LocationType.objects.create( domain='test-pna', name='District', ) SQLLocation.objects.create(domain='test-pna', name='District Test', location_id='3db74fac2bad4e708e2b03800cc5ab73', location_type=district_location_type) pps_location_type = LocationType.objects.create( domain='test-pna', name='PPS', ) SQLLocation.objects.create(domain='test-pna', name='P2', location_id='ccf4430f5c3f493797486d6ce1c39682', location_type=pps_location_type) SQLProduct.objects.create(domain='test-pna', name='Product 7', code='p7', product_id='p7') with override_settings(SERVER_ENVIRONMENT='production'): configs = StaticDataSourceConfiguration.by_domain(domain.name) adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: adapter.build_table() engine = connection_manager.get_engine(UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = get_table_name(domain.name, file_name[:-4]) table = metadata.tables[table_name] postgres_copy.copy_from(f, table, engine, format=b'csv', null=b'', header=True) _call_center_domain_mock.stop()
def setUpModule(): if settings.USE_PARTITIONED_DATABASE: print('============= WARNING: not running test setup because settings.USE_PARTITIONED_DATABASE is True.') return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() domain = create_domain('icds-cas') location_type = LocationType.objects.create( domain=domain.name, name='block', ) SQLLocation.objects.create( domain=domain.name, name='b1', location_id='b1', location_type=location_type ) state_location_type = LocationType.objects.create( domain=domain.name, name='state', ) SQLLocation.objects.create( domain=domain.name, name='st1', location_id='st1', location_type=state_location_type ) SQLLocation.objects.create( domain=domain.name, name='st2', location_id='st2', location_type=state_location_type ) SQLLocation.objects.create( domain=domain.name, name='st3', location_id='st3', location_type=state_location_type ) SQLLocation.objects.create( domain=domain.name, name='st4', location_id='st4', location_type=state_location_type ) SQLLocation.objects.create( domain=domain.name, name='st5', location_id='st5', location_type=state_location_type ) SQLLocation.objects.create( domain=domain.name, name='st6', location_id='st6', location_type=state_location_type ) SQLLocation.objects.create( domain=domain.name, name='st7', location_id='st7', location_type=state_location_type ) awc_location_type = LocationType.objects.create( domain=domain.name, name='awc', ) SQLLocation.objects.create( domain=domain.name, name='a7', location_id='a7', location_type=awc_location_type ) with override_settings(SERVER_ENVIRONMENT='icds'): configs = StaticDataSourceConfiguration.by_domain('icds-cas') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() engine = connection_manager.get_engine(ICDS_UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] if not table_name.startswith('icds_dashboard_'): columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', columns=columns ) for state_id in ('st1', 'st2'): _aggregate_child_health_pnc_forms(state_id, datetime(2017, 3, 31)) _aggregate_gm_forms(state_id, datetime(2017, 3, 31)) _aggregate_bp_forms(state_id, datetime(2017, 3, 31)) try: move_ucr_data_into_aggregation_tables(datetime(2017, 5, 28), intervals=2) build_incentive_report(agg_date=datetime(2017, 5, 28)) except Exception as e: print(e) tearDownModule() raise finally: _call_center_domain_mock.stop()
def setUpModule(): if settings.USE_PARTITIONED_DATABASE: print('============= WARNING: not running test setup because settings.USE_PARTITIONED_DATABASE is True.') return _call_center_domain_mock = mock.patch( 'corehq.apps.callcenter.data_source.call_center_data_source_configuration_provider' ) _call_center_domain_mock.start() # _use_citus.enable() domain = create_domain('icds-cas') SQLLocation.objects.all().delete() LocationType.objects.all().delete() state_location_type = LocationType.objects.create( domain=domain.name, name='state', ) st1 = SQLLocation.objects.create( domain=domain.name, name='st1', location_id='st1', location_type=state_location_type ) st2 = SQLLocation.objects.create( domain=domain.name, name='st2', location_id='st2', location_type=state_location_type ) st3 = SQLLocation.objects.create( domain=domain.name, name='st3', location_id='st3', location_type=state_location_type ) st4 = SQLLocation.objects.create( domain=domain.name, name='st4', location_id='st4', location_type=state_location_type ) st5 = SQLLocation.objects.create( domain=domain.name, name='st5', location_id='st5', location_type=state_location_type ) st6 = SQLLocation.objects.create( domain=domain.name, name='st6', location_id='st6', location_type=state_location_type ) st7 = SQLLocation.objects.create( domain=domain.name, name='st7', location_id='st7', location_type=state_location_type ) supervisor_location_type = LocationType.objects.create( domain=domain.name, name='supervisor', ) s1 = SQLLocation.objects.create( domain=domain.name, name='s1', location_id='s1', location_type=supervisor_location_type, parent=st1 ) block_location_type = LocationType.objects.create( domain=domain.name, name='block', ) b1 = SQLLocation.objects.create( domain=domain.name, name='b1', location_id='b1', location_type=block_location_type, parent=s1 ) awc_location_type = LocationType.objects.create( domain=domain.name, name='awc', ) a7 = SQLLocation.objects.create( domain=domain.name, name='a7', location_id='a7', location_type=awc_location_type ) with override_settings(SERVER_ENVIRONMENT='icds'): configs = StaticDataSourceConfiguration.by_domain('icds-cas') adapters = [get_indicator_adapter(config) for config in configs] for adapter in adapters: try: adapter.drop_table() except Exception: pass adapter.build_table() engine = connection_manager.get_engine(ICDS_UCR_ENGINE_ID) metadata = sqlalchemy.MetaData(bind=engine) metadata.reflect(bind=engine, extend_existing=True) path = os.path.join(os.path.dirname(__file__), 'fixtures') for file_name in os.listdir(path): with open(os.path.join(path, file_name), encoding='utf-8') as f: table_name = FILE_NAME_TO_TABLE_MAPPING[file_name[:-4]] table = metadata.tables[table_name] if not table_name.startswith('icds_dashboard_'): columns = [ '"{}"'.format(c.strip()) # quote to preserve case for c in f.readline().split(',') ] postgres_copy.copy_from( f, table, engine, format='csv' if six.PY3 else b'csv', null='' if six.PY3 else b'', columns=columns ) _distribute_tables_for_citus(engine) for state_id in ('st1', 'st2'): _aggregate_child_health_pnc_forms(state_id, datetime(2017, 3, 31)) _aggregate_gm_forms(state_id, datetime(2017, 3, 31)) _aggregate_bp_forms(state_id, datetime(2017, 3, 31)) try: move_ucr_data_into_aggregation_tables(datetime(2017, 5, 28), intervals=2) build_incentive_report(agg_date=datetime(2017, 5, 28)) except Exception as e: print(e) tearDownModule() raise finally: _call_center_domain_mock.stop()