def preprocess(infile, outfile, separator, encoding, drop_columns): """ Preprocess device files """ loader = BaseLoader() loader.preprocess(infile, outfile, encoding=encoding, sep=separator, drop_columns=drop_columns)
def sp(ctx, db_name): #, batch_size, throttle_size, throttle_time): ctx.ensure_object(dict) loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) ctx.obj['loader'] = loader if not db_name: ctx.obj['db_name'] = loader._query("select database()")[0][0] logger.info(f"Using db {ctx.obj['db_name']}") else: ctx.obj['db_name'] = db_name
def load(ctx, infile, table_name, complete): loader = BaseLoader(warnings=ctx.obj['warnings']) if complete: loader.column_type_overrides = { # 'primarydi': (lambda x: int(float(x)) if x else None), 'deviceid': (lambda x: parseIntOrNone(x)), 'dunsnumber': (lambda x: parseIntOrNone(x)), 'containsdinumber': (lambda x: parseIntOrNone(x)), 'pkgquantity': (lambda x: parseIntOrNone(x)), 'rx': (lambda x: True if x.lower() == "true" else False), 'otc': (lambda x: True if x.lower() == "true" else False), 'phoneextension': (lambda x: parseIntOrNone(x)), 'eff_date': (lambda x: convert_date(x)), 'end_eff_date': (lambda x: convert_date(x)) } else: loader.column_type_overrides = { 'rx_id': (lambda x: parseIntOrNone(x)), # 'deviceid': (lambda x: parseInt(x)), 'containsdinumber': (lambda x: parseIntOrNone(x)), 'dunsnumber': (lambda x: parseIntOrNone(x)), 'eff_date': (lambda x: convert_date(x)), 'end_eff_date': (lambda x: convert_date(x)) } loader.connect(**ctx.obj['db_credentials']) loader.csv_loader(INSERT_QUERY, table_name, infile, ctx) print(f"Medical device data loaded to table: {table_name}")
def preprocess(ctx, infile, outfile): ndc_loader = BaseLoader() # xforms performs transformations on the column names. # If the value is callable, call it on the column value (key) # Otherwise, just replace with the given value. xforms = { "Type": "te_type", "drug_id": "ind_drug_id", "status": "ind_status", "phase": "ind_phase", } ndc_loader.preprocess(infile, outfile, column_xforms=xforms) print(outfile)
def prod_ndc(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q = CREATE_NDCMASTER_DDL.format(table_name=table_name) loader._query(q) archive_table_name = table_name + "_archive" stage_table_name = table_name + "_stage" # Create NDC archive table loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=archive_table_name, source_table_name=table_name) ) # Create NDC stage table loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=stage_table_name, source_table_name=table_name) )
def prod_devicemaster(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) archive_table_name = table_name + "_archive" stage_table_name = table_name + "_stage" q = CREATE_DEVICEMASTER_DDL.format(table_name=table_name) loader._query(q) # Create Device Master archive table loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=archive_table_name, source_table_name=table_name) ) # Create Device Master stage table loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=stage_table_name, source_table_name=table_name) ) print("Created device master tables.")
def prod_productmaster(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q = CREATE_PRODUCT_MASTER_DDL.format(table_name=table_name) loader._query(q) product_master_archive_table_name = table_name + "_archive" # Create archive table loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=product_master_archive_table_name, source_table_name=table_name) )
def refresh_ndc(ctx, table_name): archive_table_name = table_name+"_archive" loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q = CREATE_NDC_DDL.format(table_name=table_name) loader._query(q)
def copy_table(ctx, source_table_name, destination_table_name): """ Create a copy of a source table. """ loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) loader.copy_table(source_table_name, destination_table_name)
def all(ctx): """ Create all product refresh tables using default names. """ loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) indications_table_name = "refresh_indications" ndc_table_name = "refresh_ndc_product" marketing_table_name = "refresh_marketing" orange_table_name = "refresh_orange" devicemaster_table_name = "refresh_devicemaster" guid_devices = "refresh_gudid_devices" guid_identifiers = "refresh_gudid_identifiers" guid_contacts = "refresh_gudid_contacts" q1 = DROP_TABLE_IFE_DDL.format(table_name=indications_table_name) q2 = DROP_TABLE_IFE_DDL.format(table_name=ndc_table_name) q3 = DROP_TABLE_IFE_DDL.format(table_name=marketing_table_name) q4 = DROP_TABLE_IFE_DDL.format(table_name=orange_table_name) q5 = DROP_TABLE_IFE_DDL.format(table_name=devicemaster_table_name) q6 = DROP_TABLE_IFE_DDL.format(table_name=guid_devices) q7 = DROP_TABLE_IFE_DDL.format(table_name=guid_identifiers) q8 = DROP_TABLE_IFE_DDL.format(table_name=guid_contacts) try: loader._query(q1) loader._query(q2) loader._query(q3) loader._query(q4) loader._query(q5) except Exception as e: print(e.args) print("Could not drop table. Does the table exist?") return print( f"DROPPED refresh tables:\n {indications_table_name}\n {ndc_table_name}\n {marketing_table_name}\n {orange_table_name}" )
def drop_table(ctx, table_name): """ Create a copy of a source table. """ loader = BaseLoader(warnings=ctx.obj['warnings']) DROP_TABLE_DDL = f"DROP TABLE {table_name}" loader.connect(**ctx.obj['db_credentials']) loader._submit_single_q(DROP_TABLE_DDL) print(f"Dropped table {table_name}")
def refresh_gudid_all(ctx): """ Create all gudid refresh tables """ loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) guid_devices = "refresh_gudid_devices" guid_identifiers = "refresh_gudid_identifiers" guid_contacts = "refresh_gudid_contacts" q6 = CREATE_GUDID_CONTACTS_DDL.format(table_name=guid_contacts) q7 = CREATE_GUDID_DEVICES_DDL.format(table_name=guid_devices) q8 = CREATE_GUDID_IDENTIFERS_DDL.format(table_name=guid_identifiers) loader._query(q6) loader._query(q7) loader._query(q8) print("Created all gudid tables")
def prod_service(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) if not table_name: table_name = "service_master" q = CREATE_SERVICEMASTER_DDL.format(table_name=table_name) loader._query(q) print("Created service master table.")
def load_gudid_devices(ctx, infile, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.column_type_overrides = { # 'primarydi': (lambda x: int(float(x)) if x else None), 'deviceid': (lambda x: parseIntOrNone(x)), 'dunsnumber': (lambda x: parseIntOrNone(x)), 'containsdinumber': (lambda x: parseIntOrNone(x)), 'pkgquantity': (lambda x: parseIntOrNone(x)), 'rx': (lambda x: True if x.lower() == "true" else False), 'otc': (lambda x: True if x.lower() == "true" else False), } loader.connect(**ctx.obj['db_credentials']) loader.csv_loader(INSERT_QUERY, table_name, infile, ctx) print(f"Medical device data loaded to table: {table_name}")
def product_table(ctx, old_table_name, new_table_name, master_table_name): logger.info( f"Creating new product table `{new_table_name}` from `{old_table_name}`, using master: `{master_table_name}`" ) loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) format_args = { "old_table_name": old_table_name, "new_table_name": new_table_name, "product_master": master_table_name } loader.execute_queries(PRODUCT_TABLE_MERGE, **format_args) logger.info(f"`{new_table_name}` loaded.")
def load(ctx, infile, table_name): """ Generic loader """ batch_size = ctx.obj['batch_size'] throttle_size = ctx.obj['throttle_size'] throttle_time = ctx.obj['throttle_time'] debug = ctx.obj['debug'] args = { 'user': os.environ.get('db_user'), 'password': os.environ.get('db_password'), 'host': os.environ.get('db_host'), 'database': os.environ.get('db_schema'), 'debug': debug } logger.debug( "Loading: query={} table={} infile={} batch_size={} throttle_size={} throttle_time={} \n" .format(INSERT_QUERY, table_name, infile, batch_size, throttle_size, throttle_time)) loader = BaseLoader() # loader.column_type_overrides = { # 'rx': (lambda x: True if x.lower() == "true" else False), # 'otc': (lambda x: True if x.lower() == "true" else False), # 'phoneextension': (lambda x: float(int(x)) if x else None), # 'containsdinumber': (lambda x: float(int(x)) if x else None) # # 'pkgquantity': (lambda x: float(int(x)) if x else None) # } loader.warnings = True logger.info(f"Loading {infile} into {table_name}") loader.connect(**args) loader.load_file(INSERT_QUERY, table_name, infile, batch_size, throttle_size, throttle_time) print(f"Data loaded to table: {table_name}")
def refresh_indications(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q = CREATE_INDICATIONS_DDL.format(table_name=table_name) loader._query(q)
def ndc(ctx): ctx.ensure_object(dict) loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) ctx.obj['loader'] = loader
def masterid_to_ndc(ctx, ndc_table_name, product_table_name): query = MASTER_IDS_TO_NDC_TABLE.format( ndc_table_name=ndc_table_name, product_table_name=product_table_name) loader = BaseLoader() loader.connect(**ctx.obj['db_credentials']) loader._submit_single_q(query)
def refresh_gudid_identifers(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q1 = CREATE_GUDID_IDENTIFERS_DDL.format(table_name=table_name) loader._query(q1)
def refresh_gudid_devices(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q2 = CREATE_GUDID_DEVICES_DDL.format(table_name=table_name) loader._query(q2)
def refresh_gudid_contacts(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q1 = CREATE_GUDID_CONTACTS_DDL.format(table_name=table_name) loader._query(q1)
def refresh_medicaldevice(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q = CREATE_MEDICAL_DEVICE_MASTER_DDL.format(table_name=table_name) loader._query(q)
def refresh_orange(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q = CREATE_ORANGE_DDL.format(table_name=table_name) loader._query(q)
def refresh_marketing(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q = CREATE_MARKETING_DDL.format(table_name=table_name) loader._query(q)
def prod_all(ctx): """ Create all production tables using default names. """ loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) product_table_name = "product" productmaster_table_name = "product_master" devicemaster_table_name = "device_master" servicemaster_table_name = "service_master" ndcmaster_table_name = "ndc_master" q1 = CREATE_PRODUCT_DDL.format(table_name=product_table_name) q2 = CREATE_PRODUCT_MASTER_DDL.format(table_name=productmaster_table_name) q3 = CREATE_DEVICEMASTER_DDL.format(table_name=devicemaster_table_name) q4 = CREATE_NDCMASTER_DDL.format(table_name=ndcmaster_table_name) q5 = CREATE_SERVICEMASTER_DDL.format(table_name=servicemaster_table_name) loader._query(q1) loader._query(q2) loader._query(q3) loader._query(q4) loader._query(q5) product_master_archive_table_name = productmaster_table_name + "_archive" ndc_archive_table_name = ndcmaster_table_name + "_archive" devicemaster_archive_table_name = devicemaster_table_name + "_archive" ndc_stage_table_name = ndcmaster_table_name + "_stage" devicemaster_stage_table_name = devicemaster_table_name + "_stage" # Create product master archive loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=product_master_archive_table_name, source_table_name=productmaster_table_name) ) # Create NDC master archive loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=ndc_archive_table_name, source_table_name=ndcmaster_table_name) ) # Create Device Master archive table loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=devicemaster_archive_table_name, source_table_name=devicemaster_table_name) ) # Create NDC stage table loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=ndc_stage_table_name, source_table_name=ndcmaster_table_name) ) # Create Device master stage table loader._submit_single_q( CREATE_TABLE_LIKE_IFNE_DDL.format(target_table_name=devicemaster_stage_table_name, source_table_name=devicemaster_table_name) ) print("Create prod tables")
def refresh_all(ctx): """ Create all refresh tables using default names. """ loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) indications_table_name = "refresh_indications" ndc_table_name = "refresh_ndc_product" marketing_table_name = "refresh_marketing_codes" orange_table_name = "refresh_orange" device_master_table_name = "refresh_devicemaster" q1 = CREATE_INDICATIONS_DDL.format(table_name=indications_table_name) q2 = CREATE_NDC_DDL.format(table_name=ndc_table_name) q3 = CREATE_MARKETING_DDL.format(table_name=marketing_table_name) q4 = CREATE_ORANGE_DDL.format(table_name=orange_table_name) # q5 = CREATE_MEDICAL_DEVICE_MASTER_DDL.format(table_name=device_master_table_name) loader._query(q1) loader._query(q2) loader._query(q3) loader._query(q4) # loader._query(q5) guid_devices = "refresh_gudid_devices" guid_identifiers = "refresh_gudid_identifiers" guid_contacts = "refresh_gudid_contacts" q6 = CREATE_GUDID_CONTACTS_DDL.format(table_name=guid_contacts) q7 = CREATE_GUDID_DEVICES_DDL.format(table_name=guid_devices) q8 = CREATE_GUDID_IDENTIFERS_DDL.format(table_name=guid_identifiers) loader._query(q6) loader._query(q7) loader._query(q8) print(f"CREATED refresh tables:\n {indications_table_name}\n {ndc_table_name}\n {marketing_table_name}\n {orange_table_name}") print("...and gudid tables")
def prod_product(ctx, table_name): loader = BaseLoader(warnings=ctx.obj['warnings']) loader.connect(**ctx.obj['db_credentials']) q = CREATE_PRODUCT_DDL.format(table_name=table_name) loader._query(q)