def admission_claims_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ admission_claims = pd.read_csv( f"{processed_data}\\admit_claims.csv", low_memory=False ) admission_claims = create_sql_dates(admission_claims) primary_key = ["claim_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(admission_claims, "admission_claims", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("admission_claims updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS admission_claims") create_table( admission_claims, "admission_claims", conn, primary_key, foreign_key, ref_table, ref_col, ) print("admission_claims created...") conn.commit() conn.close() open( f"{update_logs_folder}\\admission_claims_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def center_days_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ center_days = pd.read_csv(f"{processed_data}\\center_days.csv") center_days = create_sql_dates(center_days, ["as_of"]) center_days.rename(columns={"center_days": "days"}, inplace=True) primary_key = ["member_id", "days"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(center_days, "center_days", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("center_days updated...") else: create_table( center_days, "center_days", conn, primary_key, foreign_key, ref_table, ref_col, ) print("center_days created...") conn.commit() conn.close() open( f"{update_logs_folder}\\center_days_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def auths_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys If table is being updated - it is dropped and replaced entirely Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ auths = pd.read_csv(f"{processed_data}\\auths.csv") auths = create_sql_dates(auths) primary_key = ["member_id", "authorization_number"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS authorizations") create_table( auths, "authorizations", conn, primary_key, foreign_key, ref_table, ref_col ) print("authorizations updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS authorizations") create_table( auths, "authorizations", conn, primary_key, foreign_key, ref_table, ref_col ) print("authorizations created...") conn.commit() conn.close() open( f"{update_logs_folder}\\authorizations_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def er_only_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys If table is being updated - any visit with an admission date greater than 3 months from today is dropped Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ er_only = pd.read_csv(f"{processed_data}\\er_only.csv") er_only = create_sql_dates(er_only) primary_key = ["visit_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS er_only") create_table(er_only, "er_only", conn, primary_key, foreign_key, ref_table, ref_col) print("er_only updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS er_only") create_table(er_only, "er_only", conn, primary_key, foreign_key, ref_table, ref_col) print("er_only created...") conn.commit() conn.close() open( f"{update_logs_folder}\\er_only_{str(pd.to_datetime('today').date())}.txt", "a").close()
def pnuemo_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ pneumo = pd.read_csv(f"{processed_data}\\pneumo.csv") pneumo = create_sql_dates(pneumo) primary_key = ["member_id", "date_administered"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(pneumo, "pneumo", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("pneumo updated...") else: create_table( pneumo, "pneumo", conn, primary_key, foreign_key, ref_table, ref_col ) print("pneumo created...") conn.commit() conn.close() open( f"{update_logs_folder}\\pnuemo_{str(pd.to_datetime('today').date())}.txt", "a" ).close()
def centers_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ centers = pd.read_csv(f"{processed_data}\\centers.csv") centers = create_sql_dates(centers) primary_key = ["member_id", "center", "start_date"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update: update_sql_table(centers, "centers", conn, primary_key) print("centers updated...") else: create_table( centers, "centers", conn, primary_key, foreign_key, ref_table, ref_col ) print("centers created...") conn.commit() conn.close() open( f"{update_logs_folder}\\centers_{str(pd.to_datetime('today').date())}.txt", "a" ).close()
def inpatient_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys When creating the database, 6 views are created; acute, psych, nursing_home, custodial, respite, and skilled If table is being updated - any visit with an admission date greater than 3 months from today is dropped Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ acute = pd.read_csv(f"{processed_data}\\inpatient.csv") acute = create_sql_dates(acute) primary_key = ["visit_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS inpatient") create_table( acute, "inpatient", conn, primary_key, foreign_key, ref_table, ref_col ) print("inpatient updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS inpatient") create_table( acute, "inpatient", conn, primary_key, foreign_key, ref_table, ref_col ) print("inpatient created...") c.execute( """CREATE VIEW acute AS SELECT * FROM inpatient WHERE admission_type = 'Acute Hospital'; """ ) c.execute( """CREATE VIEW psych AS SELECT * FROM inpatient WHERE admission_type = 'Psych Unit / Facility'; """ ) c.execute( """CREATE VIEW nursing_home AS SELECT * FROM inpatient WHERE (admission_type = 'Nursing Home' OR admission_type = 'Rehab Unit / Facility' OR admission_type = 'End of Life') """ ) c.execute( """CREATE VIEW skilled AS SELECT * FROM inpatient WHERE (admission_type = 'Nursing Home' OR admission_type = 'Rehab Unit / Facility') AND admit_reason = 'skilled'; """ ) c.execute( """CREATE VIEW respite AS SELECT * FROM inpatient WHERE (admission_type = 'Nursing Home' OR admission_type = 'Rehab Unit / Facility') AND admit_reason = 'respite'; """ ) c.execute( """CREATE VIEW custodial AS SELECT * FROM inpatient WHERE (admission_type = 'Nursing Home' OR admission_type = 'Rehab Unit / Facility' OR admission_type = 'End of Life') AND admit_reason = 'custodial'; """ ) conn.commit() conn.close() open( f"{update_logs_folder}\\inpatient_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def payments_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys If being updated only payments with a date greater than or equal to the most recent date in the database table are added Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ payments = pd.read_csv(f"{processed_data}\\payments.csv", low_memory=False) if update: h = Helpers(database_path) first_id_val = h.single_value_query( "SELECT MAX(id_col) FROM payments") + 1 max_date = h.single_value_query("SELECT MAX(date_paid) FROM payments") payments = payments[payments["date_paid"] >= max_date].copy() payments.reset_index(inplace=True, drop=True) payments["id_col"] = list( range(first_id_val, first_id_val + payments.shape[0])) else: payments.reset_index(inplace=True) payments.rename(columns={"index": "id_col"}, inplace=True) payments = create_sql_dates(payments) primary_key = ["id_col"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: update_sql_table(payments, "payments", conn, primary_key) c = conn.cursor() c.execute("DROP TABLE IF EXISTS temp;") print("payments updated...") else: create_table(payments, "payments", conn, primary_key, foreign_key, ref_table, ref_col) print("payments created...") conn.commit() conn.close() open( f"{update_logs_folder}\\payments_{str(pd.to_datetime('today').date())}.txt", "a").close()
def claims_detail_to_sql(update=True): """ Reads a cleaned dataset Parse the dates to match SQL format of YYYY-MM-DD Creates or Updates the database table using the indicated primary keys and foreign keys If table is being updated - it is dropped and replaced entirely Args: update(bool): indicates if database table is being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ claims_detail = pd.read_csv(f"{processed_data}\\claims_detail.csv", low_memory=False) claims_detail = create_sql_dates(claims_detail, ["first_dos", "last_dos"]) primary_key = ["claim_line_id"] foreign_key = ["member_id"] ref_table = ["ppts"] ref_col = ["member_id"] conn = sqlite3.connect(database_path) if update is True: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS claims_detail") create_table( claims_detail, "claims_detail", conn, primary_key, foreign_key, ref_table, ref_col, ) print("claims_detail updated...") else: c = conn.cursor() c.execute(f"DROP TABLE IF EXISTS claims_detail") create_table( claims_detail, "claims_detail", conn, primary_key, foreign_key, ref_table, ref_col, ) print("claims_detail created...") conn.commit() conn.close() open( f"{update_logs_folder}\\claims_detail_{str(pd.to_datetime('today').date())}.txt", "a", ).close()
def monthly_census_to_sql(update=True): """ Creates or updates a monthly census as of the first table in the database This table is an aggregate table used in 100 member month queries. If updating - finds the most recent first of the month value from today and checks to see if there is a value for the census as of that date If none exists it is created and appended Args: update(bool): is the database being updated or not Output: creates empty text fill in log folder so the Lugi pipeline can be told the process is complete. """ conn = sqlite3.connect(database_path) primary_key = ["month"] if update is True: today = pd.to_datetime("today") add_leading_0 = lambda x: "0" if len(str(x)) == 1 else "" last_first_of_month = ( f"{today.year}-{add_leading_0(today.month)}{today.month}-01" ) c = conn.cursor() month_exists = c.execute( "SELECT * FROM monthly_census WHERE month = ?", [last_first_of_month] ).fetchone() if month_exists is None: enrollment = pd.read_csv(f"{processed_data}\\enrollment_for_census.csv") enrollment = create_sql_dates(enrollment) most_recent_month = pd.read_sql( "SELECT * FROM monthly_census ORDER BY month DESC LIMIT 1", conn ) disenrollments_mask = ( enrollment["disenrollment_date"] >= most_recent_month["month"][0] ) & (enrollment["disenrollment_date"] < last_first_of_month) enrollments_mask = enrollment["enrollment_date"] == last_first_of_month center_census = {} for center in enrollment.center.unique(): center_census[center.lower()] = {} for center in enrollment.center.unique(): center_mask = enrollment["center"] == center center_census[center.lower()][last_first_of_month] = most_recent_month[ center.lower() ][0] + ( enrollment[enrollments_mask & center_mask].shape[0] - enrollment[disenrollments_mask & center_mask].shape[0] ) monthly_census = pd.DataFrame.from_dict(center_census) monthly_census["total"] = monthly_census.sum(axis=1) monthly_census.reset_index(inplace=True) monthly_census.rename(columns={"index": "month"}, inplace=True) monthly_census = create_sql_dates(monthly_census, ["month"]) monthly_census.to_sql( "monthly_census", conn, if_exists="append", index=False ) print("monthly_census updated...") else: enrollment = pd.read_sql( "SELECT * FROM enrollment JOIN centers on enrollment.member_id=centers.member_id", conn, ) enrollment["enrollment_date"] = pd.to_datetime(enrollment["enrollment_date"]) enrollment["disenrollment_date"] = pd.to_datetime( enrollment["disenrollment_date"] ) center_census = {} for center in enrollment.center.unique(): center_census[center.lower()] = {} for month_start in pd.date_range( enrollment.enrollment_date.min(), pd.to_datetime("today"), freq="MS" ): enrollment_mask = enrollment["enrollment_date"] <= month_start disenrollment_mask = (enrollment["disenrollment_date"] > month_start) | ( enrollment["disenrollment_date"].isnull() ) for center in enrollment.center.unique(): center_mask = enrollment["center"] == center center_census[center.lower()][month_start] = enrollment[ center_mask & enrollment_mask & disenrollment_mask ].shape[0] monthly_census = pd.DataFrame.from_dict(center_census) monthly_census["total"] = monthly_census.sum(axis=1) monthly_census.reset_index(inplace=True) monthly_census.rename(columns={"index": "month"}, inplace=True) monthly_census = create_sql_dates(monthly_census, ["month"]) create_table(monthly_census, "monthly_census", conn, primary_key) print("monthly_census created...") conn.commit() conn.close() open( f"{update_logs_folder}\\monthly_census_{str(pd.to_datetime('today').date())}.txt", "a", ).close()