示例#1
0
def admission_claims_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    
    admission_claims = pd.read_csv(
        f"{processed_data}\\admit_claims.csv", low_memory=False
    )
    admission_claims = create_sql_dates(admission_claims)

    primary_key = ["claim_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        update_sql_table(admission_claims, "admission_claims", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("admission_claims updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS admission_claims")
        create_table(
            admission_claims,
            "admission_claims",
            conn,
            primary_key,
            foreign_key,
            ref_table,
            ref_col,
        )

        print("admission_claims created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\admission_claims_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
示例#2
0
def center_days_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    center_days = pd.read_csv(f"{processed_data}\\center_days.csv")
    center_days = create_sql_dates(center_days, ["as_of"])
    center_days.rename(columns={"center_days": "days"}, inplace=True)

    primary_key = ["member_id", "days"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        update_sql_table(center_days, "center_days", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("center_days updated...")

    else:
        create_table(
            center_days,
            "center_days",
            conn,
            primary_key,
            foreign_key,
            ref_table,
            ref_col,
        )

        print("center_days created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\center_days_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
def auths_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    If table is being updated - it is dropped and replaced entirely

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    auths = pd.read_csv(f"{processed_data}\\auths.csv")
    auths = create_sql_dates(auths)

    primary_key = ["member_id", "authorization_number"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS authorizations")

        create_table(
            auths, "authorizations", conn, primary_key, foreign_key, ref_table, ref_col
        )
        print("authorizations updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS authorizations")
        create_table(
            auths, "authorizations", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("authorizations created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\authorizations_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
def er_only_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    If table is being updated - any visit with an admission date greater
    than 3 months from today is dropped
    
    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    er_only = pd.read_csv(f"{processed_data}\\er_only.csv")
    er_only = create_sql_dates(er_only)

    primary_key = ["visit_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        c = conn.cursor()

        c.execute(f"DROP TABLE IF EXISTS er_only")
        create_table(er_only, "er_only", conn, primary_key, foreign_key,
                     ref_table, ref_col)

        print("er_only updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS er_only")
        create_table(er_only, "er_only", conn, primary_key, foreign_key,
                     ref_table, ref_col)

        print("er_only created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\er_only_{str(pd.to_datetime('today').date())}.txt",
        "a").close()
def pnuemo_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    pneumo = pd.read_csv(f"{processed_data}\\pneumo.csv")
    pneumo = create_sql_dates(pneumo)

    primary_key = ["member_id", "date_administered"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        update_sql_table(pneumo, "pneumo", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("pneumo updated...")

    else:
        create_table(
            pneumo, "pneumo", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("pneumo created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\pnuemo_{str(pd.to_datetime('today').date())}.txt", "a"
    ).close()
def centers_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    centers = pd.read_csv(f"{processed_data}\\centers.csv")
    centers = create_sql_dates(centers)

    primary_key = ["member_id", "center", "start_date"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update:

        update_sql_table(centers, "centers", conn, primary_key)

        print("centers updated...")

    else:
        create_table(
            centers, "centers", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("centers created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\centers_{str(pd.to_datetime('today').date())}.txt", "a"
    ).close()
def inpatient_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    When creating the database, 6 views are created;
        acute, psych, nursing_home, custodial, respite, and skilled

    If table is being updated - any visit with an admission date greater
    than 3 months from today is dropped
    
    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    acute = pd.read_csv(f"{processed_data}\\inpatient.csv")
    acute = create_sql_dates(acute)

    primary_key = ["visit_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        c = conn.cursor()

        c.execute(f"DROP TABLE IF EXISTS inpatient")
        create_table(
            acute, "inpatient", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("inpatient updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS inpatient")
        create_table(
            acute, "inpatient", conn, primary_key, foreign_key, ref_table, ref_col
        )

        print("inpatient created...")

        c.execute(
            """CREATE VIEW acute AS
            SELECT * FROM inpatient
            WHERE admission_type = 'Acute Hospital';
            """
        )

        c.execute(
            """CREATE VIEW psych AS
            SELECT * FROM inpatient
            WHERE admission_type = 'Psych Unit / Facility';
            """
        )

        c.execute(
            """CREATE VIEW nursing_home AS
            SELECT * FROM inpatient
            WHERE (admission_type = 'Nursing Home'
            OR admission_type = 'Rehab Unit / Facility'
            OR admission_type = 'End of Life')
            """
        )

        c.execute(
            """CREATE VIEW skilled AS
            SELECT * FROM inpatient
            WHERE (admission_type = 'Nursing Home'
            OR admission_type = 'Rehab Unit / Facility')
            AND admit_reason = 'skilled';
            """
        )

        c.execute(
            """CREATE VIEW respite AS
            SELECT * FROM inpatient
            WHERE (admission_type = 'Nursing Home'
            OR admission_type = 'Rehab Unit / Facility')
            AND admit_reason = 'respite';
            """
        )

        c.execute(
            """CREATE VIEW custodial AS
            SELECT * FROM inpatient
            WHERE (admission_type = 'Nursing Home'
            OR admission_type = 'Rehab Unit / Facility'
            OR admission_type = 'End of Life')
            AND admit_reason = 'custodial';
            """
        )

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\inpatient_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
def payments_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    If being updated only payments with a date greater than or equal to
        the most recent date in the database table are added
        
    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    payments = pd.read_csv(f"{processed_data}\\payments.csv", low_memory=False)
    if update:
        h = Helpers(database_path)
        first_id_val = h.single_value_query(
            "SELECT MAX(id_col) FROM payments") + 1
        max_date = h.single_value_query("SELECT MAX(date_paid) FROM payments")

        payments = payments[payments["date_paid"] >= max_date].copy()
        payments.reset_index(inplace=True, drop=True)

        payments["id_col"] = list(
            range(first_id_val, first_id_val + payments.shape[0]))

    else:
        payments.reset_index(inplace=True)
        payments.rename(columns={"index": "id_col"}, inplace=True)

    payments = create_sql_dates(payments)

    primary_key = ["id_col"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:

        update_sql_table(payments, "payments", conn, primary_key)

        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS temp;")

        print("payments updated...")

    else:
        create_table(payments, "payments", conn, primary_key, foreign_key,
                     ref_table, ref_col)

        print("payments created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\payments_{str(pd.to_datetime('today').date())}.txt",
        "a").close()
def claims_detail_to_sql(update=True):
    """
    Reads a cleaned dataset
    Parse the dates to match SQL format of YYYY-MM-DD
    Creates or Updates the database table using the
    indicated primary keys and foreign keys

    If table is being updated - it is dropped and replaced entirely
    
    Args:
        update(bool): indicates if database table is being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    claims_detail = pd.read_csv(f"{processed_data}\\claims_detail.csv",
                                low_memory=False)

    claims_detail = create_sql_dates(claims_detail, ["first_dos", "last_dos"])

    primary_key = ["claim_line_id"]
    foreign_key = ["member_id"]
    ref_table = ["ppts"]
    ref_col = ["member_id"]

    conn = sqlite3.connect(database_path)

    if update is True:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS claims_detail")

        create_table(
            claims_detail,
            "claims_detail",
            conn,
            primary_key,
            foreign_key,
            ref_table,
            ref_col,
        )

        print("claims_detail updated...")

    else:
        c = conn.cursor()
        c.execute(f"DROP TABLE IF EXISTS claims_detail")
        create_table(
            claims_detail,
            "claims_detail",
            conn,
            primary_key,
            foreign_key,
            ref_table,
            ref_col,
        )

        print("claims_detail created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\claims_detail_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()
示例#10
0
def monthly_census_to_sql(update=True):
    """
    Creates or updates a monthly census as of the first table in the database
    This table is an aggregate table used in 100 member month queries.

    If updating - finds the most recent first of the month value from today
        and checks to see if there is a value for the census as of that date
        If none exists it is created and appended
    
    Args:
        update(bool): is the database being updated or not

    Output:
        creates empty text fill in log folder so the Lugi pipeline
            can be told the process is complete.
    """
    conn = sqlite3.connect(database_path)
    primary_key = ["month"]

    if update is True:
        today = pd.to_datetime("today")
        add_leading_0 = lambda x: "0" if len(str(x)) == 1 else ""
        last_first_of_month = (
            f"{today.year}-{add_leading_0(today.month)}{today.month}-01"
        )
        c = conn.cursor()

        month_exists = c.execute(
            "SELECT * FROM monthly_census WHERE month = ?", [last_first_of_month]
        ).fetchone()

        if month_exists is None:

            enrollment = pd.read_csv(f"{processed_data}\\enrollment_for_census.csv")
            enrollment = create_sql_dates(enrollment)

            most_recent_month = pd.read_sql(
                "SELECT * FROM monthly_census ORDER BY month DESC LIMIT 1", conn
            )
            disenrollments_mask = (
                enrollment["disenrollment_date"] >= most_recent_month["month"][0]
            ) & (enrollment["disenrollment_date"] < last_first_of_month)

            enrollments_mask = enrollment["enrollment_date"] == last_first_of_month

            center_census = {}
            for center in enrollment.center.unique():
                center_census[center.lower()] = {}

            for center in enrollment.center.unique():
                center_mask = enrollment["center"] == center

                center_census[center.lower()][last_first_of_month] = most_recent_month[
                    center.lower()
                ][0] + (
                    enrollment[enrollments_mask & center_mask].shape[0]
                    - enrollment[disenrollments_mask & center_mask].shape[0]
                )

            monthly_census = pd.DataFrame.from_dict(center_census)
            monthly_census["total"] = monthly_census.sum(axis=1)
            monthly_census.reset_index(inplace=True)
            monthly_census.rename(columns={"index": "month"}, inplace=True)
            monthly_census = create_sql_dates(monthly_census, ["month"])
            monthly_census.to_sql(
                "monthly_census", conn, if_exists="append", index=False
            )

            print("monthly_census updated...")

    else:

        enrollment = pd.read_sql(
            "SELECT * FROM enrollment JOIN centers on enrollment.member_id=centers.member_id",
            conn,
        )
        enrollment["enrollment_date"] = pd.to_datetime(enrollment["enrollment_date"])
        enrollment["disenrollment_date"] = pd.to_datetime(
            enrollment["disenrollment_date"]
        )

        center_census = {}
        for center in enrollment.center.unique():
            center_census[center.lower()] = {}

        for month_start in pd.date_range(
            enrollment.enrollment_date.min(), pd.to_datetime("today"), freq="MS"
        ):
            enrollment_mask = enrollment["enrollment_date"] <= month_start
            disenrollment_mask = (enrollment["disenrollment_date"] > month_start) | (
                enrollment["disenrollment_date"].isnull()
            )
            for center in enrollment.center.unique():
                center_mask = enrollment["center"] == center
                center_census[center.lower()][month_start] = enrollment[
                    center_mask & enrollment_mask & disenrollment_mask
                ].shape[0]

        monthly_census = pd.DataFrame.from_dict(center_census)
        monthly_census["total"] = monthly_census.sum(axis=1)
        monthly_census.reset_index(inplace=True)
        monthly_census.rename(columns={"index": "month"}, inplace=True)
        monthly_census = create_sql_dates(monthly_census, ["month"])

        create_table(monthly_census, "monthly_census", conn, primary_key)

        print("monthly_census created...")

    conn.commit()
    conn.close()

    open(
        f"{update_logs_folder}\\monthly_census_{str(pd.to_datetime('today').date())}.txt",
        "a",
    ).close()