Exemplo n.º 1
0
 def __init__(self, database, release_num, release_date):
     self.database = database
     self.release_num = release_num
     self.release_date = release_date
     self.connector = dbconnect.DatabaseConnect(self.database)
     self.risk_cat = RiskCategorizer()
     self.risk_cat.main(self.release_num, self.release_date)
Exemplo n.º 2
0
    def __init__(self, release_date, release_num, db):

        self.start_date = '2013-01-01'
        self.db = db
        self.connector = dbconnect.DatabaseConnect(self.db)
        self.release_date = release_date
        self.release_num = release_num
Exemplo n.º 3
0
    def __init__(self, database, release_num, source):
        print('Loading data from files...')
        file_paths = os.getcwd() + '/output_data'
        file_paths = file_paths.replace('\\', '/')

        self.info_dict = {
            'path': file_paths,
            'adjustment': 'adjustedclaimextractuick1.out',
            'main_claims': 'claim_finaluick1.out',
            'nips': 'servicenips_finaluick1.out',
            'pharmacy': 'pharmacy_finaluick1.out',
            'procedure': 'serviceproc_finaluick1.out',
            'recipient_flags': 'recipientflags_final_uick1.out',
            'revenue': 'servicerev_finaluick1.out',
            'compound_drug': 'servicepharmndc_finaluick1.out',
            'immunization': 'cornerstone_finaluick1.out',
            'diagnosis': 'servicediag_finaluick1.out',
            'institutional': 'serviceinst_finaluick1.out',
            'lead': 'lead_finaluick1.out',
            'ending': '\n\n'
        }

        self.info_dict['load_date'] = '{:%Y-%m-%d}'.format(datetime.today())
        self.info_dict['DataSource'] = source
        self.info_dict['db'] = database
        self.info_dict['ReleaseNum'] = str(release_num).replace('\n',
                                                                '').strip()
        self.info_dict['Cumulative_ReleaseNum'] = str(release_num).replace(
            '\n', '').strip()[2:]
        self.load_inline_dict = {}

        if 'sql_scripts' not in os.listdir():
            os.mkdir('sql_scripts')

        self.connection = dbconnect.DatabaseConnect(self.info_dict['db'])
Exemplo n.º 4
0
    def __init__(self, database, release_num, file_path):

        self.raw_file_path = os.path.join(file_path, 'Raw_Data')
        self.sql_file_path = os.path.join(file_path, 'SQL_Scripts')

        self.info_dict = {
            'path': self.raw_file_path,
            'adjustment': 'Adjustments',
            'main_claims': 'Main_Claims',
            'nips': 'NIPS',
            'pharmacy': 'Pharmacy',
            'procedure': 'Procedure',
            'recipient_flags': 'Recipient_Flags',
            'revenue': 'Revenue_Codes',
            'compound_drug': 'Pharmacy_Prior_Authorization',
            'immunization': 'Cornerstone_Immunization',
            'diagnosis': 'Diagnosis',
            'institutional': 'Institutional',
            'lead': 'Lead',
            'ending': ';\n\n'
        }

        self.info_dict['load_date'] = '{:%Y-%m-%d}'.format(datetime.today())
        self.info_dict['db'] = database
        self.info_dict['ReleaseNum'] = str(int(release_num))
        self.load_inline_dict = {}

        self.connection = dbconnect.DatabaseConnect(self.info_dict['db'])
Exemplo n.º 5
0
    def __init__(self, database, release_num):

        self.update_tables = [
            'nips', 'diagnosis', 'institutional', 'procedure', 'revenue_codes',
            'main_claims'
        ]
        self.connection = dbconnect.DatabaseConnect(database)
        self.release_num = release_num
Exemplo n.º 6
0
    def __init__(self, db):
        self.connection = dbconnect.DatabaseConnect(db)
        cat_path = pkg_resources.resource_filename(__name__, 'categorization_files/')

        self.nips_cat_df = pd.read_csv(os.path.join(cat_path, 'nips_cat.csv'))
        self.op_hcpcs_df = pd.read_csv(os.path.join(cat_path, 'op_hcpcs_codes_cat.csv'))
        self.op_rev_df = pd.read_csv(os.path.join(cat_path,'op_rev_codes_cat.csv'),
                                     dtype={'RevenueCd':'str'})
        self.check_cat_df = pd.read_csv(os.path.join(cat_path,'check_category.csv'))
Exemplo n.º 7
0
    def __init__(self,
                 release_num,
                 release_date,
                 database='CHECK_CPAR2',
                 data_source='HFS'):

        self.release_date = release_date
        self.database = database
        self.data_source = data_source
        self.conn = dbconnect.DatabaseConnect(self.database)
        self.release_num = release_num
Exemplo n.º 8
0
    def __init__(self, database, release_date, release_num):

        #qualifying ratio i,e, 3 inclusion codes for every 1 exclusion code to be diagnosed
        self.dx_ratio = {'SCD': .75}

        self.connection = dbconnect.DatabaseConnect(database)
        self.release_num = release_num

        self.diagnosis_tables = [
            'pat_info_dx_mental_health', 'pat_info_dx_pregnancy',
            'pat_info_dx_primary'
        ]
Exemplo n.º 9
0
 def __init__(self,pp_n_months,release_num,db_name):
     '''pp_n_months: (int) Number of months to select pre and post the
     patients program date
     release_num: (int) Release to select from rid_pre_post_pat_windows
     Currently only works for test CHECK_Categories'''
     self.query = conconnect.ConsensusConnect()
     self.pp_n_months = pp_n_months
     self.release_num = release_num
     self.db_name = db_name
     self.connection = dbconnect.DatabaseConnect(self.db_name)
     self.cost_columns = ['Inpatient_Pre','Inpatient_Post','Outpatient_Pre',
                          'Outpatient_Post','ED_Pre','ED_Post','Other_Pre','Other_Post',
                          'Pharmacy_Pre','Pharmacy_Post','Total_Pre','Total_Post']
Exemplo n.º 10
0
    def __init__(self, database, release_date, release_num):

        self.release_date = release_date
        self.release_num = release_num
        self.connection = dbconnect.DatabaseConnect(database)

        self.risk_date_columns = {
            'Release_Date': 'Current_Risk',
            'Engagement_Date': 'Engagement_Risk',
            'Enrollment_Date': 'Enrollment_Risk',
            'Randomization_Date': 'Randomization_Risk',
            'Program_Date': 'Program_Risk'
        }
Exemplo n.º 11
0
 def __init__(self, release_num, db_name):
     self.release_num = release_num
     self.db_name = db_name
     self.connector = dbconnect.DatabaseConnect(self.db_name)
     self.primary_keys = [
         'DCN', 'ServiceLineNbr', 'RejectionStatusCd', 'RecipientID',
         'AdjudicatedDt'
     ]
     self.output_cols = [
         'RecipientID', 'RejectionStatusCd', 'ServiceLineNbr',
         'AdjudicatedDt', 'DCN', 'Category1', 'Category2', 'Category3',
         'Category2Rank', 'Category3Rank', 'NetLiabilityAmt',
         'EncounterPriceAmt', 'Visit', 'Service_Count', 'Procedure_Count',
         'Encounter', 'Visit_Inpatient_Days'
     ]
Exemplo n.º 12
0
 def connect(self, sql_str, db_name, df_flag=True, parse_dates=None):
     '''sql_str: query text to be sent to db
     db_name: str of the database query is sent to
     df_flag: Boolean to return an pandas dataframe or not'''
     connector = dbconnect.DatabaseConnect(db_name)
     try:
         if df_flag == False:
             connector.query(sql_str, df_flag=False)
             alliDF = "'{}' successfully ran".format(sql_str)
         elif df_flag == True:
             alliDF = connector.query(sql_str,
                                      df_flag=True,
                                      parse_dates=parse_dates)
     finally:
         f = 'completed'
     return alliDF
Exemplo n.º 13
0
    def window_load(self):
        connector = dbconnect.DatabaseConnect(self.database)
        # Date when first bills came in from HFS
        check_start_date = '2014-05-01'
        # Pulls patients in most recent ReleaseNum
        pt_df = connector.query("""SELECT RecipientID,Program_Date,
                                   pic.ReleaseNum,
                                   '{}' as CHECK_Start_Date,
                                   '{}' as HFS_Release_Date
                                   from pat_info_complete pic
                                   where pic.ReleaseNum = {}
                                   """.format(check_start_date,
                                              self.release_date,
                                              self.release_num),
                                parse_dates=[
                                    'Program_Date', 'HFS_Release_Date',
                                    'CHECK_Start_Date'
                                ])
        print('pt_df ', pt_df)
        # goes back 6 months in time to be more bills were paid
        pt_df['CHECK_End_Date'] = pt_df['HFS_Release_Date'].apply(
            lambda x: pd.Timestamp(x) - pd.DateOffset(months=6))
        pt_df['Positive_Duration'] = (
            (pt_df['CHECK_End_Date'] -
             pt_df['Program_Date'])) / np.timedelta64(1, 'M')
        pt_df['Negative_Duration'] = (
            (pt_df['Program_Date'] -
             pt_df['CHECK_Start_Date'])) / np.timedelta64(1, 'M')

        pt_df['Positive_Duration'] = pt_df['Positive_Duration'].apply(
            math.floor)
        pt_df['Negative_Duration'] = pt_df['Negative_Duration'].apply(
            math.floor)

        cols = [
            'RecipientID', 'Program_Date', 'Negative_Duration',
            'Positive_Duration', 'ReleaseNum'
        ]
        connector.insert(pt_df[cols], 'rid_pre_post_pat_windows')

        return 'Window load complete'
Exemplo n.º 14
0
    def __init__(self, database='CHECK_CPAR2'):

        #qualifying ratio i,e, 3 inclusion codes for every 1 exclusion code to be diagnosed
        self.dx_ratio = {'SCD': .75}

        self.connector = dbconnect.DatabaseConnect(database)

        self.diagnosis_tables = [
            'pat_info_dx_mental_health', 'pat_info_dx_pregnancy',
            'pat_info_dx_primary'
        ]

        self.pat_info_query = """SELECT p.RecipientID, p.Enrollment_Age, p.Gender,
                                 if(d.RecipientID is null,'0',
                                 GROUP_CONCAT(Distinct DiagCd separator ',')) ICD_List
                                 FROM pat_info_demo p left join
                                 tsc_hfs_diagnosis d
                                 on p.RecipientID = d.RecipientID
                                 group by RecipientID"""

        self.dx_code_query = '''SELECT RecipientID, DiagCd, count(*) ICD_Count
Exemplo n.º 15
0
    def main(self, release_num, release_date):

        self.connector = dbconnect.DatabaseConnect('CHECK_CPAR2')

        self.max_date = pd.Timestamp(release_date)
        self.ip_ed_df = self.ip_ed_query()

        self.ip_ed_df = pd.pivot_table(self.ip_ed_df,
                                       index=['RecipientID', 'ServiceFromDt'],
                                       columns='Category',
                                       values='encounters',
                                       aggfunc='first',
                                       fill_value=0)
        self.ip_ed_df.reset_index(inplace=True)

        self.enroll_df = self.connector.query('''select RecipientID,
                                       if(Initial_Enrollment_Date is null,
                                       Program_date, Initial_Enrollment_Date)
                                       as Initial_Enrollment_Date from
                                       pat_info_demo;''')
        self.engage_df = self.connector.query('''select RecipientID,
                                       Engagement_Date from pat_info_demo WHERE
                                       Engagement_Date is not null;''')
Exemplo n.º 16
0
    def full_run(self, recipient_list=None, to_sql=False):

        demo_df = self.query.cpar_patient_info()
        pat_program_dates = demo_df[['RecipientID', 'Program_Date']].copy()
        demo_df = demo_df[[
            'RecipientID', 'Population_Type', 'Diagnosis_Category',
            'Program_Risk', 'Program_Age_Category', 'Gender'
        ]]
        demo_df = demo_df.set_index('RecipientID')
        demo_df.rename(columns={
            'Program_Risk': 'Risk',
            'Program_Age_Category': 'Age_Category'
        },
                       inplace=True)

        connector = dbconnect.DatabaseConnect('CHECK_CPAR2')
        if recipient_list is None:
            recipient_list = demo_df.index.unique()

        jumper = 500
        cat_col_list = [
            'CHECK_Category', 'Category1', 'Category2', 'Category3'
        ]

        output_path = "rolling_window_output/"
        for x in range(0, len(recipient_list), jumper):

            file_name = "{}pt_level_{}.csv".format(output_path, x)
            rolling_df = []
            temp_unique_rins = recipient_list[x:x + jumper]
            pt_costs_df = self.claims_query(pat_program_dates,
                                            temp_unique_rins)
            for cat_col in cat_col_list:
                rolling_win = self.to_rolling_pivot(pt_costs_df, cat_col)
                rolling_win = rolling_win.set_index('RecipientID')
                rolling_win = pd.merge(rolling_win,
                                       demo_df,
                                       left_index=True,
                                       right_index=True)
                rolling_df.append(rolling_win)

            rolling_df = pd.concat(rolling_df)
            rolling_df = rolling_df[[
                'Population_Type', 'Diagnosis_Category', 'Risk',
                'Age_Category', 'Gender', 'Category_Type', 'Category',
                'Window', 'AdjustedPriceAmt', 'Encounter', 'ServiceCount',
                'VisitInpatientDays'
            ]]

            rolling_df.to_csv(file_name, chunksize=100000)

        print('Completed rolling window calculation')

        grouping_list = [
            [], ['Diagnosis_Category'], ['Risk'], ['Age_Category'],
            ['Diagnosis_Category', 'Risk'],
            ['Diagnosis_Category', 'Age_Category'],
            ['Population_Type', 'Diagnosis_Category'],
            ['Population_Type', 'Diagnosis_Category', 'Age_Category'],
            ['Population_Type', 'Gender'],
            ['Diagnosis_Category', 'Risk', 'Age_Category'],
            ['Population_Type', 'Risk']
        ]

        if 'agg_output' not in os.listdir():
            os.mkdir('agg_output')

        df = self.aggregation_df()
        for group in grouping_list:
            print(group)
            group_output = self.window_aggregation(df, group)
            group_output.to_csv('agg_output/Aggregation_' + "_".join(group) +
                                '.csv')

        if to_sql == True:
            load_files(output_path)

        return 'completed'
Exemplo n.º 17
0
import pandas as pd
import numpy as np
from CHECK.dbconnect import dbconnect

def mc_pivot(mc_df, columns, values, margins_name):
    '''pivots data to get sums of column values and renames heirrchial columns'''
    pivot_mc_df = pd.pivot_table(mc_df,index=['RecipientID'],columns=columns, values=values,
                                 fill_value=0,aggfunc=np.sum, margins=True, margins_name=margins_name)
    pivot_mc_df.columns = pivot_mc_df.columns = [col[1]+"_"+col[0] for col in pivot_mc_df.columns.values]
    pivot_mc_df.reset_index(inplace=True)
    pivot_mc_df = pivot_mc_df[:-1]
    return pivot_mc_df

connector = dbconnect.DatabaseConnect('CHECK_CPAR2')
release_info  = connector.query("""Select ReleaseNum, HFS_Release_Date FROM hfs_release_info
WHERE ReleaseNum = (SELECT MAX(ReleaseNum) from hfs_release_info)""")
#subtracts 1 to get bills since last release
relnum = release_info['ReleaseNum'][0] - 1
reldate = release_info['HFS_Release_Date'][0].strftime('%Y-%m-%d')

min_window_dt = connector.query('''Select max(servicefromdt) as min_ser_dt from
                                   tsc_hfs_main_claims where ReleaseNum = {};'''.format(relnum))
min_window_dt = min_window_dt['min_ser_dt'][0].strftime('%Y-%m-%d')

mc_df = connector.query('''Select RecipientID, CHECK_Category, sum(visit) as Visit, count(*) as Encounters,
min(ServiceFromDt) as 'Min_ServiceFromDt', Max(ServiceFromDt) as 'Max_ServiceFromDt',
sum(AdjustedPriceAmt) as AdjustedPriceAmt from tsc_hfs_main_claims_new where
ServiceFromDt > '{}' and ServiceFromDt <= '{}' group by RecipientID, CHECK_Category;'''.format(min_win_dt,reldate)
                                                    ,parse_dates = ['Max_ServiceFromDt','Min_ServiceFromDt'])

high_util_df = mc_pivot(mc_df,'CHECK_Category',['Visit','Encounters','AdjustedPriceAmt'],'Release')
Exemplo n.º 18
0
    def calculate_ucsd_risk(self):

        self.connector = dbconnect.DatabaseConnect('CHECK_CPAR2')
        dir = "/home/data_upload/.ipython/CHECK/cpar"
        file1 = open(dir + "/pat_info/ucsd_files/cdpsfmt1.child.txt", "r")
        self.d1 = {}
        for line in file1:
            x1 = line.split("=")
            key = x1[0].strip("'\r")
            value = x1[1].strip(';\n\r')
            self.d1[key] = value.strip("'")

        fd = open(dir + "/pat_info/ucsd_files/codes.txt", "r")
        self.codeSet = {}
        i = 0
        for line in fd:
            self.codeSet[i] = line.split()
            i += 1

        file3 = open(dir + "/pat_info/ucsd_files/cdps_dadc.txt", "r")
        d4 = {}
        for line in file3:
            x1 = line.split("=")
            key = x1[0].strip("'\r")
            value = x1[1].strip(';\n\r')
            d4[key] = value.strip()

        self.masterD = {}

        sql2 = """SELECT REPLACE(FILE,',',"'='") UCSD_RISK
                  FROM (SELECT CONCAT (hfsr.RecipientID,"='",hfsr.ICD_list,"'")
                  FILE FROM CHECK_CPAR2.pat_info_dx_primary  hfsr) tbl1"""

        self.execute_cursor(sql2)
        rGen = self.report_generator()
        info = next(rGen)
        risk_raw_df = pd.DataFrame(columns=['RecipientID', 'Risk'])
        i = 0
        while info:
            # print(info)
            lineToPrint = info[0][:]
            total = 0.0
            for item in info[1]:
                total = total + float(d4[item])
            lineToPrint += "\t\t" + str(total)
            RIN = lineToPrint[0:9]
            # print(RIN, total)
            risk_raw_df.loc[i, 'RecipientID'] = RIN
            risk_raw_df.loc[i, 'Risk'] = total

            i = i + 1
            try:
                info = next(rGen)
            except StopIteration:
                break

        demo_df = self.connector.query('''select RecipientID, Gender,
                                     Age from pat_info_demo;''')

        risk_raw_df = pd.merge(risk_raw_df,
                               demo_df,
                               on='RecipientID',
                               how='left')

        risk_raw_df.loc[(risk_raw_df['Age'] <= 1),
                        'RiskScore'] = risk_raw_df['Risk'] + 0.398 + 0.226
        risk_raw_df.loc[((risk_raw_df['Age'] > 1) & (risk_raw_df['Age'] < 5)),
                        'RiskScore'] = risk_raw_df['Risk'] - 0.068 + 0.226
        risk_raw_df.loc[((risk_raw_df['Age'] >= 5) & (risk_raw_df['Age'] < 15)
                         & (risk_raw_df['Gender'] == 'Male')),
                        'RiskScore'] = risk_raw_df['Risk'] - 0.06 + 0.226
        risk_raw_df.loc[((risk_raw_df['Age'] >= 5) & (risk_raw_df['Age'] < 15)
                         & (risk_raw_df['Gender'] == 'Female')),
                        'RiskScore'] = risk_raw_df['Risk'] - 0.105 + 0.226
        risk_raw_df.loc[((risk_raw_df['Age'] >= 15) & (risk_raw_df['Age'] < 25)
                         & (risk_raw_df['Gender'] == 'Male')),
                        'RiskScore'] = risk_raw_df['Risk'] - 0.026 + 0.226
        risk_raw_df.loc[((risk_raw_df['Age'] >= 15) & (risk_raw_df['Age'] < 25)
                         & (risk_raw_df['Gender'] == 'Female')),
                        'RiskScore'] = risk_raw_df['Risk'] + 0.051 + 0.226
        risk_raw_df.loc[((risk_raw_df['Age'] >= 25) &
                         (risk_raw_df['Gender'] == 'Male')),
                        'RiskScore'] = risk_raw_df['Risk'] - 0.068 + 0.226
        risk_raw_df.loc[((risk_raw_df['Age'] >= 25) &
                         (risk_raw_df['Gender'] == 'Female')),
                        'RiskScore'] = risk_raw_df['Risk'] + 0.041 + 0.226

        risk_raw_df = risk_raw_df.drop(['Gender', 'Age'], axis=1)

        risk_raw_df = risk_raw_df.rename(columns={
            'Risk': 'UCSD_Risk_Raw',
            'RiskScore': 'UCSD_Risk'
        })

        return risk_raw_df
Exemplo n.º 19
0
    def __init__(self, database, release_num):

        self.connection = dbconnect.DatabaseConnect(database)
        self.release_num = release_num