def initdfmonth(customer_id_string, site_id_string): # Connect to DB and initialize dataframe with required information host, user, passwd, db = readdbconfig() try: calibcon = mdb.connect(host, user, passwd, db) queryString = 'select * from patient_visit where customer_id in ("' + customer_id_string + '") and site_id in ("' + site_id_string + '")' dfsite = pd.read_sql(queryString, con=calibcon) except: print "An unexpected error occurred", sys.exc_info()[0] raise else: dfvisits = dfsite.loc[:,['visit_id','date_seen']] dfvisits['date_seen_index'] = pd.to_datetime(dfvisits['date_seen'], format='%Y-%m-%d') indexed_dfvisits = dfvisits.set_index(dfvisits['date_seen_index']) indexed_dfvisits = indexed_dfvisits[indexed_dfvisits.visit_id > 0] dfvolume = indexed_dfvisits.groupby(indexed_dfvisits.index).count() dfvolume = dfvolume.rename(columns = {'visit_id':'patient_volume_by_day'}) firstmonth = getstartdate(dfvolume) lastmonth = getenddate(dfvolume) idx = pd.date_range(firstmonth, lastmonth) dfvolume = dfvolume.reindex(idx, fill_value=0) dfvolumemonth = dfvolume.resample('M', how='sum') # Removing last month's information due to insufficient data dfvolumemonth = dfvolumemonth.iloc[0:-1] return dfvolumemonth
def initdf(customer_id_string): # Connect to DB and initialize dataframe with required information host, user, passwd, db = readdbconfig() try: calibcon = mdb.connect(host, user, passwd, db) queryString = 'select * from patient_visit where customer_id in ("' + customer_id_string + '")' dfall = pd.read_sql(queryString, con=calibcon) except: print "An unexpected error occurred", sys.exc_info()[0] raise else: # Get information to count patient visits per day dfcntpatients = dfall.loc[:,['visit_id', 'provider_id', 'date_seen']] dfcountpatients = dfcntpatients.set_index(['date_seen']) dfcountpatients = dfcountpatients[dfcountpatients.visit_id > 0] # Remove rows without any visit ID dfcountpatients = dfcountpatients[dfcountpatients.provider_id != ''] # Get raw scores where available dfcntscore = dfall.loc[:,['score', 'provider_id', 'date_seen']] dfcntscore = dfcntscore.dropna(axis=0) dfcntscore = dfcntscore[dfcntscore.score != ''] dfcountscore = dfcntscore.set_index(['date_seen']) dfcountscoretotalcount = dfcountscore.groupby([dfcountscore.index, 'provider_id']).count() dfcountscoreatt = dfcountscore[dfcountscore['score'].isin(['9','10'])] dfcountscoredet = dfcountscore[dfcountscore['score'].isin(['0','1','2','3','4','5','6'])] dfcountscore['score'] = dfcountscore['score'].astype(int) dfcountscoremean = dfcountscore.groupby([dfcountscore.index, 'provider_id']).mean() dfcountscoreatt1 = dfcountscoreatt.groupby([dfcountscoreatt.index, 'provider_id']).count() dfcountscoredet1 = dfcountscoredet.groupby([dfcountscoredet.index, 'provider_id']).count() dfcountscoreatt1 = dfcountscoreatt1.rename(columns = {'score':'scoreatt'}) dfcountscoredet1 = dfcountscoredet1.rename(columns = {'score':'scoredet'}) dfnpsscore = pd.concat([dfcountscoretotalcount, dfcountscoreatt1, dfcountscoredet1], axis=1, join_axes=[dfcountscoretotalcount.index]) dfnpsscore['scoreatt'] = dfnpsscore['scoreatt'].fillna(0) dfnpsscore['scoredet'] = dfnpsscore['scoredet'].fillna(0) dfnpsscore['nps'] = (dfnpsscore.scoreatt - dfnpsscore.scoredet)/dfnpsscore.score * 100 dfcountpatientstotalcount = dfcountpatients.groupby([dfcountpatients.index, 'provider_id']).count() dfcountpatientstotalcount = dfcountpatientstotalcount.rename(columns = {'visit_id':'patientvolume'}) dfvolumescore = pd.concat([dfcountpatientstotalcount, dfnpsscore], axis=1, join_axes=[dfcountpatientstotalcount.index]) return dfvolumescore