Exemplo n.º 1
0
def calc_by_states(df):
    "calc parameters by state"
    plotdir = make_plotdir(plotdir='cms_state_service_plots/')
    agg_fns = ['count','median']
    p_group = calc_par_group(df, agg_fns, ['provider_type','nppes_provider_state'], ['pay_per_person','pay_per_service'], print_out=False)
#   print('index level provider_types\n', p_group.index.levels[0])

    bmap = get_basemap()  # read file once for all maps
#   minmax = (1.0, 2.7)
#   im = p_group.ix['Internal Medicine']['pay_per_service']['median']
#   print('%s\n' % provider, im)
#   make_state_map(bmap, im, plotdir, 'cost_per_service_internal_medicine', 'Internal Medicine, Median Cost Per Service')

#   im = p_group.ix['General Surgery']['pay_per_service']['median']
#   print('%s\n' % provider, im)
#   make_state_map(bmap, im, plotdir, 'cost_per_service_general_surgery', 'General Surgery, Median Cost Per Service')

#   im = p_group.ix['Physical Therapist']['pay_per_service']['median']
#   print('%s\n' % provider, im)
#   make_state_map(bmap, im, plotdir, 'cost_per_service_physical_therapist', 'Physical Therapist, Median Cost Per Service')

    patr = re_compile('[ (/)]+')
#   for provider in p_group.index.levels[0]:
#       im = p_group.ix[provider]['pay_per_service']['median']
#       make_state_map(bmap, im, plotdir, 'cost_per_service_%s' % '_'.join(patr.split(provider.lower())), '%s, Median Cost Per Service' % provider)

    plotdir = make_plotdir(plotdir='cms_state_person_plots/')
    for provider in p_group.index.levels[0]:
        im = p_group.ix[provider]['pay_per_person']['median']
        make_state_map(bmap, im, plotdir, 'cost_per_person_%s' % '_'.join(patr.split(provider.lower())), '%s, Median Cost Per Person' % provider)
Exemplo n.º 2
0
def gender_par_groups(df):
    "calculate series of grouped gender parameters, printed by column"
    plotdir = make_plotdir(plotdir='cms_pop_gender_plots/')
    agg_fns = ['count','sum']
    pars = ['beneficiary_female_count','beneficiary_male_count']
    p_group = calc_par_group(df, agg_fns, ['provider_type'], pars)
    make_group_bar_plots(p_group, 'provider_type', pars, ['female','male'], 'sum', 'Patient Gender', plotdir)
Exemplo n.º 3
0
def pop_calc_par_groups(df):
    "calculate series of grouped population parameters, printed by column"
    plotdir = make_plotdir(plotdir='cms_pop_plots/')
    agg_fns = ['count','sum','median']
    p_group = calc_par_group(df, agg_fns, ['provider_type'], ['total_unique_benes','total_services','beneficiary_average_age','total_medicare_payment_amt'])

    print('\ntop total_services count')  # count of number of providers, not patients
    p_sort = filter_group_by_var(p_group['total_services'], agg_fns, stat='count')
    make_bar_plot(get_col(p_sort,'count',log=True), plotdir, 'total_services_count', 'Log10 Count Total Services', xlim=(0.1,5))
    print('\ntop total_services sum')
    p_sort = filter_group_by_var(p_group['total_services'], agg_fns, stat='sum')
    make_bar_plot(get_col(p_sort,'sum',log=True), plotdir, 'total_services_sum', 'Log10 Sum Total Services', xlim=(1,9))

    print('\ntop total_unique_benes sum')
    p_sort = filter_group_by_var(p_group['total_unique_benes'], agg_fns, stat='sum')
    make_bar_plot(get_col(p_sort,'sum',log=True), plotdir, 'total_unique_benes_sum', 'Log10 Sum Total Beneficiaries', xlim=(1,8))

    print('\ntop beneficiary_average_age median')
    p_sort = filter_group_by_var(p_group['beneficiary_average_age'], agg_fns, stat='median')
    make_bar_plot(get_col(p_sort,'median'), plotdir, 'beneficiary_average_age_median', 'Median Beneficiary Age Per Provider', xlim=(50,100))

    print('\ntop total_medicare_payment_amt median')
    p_sort = filter_group_by_var(p_group['total_medicare_payment_amt'], agg_fns, stat='median')
    make_bar_plot(get_col(p_sort,'median',log=True), plotdir, 'median_medicare_payment_amt', 'Log10 Median Medicare Payment Amount Per Provider', xlim=(1,6))

    print('\ntop total_medicare_payment_amt')
    p_sort = filter_group_by_var(p_group['total_medicare_payment_amt'], agg_fns, stat='sum')
    make_bar_plot(get_col(p_sort,'sum',log=True), plotdir, 'total_medicare_payment_amt', 'Log10 Total Medicare Payment Amount', xlim=(1,10))
Exemplo n.º 4
0
def gender_par_groups(df):
    "calculate series of grouped gender parameters, printed by column"
    plotdir = make_plotdir(plotdir='cms_pop_gender_plots/')
    agg_fns = ['count', 'sum']
    pars = ['beneficiary_female_count', 'beneficiary_male_count']
    p_group = calc_par_group(df, agg_fns, ['provider_type'], pars)
    make_group_bar_plots(p_group, 'provider_type', pars, ['female', 'male'],
                         'sum', 'Patient Gender', plotdir)
Exemplo n.º 5
0
def pay_calc_par_groups(df):
    "calculate series of grouped pay parameters, printed by column"
    plotdir = make_plotdir(plotdir='cms_cost_plots/')
    agg_fns = ['count','median','mean','std']
    p_group = calc_par_group(df, agg_fns, ['provider_type'], ['pay_per_person','pay_per_service'])

    print('\ntop pay_per_service')
    p_sort = filter_group_by_var(p_group['pay_per_service'], agg_fns, stat='median')
    make_bar_plot(get_col(p_sort,'median'), plotdir, 'pay_per_service', 'Median Log10 Pay Per Service')
    print('\ntop pay_per_person')
    p_sort = filter_group_by_var(p_group['pay_per_person'], agg_fns, stat='median')
    make_bar_plot(get_col(p_sort,'median'), plotdir, 'pay_per_person', 'Median Log10 Pay Per Person')
#   p_sort = filter_group_by_var(p_group['pay_per_person'], agg_fns, stat='count')
    make_bar_plot(get_col(p_sort,'count'), plotdir, 'pay_per_person_count', 'Count Per Person')
    make_scatter_plot(get_col(p_sort,'count'), get_col(p_sort,'median'), plotdir, 'pay_per_person_by_count', 'Count', 'Log10 Cost Per Person', xlim=(-1000,100000))

    plotdir = make_plotdir(plotdir='cms_gender_plots/')
    g_group = calc_par_group(df, agg_fns, ['provider_type','nppes_provider_gender'], ['pay_per_person'])
    g_group = process_by_var(plotdir, g_group, col='pay_per_person', var='nppes_provider_gender')
Exemplo n.º 6
0
def average_age_par_group(df):
    "average age calc per provider type"
    df['total_age'] = df['beneficiary_average_age'] * df['total_unique_benes']
#   print(df['total_age'])
    plotdir = make_plotdir(plotdir='cms_pop_plots/')
    agg_fns = ['sum']
    p_group = calc_par_group(df, agg_fns, ['provider_type'], ['total_unique_benes','total_age'])
    p_group['avg_age'] = p_group['total_age']['sum'] / p_group['total_unique_benes']['sum']
    p_sort = p_group.sort_values(by='avg_age', ascending=False)
    print_all_rows(p_sort, ['avg_age'])
    make_bar_plot(get_col(p_sort,'avg_age'), plotdir, 'beneficiary_average_age', 'Beneficiary Average Age', xlim=(50,100))
Exemplo n.º 7
0
def pay_calc_par_groups(df):
    "calculate series of grouped pay parameters, printed by column"
    plotdir = make_plotdir(plotdir='cms_cost_plots/')
    agg_fns = ['count', 'median', 'mean', 'std']
    p_group = calc_par_group(df, agg_fns, ['provider_type'],
                             ['pay_per_person', 'pay_per_service'])

    print('\ntop pay_per_service')
    p_sort = filter_group_by_var(p_group['pay_per_service'],
                                 agg_fns,
                                 stat='median')
    make_bar_plot(get_col(p_sort, 'median'), plotdir, 'pay_per_service',
                  'Median Log10 Pay Per Service')
    print('\ntop pay_per_person')
    p_sort = filter_group_by_var(p_group['pay_per_person'],
                                 agg_fns,
                                 stat='median')
    make_bar_plot(get_col(p_sort, 'median'), plotdir, 'pay_per_person',
                  'Median Log10 Pay Per Person')
    #   p_sort = filter_group_by_var(p_group['pay_per_person'], agg_fns, stat='count')
    make_bar_plot(get_col(p_sort, 'count'), plotdir, 'pay_per_person_count',
                  'Count Per Person')
    make_scatter_plot(get_col(p_sort, 'count'),
                      get_col(p_sort, 'median'),
                      plotdir,
                      'pay_per_person_by_count',
                      'Count',
                      'Log10 Cost Per Person',
                      xlim=(-1000, 100000))

    plotdir = make_plotdir(plotdir='cms_gender_plots/')
    g_group = calc_par_group(df, agg_fns,
                             ['provider_type', 'nppes_provider_gender'],
                             ['pay_per_person'])
    g_group = process_by_var(plotdir,
                             g_group,
                             col='pay_per_person',
                             var='nppes_provider_gender')
Exemplo n.º 8
0
def calc_by_states(df):
    "calc parameters by state"
    plotdir = make_plotdir(plotdir='cms_state_service_plots/')
    agg_fns = ['count', 'median']
    p_group = calc_par_group(df,
                             agg_fns,
                             ['provider_type', 'nppes_provider_state'],
                             ['pay_per_person', 'pay_per_service'],
                             print_out=False)
    #   print('index level provider_types\n', p_group.index.levels[0])

    bmap = get_basemap()  # read file once for all maps
    #   minmax = (1.0, 2.7)
    #   im = p_group.ix['Internal Medicine']['pay_per_service']['median']
    #   print('%s\n' % provider, im)
    #   make_state_map(bmap, im, plotdir, 'cost_per_service_internal_medicine', 'Internal Medicine, Median Cost Per Service')

    #   im = p_group.ix['General Surgery']['pay_per_service']['median']
    #   print('%s\n' % provider, im)
    #   make_state_map(bmap, im, plotdir, 'cost_per_service_general_surgery', 'General Surgery, Median Cost Per Service')

    #   im = p_group.ix['Physical Therapist']['pay_per_service']['median']
    #   print('%s\n' % provider, im)
    #   make_state_map(bmap, im, plotdir, 'cost_per_service_physical_therapist', 'Physical Therapist, Median Cost Per Service')

    patr = re_compile('[ (/)]+')
    #   for provider in p_group.index.levels[0]:
    #       im = p_group.ix[provider]['pay_per_service']['median']
    #       make_state_map(bmap, im, plotdir, 'cost_per_service_%s' % '_'.join(patr.split(provider.lower())), '%s, Median Cost Per Service' % provider)

    plotdir = make_plotdir(plotdir='cms_state_person_plots/')
    for provider in p_group.index.levels[0]:
        im = p_group.ix[provider]['pay_per_person']['median']
        make_state_map(
            bmap, im, plotdir,
            'cost_per_person_%s' % '_'.join(patr.split(provider.lower())),
            '%s, Median Cost Per Person' % provider)
Exemplo n.º 9
0
def calc_by_zip(df):
    "calc parameters by zip code"
    plotdir = make_plotdir(plotdir='cms_zip_person_plots/')

# first validate by state, as a mock of zip
    agg_fns = ['count','median']
    p_group = calc_par_group(df, agg_fns, ['provider_type','nppes_provider_state'], ['pay_per_person','pay_per_service'], print_out=False)
#   print('index level provider_types\n', p_group.index.levels[0])

#   bmap = get_basemap()  # read file once for all maps
#   im = p_group.ix['Cardiology']['pay_per_person']['median']
#   make_state_map(bmap, im, plotdir, 'zip_cardiology_per_person', '%s, Median Cost Per Person' % 'Cardiology')
#   im = p_group.ix['Cardiology']['pay_per_person']['count']
#   make_state_map(bmap, im, plotdir, 'zip_cardiology_per_person_count', '%s, Median Count Per Person' % 'Cardiology')
    im_group = p_group.ix['Cardiology']['pay_per_person']
    calc_zip_group(im_group, 'Cardiology')
Exemplo n.º 10
0
def age_segment_par_groups(df):
    "calculate series of grouped age parameters, printed by column"
    plotdir = make_plotdir(plotdir='cms_pop_age_plots/')
    agg_fns = ['count','sum']
    pars = ['beneficiary_age_less_65_count','beneficiary_age_65_74_count','beneficiary_age_75_84_count','beneficiary_age_greater_84_count']
    p_group = calc_par_group(df, agg_fns, ['provider_type'], pars)
    labels = ['< 65','65-74','75-84','> 84']

    age_sums = [ p_group[p]['sum'].sum() for p in pars ]
    age_total = 0
    for age in age_sums:
        age_total += age
    print('\nage_range  number of patients:')
    for label,age in zip(labels, age_sums):
        print('  %5s    %d   (%.2f%%)' % (label, age, 100*age/age_total))

    make_group_bar_plots(p_group, 'provider_type', pars, labels, 'sum', 'Patient Age Group', plotdir)
Exemplo n.º 11
0
def average_age_par_group(df):
    "average age calc per provider type"
    df['total_age'] = df['beneficiary_average_age'] * df['total_unique_benes']
    #   print(df['total_age'])
    plotdir = make_plotdir(plotdir='cms_pop_plots/')
    agg_fns = ['sum']
    p_group = calc_par_group(df, agg_fns, ['provider_type'],
                             ['total_unique_benes', 'total_age'])
    p_group['avg_age'] = p_group['total_age']['sum'] / p_group[
        'total_unique_benes']['sum']
    p_sort = p_group.sort_values(by='avg_age', ascending=False)
    print_all_rows(p_sort, ['avg_age'])
    make_bar_plot(get_col(p_sort, 'avg_age'),
                  plotdir,
                  'beneficiary_average_age',
                  'Beneficiary Average Age',
                  xlim=(50, 100))
Exemplo n.º 12
0
def calc_by_zip(df):
    "calc parameters by zip code"
    plotdir = make_plotdir(plotdir='cms_zip_person_plots/')

    # first validate by state, as a mock of zip
    agg_fns = ['count', 'median']
    p_group = calc_par_group(df,
                             agg_fns,
                             ['provider_type', 'nppes_provider_state'],
                             ['pay_per_person', 'pay_per_service'],
                             print_out=False)
    #   print('index level provider_types\n', p_group.index.levels[0])

    #   bmap = get_basemap()  # read file once for all maps
    #   im = p_group.ix['Cardiology']['pay_per_person']['median']
    #   make_state_map(bmap, im, plotdir, 'zip_cardiology_per_person', '%s, Median Cost Per Person' % 'Cardiology')
    #   im = p_group.ix['Cardiology']['pay_per_person']['count']
    #   make_state_map(bmap, im, plotdir, 'zip_cardiology_per_person_count', '%s, Median Count Per Person' % 'Cardiology')
    im_group = p_group.ix['Cardiology']['pay_per_person']
    calc_zip_group(im_group, 'Cardiology')
Exemplo n.º 13
0
def age_segment_par_groups(df):
    "calculate series of grouped age parameters, printed by column"
    plotdir = make_plotdir(plotdir='cms_pop_age_plots/')
    agg_fns = ['count', 'sum']
    pars = [
        'beneficiary_age_less_65_count', 'beneficiary_age_65_74_count',
        'beneficiary_age_75_84_count', 'beneficiary_age_greater_84_count'
    ]
    p_group = calc_par_group(df, agg_fns, ['provider_type'], pars)
    labels = ['< 65', '65-74', '75-84', '> 84']

    age_sums = [p_group[p]['sum'].sum() for p in pars]
    age_total = 0
    for age in age_sums:
        age_total += age
    print('\nage_range  number of patients:')
    for label, age in zip(labels, age_sums):
        print('  %5s    %d   (%.2f%%)' % (label, age, 100 * age / age_total))

    make_group_bar_plots(p_group, 'provider_type', pars, labels, 'sum',
                         'Patient Age Group', plotdir)
Exemplo n.º 14
0
def pop_calc_par_groups(df):
    "calculate series of grouped population parameters, printed by column"
    plotdir = make_plotdir(plotdir='cms_pop_plots/')
    agg_fns = ['count', 'sum', 'median']
    p_group = calc_par_group(df, agg_fns, ['provider_type'], [
        'total_unique_benes', 'total_services', 'beneficiary_average_age',
        'total_medicare_payment_amt'
    ])

    print('\ntop total_services count'
          )  # count of number of providers, not patients
    p_sort = filter_group_by_var(p_group['total_services'],
                                 agg_fns,
                                 stat='count')
    make_bar_plot(get_col(p_sort, 'count', log=True),
                  plotdir,
                  'total_services_count',
                  'Log10 Count Total Services',
                  xlim=(0.1, 5))
    print('\ntop total_services sum')
    p_sort = filter_group_by_var(p_group['total_services'],
                                 agg_fns,
                                 stat='sum')
    make_bar_plot(get_col(p_sort, 'sum', log=True),
                  plotdir,
                  'total_services_sum',
                  'Log10 Sum Total Services',
                  xlim=(1, 9))

    print('\ntop total_unique_benes sum')
    p_sort = filter_group_by_var(p_group['total_unique_benes'],
                                 agg_fns,
                                 stat='sum')
    make_bar_plot(get_col(p_sort, 'sum', log=True),
                  plotdir,
                  'total_unique_benes_sum',
                  'Log10 Sum Total Beneficiaries',
                  xlim=(1, 8))

    print('\ntop beneficiary_average_age median')
    p_sort = filter_group_by_var(p_group['beneficiary_average_age'],
                                 agg_fns,
                                 stat='median')
    make_bar_plot(get_col(p_sort, 'median'),
                  plotdir,
                  'beneficiary_average_age_median',
                  'Median Beneficiary Age Per Provider',
                  xlim=(50, 100))

    print('\ntop total_medicare_payment_amt median')
    p_sort = filter_group_by_var(p_group['total_medicare_payment_amt'],
                                 agg_fns,
                                 stat='median')
    make_bar_plot(get_col(p_sort, 'median', log=True),
                  plotdir,
                  'median_medicare_payment_amt',
                  'Log10 Median Medicare Payment Amount Per Provider',
                  xlim=(1, 6))

    print('\ntop total_medicare_payment_amt')
    p_sort = filter_group_by_var(p_group['total_medicare_payment_amt'],
                                 agg_fns,
                                 stat='sum')
    make_bar_plot(get_col(p_sort, 'sum', log=True),
                  plotdir,
                  'total_medicare_payment_amt',
                  'Log10 Total Medicare Payment Amount',
                  xlim=(1, 10))