Python Surge.clustering示例，covid_surge.Surge.clustering Python示例

示例#1

0

显示文件

文件： test_us_states.py 项目： codacy-badger/covid-surge

def test_main():

    # Get US surge data
    us_surge = Surge()

    # Set parameters
    us_surge.end_date = '5/15/20'  # set end date wanted
    us_surge.ignore_last_n_days = 0  # allow for data repo to be corrected/updated
    us_surge.min_n_cases_abs = 500  # min # of absolute cases for analysis
    us_surge.deaths_100k_minimum = 41  # US death per 100,000 for Chronic Lower Respiratory Diseases per year: 41 (2019)

    print('')
    print('# of states/distric: ', len(us_surge.names))
    print('# of days:           ', us_surge.dates.shape[0])

    # Fit data to all states
    fit_data = us_surge.multi_fit_data(verbose=True,
                                       plot=True,
                                       save_plots=True)

    # Plot all data in one plot
    us_surge.plot_multi_fit_data(fit_data, 'experimental', save=True)
    # Plot all fit data in one plot
    us_surge.plot_multi_fit_data(fit_data, 'fit', save=True)

    # Create clustering bins based on surge period
    bins = us_surge.clustering(fit_data, 2, 'surge_period')

    print('')
    print('*****************************************************************')
    print('                             Bins                                ')
    print('*****************************************************************')
    for k in sorted(bins.keys()):
        print(' Bin %i %s' % (k, bins[k]))

    # Use bins to create groups of states based on surge period
    state_groups = dict()

    for (sort_key, data) in fit_data:
        state = data[0]
        param_vec = data[3]
        key = us_surge.get_bin_id(sort_key, bins)
        if key in state_groups:
            state_groups[key].append(state)
        else:
            state_groups[key] = list()
            state_groups[key].append(state)

    state_groups = [
        state_groups[k] for k in sorted(state_groups.keys(), reverse=False)
    ]

    print('')
    print('*****************************************************************')
    print('                         Country Groups                          ')
    print('*****************************************************************')
    for g in state_groups:
        print(' Group %i %s' % (state_groups.index(g), g))

    assert len(state_groups) == 7
    assert state_groups[0] == ['New York', 'Virginia']
    assert state_groups[1] == ['Massachusetts', 'Connecticut', 'Michigan']
    assert state_groups[2] == [
        'New Jersey', 'Pennsylvania', 'Louisiana', 'Minnesota', 'Maryland'
    ]
    assert state_groups[3] == ['North Carolina', 'Indiana']
    assert state_groups[4] == [
        'Florida', 'Georgia', 'Wisconsin', 'Missouri', 'Colorado', 'Ohio'
    ]
    assert state_groups[5] == ['Illinois', 'California', 'Washington']
    assert state_groups[6] == ['Alabama', 'Mississippi']

    # Plot the normalized surge for groups of states
    us_surge.plot_group_fit_data(state_groups, fit_data, save=True)

    # Plot the surge period for all grouped states
    us_surge.plot_group_surge_periods(fit_data, bins, save=True)

示例#2

0

显示文件

文件： test_countries.py 项目： codacy-badger/covid-surge

def test_main():

    # Get US surge data
    g_surge = Surge('global')

    # Set parameters
    g_surge.end_date = '5/15/20'  # set end date wanted
    g_surge.ignore_last_n_days = 0  # allow for data repo to be corrected/updated
    g_surge.min_n_cases_abs = 2500  # min # of absolute cases for analysis

    print('# of countries: ', g_surge.cases.shape[1])
    print('# of days:      ', g_surge.cases.shape[0])

    # Fit data to all states
    fit_data = g_surge.multi_fit_data(blocked_list=['China'],
                                      verbose=True,
                                      plot=True,
                                      save_plots=True)

    # Plot all data in one plot
    g_surge.plot_multi_fit_data(fit_data, 'experimental', save=True)
    # Plot all fit data in one plot
    g_surge.plot_multi_fit_data(fit_data, 'fit', save=True)

    # Create clustering bins based on surge period
    bins = g_surge.clustering(fit_data, 2, 'surge_period')

    print('')
    print('*****************************************************************')
    print('                             Bins                                ')
    print('*****************************************************************')
    for k in sorted(bins.keys()):
        print(' Bin %i %s' % (k, bins[k]))

    # Use bins to create groups of countries based on surge period
    country_groups = dict()

    for (sort_key, data) in fit_data:
        country = data[0]
        param_vec = data[3]
        key = g_surge.get_bin_id(sort_key, bins)
        if key in country_groups:
            country_groups[key].append(country)
        else:
            country_groups[key] = list()
            country_groups[key].append(country)

    country_groups = [
        country_groups[k] for k in sorted(country_groups.keys(), reverse=False)
    ]

    print('')
    print('*****************************************************************')
    print('                         Country Groups                          ')
    print('*****************************************************************')
    for g in country_groups:
        print(' Group %i %s' % (country_groups.index(g), g))

    assert len(country_groups) == 6
    assert country_groups[0] == ['Belgium', 'France']
    assert country_groups[1] == ['Germany', 'Turkey']
    assert country_groups[2] == [
        'Spain', 'Netherlands', 'United Kingdom', 'Canada'
    ]
    assert country_groups[3] == ['Ecuador', 'Sweden']
    assert country_groups[4] == ['US', 'Italy']
    assert country_groups[5] == ['Iran']

    # Plot the normalized surge for groups of countries
    g_surge.plot_group_fit_data(country_groups, fit_data, save=True)

    # Plot the surge period for all grouped states
    g_surge.plot_group_surge_periods(fit_data, bins, save=True)

示例#3

0

显示文件

文件： run_countries.py 项目： codacy-badger/covid-surge

def main():

    # Get US surge data
    g_surge = Surge('global')

    # Set parameters
    g_surge.end_date = '4/20/20'  # set end date wanted
    g_surge.end_date = None  # get all the data available
    g_surge.ignore_last_n_days = 2  # allow for data repo to be corrected/updated
    g_surge.min_n_cases_abs = 2500  # min # of absolute cases for analysis

    print('# of countries: ', g_surge.cases.shape[1])
    print('# of days:      ', g_surge.cases.shape[0])

    # Fit data to all states
    fit_data = g_surge.multi_fit_data(blocked_list=['China'],
                                      verbose=True,
                                      plot=True,
                                      save_plots=True)

    # Plot all data in one plot
    g_surge.plot_multi_fit_data(fit_data, 'experimental', save=True)
    # Plot all fit data in one plot
    g_surge.plot_multi_fit_data(fit_data, 'fit', save=True)

    # Create clustering bins based on surge period
    bins = g_surge.clustering(fit_data, 2, 'surge_period')

    print('')
    print('*****************************************************************')
    print('                             Bins                                ')
    print('*****************************************************************')
    for k in sorted(bins.keys()):
        print(' Bin %i %s' % (k, bins[k]))

    # Use bins to create groups of countries based on surge period
    country_groups = dict()

    for (sort_key, data) in fit_data:
        country = data[0]
        param_vec = data[3]
        key = g_surge.get_bin_id(sort_key, bins)
        if key in country_groups:
            country_groups[key].append(country)
        else:
            country_groups[key] = list()
            country_groups[key].append(country)

    country_groups = [
        country_groups[k] for k in sorted(country_groups.keys(), reverse=False)
    ]

    print('')
    print('*****************************************************************')
    print('                         Country Groups                          ')
    print('*****************************************************************')
    for g in country_groups:
        print(' Group %i %s' % (country_groups.index(g), g))

    # Plot the normalized surge for groups of countries
    g_surge.plot_group_fit_data(country_groups, fit_data, save=True)

    # Plot the surge period for all grouped states
    g_surge.plot_group_surge_periods(fit_data, bins, save=True)

示例#4

0

显示文件

文件： run_us_state_counties.py 项目： codacy-badger/covid-surge

def main():

    # Get US surge data
    sub_locale = 'North Carolina'
    c_surge = Surge(locale='US',sub_locale=sub_locale)

    print('')
    print('State        : ',sub_locale)
    print('# of counties: ',len(c_surge.names))

    # Set parameters
    c_surge.end_date = '4/20/20'   # set end date wanted
    c_surge.end_date = None        # get all the data available
    c_surge.ignore_last_n_days = 2 # allow for data repo to be corrected/updated
    c_surge.min_n_cases_abs = 25  # min # of absolute cases for analysis
    c_surge.deaths_100k_minimum = 41 # US death per 100,000 for Chronic Lower Respiratory Diseases per year: 41 (2019)

    # Fit data to all counties/cities
    fit_data = c_surge.multi_fit_data(verbose=True, plot=True, save_plots=True)

    print('# of fittings done = ',len(fit_data))

    # Plot all data in one plot
    c_surge.plot_multi_fit_data( fit_data, 'experimental', save=True )

    if len(fit_data) == 0:
        print('Done here...')
        return

    # Plot all fit data in one plot
    c_surge.plot_multi_fit_data( fit_data, 'fit', save=True )

    # Create clustering bins based on surge period
    bins = c_surge.clustering(fit_data,2,'surge_period')

    print('')
    print('*****************************************************************')
    print('                             Bins                                ')
    print('*****************************************************************')
    for k in sorted(bins.keys()):
        print(' Bin %i %s'%(k,bins[k]))

    # Use bins to create groups of counties/cities based on surge period
    county_groups = dict()

    for (sort_key,data) in fit_data:
        county = data[0]
        param_vec = data[3]
        key = c_surge.get_bin_id(sort_key,bins)
        if key in county_groups:
            county_groups[key].append(county)
        else:
            county_groups[key] = list()
            county_groups[key].append(county)

    county_groups = [ county_groups[k] for k in
                     sorted(county_groups.keys(),reverse=False) ]

    print('')
    print('*****************************************************************')
    print('                         County Groups                           ')
    print('*****************************************************************')
    for g in county_groups:
        print(' Group %i %s'%(county_groups.index(g),g))

    # Plot the normalized surge for groups of counties
    c_surge.plot_group_fit_data( county_groups, fit_data, save=True )

    # Plot the surge period for all grouped counties
    c_surge.plot_group_surge_periods( fit_data, bins, save=True )

示例#5

0

显示文件

文件： run_us_states.py 项目： codacy-badger/covid-surge

def main():

    # Get US surge data
    us_surge = Surge()

    # Set parameters
    us_surge.end_date = '4/20/20'  # set end date wanted
    us_surge.end_date = None  # get all the data available
    us_surge.ignore_last_n_days = 2  # allow for data repo to be corrected/updated
    us_surge.min_n_cases_abs = 500  # min # of absolute cases for analysis
    us_surge.deaths_100k_minimum = 41  # US death per 100,000 for Chronic Lower Respiratory Diseases per year: 41 (2019)

    print('')
    print('# of states/distric: ', len(us_surge.names))
    print('# of days:           ', us_surge.dates.shape[0])

    # Fit data to all states
    fit_data = us_surge.multi_fit_data(verbose=True,
                                       plot=True,
                                       save_plots=True)

    # Plot all data in one plot
    us_surge.plot_multi_fit_data(fit_data, 'experimental', save=True)
    # Plot all fit data in one plot
    us_surge.plot_multi_fit_data(fit_data, 'fit', save=True)

    # Create clustering bins based on surge period
    bins = us_surge.clustering(fit_data, 2, 'surge_period')

    print('')
    print('*****************************************************************')
    print('                             Bins                                ')
    print('*****************************************************************')
    for k in sorted(bins.keys()):
        print(' Bin %i %s' % (k, bins[k]))

    # Use bins to create groups of states based on surge period
    state_groups = dict()

    for (sort_key, data) in fit_data:
        state = data[0]
        param_vec = data[3]
        key = us_surge.get_bin_id(sort_key, bins)
        if key in state_groups:
            state_groups[key].append(state)
        else:
            state_groups[key] = list()
            state_groups[key].append(state)

    state_groups = [
        state_groups[k] for k in sorted(state_groups.keys(), reverse=False)
    ]

    print('')
    print('*****************************************************************')
    print('                         Country Groups                          ')
    print('*****************************************************************')
    for g in state_groups:
        print(' Group %i %s' % (state_groups.index(g), g))

    # Plot the normalized surge for groups of states
    us_surge.plot_group_fit_data(state_groups, fit_data, save=True)

    # Plot the surge period for all grouped states
    us_surge.plot_group_surge_periods(fit_data, bins, save=True)

示例#6

0

显示文件

def main():

    # Get US surge data
    us_surge = Surge()

    #states = [ a for (a,b) in
    #                 sorted( zip(us_surge.names, us_surge.cases[-1,:]),
    #                 key = lambda entry: entry[1], reverse=True )]

    # Set parameters
    us_surge.end_date = '4/20/20'  # set end date wanted
    us_surge.end_date = None  # get all the data available
    us_surge.ignore_last_n_days = 2  # allow for data repo to be corrected/updated
    us_surge.min_n_cases_abs = 500  # min # of absolute cases for analysis
    us_surge.deaths_100k_minimum = 41  # US death per 100,000 for Chronic Lower Respiratory Diseases per year: 41 (2019)

    print('')
    print('# of states/distric: ', len(us_surge.names))
    print('# of days:           ', us_surge.dates.shape[0])

    # Fit data to all states of fully-evolved surge
    fit_data = us_surge.multi_fit_data()  # silent

    states = list()

    print('')
    for (i, (sort_key, data)) in enumerate(fit_data):
        name = data[0]
        states.append(name)
        print('%2i) %15s: surge period %1.2f [day]' % (i, name, sort_key))

    surge_periods = list()  # collect surge period of all counties/towns

    total_n_counties = 0  # count all counties/towns inspected w/ nonzero cases

    for state in states:

        print('')
        print(
            '***************************************************************')
        print('                          ', state)
        print(
            '***************************************************************')

        c_surge = Surge(locale='US', sub_locale=state)

        print('# of counties: ', len(c_surge.names))

        (ids, ) = np.where(c_surge.cases[-1, :] > 0)
        total_n_counties += ids.size

        # Set parameters
        c_surge.end_date = '4/20/20'  # set end date wanted
        c_surge.end_date = None  # get all the data available
        c_surge.ignore_last_n_days = 2  # allow for data repo to be corrected/updated
        c_surge.min_n_cases_abs = 100  # min # of absolute cases for analysis
        c_surge.deaths_100k_minimum = 41  # US death per 100,000 for Chronic Lower Respiratory Diseases per year: 41 (2019)

        # Fit data to all counties/cities
        fit_data = c_surge.multi_fit_data(verbose=False,
                                          plot=True,
                                          save_plots=True)
        print('# of fittings done = ', len(fit_data))

        if len(fit_data) == 0:
            continue

        print('')
        for (sort_key, data) in fit_data:
            name = data[0]
            surge_periods.append(sort_key)
            print('%15s: surge period %1.2f [day]' % (name, sort_key))

        # Create clustering bins based on surge period
        bins = c_surge.clustering(fit_data, 2, 'surge_period')

        print('')
        print(
            '----------------------------------------------------------------')
        print(
            '                            Bins                                ')
        print(
            '----------------------------------------------------------------')
        for k in sorted(bins.keys()):
            print(' Bin %i %s' % (k, bins[k]))

        # Use bins to create groups of counties/cities based on surge period
        county_groups = dict()

        for (sort_key, data) in fit_data:
            county = data[0]
            param_vec = data[3]
            key = c_surge.get_bin_id(sort_key, bins)
            if key in county_groups:
                county_groups[key].append(county)
            else:
                county_groups[key] = list()
                county_groups[key].append(county)

        county_groups = [
            county_groups[k]
            for k in sorted(county_groups.keys(), reverse=False)
        ]

        print('')
        print(
            '----------------------------------------------------------------')
        print(
            '                        County Groups                           ')
        print(
            '----------------------------------------------------------------')
        for g in county_groups:
            print(' Group %i %s' % (county_groups.index(g), g))

        # Plot the surge period for all grouped counties
        c_surge.plot_group_surge_periods(fit_data, bins, save=True)

        print('')
        print('')

    print('Total # of counties/towns with surge period = ', len(surge_periods))
    print('Average surge period %1.2f [day], std %1.2f' %
          (np.mean(np.array(surge_periods)), np.std(np.array(surge_periods))))
    print('Total # of inspected counties/towns w/ non-zero cases = %4i' %
          total_n_counties)