def rf_epidemiologists(): # CITIES = ['spb', ] THRESHOLDS = [0] city_resolver = get_city_resolver() ah = get_ah(CITIES) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) onsets = get_onsets_by_epidemiologists(CITIES, AH_FILE_PATTERN, THRESHOLDS) cases = [ (['msk'], 'Moscow', 'Moscow'), (['spb'], 'SaintPetersburg', 'Saint Petersburg'), (['nsk'], 'Novosibirsk', 'Novosibirsk'), (['spb', 'msk', 'nsk'], 'spb,msk,nsk', 'All cities'), ] for case in cases: CUR_CITIES, name_suffix, title = case average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CUR_CITIES, THRESHOLDS, DATE_SHIFT_RANGE, city_resolver) filename = 'results/russia/epidemiologists/rf_%s.pdf' % name_suffix plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, title=title, save_to_file=filename)
def main(): """ Parameters """ THRESHOLDS = [30, 35, 40, 45] # [25, 30, 35, 40, 45, 50] # CITIES = ['spb'] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 11, 1) winter.END = datetime.date(winter.END.year, 3, 31) city_resolver = get_city_resolver() population = get_population(CITIES) ah = get_ah(CITIES) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) morbidity = get_daily_morbidity(CITIES) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter) cases = [ (['msk'], 'Moscow', 'Moscow'), (['spb'], 'SaintPetersburg', 'Saint Petersburg'), (['nsk'], 'Novosibirsk', 'Novosibirsk'), (['spb', 'msk', 'nsk'], 'spb,msk,nsk', 'All cities'), ] for case in cases: CUR_CITIES, name_suffix, title = case average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CUR_CITIES, THRESHOLDS, DATE_SHIFT_RANGE, city_resolver) filename = 'results/russia/morbidity/' \ 'rf_m_%s_winter%d-%d_threshold%s-%s.pdf' % ( name_suffix, winter.START.month, winter.END.month, min(average_ah_dev.keys()), max(average_ah_dev.keys())) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, title=title, save_to_file=filename)
def main_paris(): state_resolver = get_city_resolver() population = get_population(PARIS) """ Parameters """ # THRESHOLDS = [-1000, 5, 10, 50, 100, 500, 750, ] # THRESHOLDS = [0, 5, 9, 25, 35, 40, 45, 50, ] THRESHOLDS = [9, 10, 20, 30] # , 40, 50, ] # THRESHOLDS = [9, 10, 20, 30, 40, 50] # THRESHOLDS = [0, 25, 50, 75, 100, ] # THRESHOLDS = [10, 20, 30, 40, 50, 60, 70, 80] winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 11, 1) winter.END = datetime.date(winter.END.year, 3, 31) ah = get_ah(PARIS) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) morbidity = get_daily_morbidity(PARIS) morbidity_mean = get_morbidity_mean(morbidity) morbidity_excess = get_morbidity_excess(morbidity, morbidity_mean) excess_data = get_relative_weekly_morbidity_excess(morbidity_excess, population) onsets = get_onsets_by_morbidity(excess_data, THRESHOLDS, winter=winter) average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, PARIS, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) title = f'Île-de-France: outbreaks in ' \ f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}' filename = 'results/paris/paris_winter%d-%d_threshold%s.pdf' % ( winter.START.month, winter.END.month, max(average_ah_dev.keys())) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-11e-4, 15e-4), title=title, save_to_file=filename)
def winter_range_investigation(): state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) for params in ( (12, 2), (12, 3), (11, 2), (11, 3), (11, 4), (10, 3), (10, 4), (10, 5), (9, 4), (9, 5), ): winter = Winter() winter.START = datetime.date(winter.START.year, params[0], 1) if params[1] in [10, 12, 1, 3, 5]: last_day = 31 elif params[1] in [9, 11, 4]: last_day = 30 else: # 2 (February 1972) last_day = 29 winter.END = datetime.date(winter.END.year, params[1], last_day) onsets = get_onsets(excess_data, THRESHOLDS, winter) average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CONTIGUOUS_STATES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) rng = f'{winter.START.strftime("%B")} — {winter.END.strftime("%B")}' title = 'AH\' v. Onset Day: outbreaks in ' + rng filename = 'results/winter_range_usa/usa_winter%d-%d.pdf' % ( winter.START.month, winter.END.month) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-3.3e-4, 2.2e-4), title=title, save_to_file=filename)
def distinct_states(): state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS) deeps = dict() # state_code: ah deep_level = -0.0003 anomaly_peaks = range(-28, 0, 1) # [-19, -18, -17, -11, -10, -9] for state in [1] + list(range(3, 12)) + list(range(13, 52)): CONTIGUOUS_STATES = [state] average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, CONTIGUOUS_STATES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) for threshold, average in average_ah_dev.items(): idxs = [day_x - DATE_SHIFT_RANGE[0] for day_x in anomaly_peaks] deep = min(average[idx] for idx in idxs) if deep < deep_level: if state in deeps: deeps[state] = min(deep, deeps[state]) else: deeps[state] = deep # plot_average_ah_dev( # average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, # title=state_resolver[state]['name'], # save_to_file='results/usa_distinct/figure_state%s.png' % # state_resolver[state]['acronym']) for state, deep in sorted(deeps.items(), key=lambda x: x[1]): print('Deep level %f in %s state (%d)' % (deep, state_resolver[state]['acronym'], state)) top_dip = [ state for state, deep in sorted(deeps.items(), key=lambda x: x[1]) ] print(f'Top dip {len(top_dip)}: {top_dip}') return top_dip
def main(): winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 4, 30) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) regions = [ # ('all', 'Contiguous States', CONTIGUOUS_STATES), ('sw', 'Southwest States', SW_STATES), ('ne', 'Northeast States', NE_STATES), ('gulf', 'Gulf States', GULF_STATES), ('the_rest', 'The Remained States', REST_STATES) ] for region in regions: name_suffix, title_suffix, SITES = region average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, SITES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-7e-4, 5e-4), title='AH\' v. Onset Day: ' + title_suffix, save_to_file='results/usa/usa_winter10-4_%s.pdf' % name_suffix)
def stats_joint(): # Assert stats_distinct_states been already performed for every state winter = Winter() # if params[1] in [10, 12, 1, 3, 5]: # last_day = 31 # elif params[1] in [9, 11, 4]: # last_day = 30 winter.START = datetime.date(winter.START.year, 10, 1) winter.END = datetime.date(winter.END.year, 3, 31) state_resolver = get_state_resolver(STATE_CODES_FILE) ah = get_ah(AH_CSV_FILE) ah_mean = get_ah_mean(ah) ah_dev = get_ah_deviation(ah, ah_mean) excess_data = get_mortality_excess(MORTALITY_EXCESS_FILE) onsets = get_onsets(excess_data, THRESHOLDS, winter) top_dip = distinct_states() CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52)) NOT_TOP_STATES = list(set(CONTIGUOUS_STATES) - set(top_dip[:24])) # exclude top 24 AH' lowest average_ah_dev = get_average_ah_vs_onsets(ah_dev, onsets, NOT_TOP_STATES, THRESHOLDS, DATE_SHIFT_RANGE, state_resolver) plot_average_ah_dev(average_ah_dev, THRESHOLD_COLORS, DATE_SHIFT_RANGE, limits=(-7e-4, 5e-4), title='AH\' v. Onset Day: The Remained States', save_to_file='results/usa/usa_top.pdf') # For joint states test threshold = THRESHOLDS[-1] # The strongest ah_sample = [] for i in range(len(top_dip)): CONTIGUOUS_STATES = [1] + list(range(3, 12)) + list(range(13, 52)) CONTIGUOUS_STATES = list(set(CONTIGUOUS_STATES) - set(top_dip[:i])) # exclude top 24 AH' lowest for site in CONTIGUOUS_STATES: try: with open( f'results/stats/usa/distinct/control.{site}.{threshold}.json', 'r') as f: ah_sample += json.load(f) except: continue generate_experimental_sample( onsets, threshold, ah_dev, Winter(), CONTIGUOUS_STATES, state_resolver, filename= f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json' ) with open( f'results/stats/usa/joint/sites_cnt{len(CONTIGUOUS_STATES)}.{threshold}.json', 'r') as f: epidemic_sample = json.load(f) print(f'Some {len(CONTIGUOUS_STATES)} states') print(f"AH' sample size = {len(ah_sample)}") print(f"Epidemic sample size = {len(epidemic_sample)}") # t, prob = stats.ttest_ind(ah_sample, epidemic_sample) # print(f"Equal variance (Student's t-test): P-value = {prob}") t, prob = stats.ttest_ind(ah_sample, epidemic_sample, equal_var=False) print(f"Not equal variance (Welch’s t-test): P-value = {prob}") print()