def create_report_file(param_obj, campaign_obj, report_data_obj, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True base_infectivity = param_obj[KEY_BASE_INFECTIVITY] initial_effect = campaign_obj[KEY_INITIAL_EFFECT] start_day = campaign_obj[KEY_START_DAY] new_infection = report_data_obj[KEY_NEW_INFECTION] # calculate expected number of infections for a time period of 3 months: number_of_month = 3 expected_new_infection = base_infectivity * sft.DAYS_IN_MONTH * number_of_month * (1.0 - initial_effect) # testing for one year expected = [expected_new_infection] * (sft.MONTHS_IN_YEAR // number_of_month) # group new infections for every 3 months: value_to_test = [] if len(new_infection) < 2 * sft.DAYS_IN_YEAR: # the infected individual is imported at the end of first year, so simulation # duration need to be at least 2 years. success = False outfile.write("BAD: the simulation duration is too short, please make sure it's at least 2 years.\n") outfile.write("running chi-squared test for expected new infections for {0} {1}-months time bins: \n" "base_infectivity = {2}, initial_effect = {3}.\n".format(sft.MONTHS_IN_YEAR // number_of_month, number_of_month, base_infectivity, initial_effect)) actual_new_infection = 0 i = 0 for t in range(start_day, len(new_infection)): actual_new_infection += new_infection[t] i += 1 if not i % (number_of_month * sft.DAYS_IN_MONTH): # at the end of every 3 month value_to_test.append(actual_new_infection) actual_new_infection = 0 sft.plot_data(value_to_test, dist2=expected, label1="actual_new_infections", label2="expected_new_infection", title="actual vs. expected new infection for every {} months".format(number_of_month), xlabel="every {} months".format(number_of_month), ylabel="# of new infections", category='actual_vs_expected_new_infections', show=True, line=True) result = sft.test_multinomial(dist=value_to_test, proportions=expected, report_file=outfile, prob_flag=False) if not result: success = False outfile.write( "BAD: The Chi-squared test for number of new infections in every {} months failed.\n".format(number_of_month)) else: outfile.write( "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n".format( number_of_month)) outfile.write(sft.format_success_msg(success)) if debug: print( "SUMMARY: Success={0}\n".format(success) ) return success
def create_report_file(drug_start_timestep, disease_deaths, cum_deaths, deaths, infected_individuals, death_times, drug_mortality_rate_HIV, report_name ): with open(report_name, "w") as outfile: success = True length = len(cum_deaths) if sum(disease_deaths)==0 or sum(cum_deaths)==0 or len(death_times)==0: success = False outfile.write(sft.no_test_data) for x in range(length): if disease_deaths[x] != cum_deaths[x]: success = False outfile.write("BAD: at timestep {0}, disease deaths is {1} in InsetChart.json and {2} in stdout.txt.\n".format(x+1, disease_deaths[x], cum_deaths[x])) # ks exponential test doesn't work very well with large rate, use chi squared test instead # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_mortality_rate_HIV < 0.1: outfile.write("Testing death times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format(drug_mortality_rate_HIV, len(death_times))) ks_result = sft.test_exponential( death_times, drug_mortality_rate_HIV, report_file = outfile, integers=True, roundup=True, round_nearest=False ) if not ks_result: success = False outfile.write("BAD: ks test reuslt is False.\n") size = len(death_times) scale = 1.0 / drug_mortality_rate_HIV dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] sft.plot_data_sorted(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_actual_vs_numpy", xlabel="data points", ylabel="death times", category="death_times", show=True, line = True, overlap=True) sft.plot_cdf(death_times, dist_exponential_np, label1="death times", label2="numpy data", title="death_times_cdf", xlabel="days", ylabel="probability", category="death_times_cdf", show=True) else: outfile.write("Testing death count per day with rate {0}. \n".format(drug_mortality_rate_HIV)) expected_mortality = [] for t in range( len(deaths)): if t < drug_start_timestep + 1: if deaths[t] > 0: success = False outfile.write("BAD: expected no disease death on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep + 1, deaths[t], t)) elif infected_individuals[t] > 0: expected_mortality.append(drug_mortality_rate_HIV * infected_individuals[t]) expected_mortality.pop(0) # the Infected is off by one day test_death_dates = deaths[drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_mortality)] sft.plot_data(test_death_dates, expected_mortality, label1="actual death", label2="expected death", title="death per day", xlabel="date after drug start day", ylabel="death per day", category="death_counts", show=True, line=True, overlap=True, sort=False) chi_result = sft.test_multinomial(dist=test_death_dates, proportions=expected_mortality, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, stdout_df, report_name, debug): with open(report_name, "w") as outfile: for name, param in param_obj.items(): outfile.write("{0} = {1}\n".format(name, param)) success = True sample_threshold = float(campaign_obj[ CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold]) base_sensitivity = float(campaign_obj[ CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity]) base_specificity = float(campaign_obj[ CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity]) ip_key_value = campaign_obj[ CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value] outfile.write("{0} = {1}\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) outfile.write("{0} = {1}\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity)) outfile.write("{0} = {1}\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity)) outfile.write("{0} = {1}\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value)) if sample_threshold: outfile.write( "WARNING: {0} should be 0 in this test, got {1} from compaign file. Please fix the test.\n" "".format( CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) if base_specificity != 1: outfile.write( "WARNING: the {0} is {1}, expected value is 1.\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity)) if ip_key_value: success = False outfile.write( "BAD: {0} should be empty in this test, got {1} from compaign file. Please fix the test.\n" "".format( CampaignKeys.EnvironmentalDiagnosticKeys. Environment_IP_Key_Value, ip_key_value)) duration = param_obj[ConfigKeys.Simulation_Duration] base_infectivity = param_obj[ConfigKeys.Base_Infectivity] expected_positive_count = expected_negative_count = 0 contagion_list = [] contagion = 0 for t in range(1, duration): stdout_t_df = stdout_df[stdout_df[ Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t] infected = stdout_t_df[Diagnostic_Support.Stdout.infected].iloc[0] stat_pop = stdout_t_df[Diagnostic_Support.Stdout.stat_pop].iloc[0] envi_sample = stdout_t_df[Diagnostic_Support.Stdout.sample].iloc[0] # calculated environmental contagion for next time step contagion = base_infectivity * infected / stat_pop if math.fabs(contagion - envi_sample) > envi_sample * 1e-2: success = False outfile.write( "BAD: at time step {0} the environmental sample is {1}, expected value is {2}.\n" .format(t, envi_sample, contagion)) if contagion > sample_threshold: expected_test_positive = base_sensitivity expected_test_negative = 1.0 - base_sensitivity else: expected_test_positive = 1.0 - base_specificity expected_test_negative = base_specificity contagion_list.append(contagion) expected_positive_count += expected_test_positive expected_negative_count += expected_test_negative stdout_sum = stdout_df.sum() result = sft.test_multinomial( [ stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative] ], proportions=[expected_positive_count, expected_negative_count], report_file=outfile, prob_flag=False, ) message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \ " are {3} and {4}.\n" if result: outfile.write( message.format( "GOOD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) else: success = False outfile.write( message.format( "BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) sft.plot_data(stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:], contagion_list, label1="Actual", label2="Expected", title="Environmental_Contagion", xlabel="Day", ylabel="Environmental_Contagion", category='Environmental_Contagion', overlap=True, alpha=0.5) # positive_list = [] # negative_list = [] # for t in range(1, duration): # infected = stdout_df[Stdout.infected].iloc[t] # stat_pop = stdout_df[Stdout.stat_pop].iloc[t] # test_positive = stdout_df[Stdout.test_positive].iloc[t] # test_negative = stdout_df[Stdout.test_negative].iloc[t] # test_default = stdout_df[Stdout.test_default].iloc[t] # # susceptible = stat_pop - infected # message = "BAD: at time {0}, total infected individuals = {1} and total susceptible individuals = {2}, " \ # "expected {3} ± {4} individuals receive a {5} test result, got {6} from logging.\n" # # expected_test_positive = infected * base_sensitivity + susceptible * (1.0 - base_specificity) # if math.fabs(test_positive - expected_test_positive) > 5e-2 * expected_test_positive: # success = False # outfile.write(message.format( # t, infected, susceptible, expected_test_positive, 5e-2 * expected_test_positive, "positive", # test_positive)) # # expected_test_negative = infected * (1.0 - base_sensitivity) + susceptible * base_specificity # if math.fabs(test_negative - expected_test_negative) > 5e-2 * expected_test_negative: # success = False # outfile.write(message.format( # t, infected, susceptible, expected_test_negative, 5e-2 * expected_test_negative, "negative", # test_negative)) # # expected_test_default = 0 # if test_default != expected_test_default: # success = False # outfile.write(message.format( # t, infected, susceptible, expected_test_default, 0, "default", test_default)) # # positive_list.append([test_positive, expected_test_positive]) # negative_list.append([test_negative, expected_test_negative]) # sft.plot_data(np.array(positive_list)[:, 0], np.array(positive_list)[:, 1], # label1="Actual", # label2="Expected", # title="Test Positive", xlabel="Day", # ylabel="Positive count", # category='Test_Positive', overlap=True, alpha=0.5) # sft.plot_data(np.array(negative_list)[:, 0], np.array(negative_list)[:, 1], # label1="Actual", # label2="Expected", # title="Test Negative", xlabel="Day", # ylabel="Negative count", # category='Test_Negative', overlap=True, alpha=0.5) outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(drug_start_timestep, inactivation_times, active_count, inactivations, drug_inactivation_rate, report_name, debug=False): with open(report_name, "w") as outfile: success = True # ks exponential test doesn't work very well with large rate, use chi squared test instead. # while rate is small ks test for exponential distribution is more sensitive to catch the difference if drug_inactivation_rate < 0.1: outfile.write( "Testing inactivation times as draws from exponential distrib with rate {0}. " "Dataset size = {1}.\n".format(drug_inactivation_rate, len(inactivation_times))) success = sft.test_exponential(inactivation_times, drug_inactivation_rate, outfile, integers=True, roundup=True, round_nearest=False) if not success: outfile.write("BAD: ks test for rate {} is False.\n".format( drug_inactivation_rate)) size = len(inactivation_times) scale = 1.0 / drug_inactivation_rate dist_exponential_np = numpy.random.exponential(scale, size) dist_exponential_np = [math.ceil(x) for x in dist_exponential_np] sft.plot_data_sorted(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_actual_vs_numpy", xlabel="data points", ylabel="Inactivation times", category="inactivation_times", show=True, line=True, overlap=True) sft.plot_cdf(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_cdf", xlabel="days", ylabel="probability", category="inactivation_times_cdf", show=True) sft.plot_probability(inactivation_times, dist_exponential_np, label1="test times", label2="numpy data", title="inactivation_times_pdf", xlabel="days", ylabel="probability", category="inactivation_times_pdf", show=True) else: outfile.write( "Testing inactivation count per day with rate {0}. \n".format( drug_inactivation_rate)) expected_inactivation = [] for t in range(len(inactivations)): if t < drug_start_timestep: if inactivations[t] > 0: success = False outfile.write( "BAD: expected no inactivations on drugs before day {0}, get {1} cases at timestep {2}.\n" "".format(drug_start_timestep, inactivations[t], t)) elif active_count[t] > 0: expected_inactivation.append(drug_inactivation_rate * active_count[t]) if len(inactivations ) <= len(expected_inactivation) + drug_start_timestep: test_inactivation_dates = inactivations[drug_start_timestep + 1:] expected_inactivation = expected_inactivation[:len( test_inactivation_dates)] else: test_inactivation_dates = inactivations[ drug_start_timestep + 1:drug_start_timestep + 1 + len(expected_inactivation)] #print (len(inactivations), len(test_inactivation_dates), len(expected_inactivation)) #print (test_inactivation_dates, expected_inactivation) sft.plot_data(test_inactivation_dates, expected_inactivation, label1="actual inactivation", label2="expected inactivation", title="inactivation per day", xlabel="date after drug start day", ylabel="inactivation per day", category="inactivation_counts", show=True, line=True, overlap=True, sort=False) chi_result = sft.test_multinomial( dist=test_inactivation_dates, proportions=expected_inactivation, report_file=outfile, prob_flag=False) if not chi_result: success = False outfile.write("BAD: Chi-squared test reuslt is False.\n") outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(param_obj, node_list, campaign_obj, migration_df, report_data_obj, stdout_filename, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True base_infectivity = param_obj[KEY_BASE_INFECTIVITY] start_day = campaign_obj[KEY_START_DAY] new_infection = report_data_obj[KEY_NEW_INFECTION] immunity_acquisition_factor = param_obj[ KEY_IMMUNITY_ACQUISITION_FACTOR] decay_rate = param_obj[KEY_DECAY_RATE] outfile.write("checking some test conditions:\n") outfile.write(" -- simulation duration: {} days\n".format( len(new_infection))) if len(new_infection) < start_day + 1 + sft.DAYS_IN_YEAR: success = False outfile.write( "BAD: the simulation duration is too short, please make sure it's at least {} days.\n" .format(start_day + 1 + sft.DAYS_IN_YEAR)) result = tms.check_test_condition(param_obj[KEY_NUM_CORES], node_list, migration_df, outfile) if not result: success = False # summary message is writen to the report file in the check_test_condition function number_of_month = 1 outfile.write( "calculate expected number of infections for a time period of {} month(" "unit is 1/years):\n".format((number_of_month))) t_initial = 0 expected = [] decay_rate *= sft.DAYS_IN_YEAR base_infectivity *= sft.DAYS_IN_YEAR step = number_of_month / sft.MONTHS_IN_YEAR for t_final in np.arange(step, 1.01, step): expected_new_infection = base_infectivity * ( t_final - t_initial) - base_infectivity * ( 1.0 - immunity_acquisition_factor) / decay_rate * math.exp( -1 * decay_rate * t_initial) * (1.0 - math.exp(-1 * decay_rate * (t_final - t_initial))) expected_new_infection *= len(node_list) expected.append(expected_new_infection) t_initial = t_final # group new infections for every month: value_to_test = [] outfile.write( "running chi-squared test for actual vs expected new infections for {0} {1}-months time bins: \n" "base_infectivity = {2}, immunity_acquisition_factor = {3}, decay rate = {4}.(unit is 1/years)\n" .format(sft.MONTHS_IN_YEAR // number_of_month, number_of_month, base_infectivity, immunity_acquisition_factor, decay_rate)) actual_new_infection = 0 i = 0 for t in range(start_day + 1, len(new_infection)): actual_new_infection += new_infection[t] i += 1 if not i % (number_of_month * sft.DAYS_IN_MONTH): value_to_test.append(actual_new_infection) actual_new_infection = 0 sft.plot_data( value_to_test, dist2=expected, label1="actual_new_infections", label2="expected_new_infection", title="actual vs. expected new infection for every {} month". format(number_of_month), xlabel="month", ylabel="# of new infections", category='actual_vs_expected_new_infections', show=True, line=False) result = sft.test_multinomial(dist=value_to_test, proportions=expected, report_file=outfile, prob_flag=False) if not result: success = False outfile.write( "BAD: The Chi-squared test for number of new infections in every {} months failed.\n" .format(number_of_month)) else: outfile.write( "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n" .format(number_of_month)) output_dict = parse_output_file(stdout_filename, debug) outfile.write( "checking if all cores are reporting in every time step in stdout file:\n" ) core_list = [str(n) for n in (range(param_obj[KEY_NUM_CORES]))] for t, cores in output_dict.items(): if core_list != sorted(cores): success = False outfile.write( "BAD: at time step {0}, these cores reported to stdout.txt are: {1}, while " "expected cores are: {2}.\n".format(t, cores, core_list)) outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, campaign_obj, report_data_obj, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True base_infectivity = param_obj[KEY_BASE_INFECTIVITY] start_day = campaign_obj[KEY_START_DAY] new_infection = report_data_obj[KEY_NEW_INFECTION] immunity_acquisition_factor = param_obj[ KEY_IMMUNITY_ACQUISITION_FACTOR] decay_rate = param_obj[KEY_DECAY_RATE] # calculate expected number of infections for a time period of 1 month: # unit is 1/years number_of_month = 1 t_initial = 0 expected = [] decay_rate *= sft.DAYS_IN_YEAR base_infectivity *= sft.DAYS_IN_YEAR step = number_of_month / sft.MONTHS_IN_YEAR for t_final in np.arange(step, 1.01, step): expected_new_infection = base_infectivity * ( t_final - t_initial) - base_infectivity * ( 1.0 - immunity_acquisition_factor) / decay_rate * math.exp( -1 * decay_rate * t_initial) * (1.0 - math.exp(-1 * decay_rate * (t_final - t_initial))) expected.append(expected_new_infection) t_initial = t_final # group new infections for every month: value_to_test = [] if len(new_infection) < start_day + 1 + sft.DAYS_IN_YEAR: success = False outfile.write( "BAD: the simulation duration is too short, please make sure it's at least {} days.\n" .format(start_day + 1 + sft.DAYS_IN_YEAR)) outfile.write( "running chi-squared test for expected new infections for {0} {1}-months time bins: \n" "base_infectivity = {2}, immunity_acquisition_factor = {3}, decay rate = {4}.(unit is 1/years)\n" .format(sft.MONTHS_IN_YEAR // number_of_month, number_of_month, base_infectivity, immunity_acquisition_factor, decay_rate)) actual_new_infection = 0 i = 0 for t in range(start_day + 1, len(new_infection)): actual_new_infection += new_infection[t] i += 1 if not i % (number_of_month * sft.DAYS_IN_MONTH): value_to_test.append(actual_new_infection) actual_new_infection = 0 sft.plot_data( value_to_test, dist2=expected, label1="actual_new_infections", label2="expected_new_infection", title="actual vs. expected new infection for every {} month". format(number_of_month), xlabel="month", ylabel="# of new infections", category='actual_vs_expected_new_infections', show=True, line=False) result = sft.test_multinomial(dist=value_to_test, proportions=expected, report_file=outfile, prob_flag=False) if not result: success = False outfile.write( "BAD: The Chi-squared test for number of new infections in every {} months failed.\n" .format(number_of_month)) else: outfile.write( "GOOD: The Chi-squared test for number of new infections in every {} months passed.\n" .format(number_of_month)) outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, campaign_obj, property_obj, property_df, stdout_df, recorder_obj, report_name, report_event_recorder, stdout_filename, debug): with open(report_name, "w") as outfile: for name, param in param_obj.items(): outfile.write("{0} = {1}\n".format(name, param)) sample_threshold = float(campaign_obj[ CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold]) base_sensitivity = float(campaign_obj[ CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity]) base_specificity = float(campaign_obj[ CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity]) ip_key_value = campaign_obj[ CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value] outfile.write("{0} = {1}\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) outfile.write("{0} = {1}\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity)) outfile.write("{0} = {1}\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity)) outfile.write("{0} = {1}\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value)) success = recorder_obj[1] if not success: error_message = recorder_obj[0] outfile.write( "Failed to parse report file: {0}, get exception: {1}.\n". format(report_event_recorder, error_message)) else: # Throw warning messages for condition checks. Make sure all features are enabled. if not sample_threshold: outfile.write( "WARNING: {0} should not be 0 in this test, got {1} from compaign file. Please fix the test.\n" "".format( CampaignKeys.EnvironmentalDiagnosticKeys. Sample_Threshold, sample_threshold)) if base_specificity == 1: outfile.write( "WARNING: the {0} is {1}, expected value is less than 1.\n" .format( CampaignKeys.EnvironmentalDiagnosticKeys. Base_Specificity, base_specificity)) if base_sensitivity == 1: outfile.write( "WARNING: the {0} is {1}, expected value is less than 1.\n" .format( CampaignKeys.EnvironmentalDiagnosticKeys. Base_Sensitivity, base_sensitivity)) if not ip_key_value: outfile.write( "WARNING: {0} should not be empty in this test, got '{1}' from compaign file. Please fix the test.\n" "".format( CampaignKeys.EnvironmentalDiagnosticKeys. Environment_IP_Key_Value, ip_key_value)) duration = param_obj[ConfigKeys.Simulation_Duration] base_infectivity = param_obj[ConfigKeys.Base_Infectivity] positive_list = [] negative_list = [] positive_event_list = [] negative_event_list = [] # get infected and stat_pop channels for the selected IP group from property report infected_ip_group_list = property_df[[ c for c in property_df.columns if Diagnostic_Support.channels[0] in c ]] stat_pop_ip_group_list = property_df[[ c for c in property_df.columns if Diagnostic_Support.channels[-1] in c ]] # group by time and event name, then count how many event in each time step and put the value into a new # column named: "Test result counts" event_df = recorder_obj[0] event_df = event_df.groupby([ Diagnostic_Support.ReportColumn.time, Diagnostic_Support.ReportColumn.event ]).size().reset_index() event_df.rename( columns={0: Diagnostic_Support.ReportColumn.counts}, inplace=True) contagion_list = [] expected_positive_count = expected_negative_count = 0 for t in range(1, duration): # Test 1: make sure we get the correct contagion sample and # number of positive and negative results in StdOut.txt stdout_t_df = stdout_df[stdout_df[ Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t] #stdout_next_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t + 1] infected = stdout_t_df[ Diagnostic_Support.Stdout.infected].iloc[0] stat_pop = stdout_t_df[ Diagnostic_Support.Stdout.stat_pop].iloc[0] test_positive = stdout_t_df[ Diagnostic_Support.Stdout.test_positive].iloc[0] test_negative = stdout_t_df[ Diagnostic_Support.Stdout.test_negative].iloc[0] test_default = stdout_t_df[ Diagnostic_Support.Stdout.test_default].iloc[0] envi_sample = stdout_t_df[ Diagnostic_Support.Stdout.sample].iloc[0] ip = stdout_t_df[Diagnostic_Support.Stdout.ip_value].iloc[0] infected_ip_group = infected_ip_group_list.iloc[t - 1][0] stat_pop_ip_group = stat_pop_ip_group_list.iloc[t - 1][0] if stat_pop == stat_pop_ip_group and infected == infected_ip_group: success = False outfile.write( "BAD: at time step {0} the total stat_pop = {1} and total infect = {2}, we got " "stat_pop_ip_group = {3} and infected_ip_group = {4} in group {5}, we expect to " "see less stat_pop and infected individual in the IP group , this is not a valid test " "for Environment_IP_Key_Value, please check the test condition.\n" .format(t, stat_pop, infected, stat_pop_ip_group, infected_ip_group, ip_key_value)) if ip_key_value != ip: success = False outfile.write( "BAD: at time step {0}, IP={1} from StdOut.txt, expected IP={2}.\n" .format(t, ip, ip_key_value)) susceptible = stat_pop_ip_group - infected_ip_group message = "BAD: at time {0}, group {1} has infected individuals = {2} and susceptible individuals = {3}," \ " expected {4} individuals receive a {5} test result, got {6} from logging.\n" # calculated environmental contagion contagion = base_infectivity * infected_ip_group / stat_pop_ip_group contagion_list.append(contagion) if math.fabs(contagion - envi_sample) > envi_sample * 1e-2: success = False outfile.write( "BAD: at time step {0} the environmental sample for IP group {1} is {2}, expected value is {3}" ".\n".format(t, ip_key_value, envi_sample, contagion)) # positive = real positive or false positive # negative = false negative or real negative if contagion > sample_threshold: expected_test_positive = base_sensitivity expected_test_negative = 1.0 - base_sensitivity else: expected_test_positive = 1.0 - base_specificity expected_test_negative = base_specificity expected_positive_count += expected_test_positive expected_negative_count += expected_test_negative # no test default in this intervention expected_test_default = 0 if test_default != expected_test_default: success = False outfile.write( message.format(t, ip_key_value, infected_ip_group, susceptible, expected_test_default, "default", test_default)) positive_list.append([test_positive, expected_test_positive]) negative_list.append([test_negative, expected_test_negative]) # End of Test 1 at this time step # Test 2: make sure events reported in ReportEventRecorder.csv and test results from StdOut.txt are matched. message = "BAD: at time {0}, {1} records {2} {3} events, got {4} {5} results from {5}.\n" # get the positive event count from data frame positive_event = event_df[ (event_df[Diagnostic_Support.ReportColumn.time] == t) & (event_df[Diagnostic_Support.ReportColumn.event] == Diagnostic_Support.ReportColumn.positive)][ Diagnostic_Support.ReportColumn.counts].values if len(positive_event): positive_event = positive_event[0] else: positive_event = 0 # StdOut.txt should match ReportEventRecorder.csv if test_positive != positive_event: success = False outfile.write( message.format( t, report_event_recorder, positive_event, Diagnostic_Support.ReportColumn.positive, test_positive, Diagnostic_Support.Stdout.test_positive, stdout_filename)) # get the negative event count from data frame negative_event = event_df[ (event_df[Diagnostic_Support.ReportColumn.time] == t) & (event_df[Diagnostic_Support.ReportColumn.event] == Diagnostic_Support.ReportColumn.negative)][ Diagnostic_Support.ReportColumn.counts].values if len(negative_event): negative_event = negative_event[0] else: negative_event = 0 # StdOut.txt should match ReportEventRecorder.csv if test_negative != negative_event: success = False outfile.write( message.format( t, report_event_recorder, negative_event, Diagnostic_Support.ReportColumn.negative, test_negative, Diagnostic_Support.Stdout.test_negative, stdout_filename)) positive_event_list.append(positive_event) negative_event_list.append(negative_event) # End of Test 2 at this time step stdout_sum = stdout_df.sum() result = sft.test_multinomial( [ stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative] ], proportions=[expected_positive_count, expected_negative_count], report_file=outfile, prob_flag=False, ) message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \ " are {3} and {4}.\n" if result: outfile.write( message.format( "GOOD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) else: success = False outfile.write( message.format( "BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) # these two plots are replaced with the scatter with fit line plots # sft.plot_data(np.array(positive_list)[:, 0], np.array(positive_list)[:, 1], # label1="Actual", # label2="Probability of Positive", # title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day", # ylabel="Positive count", # category='Test_Positive_Probability', overlap=False) # sft.plot_data(np.array(negative_list)[:, 0], np.array(negative_list)[:, 1], # label1="Actual", # label2="Probability of Negative", # title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day", # ylabel="Negative count", # category='Test_Negative_Probability', overlap=False) sft.plot_scatter_fit_line( np.array(positive_list)[:, 0], dist2=np.array(positive_list)[:, 1], label1="Actual", label2="Probability of Positive", title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day", ylabel="Positive count", category='Test_Positive_Probability_Scatter_Fit_Line') sft.plot_scatter_fit_line( np.array(negative_list)[:, 0], dist2=np.array(negative_list)[:, 1], label1="Actual", label2="Probability of Negative", title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day", ylabel="Negative count", category='Test_Negative_Probability_Scatter_Fit_Line') sft.plot_data( np.array(positive_list)[:, 0], positive_event_list, label1=stdout_filename, label2=report_event_recorder, title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day", ylabel="Positive count", category='Test_Positive_stdout_vs_event_recorder', overlap=True, alpha=0.5) sft.plot_data( np.array(negative_list)[:, 0], negative_event_list, label1=stdout_filename, label2=report_event_recorder, title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day", ylabel="Negative count", category='Test_Negative_stdout_vs_event_recorder', overlap=True, alpha=0.5) sft.plot_data( stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:], contagion_list, label1="Actual", label2="Expected", title="Environmental_Contagion", xlabel="Day", ylabel="Environmental_Contagion", category='Environmental_Contagion', overlap=True, alpha=0.5) outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, output_dict, report_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True sensitivity = campaign_obj[KEY_BASE_SENSITIVITY] treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION] proportions = [ sensitivity * treatment_fraction, (1.0 - sensitivity) * treatment_fraction, 1.0 - treatment_fraction ] positive = [] negative = [] default = [] total = [] failed_timestep = [] point_fail = 0 point_tolerance = 0.3 if not len(report_dict): success = False outfile.write(sft.sft_no_test_data) for t in report_dict: value_to_test = [ report_dict[t][KEY_POSITIVE], report_dict[t][KEY_NEGATIVE], report_dict[t][KEY_DEFAULT] ] positive.append(report_dict[t][KEY_POSITIVE]) negative.append(report_dict[t][KEY_NEGATIVE]) default.append(report_dict[t][KEY_DEFAULT]) total.append(sum(value_to_test)) outfile.write("Run Chi-squared test at time step {}.\n".format(t)) result = sft.test_multinomial(dist=value_to_test, proportions=proportions, report_file=outfile) if not result: point_fail += 1 failed_timestep.append(t) outfile.write( "Warning: At timestep {0}, the Chi-squared test failed.\n". format(t)) if len(failed_timestep) > math.ceil(0.05 * len(report_dict)): success = False outfile.write( "BAD: the Chi-squared test failed at timestep {0}.\n".format( ', '.join(str(x) for x in failed_timestep))) else: outfile.write( "GOOD: the Chi-squared test failed {} times, less than 5% of the total timestep.\n" .format(len(failed_timestep))) outfile.write( "BIG TEST: Testing the total proportion across the simulation\n") total_result = sft.test_multinomial( dist=[sum(positive), sum(negative), sum(default)], proportions=proportions, report_file=outfile) if not total_result: success = False outfile.write("FAIL: the total chi-square test fails.\n") sft.plot_data( positive, dist2=total, label1="TBTestPositive", label2="Total tested", title="Test positive vs. total, positive proportion = {}".format( sensitivity * treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_positive_vs_total', show=True, line=False) sft.plot_data( negative, dist2=total, label1="TBTestNegative", label2="Total tested", title="Test negative vs. total, negative proportion = {}".format( (1.0 - sensitivity) * treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_negative_vs_total', show=True, line=False) sft.plot_data( default, dist2=total, label1="TBTestDefault", label2="Total tested", title="Test default vs. total, default proportion = {}".format( 1.0 - treatment_fraction), xlabel="time step", ylabel="# of individuals", category='Test_default_vs_total', show=True, line=False) # TODO: write test to check if report matches debug logging. Pending on #2279. May not need this part. outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, campaign_obj, output_dict, report_dict, report_name, debug): with open(report_name, "w") as outfile: config_name = param_obj[KEY_CONFIG_NAME] outfile.write("Config_name = {}\n".format(config_name)) success = True sensitivity = campaign_obj[KEY_BASE_SENSITIVITY] treatment_fraction = campaign_obj[KEY_TREATMENT_FRACTION] treatment_fraction_negative_diagnosis = campaign_obj[ KEY_TREATMENT_FRACTION_NEGATIVE_DIAGNOSIS] proportions = [ sensitivity * treatment_fraction, (1.0 - sensitivity) * treatment_fraction_negative_diagnosis, (1.0 - sensitivity) * (1.0 - treatment_fraction_negative_diagnosis) + sensitivity * (1.0 - treatment_fraction) ] total_proportion = sum(proportions) positive = [] negative = [] default = [] total = [] pass_count = 0 fail_count = 0 for t in report_dict: value_to_test = [ report_dict[t][KEY_MDR_POSITIVE], report_dict[t][KEY_MDR_NEGATIVE], report_dict[t][KEY_MDR_DEFAULT] ] positive.append(value_to_test[0]) negative.append(value_to_test[1]) default.append(value_to_test[2]) total.append(int(sum(value_to_test) / total_proportion)) outfile.write(f"Timestep {t} Chi-squared test: {value_to_test[0]} positive, {value_to_test[1]} negative," \ f" {value_to_test[2]} default.\n") result = sft.test_multinomial(dist=value_to_test, proportions=proportions, report_file=outfile) if not result: # success = False fail_count += 1 outfile.write( "BAD: At timestep {0}, the Chi-squared test failed.\n". format(t)) else: pass_count += 1 point_count = pass_count + fail_count point_check_tolerance = 0.2 if fail_count / point_count > point_check_tolerance: success = False outfile.write( f"FAIL: more than {point_check_tolerance} of the points checked failed validation.\n" ) positives = sum(positive) negatives = sum(negative) defaults = sum(default) outfile.write( "BIG TEST: testing all of the diagnoses in the sim next!\n") sum_result = sft.test_multinomial( dist=[positives, negatives, defaults], proportions=proportions, report_file=outfile) if not sum_result: success = False outfile.write("FAIL: the sum chi-square test fails.\n") sft.plot_data( positive, dist2=total, label1="TBMDRTestPositive", label2="Total tested", title="MDR Test positive vs. total, positive proportion = {}". format(proportions[0]), xlabel="time step", ylabel="# of individuals", category='MDR_Test_positive_vs_total', show=True, line=False) sft.plot_data( negative, dist2=total, label1="TBMDRTestNegative", label2="Total tested", title="MDR Test negative vs. total, negative proportion = {}". format(proportions[1]), xlabel="time step", ylabel="# of individuals", category='MDR_Test_negative_vs_total', show=True, line=False) sft.plot_data( default, dist2=total, label1="TBMDRTestDefault", label2="Total tested", title="MDR Test default vs. total, default proportion = {}".format( proportions[2]), xlabel="time step", ylabel="# of individuals", category='MDR_Test_default_vs_total', show=True, line=False) outfile.write(sft.format_success_msg(success)) if debug: print("SUMMARY: Success={0}\n".format(success)) return success
def create_report_file(param_obj, campaign_obj, stdout_df, report_name, debug): with open(report_name, "w") as outfile: for name, param in param_obj.items(): outfile.write("{0} = {1}\n".format(name, param)) success = True sample_threshold = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold]) base_sensitivity = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity]) base_specificity = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity]) ip_key_value = campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value] outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value)) if sample_threshold: outfile.write("WARNING: {0} should be 0 in this test, got {1} from compaign file. Please fix the test.\n" "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) if base_specificity != 1: outfile.write("WARNING: the {0} is {1}, expected value is 1.\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity)) if ip_key_value: success = False outfile.write("BAD: {0} should be empty in this test, got {1} from compaign file. Please fix the test.\n" "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value)) duration = param_obj[ConfigKeys.Simulation_Duration] base_infectivity = param_obj[ConfigKeys.Base_Infectivity] expected_positive_count = expected_negative_count = 0 contagion_list = [] contagion = 0 for t in range(1, duration): stdout_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t] infected = stdout_t_df[Diagnostic_Support.Stdout.infected].iloc[0] stat_pop = stdout_t_df[Diagnostic_Support.Stdout.stat_pop].iloc[0] envi_sample = stdout_t_df[Diagnostic_Support.Stdout.sample].iloc[0] # calculated environmental contagion for next time step contagion = base_infectivity * infected /stat_pop if math.fabs(contagion - envi_sample) > envi_sample * 1e-2: success = False outfile.write("BAD: at time step {0} the environmental sample is {1}, expected value is {2}.\n".format( t, envi_sample, contagion )) if contagion > sample_threshold: expected_test_positive = base_sensitivity expected_test_negative = 1.0 - base_sensitivity else: expected_test_positive = 1.0 - base_specificity expected_test_negative = base_specificity contagion_list.append(contagion) expected_positive_count += expected_test_positive expected_negative_count += expected_test_negative stdout_sum = stdout_df.sum() result = sft.test_multinomial([stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative]], proportions=[expected_positive_count, expected_negative_count], report_file=outfile, prob_flag=False, ) message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \ " are {3} and {4}.\n" if result: outfile.write(message.format("GOOD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) else: success = False outfile.write(message.format("BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) sft.plot_data(stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:], contagion_list, label1="Actual", label2="Expected", title="Environmental_Contagion", xlabel="Day", ylabel="Environmental_Contagion", category='Environmental_Contagion', overlap=True, alpha=0.5) # positive_list = [] # negative_list = [] # for t in range(1, duration): # infected = stdout_df[Stdout.infected].iloc[t] # stat_pop = stdout_df[Stdout.stat_pop].iloc[t] # test_positive = stdout_df[Stdout.test_positive].iloc[t] # test_negative = stdout_df[Stdout.test_negative].iloc[t] # test_default = stdout_df[Stdout.test_default].iloc[t] # # susceptible = stat_pop - infected # message = "BAD: at time {0}, total infected individuals = {1} and total susceptible individuals = {2}, " \ # "expected {3} ± {4} individuals receive a {5} test result, got {6} from logging.\n" # # expected_test_positive = infected * base_sensitivity + susceptible * (1.0 - base_specificity) # if math.fabs(test_positive - expected_test_positive) > 5e-2 * expected_test_positive: # success = False # outfile.write(message.format( # t, infected, susceptible, expected_test_positive, 5e-2 * expected_test_positive, "positive", # test_positive)) # # expected_test_negative = infected * (1.0 - base_sensitivity) + susceptible * base_specificity # if math.fabs(test_negative - expected_test_negative) > 5e-2 * expected_test_negative: # success = False # outfile.write(message.format( # t, infected, susceptible, expected_test_negative, 5e-2 * expected_test_negative, "negative", # test_negative)) # # expected_test_default = 0 # if test_default != expected_test_default: # success = False # outfile.write(message.format( # t, infected, susceptible, expected_test_default, 0, "default", test_default)) # # positive_list.append([test_positive, expected_test_positive]) # negative_list.append([test_negative, expected_test_negative]) # sft.plot_data(np.array(positive_list)[:, 0], np.array(positive_list)[:, 1], # label1="Actual", # label2="Expected", # title="Test Positive", xlabel="Day", # ylabel="Positive count", # category='Test_Positive', overlap=True, alpha=0.5) # sft.plot_data(np.array(negative_list)[:, 0], np.array(negative_list)[:, 1], # label1="Actual", # label2="Expected", # title="Test Negative", xlabel="Day", # ylabel="Negative count", # category='Test_Negative', overlap=True, alpha=0.5) outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(param_obj, intervention_obj, outbreak_obj, stdout_df, report_name, debug): with open(report_name, "w") as outfile: for name, param in param_obj.items(): outfile.write("{0} = {1}\n".format(name, param)) success = True sample_threshold = float(intervention_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold]) base_sensitivity = float(intervention_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity]) base_specificity = float(intervention_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity]) ip_key_value = intervention_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value] outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value)) # make sure no outbreak in this test, so the sample should not be greater than threshold at all time if outbreak_obj: success = False outfile.write("BAD: Campaign.json should not have {0}, got {1} from the json file.\n".format( CampaignKeys.InterventionClassKeys.OutbreakIndividual, outbreak_obj )) if sample_threshold: outfile.write("WARNING: {0} should be 0 in this test, got {1} from compaign file. Please fix the test.\n" "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) if base_sensitivity != 1: outfile.write("WARNING: the {0} is {1}, expected value is 1.\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity)) if ip_key_value: success = False outfile.write("BAD: {0} should be empty in this test, got {1} from compaign file. Please fix the test.\n" "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value)) duration = param_obj[ConfigKeys.Simulation_Duration] base_infectivity = param_obj[ConfigKeys.Base_Infectivity] expected_positive_count = expected_negative_count = 0 contagion_list = [] contagion = 0 for t in range(1, duration): stdout_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t] infected = stdout_t_df[Diagnostic_Support.Stdout.infected].iloc[0] stat_pop = stdout_t_df[Diagnostic_Support.Stdout.stat_pop].iloc[0] envi_sample = stdout_t_df[Diagnostic_Support.Stdout.sample].iloc[0] # calculated environmental contagion contagion = base_infectivity * infected /stat_pop if math.fabs(contagion - envi_sample) > envi_sample * 1e-2: success = False outfile.write("BAD: at time step {0} the environmental sample is {1}, expected value is {2}.\n".format( t, envi_sample, contagion )) if contagion > sample_threshold: expected_test_positive = base_sensitivity expected_test_negative = 1.0 - base_sensitivity else: expected_test_positive = 1.0 - base_specificity expected_test_negative = base_specificity contagion_list.append(contagion) expected_positive_count += expected_test_positive expected_negative_count += expected_test_negative stdout_sum = stdout_df.sum() result = sft.test_multinomial([stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative]], proportions=[expected_positive_count, expected_negative_count], report_file=outfile, prob_flag=False, ) message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \ " are {3} and {4}.\n" if result: outfile.write(message.format("GOOD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) else: success = False outfile.write(message.format("BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) sft.plot_data(stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:], contagion_list, label1="Actual", label2="Expected", title="Environmental_Contagion", xlabel="Day", ylabel="Environmental_Contagion", category='Environmental_Contagion', overlap=True, alpha=0.5) outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success
def create_report_file(param_obj, campaign_obj, property_obj, property_df, stdout_df, recorder_obj, report_name, report_event_recorder, stdout_filename, debug): with open(report_name, "w") as outfile: for name, param in param_obj.items(): outfile.write("{0} = {1}\n".format(name, param)) sample_threshold = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold]) base_sensitivity = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity]) base_specificity = float(campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity]) ip_key_value = campaign_obj[CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value] outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity)) outfile.write("{0} = {1}\n".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value)) success = recorder_obj[1] if not success: error_message = recorder_obj[0] outfile.write("Failed to parse report file: {0}, get exception: {1}.\n".format(report_event_recorder, error_message )) else: # Throw warning messages for condition checks. Make sure all features are enabled. if not sample_threshold: outfile.write("WARNING: {0} should not be 0 in this test, got {1} from compaign file. Please fix the test.\n" "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Sample_Threshold, sample_threshold)) if base_specificity == 1: outfile.write("WARNING: the {0} is {1}, expected value is less than 1.\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Specificity, base_specificity)) if base_sensitivity == 1: outfile.write("WARNING: the {0} is {1}, expected value is less than 1.\n".format( CampaignKeys.EnvironmentalDiagnosticKeys.Base_Sensitivity, base_sensitivity)) if not ip_key_value: outfile.write( "WARNING: {0} should not be empty in this test, got '{1}' from compaign file. Please fix the test.\n" "".format(CampaignKeys.EnvironmentalDiagnosticKeys.Environment_IP_Key_Value, ip_key_value)) duration = param_obj[ConfigKeys.Simulation_Duration] base_infectivity = param_obj[ConfigKeys.Base_Infectivity] positive_list = [] negative_list = [] positive_event_list = [] negative_event_list = [] # get infected and stat_pop channels for the selected IP group from property report infected_ip_group_list = property_df[[c for c in property_df.columns if Diagnostic_Support.channels[0] in c]] stat_pop_ip_group_list = property_df[[c for c in property_df.columns if Diagnostic_Support.channels[-1] in c]] # group by time and event name, then count how many event in each time step and put the value into a new # column named: "Test result counts" event_df = recorder_obj[0] event_df = event_df.groupby([Diagnostic_Support.ReportColumn.time, Diagnostic_Support.ReportColumn.event]).size().reset_index() event_df.rename(columns={0: Diagnostic_Support.ReportColumn.counts}, inplace=True) contagion_list = [] expected_positive_count = expected_negative_count = 0 for t in range(1, duration): # Test 1: make sure we get the correct contagion sample and # number of positive and negative results in StdOut.txt stdout_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t] #stdout_next_t_df = stdout_df[stdout_df[Diagnostic_Support.ConfigKeys.Simulation_Timestep] == t + 1] infected = stdout_t_df[Diagnostic_Support.Stdout.infected].iloc[0] stat_pop = stdout_t_df[Diagnostic_Support.Stdout.stat_pop].iloc[0] test_positive = stdout_t_df[Diagnostic_Support.Stdout.test_positive].iloc[0] test_negative = stdout_t_df[Diagnostic_Support.Stdout.test_negative].iloc[0] test_default = stdout_t_df[Diagnostic_Support.Stdout.test_default].iloc[0] envi_sample = stdout_t_df[Diagnostic_Support.Stdout.sample].iloc[0] ip = stdout_t_df[Diagnostic_Support.Stdout.ip_value].iloc[0] infected_ip_group = infected_ip_group_list.iloc[t - 1][0] stat_pop_ip_group = stat_pop_ip_group_list.iloc[t - 1][0] if stat_pop == stat_pop_ip_group and infected == infected_ip_group: success = False outfile.write("BAD: at time step {0} the total stat_pop = {1} and total infect = {2}, we got " "stat_pop_ip_group = {3} and infected_ip_group = {4} in group {5}, we expect to " "see less stat_pop and infected individual in the IP group , this is not a valid test " "for Environment_IP_Key_Value, please check the test condition.\n".format(t, stat_pop, infected, stat_pop_ip_group, infected_ip_group, ip_key_value)) if ip_key_value != ip: success = False outfile.write("BAD: at time step {0}, IP={1} from StdOut.txt, expected IP={2}.\n".format( t, ip, ip_key_value)) susceptible = stat_pop_ip_group - infected_ip_group message = "BAD: at time {0}, group {1} has infected individuals = {2} and susceptible individuals = {3}," \ " expected {4} individuals receive a {5} test result, got {6} from logging.\n" # calculated environmental contagion contagion = base_infectivity * infected_ip_group / stat_pop_ip_group contagion_list.append(contagion) if math.fabs(contagion - envi_sample) > envi_sample * 1e-2: success = False outfile.write( "BAD: at time step {0} the environmental sample for IP group {1} is {2}, expected value is {3}" ".\n".format( t, ip_key_value, envi_sample, contagion )) # positive = real positive or false positive # negative = false negative or real negative if contagion > sample_threshold: expected_test_positive = base_sensitivity expected_test_negative = 1.0 - base_sensitivity else: expected_test_positive = 1.0 - base_specificity expected_test_negative = base_specificity expected_positive_count += expected_test_positive expected_negative_count += expected_test_negative # no test default in this intervention expected_test_default = 0 if test_default != expected_test_default: success = False outfile.write(message.format( t, ip_key_value, infected_ip_group, susceptible, expected_test_default, "default", test_default)) positive_list.append([test_positive, expected_test_positive]) negative_list.append([test_negative, expected_test_negative]) # End of Test 1 at this time step # Test 2: make sure events reported in ReportEventRecorder.csv and test results from StdOut.txt are matched. message = "BAD: at time {0}, {1} records {2} {3} events, got {4} {5} results from {5}.\n" # get the positive event count from data frame positive_event = event_df[ (event_df[Diagnostic_Support.ReportColumn.time] == t) & (event_df[Diagnostic_Support.ReportColumn.event] == Diagnostic_Support.ReportColumn.positive)][ Diagnostic_Support.ReportColumn.counts].values if len(positive_event): positive_event = positive_event[0] else: positive_event = 0 # StdOut.txt should match ReportEventRecorder.csv if test_positive != positive_event: success = False outfile.write(message.format( t, report_event_recorder, positive_event, Diagnostic_Support.ReportColumn.positive, test_positive, Diagnostic_Support.Stdout.test_positive, stdout_filename)) # get the negative event count from data frame negative_event = event_df[ (event_df[Diagnostic_Support.ReportColumn.time] == t) & (event_df[Diagnostic_Support.ReportColumn.event] == Diagnostic_Support.ReportColumn.negative)][ Diagnostic_Support.ReportColumn.counts].values if len(negative_event): negative_event = negative_event[0] else: negative_event = 0 # StdOut.txt should match ReportEventRecorder.csv if test_negative != negative_event: success = False outfile.write(message.format( t, report_event_recorder, negative_event, Diagnostic_Support.ReportColumn.negative, test_negative, Diagnostic_Support.Stdout.test_negative, stdout_filename)) positive_event_list.append(positive_event) negative_event_list.append(negative_event) # End of Test 2 at this time step stdout_sum = stdout_df.sum() result = sft.test_multinomial([stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative]], proportions=[expected_positive_count, expected_negative_count], report_file=outfile, prob_flag=False, ) message = "{0}: the total test positive and negative counts from StdOut.txt are {1} and {2}, expected values" \ " are {3} and {4}.\n" if result: outfile.write(message.format("GOOD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) else: success = False outfile.write(message.format("BAD", stdout_sum[Diagnostic_Support.Stdout.test_positive], stdout_sum[Diagnostic_Support.Stdout.test_negative], expected_positive_count, expected_negative_count)) # these two plots are replaced with the scatter with fit line plots # sft.plot_data(np.array(positive_list)[:, 0], np.array(positive_list)[:, 1], # label1="Actual", # label2="Probability of Positive", # title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day", # ylabel="Positive count", # category='Test_Positive_Probability', overlap=False) # sft.plot_data(np.array(negative_list)[:, 0], np.array(negative_list)[:, 1], # label1="Actual", # label2="Probability of Negative", # title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day", # ylabel="Negative count", # category='Test_Negative_Probability', overlap=False) sft.plot_scatter_fit_line(np.array(positive_list)[:, 0], dist2=np.array(positive_list)[:, 1], label1="Actual", label2="Probability of Positive", title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day", ylabel="Positive count", category='Test_Positive_Probability_Scatter_Fit_Line') sft.plot_scatter_fit_line(np.array(negative_list)[:, 0], dist2=np.array(negative_list)[:, 1], label1="Actual", label2="Probability of Negative", title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day", ylabel="Negative count", category='Test_Negative_Probability_Scatter_Fit_Line') sft.plot_data(np.array(positive_list)[:, 0], positive_event_list, label1=stdout_filename, label2=report_event_recorder, title="Test Positive\n Group {}".format(ip_key_value), xlabel="Day", ylabel="Positive count", category='Test_Positive_stdout_vs_event_recorder', overlap=True, alpha=0.5) sft.plot_data(np.array(negative_list)[:, 0], negative_event_list, label1=stdout_filename, label2=report_event_recorder, title="Test Negative\n Group {}".format(ip_key_value), xlabel="Day", ylabel="Negative count", category='Test_Negative_stdout_vs_event_recorder', overlap=True, alpha=0.5) sft.plot_data(stdout_df[Diagnostic_Support.Stdout.sample].tolist()[1:], contagion_list, label1="Actual", label2="Expected", title="Environmental_Contagion", xlabel="Day", ylabel="Environmental_Contagion", category='Environmental_Contagion', overlap=True, alpha=0.5) outfile.write(sft.format_success_msg(success)) if debug: print(sft.format_success_msg(success)) return success