Пример #1
0
def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, [matches[0]]):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    core = None
    output_dict = {}
    for line in filtered_lines:
        time_step = dtk_sft.get_val(matches[0], line)
        core = dtk_sft.get_val(matches[1], line)
        if time_step not in output_dict:
            output_dict[time_step] = [core]
        else:
            output_dict[time_step].append(core)

    if debug:
        res_path = r'./DEBUG_core_per_time_step.json'
        with open(res_path, "w") as file:
            json.dump(output_dict, file, indent=4)
    return output_dict
Пример #2
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    cd4_strata = data[2]
    mod_array = data[3]

    actual_data = []
    expected_data = []
    success = True
    epsilon = 0.000002
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            cd4_count = float(dtk_sft.get_val("CD4count=", line))
            cd4_mod_actual = float(dtk_sft.get_val("CD4mod=", line))
            cd4_mod_expected = tb_cd4_susceptibility_calc([mod_array, cd4_strata, cd4_count])
            actual_data.append(cd4_mod_actual)
            expected_data.append(cd4_mod_expected)
            if abs(cd4_mod_actual-cd4_mod_expected) > epsilon:
                success = False
                outfile.write("BAD: At Time: {} for Individual {} with CD4 count {} Expected susceptibility modifier "
                              "was {}, but actual was {}.\n".format(dtk_sft.get_val("time= ", line),
                                                                    dtk_sft.get_val("Individual ", line),
                                                                    cd4_count,
                                                                    cd4_mod_expected, cd4_mod_actual))

        outfile.write("Data points checked = {} .\n".format(len(lines)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data_sorted(actual_data, expected_data, label1="Actual", label2="Expected",
                          title="Susceptibility Modifier", xlabel="Data Points", ylabel="Modifying Multiplier",
                          category="tb_susceptibility_and_cd4", line = True, overlap=True)
Пример #3
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    latency_update_data = []
    original_latency_data = []
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "Incubation_timer calculated as" in line:
                incubation_timer = float(dtk_sft.get_val("as ", line))
                original_latency_data.append(incubation_timer)
            if "LifeCourseLatencyTimerUpdate" in line:
                new_incubation_timer = float(dtk_sft.get_val("timer ", line))
                latency_update_data.append(new_incubation_timer)

        # expecting the original distribution to NOT match the art-triggered update distribution
        if dtk_sft.test_exponential(original_latency_data,
                                    tb_cd4_activation_vector[2],
                                    integers=True,
                                    roundup=True,
                                    round_nearest=False):
            outfile.write(
                "BAD: The updated latency data matches the original distribution.\n"
            )
            success = False
        expected_update_data = np.random.exponential(
            1 / tb_cd4_activation_vector[2], len(latency_update_data))
        if not dtk_sft.test_exponential(latency_update_data,
                                        tb_cd4_activation_vector[2],
                                        outfile,
                                        integers=True,
                                        roundup=True,
                                        round_nearest=False):
            # as it should fail , success = bad.
            outfile.write(
                "BAD: The updated latency data does not match the expected distribution.\n"
            )
            success = False
        outfile.write("Data points checked = {}.\n".format(
            len(latency_update_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(sorted(latency_update_data),
                          sorted(expected_update_data),
                          label1="Actual",
                          label2="Expected",
                          title="Latency Duration recalculated for ART",
                          xlabel="Data Points",
                          ylabel="Days",
                          category="tb_activation_and_cd4_hiv_first_on_art",
                          line=True,
                          overlap=True)
Пример #4
0
def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store stat populations, infected population, and MDR test result for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    infected = 0
    statpop = 0
    simulation_timestep = 1
    positive = 0
    negative = 0
    default = 0
    output_dict = {}
    for line in filtered_lines:
        if matches[0] in line:
            output_dict[time_step] = {
                KEY_STAT_POP: statpop,
                KEY_INFECTED: infected,
                KEY_POSITIVE: positive,
                KEY_NEGATIVE: negative,
                KEY_DEFAULT: default
            }
            infected = dtk_sft.get_val(matches[2], line)
            statpop = dtk_sft.get_val(matches[1], line)
            time_step += simulation_timestep
            positive = 0
            negative = 0
            default = 0
        if matches[3] in line:
            result = int(dtk_sft.get_val(matches[3], line))
            if result:
                positive += 1
            else:
                negative += 1
        if matches[4] in line:
            default += 1
    res_path = r'./tb_test_result_from_logging.json'
    with open(res_path, "w") as file:
        json.dump(output_dict, file, indent=4)
    return output_dict
Пример #5
0
def parse_output_file(output_filename="test.txt",
                      simulation_timestep=1,
                      debug=False):
    """
    creates a dataframe of time step, infected,  infectiouness and stat populations
    :param output_filename: file to parse (test.txt)
    :param simulation_timestep: simulation time step, * days
    :return: output_df:  data frame contains: 1, time step,
                                            2, # of infected population,
                                            3, infectiousness,
                                            4, statistical populations, at each time step
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0  #
    infectiousness = 0
    infected = 0
    statpop = 0
    output_df = pd.DataFrame(columns=[
        KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP
    ])
    output_df.index.name = "index"
    index = 0
    for line in filtered_lines:
        if matches[0] in line:
            infected = dtk_sft.get_val(matches[1], line)
            statpop = dtk_sft.get_val(matches[3], line)
            output_df.loc[index] = [
                time_step, infected, infectiousness, statpop
            ]
            index += 1
            time_step += simulation_timestep
            infectiousness = 0
            continue
        if matches[2] in line:
            infectiousness = dtk_sft.get_val(matches[2], line)
            continue
    res_path = r'./infected_vs_infectiousness.csv'
    if not os.path.exists(os.path.dirname(res_path)):
        os.makedirs(os.path.dirname(res_path))
    output_df.to_csv(res_path)
    return output_df
Пример #6
0
def parse_stdout_file(initial_timestep=0,
                      stdout_filename="test.txt",
                      debug=False):
    """
    :param initial_timestep:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :param debug:           whether or not we write an additional file that's full of the matched lines
    :return:                array of lines of interest
    """
    exogenous = "EXOGENOUS infected"  # "EXOGENOUS infected"
    infectiousness = "UpdateInfectiousness"
    state_latent = "state= Latent"
    filtered_lines = []
    exogenous_infected_count = 0
    latent_count = 0
    initial_population = 5000  # placeholder until we get to the time: 1 and get it from there
    exogenous_infected_dict = {}
    exogenous_infected_list = []
    update_time = "Update(): Time: "
    time = initial_timestep
    with open(stdout_filename) as logfile:
        for line in logfile:
            if update_time in line:
                exogenous_infected_list.append(exogenous_infected_count)
                exogenous_infected_dict[time] = [
                    exogenous_infected_count, latent_count
                ]
                time += 1
                exogenous_infected_count = 0  # resetting for the next time step
                latent_count = 0
                filtered_lines.append(line)
            elif exogenous in line:
                ind_id = int(float(dtk_sft.get_val("Individual ", line)))
                if ind_id <= initial_population:  # ignoring imported people
                    exogenous_infected_count += 1
                filtered_lines.append(line)
            elif infectiousness in line and state_latent in line:
                ind_id = int(float(dtk_sft.get_val("Individual ", line)))
                fast_progressor = int(
                    float(dtk_sft.get_val("progressor=", line)))
                if not fast_progressor and ind_id <= initial_population:  # ignoring imported people
                    latent_count += 1
                filtered_lines.append(line)

    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            for line in filtered_lines:
                outfile.write(line)

    return [exogenous_infected_dict, exogenous_infected_list]
Пример #7
0
def parse_stdout_file(curr_timestep=0,
                      stdout_filename="test.txt",
                      debug=False):
    """
    :param curr_timestep:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :param debug:           whether or not we write an additional file that's full of the matched lines
    :return:                array of lines of interest
    """
    expose = "Expose: Individual"
    filtered_lines = []
    update_time = "Update(): Time:"
    time = 0
    with open(stdout_filename) as logfile:
        for line in logfile:
            if update_time in line:
                time += 1
            if expose in line:
                ind = int(dtk_sft.get_val("Individual ", line))
                if ind <= 100:  # do not look at imported people
                    new_line = dtk_sft.add_time_stamp(time, line)
                    filtered_lines.append(new_line)

    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    return filtered_lines
def parse_output_file(output_filename="test.txt", simulation_timestep = 1, debug=False):
    """
    creates a dataframe of time step, infected,  infectiouness and stat populations
    :param output_filename: file to parse (test.txt)
    :param simulation_timestep: simulation time step, * days
    :return: output_df:  data frame contains: 1, time step,
                                            2, # of infected population,
                                            3, infectiousness,
                                            4, statistical populations, at each time step
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0 #
    infectiousness = 0
    infected = 0
    statpop = 0
    output_df = pd.DataFrame( columns = [KEY_SIMULATION_TIMESTEP, KEY_INFECTED, KEY_INFECTIOUSNESS, KEY_STAT_POP])
    output_df.index.name = "index"
    index = 0
    for line in filtered_lines:
        if matches[0] in line:
            infected = dtk_sft.get_val(matches[1], line)
            statpop = dtk_sft.get_val(matches[3], line)
            output_df.loc[index] = [time_step, infected, infectiousness, statpop]
            index += 1
            time_step += simulation_timestep
            infectiousness = 0
            continue
        if matches[2] in line:
            infectiousness = dtk_sft.get_val(matches[2], line)
            continue
    res_path = r'./infected_vs_infectiousness.csv'
    if not os.path.exists(os.path.dirname(res_path)):
        os.makedirs(os.path.dirname(res_path))
    output_df.to_csv(res_path)
    return output_df
Пример #9
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    coinfection_mortality_rate_off_art = data[2]
    coinfection_mortality_rate_on_art = data[3]

    died_of_coinfection = "died of CoInfection"
    state_active_symptomatic = "infectionstatechange TBActivation "
    time_to_death_data = []
    active_infections_dictionary = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if died_of_coinfection in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in active_infections_dictionary.keys():
                    time_to_death_data.append(time_stamp - active_infections_dictionary[ind_id])
                else:
                    success = False
                    outfile.write("BAD: Individual {} died of coinfection without going active, at time {}."
                                  "\n".format(ind_id, time_stamp))
            elif state_active_symptomatic in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                start_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in active_infections_dictionary.keys():
                    outfile.write("Individual {} went active symptomatic while already being active symptomatic"
                                  "at time {}. \n".format(ind_id, start_time_stamp))
                else:
                    active_infections_dictionary[ind_id] = start_time_stamp
        # expected_data here only used for graphing purposes
        expected_data = map(int, np.random.exponential(1/coinfection_mortality_rate_off_art, len(time_to_death_data)))
        if not dtk_sft.test_exponential(time_to_death_data, coinfection_mortality_rate_off_art, outfile,
                                        integers=True, roundup=False, round_nearest=False):
            success = False
        outfile.write("Data points checked = {}.\n".format(len(time_to_death_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(sorted(time_to_death_data), sorted(expected_data), label1="Actual", label2="Expected",
                          title="Time from Smear Negative Off ART TBHIV to Death", xlabel="Data Points", ylabel="Days",
                          category="tbhiv_mortality_smear_negative_off_art", line = True, overlap=True)
Пример #10
0
def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    simulation_timestep = 1
    output_dict = {}
    for line in filtered_lines:
        if matches[0] in line:
            time_step += simulation_timestep
        elif matches[1] in line:  # this individual is Cleared
            individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line)
            if individual_id in output_dict:
                output_dict[individual_id][KEY_CLEARED] = time_step
            else:
                output_dict[individual_id] = {KEY_CLEARED: time_step}
        elif matches[2] in line:  # this individual is Latent
            individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line)
            if individual_id in output_dict:
                output_dict[individual_id][KEY_LATENT] = time_step
            else:
                output_dict[individual_id] = {KEY_LATENT: time_step}
        elif matches[3] in line:  # this individual is PreSymptomatic active
            individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line)
            if individual_id in output_dict:
                output_dict[individual_id][KEY_PRESYMPTOMATIC] = time_step
            else:
                output_dict[individual_id] = {KEY_PRESYMPTOMATIC: time_step}
    res_path = r'./SEIR_Latent_Cure_from_logging.json'
    with open(res_path, "w") as file:
        json.dump(output_dict, file, indent=4)
    return output_dict
Пример #11
0
def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    simulation_timestep = 1
    output_dict = {}
    for line in filtered_lines:
        if matches[0] in line:
            time_step += simulation_timestep
        if matches[1] in line:
            individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line)
            if individual_id in output_dict:
                output_dict[individual_id][KEY_DEATH] = time_step
            else:
                output_dict[individual_id] = {KEY_DEATH: time_step}
        if matches[2] in line:
            individual_id = dtk_sft.get_val(KEY_INDIVIDUAL, line)
            timer = float(dtk_sft.get_val(KEY_TIMER, line))
            if individual_id in output_dict:
                output_dict[individual_id][KEY_SYMPTOMATIC] = [
                    time_step, timer
                ]
            else:
                output_dict[individual_id] = {
                    KEY_SYMPTOMATIC: [time_step, timer]
                }
    res_path = r'./SEIR_Death_from_logging.json'
    with open(res_path, "w") as file:
        json.dump(output_dict, file, indent=4)
    return output_dict
def parse_stdout_file(output_filename="test.txt",
                      debug=False):
    """
    Reads stdout file and creates an individual ID indexed
    DataFrame of ages and mod_aquire
    :param output_filename: stdout filename (test.txt)
    :param debug: generate
    :return: dataframe of all individuals from a single timestep
    """
    matches = ["{} = ".format(DataframeKeys.MOD_ACQUIRE),
               "{} = ".format(DataframeKeys.AGE),
               "{} = ".format(DataframeKeys.ID),
               "Update(): Time: "]
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if sft.has_match(line, matches):
                if matches[-1] in line:
                    break
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt","w") as outfile:
            outfile.writelines(filtered_lines)

    individuals_df = pd.DataFrame( columns=[DataframeKeys.AGE,
                                            DataframeKeys.MOD_ACQUIRE])
    individuals_df.index.name = 'index'
    for line in filtered_lines:
        age = float(sft.get_val(matches[1], line))
        acquire = float(sft.get_val(matches[0], line))
        id = int(sft.get_val(matches[2], line))
        individuals_df.loc[id] = [age, acquire]

    if debug:
        with open("DEBUG_individuals_dataframe.csv","w") as outfile:
            outfile.write(individuals_df.to_csv())
    return individuals_df
Пример #13
0
def parse_output_file(output_filename="test.txt",
                      simulation_timestep=1,
                      debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    positive = 0
    negative = 0
    default = 0
    output_df = pd.DataFrame(columns=[
        ReportColumn.negative, ReportColumn.default, ReportColumn.positive
    ])
    output_df.index.name = Config.simulation_timestep
    for line in filtered_lines:
        if matches[0] in line:
            output_df.loc[time_step] = pd.Series({
                ReportColumn.positive: positive,
                ReportColumn.negative: negative,
                ReportColumn.default: default
            })
            time_step += simulation_timestep
            positive = 0
            negative = 0
            default = 0
        if matches[1] in line:
            result = int(dtk_sft.get_val(matches[1], line))
            if result:
                positive += 1
            else:
                negative += 1
        if matches[2] in line:
            default += 1
    res_path = r'./DEBUG_tb_test_result_from_logging.csv'
    with open(res_path, "w") as file:
        output_df.to_csv(file)
    return output_df
Пример #14
0
def parse_stdout_file( start_timestep, duration_of_interest, stdout_filename="test.txt", debug=False):
    """creates cum_death and death_times array

    :param start_timestep:   drug start time
    :param stdout_filename: file to parse (test.txt)
    :return:                cum_deaths, death_times
    """

    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    cum_deaths = []
    deaths = []
    death_times = []
    timestep = 0
    cum = death_daily = infected = 0
    infected_individuals = []
    for line in filtered_lines:
        if "Time:" in line:
            timestep += 1
            infected_individuals.append(infected)
            cum_deaths.append(cum)
            deaths.append(death_daily)
            death_daily = 0
            infected = int(dtk_sft.get_val("Infected: ", line))
        else:
            cum += 1
            death_daily += 1
            # cum_deaths[-1] = cum
            if timestep <= duration_of_interest + start_timestep:
                death_times.append( timestep-start_timestep )
    if debug:
        with open("cum_deaths.txt","w") as file:
            file.writelines(  str( cum_deaths ) )
        with open("death_times.txt","w") as file:
            file.writelines( str( death_times ) )
    return cum_deaths, deaths, infected_individuals, death_times
Пример #15
0
def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates an object which contains the heterogeneity multiplier and infectiousness
    :param output_filename: file to parse (test.txt)
    :return:                output_obj:  heterogeneity multiplier and infectiousness for each infection
    """
    filtered_lines = []
    output_obj= {}
    for match in matches:
        output_obj[match] = []
    with open(output_filename) as logfile:
        for line in logfile:
            for match in matches:
                if match in line:
                    output_obj[match].append(float(sft.get_val(match, line)))
                    filtered_lines.append(line)
                    break

    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)
    return output_obj
Пример #16
0
def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates an object which contains the heterogeneity multiplier and infectiousness
    :param output_filename: file to parse (test.txt)
    :return:                output_obj:  heterogeneity multiplier and infectiousness for each infection
    """
    filtered_lines = []
    output_obj= {}
    for match in matches:
        output_obj[match] = []
    with open(output_filename) as logfile:
        for line in logfile:
            for match in matches:
                if match in line:
                    output_obj[match].append(float(sft.get_val(match, line)))
                    filtered_lines.append(line)
                    break

    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)
    return output_obj
Пример #17
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[2]  # this test assumes the vector is constant

    # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect
    # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than
    # what StartedArt distribution would extremely likely give us
    big_magic_number = 2000
    stopped_art_latency_data = []
    started_art_latency_data = []
    tb_on_art_latency_data = []
    art_events_dict = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "has event" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                art_status = line.split(" ")[9].strip(".")  # get_val only gets digits
                art_events_dict[ind_id] = art_status
            if "Incubation_timer calculated as" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                infection_timer = float(dtk_sft.get_val("calculated as ", line))
                reconstitute = int(dtk_sft.get_val("reconstitute=", line))
                if reconstitute:  # ignore people who are not reconstituting.
                    tb_on_art_latency_data.append(infection_timer)
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                new_incubation_timer = float(dtk_sft.get_val("timer ", line))
                if ind_id in art_events_dict.keys():
                    if art_events_dict.get(ind_id) == "StartedART":
                        # we ignore this for this test, people are already on art when they get TB
                        started_art_latency_data.append(new_incubation_timer)
                    else:
                        stopped_art_latency_data.append(new_incubation_timer)
                    art_events_dict.pop(ind_id)
                else:
                    success = False
                    outfile.write("BAD: No art-related event found in the logs for this timer update for Individual {},"
                                  " at time {}.\n".format(ind_id, int(dtk_sft.get_val("time= ", line))))
        # we want the stopped art latency data to NOT match the started art latency data
        # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector
        if dtk_sft.test_exponential(stopped_art_latency_data, tb_cd4_activation_vector[2], integers=True,
                                    roundup=True, round_nearest=False):
            outfile.write("BAD: The StoppedArt latency data distribution matches the initial latency data"
                          " distribution, but shouldn't.\n")
            success = False
        small_duration_count = 0
        for duration in stopped_art_latency_data:
            if duration < big_magic_number:
                small_duration_count += 1
        proportion_small = small_duration_count / float(len(stopped_art_latency_data))
        if proportion_small > 0.006:
            outfile.write("BAD: More than 0.006 of our durations are suspiciously small, it is {}. "
                          "Please Investigate.\n".format(proportion_small))
            success = False

        if not dtk_sft.test_exponential(tb_on_art_latency_data, tb_cd4_activation_vector[2], outfile, integers=False,
                                        roundup=False, round_nearest=False):
            # this is testing the internal timer which is float type
            # so 'integers=False'
            success = False
            outfile.write("BAD: Initial TB infection (with HIV and ART) latency doesn't match expected distribution.")
        outfile.write("Data points checked = {}."
                      "\n".format(len(tb_on_art_latency_data), 0))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        # for graphing purposes only
        expected_tb_on_art_latency_data = np.random.exponential(1/tb_cd4_activation_vector[2],
                                                                len(tb_on_art_latency_data))

        dtk_sft.plot_data(sorted(tb_on_art_latency_data), sorted(expected_tb_on_art_latency_data), label1="Actual",
                          label2="Expected",
                          title="HIV+ART then TB latency data",
                          xlabel="Data Points", ylabel="Days",
                          category="tb_activation_and_cd4_hiv_art_tb_offart", line = True, overlap=True)
Пример #18
0
def parse_output_file(output_filename="test.txt", debug=False):
    """
    creates a dictionary to store filtered information for each time step
    :param output_filename: file to parse (test.txt)
    :return:                output_dict, message
    """
    filtered_lines = []
    with open(output_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("DEBUG_filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    time_step = 0
    core = 0
    output_dict = {}
    exception_message = None
    for line in filtered_lines:
        try:
            if matches[0] in line:
                #this may raise LookupError
                value = dtk_sft.get_val(matches[0], line)
                if debug:
                    print("time value I get is '{}'".format(value))
                # this may raise ValueError
                time_step = int(float(value))

                if matches[1] in line:
                    # this may raise ValueError or LookupError
                    core = int(dtk_sft.get_val(matches[1], line))
                else:
                    print(line)
                    raise Exception(
                        "at timestep = {0}, {1} and {2) are not in the same line.\n"
                        .format(time_step, matches[0], matches[1]))
                if debug:
                    print("core is {}".format(core))

                if time_step not in output_dict:
                    output_dict[time_step] = {core: [0, 0]}
                elif core not in output_dict[time_step]:
                    output_dict[time_step][core] = [0, 0]
            elif matches[
                    2] in line:  # this individual is died from TB Symptomatic active
                output_dict[time_step][core][0] += 1
            elif matches[3] in line:  # this individual died from HIV
                output_dict[time_step][core][1] += 1

        except Exception as ex:
            exception_message = "failed to parse {0}, got exception: {1}.".format(
                output_filename, ex)
            print(exception_message)
            return None, exception_message

    if debug:
        res_path = r'./DEBUG_stdout_parsed.json'
        with open(res_path, "w") as file:
            json.dump(output_dict, file, indent=4)
    return output_dict, exception_message
Пример #19
0
def parse_stdout_file(drug_start_time, start_timestep, stdout_filename="test.txt", debug=False):
    """creates cum_inactivations, inactivation_times arrays

    :param drug_start_time: drug start time
    :param start_timestep:   first timestep from config
    :param stdout_filename: file to parse (test.txt)
    :return:                infected_mosquitoes_per_day dictionary, total_infections int
    """

    filtered_lines = []
    with open(stdout_filename) as logfile:
        for line in logfile:
            if dtk_sft.has_match(line, matches):
                filtered_lines.append(line)
    if debug:
        with open("filtered_lines.txt", "w") as outfile:
            outfile.writelines(filtered_lines)

    # initialize variables
    cum_inactivations = []
    inactivation_times = []
    timestep = 0
    cum = inactivation_daily = infected = active = 0
    infected_individuals = []
    inactivations = []
    active_count = []
    reactivation_times = {}
    for line in filtered_lines:
        if matches[2] in line:
            cum_inactivations.append(cum)
            infected_individuals.append(infected)
            inactivations.append(inactivation_daily)
            active_count.append(active)
            infected = int(dtk_sft.get_val("Infected: ", line))
            inactivation_daily = 0
            timestep += 1
        elif matches[0] in line:
            # deactivation: have to track individual
            individual = int(dtk_sft.get_val("TB drug deactivated my \(", line))
            # if timestep <= duration_of_interest + start_timestep + drug_start_time:
            inactivation_time = timestep - start_timestep - drug_start_time
            if individual in reactivation_times:
                inactivation_time = timestep-reactivation_times[individual]
                reactivation_times.pop( individual )
                # not including the reactivations: there are some data point lost due to simulation end before timers end
                # inactivation_times.append( inactivation_time )
            else:
                cum += 1
                active -= 1
                inactivation_daily += 1
                inactivation_times.append( inactivation_time )
        elif matches[1] in line: # "progressing from Latent to Active Presymptomatic while on TB Drugs"
            # activation: have to track individual
            individual = int(dtk_sft.get_val("Individual ", line))
            reactivation_times[individual] = timestep
        elif matches[3] in line: # move to active
            active += 1
        else: #die from HIV
            individual = int(dtk_sft.get_val("individual ", line))
            if individual in reactivation_times:
                active -= 1
                reactivation_times.pop(individual)

    if debug:
        with open ("Cum_Inactivations.txt" ,"w") as outfile:
            outfile.write(str( cum_inactivations ) )
        with open ("Inactivations.txt" ,"w") as outfile:
            outfile.write(str( inactivation_times ) )
        print( "there are {} individual in reactivation state at the end of simulation.\n".format(len(reactivation_times)) )
    return cum_inactivations, inactivation_times, active_count, inactivations
Пример #20
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    cd4_strata = data[2]
    mod_array = data[3]
    base_inf = data[4]
    presymp_mult = data[5]
    smear_neg_mult = data[6]

    latent_data_points = 0
    smear_negative_data_points = 0
    smear_positive_data_points = 0
    presymptomatic_data_points = 0
    extrapulmonary_data_points = 0
    success = True
    epsilon = 0.000002
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            tot_inf_actual = float(
                dtk_sft.get_val("total_infectiousness= ", line))
            cd4_count = float(dtk_sft.get_val("CD4count= ", line))
            cd4_mod_actual = float(dtk_sft.get_val("CD4mod= ", line))
            cd4_mod_expected = tb_cd4_infectiousness_calc(
                [mod_array, cd4_strata, cd4_count])
            if "Latent" in line:
                latent_data_points += 1
                if tot_inf_actual != 0:
                    success = False
                    outfile.write(
                        "BAD, found Latent infection with total_infectiousness= {} at time= {}. "
                        "Expected 0. \n".format(
                            tot_inf_actual, dtk_sft.get_val("time= ", line)))
            elif "SmearNegative" in line:
                smear_negative_data_points += 1
                tot_inf_expected = cd4_mod_expected * base_inf * smear_neg_mult
                if abs(tot_inf_expected - tot_inf_actual) > epsilon:
                    success = False
                    outfile.write(
                        "BAD, found SmearNegative infection with total_infectiousness= {} at time= {},  "
                        "Expected {}. \n {} \n ".format(
                            tot_inf_actual, dtk_sft.get_val("time= ", line),
                            tot_inf_expected, line))
            elif "SmearPositive" in line:
                smear_positive_data_points += 1
                tot_inf_expected = cd4_mod_expected * base_inf
                if abs(tot_inf_expected - tot_inf_actual) > epsilon:
                    success = False
                    outfile.write(
                        "BAD, found SmearPositive infection with total_infectiousness= {} at time= {},  "
                        "Expected {}. \n {} \n".format(
                            tot_inf_actual, dtk_sft.get_val("time= ", line),
                            tot_inf_expected, line))
            elif "Presymptomatic" in line:
                presymptomatic_data_points += 1
                tot_inf_expected = cd4_mod_expected * base_inf * presymp_mult
                if abs(tot_inf_expected - tot_inf_actual) > epsilon:
                    success = False
                    outfile.write(
                        "BAD, found Presymptomatic infection with total_infectiousness= {} at time= {},  "
                        "Expected {}. \n {}\n ".format(
                            tot_inf_actual, dtk_sft.get_val("time= ", line),
                            tot_inf_expected, line))
            elif "Extrapulmonary" in line:
                extrapulmonary_data_points += 1
                if tot_inf_actual != 0:
                    success = False
                    outfile.write(
                        "BAD, found Extrapulmonary infection with total_infectiousness= {} at time= {}. "
                        "Should be 0. \n".format(
                            tot_inf_actual, dtk_sft.get_val("time= ", line)))

        outfile.write(
            "Data points for each TBHIV infection state:\nLatent = {} \nPresymptomatic = {} "
            "\nSmear Negative = {} \nSmear Positive = {} \nExtrapulmonary = {} "
            "\n".format(latent_data_points, presymptomatic_data_points,
                        smear_negative_data_points, smear_positive_data_points,
                        extrapulmonary_data_points))
        outfile.write("SUMMARY: Success={0}\n".format(success))
Пример #21
0
def create_report_file(data, debug=False):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    latency_data = {}
    duration_data = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                start_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id in latency_data.keys():
                    outfile.write(
                        "Individual {} incubation timer reset at time {}. Please check. "
                        "\n".format(ind_id, start_time_stamp))
                latency_data[ind_id] = start_time_stamp
            elif "TBActivationPresymptomatic" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                end_time_stamp = int(dtk_sft.get_val("time= ", line))
                if ind_id not in latency_data.keys():
                    outfile.write(
                        "Individual {} went presymptomatic without incubation timer update at time {}. "
                        "Please check. \n".format(ind_id, end_time_stamp))
                else:
                    duration = end_time_stamp - latency_data.get(ind_id)
                    duration_data[ind_id] = duration
        if debug:
            with open("DEBUG_duration_data.json", "w") as debug_outfile:
                json.dump(duration_data, debug_outfile, indent=4)
        durations = list(duration_data.values())

        if not dtk_sft.test_exponential(durations,
                                        tb_cd4_activation_vector[0],
                                        outfile,
                                        integers=True,
                                        roundup=True,
                                        round_nearest=False):
            success = False
        outfile.write("Data points checked = {}.\n".format(len(duration_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        # only used for graphing purposes
        expected_data = map(
            math.ceil,
            np.random.exponential(1 / tb_cd4_activation_vector[0],
                                  len(duration_data)))
        expected_durations = list(expected_data)
        dtk_sft.plot_data_sorted(
            durations,
            expected_durations,
            label1="Actual",
            label2="Expected",
            title="Recalculated Latency Duration TB then HIV(Sorted)",
            xlabel="Data Points",
            ylabel="Days",
            category="tb_activation_and_cd4_tb_first",
            line=True,
            overlap=True)
Пример #22
0
def create_report_file(data):
    report_name = data[0]
    lines = data[1]
    tb_cd4_activation_vector = data[
        2]  # this test assumes the vector is constant

    # StartedArt distribution is currently based on 0.01, while StoppedArt is on mostly 0.0000001, so we expect
    # much longer latency durations for the StoppedArt data, using big arbitrary # that is noticeably bigger than
    # what StartedArt distribution would extremely likely give us
    big_magic_number = 2000
    stopped_art_latency_data = []
    started_art_latency_data = []
    art_events_dict = {}
    success = True
    with open(report_name, "w") as outfile:
        if not lines:
            outfile.write("BAD: No relevant test data found.\n")
            success = False
        for line in lines:
            if "has event" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                art_status = line.split(" ")[9].strip(
                    ".")  # get_val only gets digits
                art_events_dict[ind_id] = art_status
            if "LifeCourseLatencyTimerUpdate" in line:
                ind_id = int(dtk_sft.get_val("Individual ", line))
                new_incubation_timer = float(dtk_sft.get_val("timer ", line))
                if ind_id in art_events_dict.keys():
                    if art_events_dict.get(ind_id) == "StartedART":
                        started_art_latency_data.append(new_incubation_timer)
                    else:
                        stopped_art_latency_data.append(new_incubation_timer)
                    art_events_dict.pop(ind_id)
                else:
                    success = False
                    outfile.write(
                        "BAD: No art-related event found in the logs for this timer update for Individual {},"
                        " at time {}.\n".format(
                            ind_id, int(dtk_sft.get_val("time= ", line))))
        # we want the stopped art latency data to NOT match the started art latency data
        # and we expect the stopped art latency data to be long period times as made my our cd4_Activation_vector
        if dtk_sft.test_exponential(stopped_art_latency_data,
                                    tb_cd4_activation_vector[2],
                                    integers=False,
                                    roundup=False,
                                    round_nearest=False):
            outfile.write(
                "BAD: The StoppedArt latency data distribution matches the StartedArt latency data"
                " distribution, but shouldn't.\n")
            success = False
        expected_stopped_art_data = np.random.exponential(
            1 / tb_cd4_activation_vector[0], len(stopped_art_latency_data))
        small_duration_count = 0
        for duration in stopped_art_latency_data:
            if duration < big_magic_number:
                small_duration_count += 1
        proportion_small = small_duration_count / float(
            len(stopped_art_latency_data))
        if proportion_small > 0.01:
            outfile.write(
                "BAD: More than 0.5% of our durations are suspiciously small, it is {}. "
                "Please Investigate.\n".format(proportion_small))
            success = False
        outfile.write("Data points checked = {}.\n".format(
            len(stopped_art_latency_data)))
        outfile.write("SUMMARY: Success={0}\n".format(success))

        dtk_sft.plot_data(
            sorted(stopped_art_latency_data),
            sorted(expected_stopped_art_data),
            label1="Actual",
            label2="Expected",
            title=
            "StoppedART Latency data should have a similar shape/scale of duration but will not "
            "match",
            xlabel="Data Points",
            ylabel="Days",
            category="tb_activation_and_cd4_hiv_first_on_art_off_art",
            line=True,
            overlap=True)