Exemplo n.º 1
0
def save_transmissions_to_hdf5(
    hdf5_file_path: str,
    transmissions: List[Transmission],
    chunk_size: int = 50000,
):
    """
    Saves transmissions data to hdf5. The transmission type is inferred from the first
    element of the list.

    Parameters
    ----------
    attributes_to_save
        attributes to save from each transmission
    hdf5_file_path
        hdf5 path to save transmissions
    transmissions
        list of transmission objects
    chunk_size
        number of hdf5 chunks to use while saving
    """
    with h5py.File(hdf5_file_path, "a") as f:
        if "infections" not in f:
            f.create_group("infections")
        f["infections"].create_group("transmissions")
        transmissions_group = f["infections"]["transmissions"]
        n_transsmissions = len(transmissions)
        transmissions_group.attrs["n_transsmissions"] = n_transsmissions
        transmission_type = transmissions[0].__class__.__name__
        transmissions_group.attrs["transmission_type"] = transmission_type
        n_chunks = int(np.ceil(n_transsmissions / chunk_size))
        attributes_to_save = attributes_to_save_dict[transmission_type]
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_transsmissions)
            attribute_dict = defaultdict(list)
            for index in range(idx1, idx2):
                transmission = transmissions[index]
                for attribute_name in attributes_to_save:
                    attribute = getattr(transmission, attribute_name)
                    if attribute is None:
                        attribute_dict[attribute_name].append(np.nan)
                    else:
                        attribute_dict[attribute_name].append(attribute)
            for attribute_name in attributes_to_save:
                attribute_dict[attribute_name] = np.array(
                    attribute_dict[attribute_name], dtype=np.float)
            for attribute_name in attributes_to_save:
                write_dataset(
                    group=transmissions_group,
                    dataset_name=attribute_name,
                    data=attribute_dict[attribute_name],
                    index1=idx1,
                    index2=idx2,
                )
Exemplo n.º 2
0
def save_checkpoint_to_hdf5(population: Population,
                            date: str,
                            hdf5_file_path: str,
                            chunk_size: int = 50000):
    """
    Saves a checkpoint at the given date by saving the infection information of the world.

    Parameters
    ----------
    population:
        world's population
    date:
        date of the checkpoint
    hdf5_file_path
        path where to save the hdf5 checkpoint
    chunk_size
        hdf5 chunk_size to write data
    """
    recovered_people_ids = [
        person.id for person in population if person.recovered
    ]
    dead_people_ids = [person.id for person in population if person.dead]
    susceptible_people_ids = [
        person.id for person in population if person.susceptible
    ]
    infected_people_ids = []
    infection_list = []
    for person in population.infected:
        infected_people_ids.append(person.id)
        infection_list.append(person.infection)
    with h5py.File(hdf5_file_path, "w") as f:
        f.create_group("time")
        f["time"].attrs["date"] = date
        f.create_group("people_data")
        for name, data in zip(
            ["infected_id", "dead_id", "recovered_id", "susceptible_id"],
            [
                infected_people_ids,
                dead_people_ids,
                recovered_people_ids,
                susceptible_people_ids,
            ],
        ):
            write_dataset(
                group=f["people_data"],
                dataset_name=name,
                data=np.array(data, dtype=np.int),
            )
    save_infections_to_hdf5(
        hdf5_file_path=hdf5_file_path,
        infections=infection_list,
        chunk_size=chunk_size,
    )
Exemplo n.º 3
0
def combine_checkpoints_for_ranks(hdf5_file_root: str):
    """
    After running a parallel simulation with checkpoints, the
    checkpoint data will be scattered accross, with each process
    saving a checkpoint_date.0.hdf5 file. This function can be used
    to unify all data in one single checkpoint, so that we can load it
    later with any arbitray number of cores.

    Parameters
    ----------
    hdf5_file_root
        the str root of the pasts like "checkpoint_2020-01-01". The checkpoint files
        will be expected to have names like "checkpoint_2020-01-01.{rank}.hdf5 where
        rank = 0, 1, 2, etc.
    """
    checkpoint_files = glob(hdf5_file_root + ".[0-9]*.hdf5")
    try:
        cp_date = hdf5_file_root.split("_")[-1]
    except:
        cp_date = hdf5_file_root
    logger.info(f"found {len(checkpoint_files)} {cp_date} checkpoint files")
    ret = load_checkpoint_from_hdf5(checkpoint_files[0])
    for i in range(1, len(checkpoint_files)):
        file = checkpoint_files[i]
        ret2 = load_checkpoint_from_hdf5(file, load_date=False)
        for key, value in ret2.items():
            ret[key] = np.concatenate((ret[key], value))

    unified_checkpoint_path = hdf5_file_root + ".hdf5"
    with h5py.File(unified_checkpoint_path, "w") as f:
        f.create_group("time")
        f["time"].attrs["date"] = ret["date"]
        f.create_group("people_data")
        for name in [
                "infected_id", "dead_id", "recovered_id", "susceptible_id"
        ]:
            write_dataset(
                group=f["people_data"],
                dataset_name=name,
                data=np.array(ret[name], dtype=np.int),
            )
    save_infections_to_hdf5(
        hdf5_file_path=unified_checkpoint_path,
        infections=ret["infection_list"],
        chunk_size=1000000,
    )
Exemplo n.º 4
0
def save_symptoms_to_hdf5(
    hdf5_file_path: str,
    symptoms_list: List[Symptoms],
    chunk_size: int = 50000,
):
    """
    Saves symptoms data to hdf5. 

    Parameters
    ----------
    attributes_to_save
        attributes to save from each symptom
    hdf5_file_path
        hdf5 path to save symptoms
    symptoms
        list of symptom objects
    chunk_size
        number of hdf5 chunks to use while saving
    """
    with h5py.File(hdf5_file_path, "a") as f:
        if "infections" not in f:
            f.create_group("infections")
        f["infections"].create_group("symptoms")
        symptoms_group = f["infections"]["symptoms"]
        n_symptoms = len(symptoms_list)
        symptoms_group.attrs["n_symptoms"] = n_symptoms
        n_chunks = int(np.ceil(n_symptoms / chunk_size))
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_symptoms)
            attribute_dict = {}
            max_tag_list = []
            tag_list = []
            max_severity_list = []
            stage_list = []
            time_of_symptoms_onset_list = []
            for index in range(idx1, idx2):
                symptoms = symptoms_list[index]
                max_tag_list.append(symptoms.max_tag.value)
                tag_list.append(symptoms.tag.value)
                max_severity_list.append(symptoms.max_severity)
                stage_list.append(symptoms.stage)
                time_of_symptoms_onset_list.append(
                    symptoms.time_of_symptoms_onset)
            attribute_dict["max_tag"] = np.array(max_tag_list, dtype=np.int)
            attribute_dict["tag"] = np.array(tag_list, dtype=np.int)
            attribute_dict["max_severity"] = np.array(max_severity_list,
                                                      dtype=np.float)
            attribute_dict["stage"] = np.array(stage_list, dtype=np.int)
            attribute_dict["time_of_symptoms_onset"] = np.array(
                time_of_symptoms_onset_list, dtype=np.float)
            for attribute_name, attribute_value in attribute_dict.items():
                write_dataset(
                    group=symptoms_group,
                    dataset_name=attribute_name,
                    data=attribute_value,
                    index1=idx1,
                    index2=idx2,
                )
        trajectory_times_list = []
        trajectory_symptom_list = []
        trajectory_lengths = []
        for symptoms in symptoms_list:
            times = []
            symps = []
            for time, symp in symptoms.trajectory:
                times.append(time)
                symps.append(symp.value)
            trajectory_times_list.append(np.array(times, dtype=np.float))
            trajectory_symptom_list.append(np.array(symps, dtype=np.int))
            trajectory_lengths.append(len(times))
        if len(np.unique(trajectory_lengths)) == 1:
            write_dataset(
                group=symptoms_group,
                dataset_name="trajectory_times",
                data=np.array(trajectory_times_list, dtype=float),
            )
            write_dataset(
                group=symptoms_group,
                dataset_name="trajectory_symptoms",
                data=np.array(trajectory_symptom_list, dtype=int),
            )
        else:
            write_dataset(
                group=symptoms_group,
                dataset_name="trajectory_times",
                data=np.array(trajectory_times_list, dtype=float_vlen_type),
            )
            write_dataset(
                group=symptoms_group,
                dataset_name="trajectory_symptoms",
                data=np.array(trajectory_symptom_list, dtype=int_vlen_type),
            )
Exemplo n.º 5
0
def save_infections_to_hdf5(
    hdf5_file_path: str,
    infections: List[Infection],
    chunk_size: int = 50000,
):
    """
    Saves infections data to hdf5.

    Parameters
    ----------
    attributes_to_save
        attributes to save from each symptom
    hdf5_file_path
        hdf5 path to save symptoms
    symptoms
        list of symptom objects
    chunk_size
        number of hdf5 chunks to use while saving
    """
    with h5py.File(hdf5_file_path, "a") as f:
        f.create_group("infections")
        n_infections = len(infections)
        f["infections"].attrs["n_infections"] = n_infections
        if n_infections == 0:
            return
        symptoms_list = [infection.symptoms for infection in infections]
        transmission_list = [
            infection.transmission for infection in infections
        ]
        save_symptoms_to_hdf5(
            symptoms_list=symptoms_list,
            hdf5_file_path=hdf5_file_path,
            chunk_size=chunk_size,
        )
        save_transmissions_to_hdf5(
            transmissions=transmission_list,
            hdf5_file_path=hdf5_file_path,
            chunk_size=chunk_size,
        )
        attributes_to_save = ["start_time", "number_of_infected"]
        n_chunks = int(np.ceil(n_infections / chunk_size))
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_infections)
            attribute_dict = defaultdict(list)
            for index in range(idx1, idx2):
                infection = infections[index]
                for attribute_name in attributes_to_save:
                    attribute = getattr(infection, attribute_name)
                    if attribute is None:
                        attribute_dict[attribute_name].append(np.nan)
                    else:
                        attribute_dict[attribute_name].append(attribute)
            for attribute_name in attributes_to_save:
                data = np.array(attribute_dict[attribute_name], dtype=np.float)
                write_dataset(
                    group=f["infections"],
                    dataset_name=attribute_name,
                    data=data,
                    index1=idx1,
                    index2=idx2,
                )