Exemplo n.º 1
0
def _load_legacy_hdf5_v2(root, remove_duplicate_positions):
    # Get the coalescence records
    trees_group = root["trees"]
    old_timestamp = datetime.datetime.min.isoformat()
    provenances = tskit.ProvenanceTable()
    provenances.add_row(
        timestamp=old_timestamp,
        record=_get_v2_provenance("generate_trees", trees_group.attrs),
    )
    num_rows = trees_group["node"].shape[0]
    index = np.arange(num_rows, dtype=int)
    parent = np.zeros(2 * num_rows, dtype=np.int32)
    parent[2 * index] = trees_group["node"]
    parent[2 * index + 1] = trees_group["node"]
    left = np.zeros(2 * num_rows, dtype=np.float64)
    left[2 * index] = trees_group["left"]
    left[2 * index + 1] = trees_group["left"]
    right = np.zeros(2 * num_rows, dtype=np.float64)
    right[2 * index] = trees_group["right"]
    right[2 * index + 1] = trees_group["right"]
    child = np.array(trees_group["children"], dtype=np.int32).flatten()

    tables = tskit.TableCollection(np.max(right))
    tables.edges.set_columns(left=left,
                             right=right,
                             parent=parent,
                             child=child)

    cr_node = np.array(trees_group["node"], dtype=np.int32)
    num_nodes = max(np.max(child), np.max(cr_node)) + 1
    sample_size = np.min(cr_node)
    flags = np.zeros(num_nodes, dtype=np.uint32)
    population = np.zeros(num_nodes, dtype=np.int32)
    time = np.zeros(num_nodes, dtype=np.float64)
    flags[:sample_size] = tskit.NODE_IS_SAMPLE
    cr_population = np.array(trees_group["population"], dtype=np.int32)
    cr_time = np.array(trees_group["time"])
    time[cr_node] = cr_time
    population[cr_node] = cr_population
    if "samples" in root:
        samples_group = root["samples"]
        population[:sample_size] = samples_group["population"]
        if "time" in samples_group:
            time[:sample_size] = samples_group["time"]
    tables.nodes.set_columns(flags=flags, population=population, time=time)
    _set_populations(tables)

    if "mutations" in root:
        mutations_group = root["mutations"]
        _convert_hdf5_mutations(mutations_group, tables.sites,
                                tables.mutations, remove_duplicate_positions)
        provenances.add_row(
            timestamp=old_timestamp,
            record=_get_v2_provenance("generate_mutations",
                                      mutations_group.attrs),
        )
    tables.provenances.add_row(_get_upgrade_provenance(root))
    tables.sort()
    return tables.tree_sequence()
Exemplo n.º 2
0
def provenance_timestamp_only_example():
    ts = msprime.simulate(10, random_seed=1)
    tables = ts.dump_tables()
    provenances = tskit.ProvenanceTable()
    provenances.add_row(timestamp="12345", record="")
    return tables.tree_sequence()