Exemplos de unpack_bytes em Python, exemplos de tskit.unpack_bytes em Python

Exemplo n.º 1

0

Exibir arquivo

def _upgrade_old_tables(tables):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        provenance = get_provenance(tables)
    file_version = provenance.file_version
    slim_generation = provenance.slim_generation
    warnings.warn(
        "This is an version {} SLiM tree sequence.".format(file_version) +
        " When you write this out, " +
        "it will be converted to version {}.".format(slim_file_version))
    if file_version == "0.1" or file_version == "0.2":
        # add empty nucleotide slots to metadata
        mut_bytes = tskit.unpack_bytes(tables.mutations.metadata,
                                       tables.mutations.metadata_offset)
        mut_metadata = [
            _decode_mutation_pre_nucleotides(md) for md in mut_bytes
        ]
        metadata, metadata_offset = tskit.pack_bytes(mut_metadata)
        tables.mutations.set_columns(
            site=tables.mutations.site,
            node=tables.mutations.node,
            parent=tables.mutations.parent,
            derived_state=tables.mutations.derived_state,
            derived_state_offset=tables.mutations.derived_state_offset,
            metadata=metadata,
            metadata_offset=metadata_offset)
    if file_version == "0.1":
        # shift times
        node_times = tables.nodes.time + slim_generation
        tables.nodes.set_columns(flags=tables.nodes.flags,
                                 time=node_times,
                                 population=tables.nodes.population,
                                 individual=tables.nodes.individual,
                                 metadata=tables.nodes.metadata,
                                 metadata_offset=tables.nodes.metadata_offset)
        migration_times = tables.migrations.time + slim_generation
        tables.migrations.set_columns(left=tables.migrations.left,
                                      right=tables.migrations.right,
                                      node=tables.migrations.node,
                                      source=tables.migrations.source,
                                      dest=tables.migrations.dest,
                                      time=migration_times)
    new_record = {
        "schema_version": "1.0.0",
        "software": {
            "name": "pyslim",
            "version": pyslim_version,
        },
        "parameters": {
            "command": ["_upgrade_old_tables"],
            "old_file_version": file_version,
            "new_file_version": slim_file_version,
        },
        "environment": get_environment(),
    }
    tskit.validate_provenance(new_record)
    tables.provenances.add_row(json.dumps(new_record))

Exemplo n.º 2

0

Exibir arquivo

Arquivo: slim_metadata.py Projeto: pythseq/pyslim

def extract_population_metadata(tables):
    '''
    Returns an iterator over lists of :class:`PopulationMetadata` objects
    containing information about the populations in the tables.

    :param TableCollection tables: The tables, as produced by SLiM.
    '''
    metadata = tskit.unpack_bytes(tables.populations.metadata,
                                  tables.populations.metadata_offset)
    for md in metadata:
        yield decode_population(md)

Exemplo n.º 3

0

Exibir arquivo

    def test_annotate_nodes(self):
        for ts in self.get_slim_examples():
            tables = ts.tables
            new_tables = ts.tables
            metadata = []
            for md in tskit.unpack_bytes(tables.nodes.metadata,
                                         tables.nodes.metadata_offset):
                dm = pyslim.decode_node(md)
                edm = pyslim.encode_node(dm)
                self.assertEqual(md, edm)
                metadata.append(dm)

            pyslim.annotate_node_metadata(new_tables, metadata)
            self.assertEqual(tables, new_tables)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: util.py Projeto: benjeffery/tsdate

def nodes_time(tree_sequence, unconstrained=True):
    nodes_age = tree_sequence.tables.nodes.time[:]
    if unconstrained:
        metadata = tree_sequence.tables.nodes.metadata[:]
        metadata_offset = tree_sequence.tables.nodes.metadata_offset[:]
        for index, met in enumerate(tskit.unpack_bytes(metadata, metadata_offset)):
            if index not in tree_sequence.samples():
                try:
                    nodes_age[index] = json.loads(met.decode())["mn"]
                except (KeyError, json.decoder.JSONDecodeError):
                    raise ValueError("Tree Sequence must be tsdated with the "
                                     "Inside-Outside Method. Use unconstrained=False "
                                     "if not.")
    return nodes_age

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_legacy_metadata.py Projeto: petrelharp/pyslim

    def test_annotate_populations(self):
        for ts in self.get_slim_examples():
            tables = ts.tables
            new_tables = ts.tables
            metadata = []
            for md in tskit.unpack_bytes(tables.populations.metadata,
                                         tables.populations.metadata_offset):
                with self.assertWarns(DeprecationWarning):
                    dm = pyslim.decode_population(md)
                with self.assertWarns(DeprecationWarning):
                    edm = pyslim.encode_population(dm)
                self.assertEqual(md, edm)
                metadata.append(dm)

            with self.assertWarns(DeprecationWarning):
                pyslim.annotate_population_metadata(new_tables, metadata)
            self.assertTableCollectionsEqual(tables, new_tables)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: slim_metadata.py Projeto: attrna/pyslim

def extract_mutation_metadata(tables):
    '''
    Returns an iterator over lists of :class:`MutationMetadata` objects containing
    information about the mutations in the tables.

    .. warning::

        This method is deprecated, since metadata handling has been taken over
        by tskit. It will dissappear at some point in the future.

    :param TableCollection tables: The tables, as produced by SLiM.
    '''
    _deprecation_warning("extract_mutation_metadata")
    metadata = tskit.unpack_bytes(tables.mutations.metadata,
                                    tables.mutations.metadata_offset)
    for mut in tables.mutations:
        yield [MutationMetadata.fromdict(mm) for mm in mut.metadata['mutation_list']]

Exemplo n.º 7

0

Exibir arquivo

Arquivo: util.py Projeto: tskit-dev/tsdate

def nodes_time_unconstrained(tree_sequence):
    """
    Return the unconstrained node times for every node in a tree sequence that has
    been dated using ``tsdate`` with the inside-outside algorithm (these times are
    stored in the node metadata). Will produce an error if the tree sequence does
    not contain this information.
    """
    nodes_time = tree_sequence.tables.nodes.time.copy()
    metadata = tree_sequence.tables.nodes.metadata
    metadata_offset = tree_sequence.tables.nodes.metadata_offset
    for index, met in enumerate(tskit.unpack_bytes(metadata, metadata_offset)):
        if index not in tree_sequence.samples():
            try:
                nodes_time[index] = json.loads(met.decode())["mn"]
            except (KeyError, json.decoder.JSONDecodeError):
                raise ValueError(
                    "Tree Sequence must be tsdated with the Inside-Outside Method."
                )
    return nodes_time

Exemplo n.º 8

0

Exibir arquivo

def posterior_mean_var(ts, timepoints, posterior, Ne, *, fixed_node_set=None):
    """
    Mean and variance of node age in scaled time. Fixed nodes will be given a mean
    of their exact time in the tree sequence, and zero variance (as long as they are
    identified by the fixed_node_set
    If fixed_node_set is None, we attempt to date all the non-sample nodes
    Also assigns the estimated mean and variance of the age of each node, in unscaled
    time, as metadata in the tree sequence.
    """
    mn_post = np.full(ts.num_nodes,
                      np.nan)  # Fill with NaNs so we detect when there's
    vr_post = np.full(ts.num_nodes, np.nan)  # been an error
    tables = ts.dump_tables()

    if fixed_node_set is None:
        fixed_node_set = ts.samples()
    fixed_nodes = np.array(list(fixed_node_set))
    mn_post[fixed_nodes] = tables.nodes.time[fixed_nodes]
    vr_post[fixed_nodes] = 0

    metadata_array = tskit.unpack_bytes(ts.tables.nodes.metadata,
                                        ts.tables.nodes.metadata_offset)
    timepoints = timepoints * 2 * Ne
    for row, node_id in zip(posterior.grid_data, posterior.nonfixed_nodes):
        mn_post[node_id] = np.sum(row * timepoints) / np.sum(row)
        vr_post[node_id] = np.sum(
            ((mn_post[node_id] - (timepoints))**2) * (row / np.sum(row)))
        metadata_array[node_id] = json.dumps({
            "mn": mn_post[node_id],
            "vr": vr_post[node_id]
        }).encode()
    md, md_offset = tskit.pack_bytes(metadata_array)
    tables.nodes.set_columns(
        flags=tables.nodes.flags,
        time=tables.nodes.time,
        population=tables.nodes.population,
        individual=tables.nodes.individual,
        metadata=md,
        metadata_offset=md_offset,
    )
    ts = tables.tree_sequence()
    return ts, mn_post, vr_post

Exemplo n.º 9

0

Exibir arquivo

Arquivo: analyze_data.py Projeto: benjeffery/unified_genealogy_paper

def get_mut_ages(ts,
                 unconstrained=True,
                 ignore_sample_muts=False,
                 geometric=True):
    mut_ages = np.zeros(ts.num_sites)
    mut_upper_bounds = np.zeros(ts.num_sites)
    node_ages = ts.tables.nodes.time
    oldest_mut_ids = np.zeros(ts.num_sites)
    if unconstrained:
        metadata = ts.tables.nodes.metadata[:]
        metadata_offset = ts.tables.nodes.metadata_offset[:]
        for index, met in enumerate(
                tskit.unpack_bytes(metadata, metadata_offset)):
            if index not in ts.samples():
                node_ages[index] = json.loads(met.decode())["mn"]
    if ignore_sample_muts:
        mutations_table = ts.tables.mutations
        unique_sites = np.unique(ts.tables.mutations.site, return_counts=True)
        unique_sites = unique_sites[0][unique_sites[1] > 1]
        no_samp_muts = ~np.logical_and(
            np.isin(mutations_table.site, unique_sites),
            np.isin(mutations_table.node, ts.samples()),
        )
    for tree in tqdm(ts.trees(),
                     total=ts.num_trees,
                     desc="Finding mutation ages"):
        for site in tree.sites():
            for mut in site.mutations:
                parent_age = node_ages[tree.parent(mut.node)]
                if geometric:
                    age = np.sqrt(node_ages[mut.node] * parent_age)
                else:
                    age = (node_ages[mut.node] + parent_age) / 2
                if mut_ages[site.id] < age:
                    mut_upper_bounds[site.id] = parent_age
                    mut_ages[site.id] = age
                    oldest_mut_ids[site.id] = mut.id
    return mut_ages, mut_upper_bounds, oldest_mut_ids.astype(int)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: tsutil.py Projeto: awohns/unified_genealogy_paper

def combine_chromosome_arms(args):
    """
    Splices two chromosome arms together to form a full chromosome
    """
    short_arm = tskit.load(args.p_arm)
    long_arm = tskit.load(args.q_arm)
    assert short_arm.num_samples == long_arm.num_samples
    # Remove material before first position and after last position
    short_arm = short_arm.keep_intervals(
        [[
            short_arm.tables.sites.position[0] - 1,
            short_arm.tables.sites.position[-1] + 1,
        ]],
        simplify=False,
    )
    long_arm = long_arm.keep_intervals(
        [[
            long_arm.tables.sites.position[0] - 1,
            long_arm.tables.sites.position[-1] + 1,
        ]],
        simplify=False,
    )
    short_tables = short_arm.dump_tables()
    long_tables = long_arm.dump_tables()
    assert np.array_equal(short_tables.individuals.metadata,
                          long_tables.individuals.metadata)
    short_tables.sequence_length = long_arm.get_sequence_length()
    short_metadata = short_tables.nodes.metadata
    short_metadata_offset = short_tables.nodes.metadata_offset
    short_metadata = tskit.unpack_bytes(short_metadata, short_metadata_offset)

    long_metadata = long_tables.nodes.metadata
    long_metadata_offset = long_tables.nodes.metadata_offset
    long_metadata = tskit.unpack_bytes(long_metadata, long_metadata_offset)
    long_metadata = long_metadata[long_arm.num_samples:]
    combined_metadata = np.concatenate([short_metadata, long_metadata])
    metadata, metadata_offset = tskit.pack_bytes(combined_metadata)

    all_nodes_except_samples = ~np.isin(np.arange(long_arm.num_nodes),
                                        long_arm.samples())
    short_tables.nodes.append_columns(
        long_tables.nodes.flags[all_nodes_except_samples],
        long_tables.nodes.time[all_nodes_except_samples],
        long_tables.nodes.population[all_nodes_except_samples],
    )
    short_tables.nodes.set_columns(
        flags=short_tables.nodes.flags,
        time=short_tables.nodes.time,
        population=short_tables.nodes.population,
        metadata=metadata,
        individual=short_tables.nodes.individual,
        metadata_offset=metadata_offset,
    )

    long_edges_parent = long_tables.edges.parent
    long_edges_child = long_tables.edges.child
    long_arm_sample_map = np.zeros(long_arm.num_nodes).astype(int)
    long_arm_sample_map[long_arm.samples()] = short_arm.samples()
    long_edges_parent[~np.isin(long_edges_parent, long_arm.samples(
    ))] = long_edges_parent[~np.isin(long_edges_parent, long_arm.samples()
                                     )] + (short_arm.num_nodes)
    long_edges_parent[
        long_arm.tables.edges.parent > long_arm.samples()[-1]] = (
            long_edges_parent[
                long_arm.tables.edges.parent > long_arm.samples()[-1]] -
            long_arm.num_samples)
    long_edges_child[~np.isin(long_edges_child, long_arm.samples(
    ))] = long_edges_child[~np.isin(long_edges_child, long_arm.samples())] + (
        short_arm.num_nodes)
    long_edges_child[long_tables.edges.child > long_arm.samples()[-1]] = (
        long_edges_child[long_tables.edges.child > long_arm.samples()[-1]] -
        long_arm.num_samples)
    long_edges_child[np.isin(
        long_tables.edges.child, long_arm.samples())] = long_arm_sample_map[
            long_tables.edges.child[np.isin(long_tables.edges.child,
                                            long_arm.samples())]]
    short_tables.edges.append_columns(
        long_tables.edges.left,
        long_tables.edges.right,
        long_edges_parent,
        long_edges_child,
    )
    short_tables.sites.append_columns(
        long_tables.sites.position,
        long_tables.sites.ancestral_state,
        long_tables.sites.ancestral_state_offset,
    )
    long_mutations_node = long_tables.mutations.node
    long_mutations_node[~np.isin(long_mutations_node, long_arm.samples(
    ))] = long_mutations_node[~np.isin(long_mutations_node, long_arm.samples()
                                       )] + (short_arm.num_nodes)
    long_mutations_node[
        long_tables.mutations.node > long_arm.samples()[-1]] = (
            long_mutations_node[
                long_tables.mutations.node > long_arm.samples()[-1]] -
            long_arm.num_samples)
    long_mutations_node[np.isin(long_tables.mutations.node,
                                long_arm.samples())] = long_arm_sample_map[
                                    long_tables.mutations.node[np.isin(
                                        long_tables.mutations.node,
                                        long_arm.samples())]]
    short_tables.mutations.append_columns(
        long_tables.mutations.site + short_arm.num_sites,
        long_mutations_node,
        long_tables.mutations.derived_state,
        long_tables.mutations.derived_state_offset,
    )

    short_tables.sort()
    combined = short_tables.tree_sequence()
    assert combined.num_nodes == (short_arm.num_nodes + long_arm.num_nodes -
                                  short_arm.num_samples)
    assert combined.num_sites == (short_arm.num_sites + long_arm.num_sites)
    assert combined.num_edges == (short_arm.num_edges + long_arm.num_edges)
    assert combined.num_mutations == (short_arm.num_mutations +
                                      long_arm.num_mutations)
    assert (combined.num_individuals == short_arm.num_individuals ==
            long_arm.num_individuals)
    assert np.array_equal(
        np.sort(combined.tables.sites.position),
        np.concatenate(
            [short_arm.tables.sites.position, long_arm.tables.sites.position]),
    )
    assert np.array_equal(
        np.sort(combined.tables.nodes.time[combined.tables.mutations.node]),
        np.sort(
            np.concatenate([
                short_arm.tables.nodes.time[short_arm.tables.mutations.node],
                long_arm.tables.nodes.time[long_arm.tables.mutations.node],
            ])),
    )
    assert np.array_equal(combined.tables.individuals.metadata,
                          long_tables.individuals.metadata)
    combined.dump(args.output)

Exemplo n.º 11

0

Exibir arquivo

    def test_dump_to_tskit(self):
        import tskit
        dumped_ts = self.pop.dump_tables_to_tskit()
        self.assertEqual(len(dumped_ts.tables.nodes),
                         len(self.pop.tables.nodes))
        self.assertEqual(len(dumped_ts.tables.edges),
                         len(self.pop.tables.edges))
        self.assertEqual(len(dumped_ts.tables.mutations),
                         len(self.pop.tables.mutations))
        eview = np.array(self.pop.tables.edges, copy=False)
        self.assertEqual(eview['parent'].sum(),
                         dumped_ts.tables.edges.parent.sum())
        self.assertEqual(eview['child'].sum(),
                         dumped_ts.tables.edges.child.sum())
        self.assertEqual(eview['left'].sum(),
                         dumped_ts.tables.edges.left.sum())
        self.assertEqual(eview['right'].sum(),
                         dumped_ts.tables.edges.right.sum())
        tv = fwdpy11.TreeIterator(self.pop.tables,
                                  [i for i in range(2 * self.pop.N)])
        tt_fwd = 0
        for t in tv:
            tt_fwd += t.total_time(self.pop.tables.nodes)
        tt_tskit = 0
        for t in dumped_ts.trees():
            tt_tskit += t.get_total_branch_length()
        self.assertEqual(tt_fwd, tt_tskit)

        # Now, we make sure that the metadata can
        # be decoded
        md = tskit.unpack_bytes(dumped_ts.tables.individuals.metadata,
                                dumped_ts.tables.individuals.metadata_offset)
        for i, j in zip(self.pop.diploid_metadata, md):
            d = eval(j)
            self.assertEqual(i.g, d['g'])
            self.assertEqual(i.w, d['w'])
            self.assertEqual(i.e, d['e'])
            self.assertEqual(i.label, d['label'])
            self.assertEqual(i.parents, d['parents'])
            self.assertEqual(i.sex, d['sex'])
            self.assertEqual(i.deme, d['deme'])
            self.assertEqual(i.geography, d['geography'])

        # Test that we can go backwards from node table to individuals
        samples = np.where(
            dumped_ts.tables.nodes.flags == tskit.NODE_IS_SAMPLE)[0]
        self.assertEqual(len(samples), 2 * self.pop.N)
        for i in samples[::2]:
            ind = i // 2
            d = eval(md[ind])
            fwdpy11_md = self.pop.diploid_metadata[ind]
            self.assertEqual(fwdpy11_md.g, d['g'])
            self.assertEqual(fwdpy11_md.w, d['w'])
            self.assertEqual(fwdpy11_md.e, d['e'])
            self.assertEqual(fwdpy11_md.label, d['label'])
            self.assertEqual(fwdpy11_md.parents, d['parents'])
            self.assertEqual(fwdpy11_md.sex, d['sex'])
            self.assertEqual(fwdpy11_md.deme, d['deme'])
            self.assertEqual(fwdpy11_md.geography, d['geography'])

        md = tskit.unpack_bytes(dumped_ts.tables.mutations.metadata,
                                dumped_ts.tables.mutations.metadata_offset)
        for i, j, k in zip(self.pop.tables.mutations,
                           dumped_ts.tables.mutations.site, md):
            d = eval(k)
            self.assertEqual(i.key, d['key'])
            site = dumped_ts.tables.sites[j]
            m = self.pop.mutations[d['key']]
            self.assertEqual(site.position, m.pos)
            self.assertEqual(d['s'], m.s)
            self.assertEqual(d['h'], m.h)
            self.assertTrue(np.array_equal(np.array(d['esizes']), m.esizes))
            self.assertTrue(np.array_equal(np.array(d['heffects']),
                                           m.heffects))
            self.assertEqual(d['label'], m.label)
            self.assertEqual(d['neutral'], m.neutral)

        self.assertEqual(mcounts_comparison(self.pop, dumped_ts), True)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: tmrcas.py Projeto: benjeffery/unified_genealogy_paper

def get_pairwise_tmrca_pops(
    ts_name,
    max_pop_nodes,
    hist_nbins=30,
    hist_min_gens=1000,
    num_processes=1,
    restrict_populations=None,
    return_raw_data=False,
):
    """
    Get the mean tMRCA and a histogram of tMRCA times for pairs of populations from a
    tree sequence.
    
    :param int max_pop_nodes: The maximum number of sample nodes per pop to use. This
        number of samples (or lower, for small populations) will be taken at random from
        each population as a set of representative samples for which to construct
        pairwise statistics
    :param int hist_nbins: The number of bins used to save the histogram data. Bins will
        be spaced out evenly on a log scale.
    :param float hist_min_gens: A lower cutoff for the histogram bins, as there is
        usually very little in the lowest (logged) bins
    :param int num_processes: The number of CPUs to run in parallel on the calculation.
    :param list restrict_populations: A list of population IDs or names giving the
        populations among which to calculate pairwise distances. If ``None`` (default)
        then use all the populations defined in the tree sequence.
    :param bool return_raw_data is True, also return the full dataset of weights (which
        may be huge, as it is ~ num_unique_times * n_pops * n_pops /2

    :return: a TmrcaData object containing a dataframe of the mean values for each
        pair, a HistData object with the histogram data, and (if return_full_data is
        ``True``) a potentially huge numpy array of weights of pairs X unique_times
    :rtype: TmrcaData
    """
    ts = tskit.load(ts_name)
    deleted_trees = [tree.index for tree in ts.trees() if tree.parent(0) == -1]
    node_ages = np.zeros_like(ts.tables.nodes.time[:])
    metadata = ts.tables.nodes.metadata[:]
    metadata_offset = ts.tables.nodes.metadata_offset[:]
    try:
        for index, met in enumerate(
                tskit.unpack_bytes(metadata, metadata_offset)):
            if index not in ts.samples():
                try:
                    # Get unconstrained node age if available
                    node_ages[index] = json.loads(met.decode())["mn"]
                except json.decoder.JSONDecodeError:
                    raise ValueError(
                        "Tree Sequence must be dated to use unconstrained=True"
                    )
        logging.info("Using tsdate unconstrained node times")
    except KeyError:
        logging.info("Using standard ts node times")
        node_ages[:] = ts.tables.nodes.time[:]
    unique_times, time_index = np.unique(node_ages, return_inverse=True)
    with np.errstate(divide='ignore'):
        log_unique_times = np.log(unique_times)

    # Make a random selection of up to 10 samples from each population
    np.random.seed(123)
    pop_nodes = ts.tables.nodes.population[ts.samples()]
    nodes_for_pop = {}
    if restrict_populations is None:
        pops = [pop.id for pop in ts.populations()]
    else:
        # Convert any named populations to population ids
        name2id = {
            json.loads(pop.metadata)["name"]: pop.id
            for pop in ts.populations()
        }
        pops = [
            int(p) if p.isdigit() else name2id[p] for p in restrict_populations
        ]
    for pop_id in pops:
        metadata = json.loads(ts.population(pop_id).metadata)
        key = metadata["name"]
        # Hack to distinguish SGDP from HGDP (all uppercase) pop names
        if 'region' in metadata and not metadata['region'].isupper():
            key += " (SGDP)"
        assert key not in nodes_for_pop  # Check for duplicate names
        nodes = np.where(pop_nodes == pop_id)[0]
        if len(nodes) > max_pop_nodes:
            nodes_for_pop[key] = np.random.choice(nodes,
                                                  max_pop_nodes,
                                                  replace=False)
        else:
            nodes_for_pop[key] = nodes

    # Make all combinations of populations
    pop_names = list(nodes_for_pop.keys())
    tmrca_df = pd.DataFrame(columns=pop_names, index=pop_names)
    combos = itertools.combinations_with_replacement(
        np.arange(0, len(pop_names)), 2)
    combo_map = {c: i for i, c in enumerate(combos)}
    func_params = zip(
        combo_map.keys(),
        itertools.repeat(time_index),
        itertools.repeat(list(nodes_for_pop.values())),
        itertools.repeat(ts_name),
        itertools.repeat(deleted_trees),
    )
    data = np.zeros((len(combo_map), len(unique_times)), dtype=np.float)
    with multiprocessing.Pool(processes=num_processes) as pool:
        for tmrca_weight, combo in tqdm(pool.imap_unordered(
                get_tmrca_weights, func_params),
                                        total=len(combo_map)):
            popA = pop_names[combo[0]]
            popB = pop_names[combo[1]]
            keep = (tmrca_weight != 0)  # Deal with log_unique_times[0] == -inf
            mean_log_age = np.sum(log_unique_times[keep] * tmrca_weight[keep])
            mean_log_age /= np.sum(tmrca_weight)  # Normalise
            tmrca_df.loc[popA, popB] = np.exp(mean_log_age)
            data[combo_map[combo], :] = tmrca_weight
    bins, hist_data = make_histogram_data(log_unique_times, data, hist_nbins,
                                          hist_min_gens)
    named_combos = [None] * len(combo_map)
    for combo, i in combo_map.items():
        named_combos[i] = (pop_names[combo[0]], pop_names[combo[1]])
    hist = HistData(bins, hist_data, np.array(named_combos))
    if return_raw_data is False:
        data = None
    return TmrcaData(means=tmrca_df,
                     histogram=hist,
                     raw_data=(log_unique_times, data))

Exemplo n.º 13

0

Exibir arquivo

def _set_populations(
        tables, pop_id=None, selfing_fraction=0.0, female_cloning_fraction=0.0,
        male_cloning_fraction=0.0, sex_ratio=0.5, bounds_x0=0.0, bounds_x1=0.0,
        bounds_y0=0.0, bounds_y1=0.0, bounds_z0=0.0, bounds_z1=0.0,
        migration_records=None):
    '''
    Adds to a TableCollection the information about populations required for SLiM
    to load a tree sequence. This will replace anything already in the Population
    table.
    '''
    num_pops = max(tables.nodes.population) + 1
    for md in tskit.unpack_bytes(tables.individuals.metadata,
                                   tables.individuals.metadata_offset):
        try:
            ind_md = decode_individual(md)
        except:
            raise ValueError("Individuals do not have metadata: "
                    "need to run set_nodes_individuals() first?")
        assert(ind_md.population < num_pops)
    if pop_id is None:
        pop_id = list(range(num_pops))
    assert(len(pop_id) == num_pops)

    if type(selfing_fraction) is float:
        selfing_fraction = [selfing_fraction for _ in range(num_pops)]
    assert(len(selfing_fraction) == num_pops)

    if type(female_cloning_fraction) is float:
        female_cloning_fraction = [female_cloning_fraction for _ in range(num_pops)]
    assert(len(female_cloning_fraction) == num_pops)

    if type(male_cloning_fraction) is float:
        male_cloning_fraction = [male_cloning_fraction for _ in range(num_pops)]
    assert(len(male_cloning_fraction) == num_pops)

    if type(sex_ratio) is float:
        sex_ratio = [sex_ratio for _ in range(num_pops)]
    assert(len(sex_ratio) == num_pops)

    if type(bounds_x0) is float:
        bounds_x0 = [bounds_x0 for _ in range(num_pops)]
    assert(len(bounds_x0) == num_pops)

    if type(bounds_x1) is float:
        bounds_x1 = [bounds_x1 for _ in range(num_pops)]
    assert(len(bounds_x1) == num_pops)

    if type(bounds_y0) is float:
        bounds_y0 = [bounds_y0 for _ in range(num_pops)]
    assert(len(bounds_y0) == num_pops)

    if type(bounds_y1) is float:
        bounds_y1 = [bounds_y1 for _ in range(num_pops)]
    assert(len(bounds_y1) == num_pops)

    if type(bounds_z0) is float:
        bounds_z0 = [bounds_z0 for _ in range(num_pops)]
    assert(len(bounds_z0) == num_pops)

    if type(bounds_z1) is float:
        bounds_z1 = [bounds_z1 for _ in range(num_pops)]
    assert(len(bounds_z1) == num_pops)

    if migration_records is None:
        migration_records = [[] for _ in range(num_pops)]
    assert(len(migration_records) == num_pops)
    for mrl in migration_records:
        for mr in mrl:
            assert(type(mr) is PopulationMigrationMetadata)

    population_metadata = [PopulationMetadata(*x) for x in
                           zip(pop_id, selfing_fraction, female_cloning_fraction,
                               male_cloning_fraction, sex_ratio, bounds_x0,
                               bounds_x1, bounds_y0, bounds_y1, bounds_z0, bounds_z1,
                               migration_records)]
    annotate_population_metadata(tables, population_metadata)

Exemplo n.º 14

0

Exibir arquivo

    def __init__(self, ts, reference_sequence=None):
        provenance = get_provenance(ts)
        slim_generation = provenance.slim_generation
        if provenance.file_version != "0.4":
            warnings.warn("This is an version {} SLiM tree sequence.".format(provenance.file_version) +
                          " When you write this out, " +
                          "it will be converted to version 0.4.")
            tables = ts.dump_tables()
            if provenance.file_version == "0.1" or provenance.file_version == "0.2":
                # add empty nucleotide slots to metadata
                mut_bytes = tskit.unpack_bytes(tables.mutations.metadata,
                                               tables.mutations.metadata_offset)
                mut_metadata = [_decode_mutation_pre_nucleotides(md)
                                for md in mut_bytes]
                annotate_mutation_metadata(tables, mut_metadata)
            if provenance.file_version == "0.1":
                # shift times
                node_times = tables.nodes.time + slim_generation
                tables.nodes.set_columns(
                        flags=tables.nodes.flags,
                        time=node_times,
                        population=tables.nodes.population,
                        individual=tables.nodes.individual,
                        metadata=tables.nodes.metadata,
                        metadata_offset=tables.nodes.metadata_offset)
                migration_times = tables.migrations.time + slim_generation
                tables.migrations.set_columns(
                        left=tables.migrations.left,
                        right=tables.migrations.right,
                        node=tables.migrations.node,
                        source=tables.migrations.source,
                        dest=tables.migrations.dest,
                        time=migration_times)
            upgrade_slim_provenance(tables)
            ts = tables.tree_sequence()
            provenance = get_provenance(ts)
            assert(provenance.file_version == "0.4")
        super().__init__(ts._ll_tree_sequence)
        self.slim_generation = slim_generation
        self.reference_sequence = reference_sequence
        # pre-extract individual metadata
        self.individual_locations = ts.tables.individuals.location
        self.individual_locations.shape = (int(len(self.individual_locations)/3), 3)
        self.individual_ages = np.zeros(ts.num_individuals, dtype='int')
        if self.slim_provenance.model_type != "WF":
            self.individual_ages = np.fromiter(map(lambda ind: decode_individual(ind.metadata).age, ts.individuals()), dtype='int64')

        self.individual_times = np.zeros(ts.num_individuals)
        self.individual_populations = np.repeat(np.int32(-1), ts.num_individuals)
        if not np.all(unique_labels_by_group(ts.tables.nodes.individual,
                                              ts.tables.nodes.population)):
            raise ValueError("Individual has nodes from more than one population.")
        if not np.all(unique_labels_by_group(ts.tables.nodes.individual,
                                              ts.tables.nodes.time)):
            raise ValueError("Individual has nodes from more than one time.")
        has_indiv = (ts.tables.nodes.individual >= 0)
        which_indiv = ts.tables.nodes.individual[has_indiv]
        # if we did not do the sanity check above then an individual with nodes in more than one pop
        # would get the pop of their last node in the list
        self.individual_populations[which_indiv] = ts.tables.nodes.population[has_indiv]
        self.individual_times[which_indiv] = ts.tables.nodes.time[has_indiv]