def extract_ancestors(samples, ts): """ Given the specified sample data file and final (unsimplified) tree sequence output by tsinfer, return the same tree sequence with the samples removed, which can then be used as an ancestors tree sequence. """ position = samples.sites_position[:][samples.sites_inference[:]] ts = subset_sites(ts, position) tables = ts.dump_tables() # The nodes that we want to keep are all those *except* what # has been marked as samples. samples = np.where(tables.nodes.flags != tskit.NODE_IS_SAMPLE)[0].astype(np.int32) # Mark all nodes as samples tables.nodes.set_columns( flags=np.bitwise_or(tables.nodes.flags, tskit.NODE_IS_SAMPLE), time=tables.nodes.time, population=tables.nodes.population, individual=tables.nodes.individual, metadata=tables.nodes.metadata, metadata_offset=tables.nodes.metadata_offset) # Now simplify down the tables to get rid of all sample edges. node_id_map = tables.simplify( samples, filter_sites=False, filter_individuals=True, filter_populations=False) # We cannot have flags that are both samples and have other flags set, # so we need to unset all the sample flags for these. flags = np.zeros_like(tables.nodes.flags) index = tables.nodes.flags == tskit.NODE_IS_SAMPLE flags[index] = tskit.NODE_IS_SAMPLE index = tables.nodes.flags != tskit.NODE_IS_SAMPLE flags[index] = np.bitwise_and(tables.nodes.flags[index], ~tskit.NODE_IS_SAMPLE) tables.nodes.set_columns( flags=flags, time=tables.nodes.time, population=tables.nodes.population, individual=tables.nodes.individual, metadata=tables.nodes.metadata, metadata_offset=tables.nodes.metadata_offset) # Drop site metadata and set the ancestral_state to zeros tables.sites.set_columns( position=tables.sites.position, ancestral_state=np.zeros(len(tables.sites), dtype=np.int8) + ord('0'), ancestral_state_offset=np.arange(len(tables.sites) + 1, dtype=np.uint32)) # Drop mutation metadata and set the derived_state to ones tables.mutations.set_columns( site=tables.mutations.site, node=tables.mutations.node, derived_state=np.zeros(len(tables.mutations), dtype=np.int8) + ord('1'), derived_state_offset=np.arange(len(tables.mutations) + 1, dtype=np.uint32)) record = provenance.get_provenance_dict(command="extract_ancestors") tables.provenances.add_row(record=json.dumps(record)) return tables, node_id_map
def snip_centromere(ts, left, right): """ Cuts tree topology information out of the specifified tree sequence in the specified region. The tree sequence will effectively be in two halves. There cannot be any sites within the removed region. """ if not (0 < left < right < ts.sequence_length): raise ValueError("Invalid centromere coordinates") tables = ts.dump_tables() if len(tables.sites) > 0: position = tables.sites.position left_index = np.searchsorted(position, left) right_index = np.searchsorted(position, right) if right_index != left_index: raise ValueError("Cannot have sites defined within the centromere") edges = tables.edges.copy() # Get all edges that do not intersect and add them in directly. index = np.logical_or(right <= edges.left, left >= edges.right) tables.edges.set_columns( left=edges.left[index], right=edges.right[index], parent=edges.parent[index], child=edges.child[index], ) # Get all edges that intersect and add two edges for each. index = np.logical_not(index) i_parent = edges.parent[index] i_child = edges.child[index] i_left = edges.left[index] i_right = edges.right[index] # Only insert valid edges (remove any entirely lost topology) index = i_left < left num_intersecting = np.sum(index) tables.edges.append_columns( left=i_left[index], right=np.full(num_intersecting, left, dtype=np.float64), parent=i_parent[index], child=i_child[index], ) # Only insert valid edges (remove any entirely lost topology) index = right < i_right num_intersecting = np.sum(index) tables.edges.append_columns( left=np.full(num_intersecting, right, dtype=np.float64), right=i_right[index], parent=i_parent[index], child=i_child[index], ) tables.sort() record = provenance.get_provenance_dict(command="snip_centromere", left=left, right=right) tables.provenances.add_row(record=json.dumps(record)) return tables.tree_sequence()
def validate_encoding(self, params): pdict = provenance.get_provenance_dict("test", **params) encoded = pdict["parameters"] assert encoded["command"] == "test" del encoded["command"] assert encoded == params
def test_no_command(self): with pytest.raises(ValueError): provenance.get_provenance_dict()
def validate_encoding(self, params): pdict = provenance.get_provenance_dict("test", **params) encoded = pdict["parameters"] self.assertEqual(encoded["command"], "test") del encoded["command"] self.assertEqual(encoded, params)
def test_no_command(self): with self.assertRaises(ValueError): provenance.get_provenance_dict()