def _parse_and_add_from_stream(self, stream, schema, exclude_trees=False, exclude_chars=False, **kwargs): # exclude_trees = kwargs.pop("exclude_trees", False) # exclude_chars = kwargs.pop("exclude_chars", False) taxon_namespace = taxonmodel.process_kwargs_dict_for_taxon_namespace(kwargs, None) if (self.attached_taxon_namespace is not None and taxon_namespace is not None and self.attached_taxon_namespace is not taxon_namespace): raise ValueError("DataSet has attached TaxonNamespace that is not the same as ``taxon_namespace``") if self.attached_taxon_namespace is not None and taxon_namespace is None: taxon_namespace = self.attached_taxon_namespace label = kwargs.pop("label", None) reader = dataio.get_reader(schema, **kwargs) n_tns = len(self.taxon_namespaces) n_tree_lists = len(self.tree_lists) n_char_matrices = len(self.char_matrices) reader.read_dataset( stream=stream, dataset=self, taxon_namespace=taxon_namespace, exclude_trees=exclude_trees, exclude_chars=exclude_chars, state_alphabet_factory=charstatemodel.StateAlphabet, ) n_tns2 = len(self.taxon_namespaces) n_tree_lists2 = len(self.tree_lists) n_char_matrices2 = len(self.char_matrices) return (n_tns2-n_tns, n_tree_lists2-n_tree_lists, n_char_matrices2-n_char_matrices)
def iterate_over_trees(self, src, format, taxa_block=None, encode_splits=False, rooted=None, finish_node_func=None): from dendropy import dataio reader = dataio.get_reader(format) reader.include_characters = False added = { "encode_splits": encode_splits, "default_rooting": rooted, "finish_node_func": finish_node_func, } cache = cache_reader_state(reader, **added) if taxa_block is None: for tree in reader.iterate_over_trees(src, dataset=self): yield tree else: if not taxa_block in self.taxa_blocks: self.taxa_blocks.append(taxa_block) for tree in reader.iterate_over_trees(src, taxa_block=taxa_block): yield tree restore_reader_state(reader, cache)
def _parse_and_create_from_stream(cls, stream, schema, **kwargs): """ Constructs a new |DataSet| object and populates it with data from file-like object ``stream``. """ exclude_trees = kwargs.pop("exclude_trees", False) exclude_chars = kwargs.pop("exclude_chars", False) taxon_namespace = taxonmodel.process_kwargs_dict_for_taxon_namespace(kwargs, None) label = kwargs.pop("label", None) dataset = DataSet(label=label) if taxon_namespace is not None: dataset.attached_taxon_namespace = taxon_namespace reader = dataio.get_reader(schema, **kwargs) reader.read_dataset( stream=stream, dataset=dataset, taxon_namespace=taxon_namespace, exclude_trees=exclude_trees, exclude_chars=exclude_chars, state_alphabet_factory=charstatemodel.StateAlphabet, ) return dataset
def check_parse_with_ambiguities(self, data_filename, expected_filename): reader = dataio.get_reader('nexus') dataset = reader.read(stream=pathmap.char_source_stream(data_filename)) self.assertEqual(len(dataset.char_matrices), 1) self.map_multistate_to_symbols(dataset.char_matrices[0]) expected_label_symbol_stream = pathmap.char_source_stream(expected_filename) self.assertEqualCharMatrixLabelSymbols(dataset.char_matrices[0], \ expected_label_symbol_stream = expected_label_symbol_stream)
def testReferenceTreeFileNoTaxaBlockNoTranslateBlockSameTaxa(self): ref_tree_list = datagen.reference_tree_list() reader = dataio.get_reader("nexus", taxon_set=ref_tree_list.taxon_set) dataset = reader.read( stream=pathmap.tree_source_stream("pythonidae.reference-trees.no-taxa-no-translate-block.nexus") ) self.assertEqual(len(dataset.tree_lists), 1) self.assertDistinctButEqualTreeList(ref_tree_list, dataset.tree_lists[0], distinct_taxa=False, equal_oids=None)
def check_continuous_chars_against_expected(self, data_filename, expected_filename, datatype): self.logger.info("Checking '%s' => %s" % (data_filename, datatype.__name__)) reader = dataio.get_reader('nexus') dataset = reader.read(stream=pathmap.char_source_stream(data_filename)) expected_label_symbol_stream = pathmap.char_source_stream(expected_filename) self.assertEqual(len(dataset.char_matrices), 1) self.assertEqualCharMatrixLabelContinuousValues(dataset.char_matrices[0], \ expected_label_symbol_stream=expected_label_symbol_stream)
def check_parse_with_ambiguities(self, data_filename, expected_filename): reader = dataio.get_reader('nexus') dataset = reader.read(stream=pathmap.char_source_stream(data_filename)) self.assertEqual(len(dataset.char_matrices), 1) self.map_multistate_to_symbols(dataset.char_matrices[0]) expected_label_symbol_stream = pathmap.char_source_stream( expected_filename) self.assertEqualCharMatrixLabelSymbols(dataset.char_matrices[0], \ expected_label_symbol_stream = expected_label_symbol_stream)
def read(self, src, format): """ Populates this dataset from `src`, given in `format`. `src` is a file descriptor object, `format` is one of the supported file format identifiers: 'NEXUS' (incl. 'NEWICK'), 'NEXML' etc. """ from dendropy import dataio reader = dataio.get_reader(format) reader.read_dataset(src, self) return self
def testReferenceTreeFileNoTaxaBlockDistinctTaxa(self): ref_tree_list = datagen.reference_tree_list() reader = dataio.get_reader('nexus') dataset = reader.read(stream=pathmap.tree_source_stream("pythonidae.reference-trees.no-taxa-block.nexus")) self.assertEqual(len(dataset.tree_lists), 1) self.assertDistinctButEqualTreeList( ref_tree_list, dataset.tree_lists[0], distinct_taxa=True, equal_oids=None)
def testReferenceTreeFileNoTaxaBlockNoTranslateBlockSameTaxa(self): ref_tree_list = datagen.reference_tree_list() reader = dataio.get_reader('nexus', taxon_set=ref_tree_list.taxon_set) dataset = reader.read(stream=pathmap.tree_source_stream( "pythonidae.reference-trees.no-taxa-no-translate-block.nexus")) self.assertEqual(len(dataset.tree_lists), 1) self.assertDistinctButEqualTreeList(ref_tree_list, dataset.tree_lists[0], distinct_taxa=False, equal_oids=None)
def testReferenceTreeFileDistinctTaxa(self): ref_tree_list = datagen.reference_tree_list() reader = dataio.get_reader('nexus') dataset = reader.read(stream=pathmap.tree_source_stream( datagen.reference_trees_filename(schema="nexus"))) self.assertEqual(len(dataset.tree_lists), 1) self.assertDistinctButEqualTreeList(ref_tree_list, dataset.tree_lists[0], distinct_taxa=True, equal_oids=None)
def testReferenceTreeFileDistinctTaxa(self): ref_tree_list = datagen.reference_tree_list() reader = dataio.get_reader('nexus') dataset = reader.read(stream=pathmap.tree_source_stream(datagen.reference_trees_filename(schema="nexus"))) self.assertEqual(len(dataset.tree_lists), 1) self.assertDistinctButEqualTreeList( ref_tree_list, dataset.tree_lists[0], distinct_taxa=True, equal_oids=None)
def check_continuous_chars_against_expected(self, data_filename, expected_filename, datatype): self.logger.info("Checking '%s' => %s" % (data_filename, datatype.__name__)) reader = dataio.get_reader('nexus') dataset = reader.read(stream=pathmap.char_source_stream(data_filename)) expected_label_symbol_stream = pathmap.char_source_stream( expected_filename) self.assertEqual(len(dataset.char_matrices), 1) self.assertEqualCharMatrixLabelContinuousValues(dataset.char_matrices[0], \ expected_label_symbol_stream=expected_label_symbol_stream)
def read(self, stream, schema, **kwargs): """ Populates this `DataSet` object from a file-like object data source `stream`, formatted in `schema`. `schema` must be a recognized and supported phylogenetic data file schema. If reading is not implemented for the schema specified, then a `UnsupportedSchemaError` is raised. The following optional keyword arguments are also recognized: - `exclude_trees` if True skips over tree data - `exclude_chars` if True skips over character data - `encode_splits` specifies whether or not split bitmasks will be calculated and attached to the edges. - `finish_node_func` is a function that will be applied to each node after it has been constructed. The following keyword arguments are recognized when parsing NEXUS or NEWICK sources: - `taxon_set` TaxonSet object to use when reading data - `as_rooted=True` (or `as_unrooted=False`) interprets trees as rooted - `as_unrooted=True` (or `as_rooted=False`) interprets trees as rooted - `default_as_rooted` (or `default_as_unrooted=False`) interprets all trees as rooted if rooting not given by `[&R]` or `[&U]` comments - `default_as_unrooted` (or `default_as_rooted=False`) interprets all trees as rooted if rooting not given by `[&R]` or `[&U]` comments - `edge_len_type` specifies the type of the edge lengths (int or float) Additional keyword arguments may be handled by various readers specialized to handle specific data formats. """ from dendropy.utility import iosys from dendropy.dataio import get_reader kwargs["dataset"] = self self.process_taxon_set_directives(**kwargs) if self.attached_taxon_set is not None: if "taxon_set" not in kwargs: kwargs["taxon_set"] = self.attached_taxon_set elif kwargs["taxon_set"] is not self.attached_taxon_set: raise TypeError( "DataSet object is already attached to a TaxonSet, but different TaxonSet passed to using 'taxon_set' keyword argument" ) reader = get_reader(schema=schema, **kwargs) try: reader.read(stream) # except error.DataParseError as x: except error.DataParseError, x: x.decorate_with_name(stream=stream) raise x
def read(self, stream, schema, **kwargs): """ Populates this `DataSet` object from a file-like object data source `stream`, formatted in `schema`. `schema` must be a recognized and supported phylogenetic data file schema. If reading is not implemented for the schema specified, then a `UnsupportedSchemaError` is raised. The following optional keyword arguments are also recognized: - `exclude_trees` if True skips over tree data - `exclude_chars` if True skips over character data - `encode_splits` specifies whether or not split bitmasks will be calculated and attached to the edges. - `finish_node_func` is a function that will be applied to each node after it has been constructed. The following keyword arguments are recognized when parsing NEXUS or NEWICK sources: - `taxon_set` TaxonSet object to use when reading data - `as_rooted=True` (or `as_unrooted=False`) interprets trees as rooted - `as_unrooted=True` (or `as_rooted=False`) interprets trees as rooted - `default_as_rooted` (or `default_as_unrooted=False`) interprets all trees as rooted if rooting not given by `[&R]` or `[&U]` comments - `default_as_unrooted` (or `default_as_rooted=False`) interprets all trees as rooted if rooting not given by `[&R]` or `[&U]` comments - `edge_len_type` specifies the type of the edge lengths (int or float) Additional keyword arguments may be handled by various readers specialized to handle specific data formats. """ from dendropy.utility import iosys from dendropy.dataio import get_reader kwargs["dataset"] = self self.process_taxon_set_directives(**kwargs) if self.attached_taxon_set is not None: if "taxon_set" not in kwargs: kwargs["taxon_set"] = self.attached_taxon_set elif kwargs["taxon_set"] is not self.attached_taxon_set: raise TypeError("DataSet object is already attached to a TaxonSet, but different TaxonSet passed to using 'taxon_set' keyword argument") reader = get_reader(schema=schema, **kwargs) try: reader.read(stream) # except error.DataParseError as x: except error.DataParseError, x: x.decorate_with_name(stream=stream) raise x
def read_trees(self, src, format, encode_splits=False, rooted=None, finish_node_func=None): """ Populates this dataset with trees from `src`, given in `format`. `src` is a file descriptor object, `format` is one of the supported file format identifiers: 'NEXUS' (incl. 'NEWICK'), 'NEXML' etc. A (plain) list of all trees read (including those from multiple TreesBlocks will be returned). In single-taxon-block data formats (e.g., NEXUS, NEWICK), all trees will share the same existing TaxonBlock (which will be expanded to include new taxa in the trees, if any). """ from dendropy import dataio reader = dataio.get_reader(format) reader.include_characters = False old_trees_block_len = len(self.trees_blocks) if format.upper() == "NEXUS" or format.upper() == "NEWICK": added = { "encode_splits": encode_splits, "default_rooting": rooted, "finish_node_func": finish_node_func, } cache = cache_reader_state(reader, **added) reader.read_dataset(src, self) if format.upper() == "NEXUS" or format.upper() == "NEWICK": restore_reader_state(reader, cache) new_trees_block_len = len(self.trees_blocks) if new_trees_block_len > old_trees_block_len: idxs = range(old_trees_block_len, new_trees_block_len) new_trees = [] for idx in idxs: new_trees.extend(self.trees_blocks[idx]) return new_trees else: return []
def testParseMesquiteMultiTaxa(self): reader = dataio.get_reader('nexus') dataset = reader.read(stream=pathmap.mixed_source_stream("multitaxa_mesquite.nex")) self.check_full_dataset_taxon_references(dataset)
def testParseMesquiteMultiTaxa(self): reader = dataio.get_reader('nexus') dataset = reader.read( stream=pathmap.mixed_source_stream("multitaxa_mesquite.nex")) self.check_full_dataset_taxon_references(dataset)