def test_wrapped_data(examples): r = NexusReader.from_file(examples / 'example-wrapped-data.nex') assert r.data.format['gap'] == '-' assert len(r.data.taxa) == 3 and r.data.nchar == 471 sites = r.data.matrix[r.data.taxa[-1]] assert len([s for s in sites if s == r.data.format['gap']]) == 5 if sys.version_info >= (3, 6): assert len([s for s in sites if s == r.data.format['missing']]) == 85
def get_nexus_reader(thing): if isinstance(thing, str): return NexusReader.from_string(thing) if isinstance(thing, pathlib.Path): return NexusReader.from_file(thing) if isinstance(thing, NexusWriter): return NexusReader.from_string(thing.write()) assert isinstance(thing, NexusReader) return thing
def make_indo_european_array(): n = NexusReader.from_file('IELex_Bouckaert2012.nex') df = pd.DataFrame.from_dict(n.data.matrix, orient='index') df = df.replace('?', np.nan) # cheating at the moment by replacing '?' with 0 df = df.replace(np.nan, 0) array = df.to_numpy() array = np.ndarray.astype(array, dtype=np.float32) missing_data_matrix = np.ones(array.shape, dtype=np.float32) number_of_clusters = 7 features = 6280 samples = 103 return array, missing_data_matrix, samples, features, df
def main(args): newick = '' if args.newick: newick = args.newick.readline() elif args.nexus: fn_nexus = args.nexus.name nexus = NexusReader.from_file(fn_nexus) newick = nexus.trees.trees[0].split('=')[1].strip() edges = tree_to_adjacency_list(newick) # print adjacency list to stdout for v1, v2 in edges: print('{},{}'.format(v1, v2))
def get_reader(args, many=False, required_blocks=None): res = [] for f in (args.filename if many else [args.filename]): if f is None: res.append(NexusReader.from_string(sys.stdin.read())) else: res.append(NexusReader.from_file(f)) if required_blocks: for nex in res: for block in required_blocks: if not getattr(nex, block, None): raise ParserError( colored( 'Nexus file {0} has no {1} block'.format( nex.filename, block), 'red', attrs=['bold'], )) return res if many else res[0]
def cmd_makecldf(self, args): args.writer.cldf.add_component('LanguageTable') args.writer.cldf.add_table( 'trees.csv', { "name": 'ID', "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#id" }, { "name": 'Name', "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#name" }, { "name": "rooted", "datatype": "boolean" }, "Newick", "Type", "dplace_ID", "source", ) # # FIXME: need to store the original tree ID and add the source! # t = args.writer.cldf.add_table( 'treelabels.csv', { "name": 'ID', "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#id" }, { "name": 'Name', "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#name" }, { "name": "Language_ID", "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#languageReference" }, { "name": "Tree_ID", "separator": " " }, ) t.add_foreign_key('Tree_ID', 'trees.csv', 'ID') gcs = set() treelabel_id = 0 tree_id = 0 for d in sorted(self.raw_dir.iterdir(), key=lambda p: p.stem): if d.is_dir(): print(d.stem) labels = { l['taxon']: l['glottocode'] for l in reader(d.joinpath('taxa.csv'), dicts=True) } tree_ids = [] if d.joinpath('summary.trees').exists(): nx = NexusReader.from_file(d.joinpath('summary.trees')) nx.trees.detranslate() tree = nx.trees[0] newick = loads(tree.newick_string, strip_comments=True)[0] for n in newick.walk(): n.name = labels.get(n.name, n.name) tree_id += 1 args.writer.objects['trees.csv'].append({ 'ID': str(tree_id), 'Name': tree.name, 'rooted': tree.rooted, 'Newick': newick.newick, "Type": 'summary', "dplace_id": d.stem, }) tree_ids.append(tree_id) if d.joinpath('posterior.trees').exists(): nx = NexusReader.from_file(d.joinpath('posterior.trees')) nx.trees.detranslate() for i, tree in enumerate(nx.trees, start=1): newick = loads(tree.newick_string, strip_comments=True)[0] for n in newick.walk(): n.name = labels.get(n.name, n.name) tree_id += 1 args.writer.objects['trees.csv'].append({ 'ID': str(tree_id), 'Name': tree.name, 'rooted': tree.rooted, 'Newick': newick.newick, "Type": 'sample', "dplace_id": d.stem, }) tree_ids.append(tree_id) for name, gc in sorted(labels.items()): if gc: gcs.add(gc) treelabel_id += 1 args.writer.objects['treelabels.csv'].append({ 'ID': str(treelabel_id), 'Name': name, 'Language_ID': gc, 'Tree_ID': [str(i) for i in tree_ids], }) for gc in sorted(gcs): lang = args.glottolog.api.cached_languoids.get(gc) if not lang: args.log.warning('invalid glottocode: {0}'.format(gc)) args.writer.objects['LanguageTable'].append({ 'ID': gc, 'Name': lang.name if lang else None, 'Latitude': lang.latitude if lang else None, 'Longitude': lang.longitude if lang else None, 'ISO639P3code': lang.iso if lang else None, 'Glottocode': gc, })
def _make(fname): return NexusReader.from_file(examples / fname)