示例#1
0
def test_wrapped_data(examples):
    r = NexusReader.from_file(examples / 'example-wrapped-data.nex')
    assert r.data.format['gap'] == '-'
    assert len(r.data.taxa) == 3 and r.data.nchar == 471
    sites = r.data.matrix[r.data.taxa[-1]]
    assert len([s for s in sites if s == r.data.format['gap']]) == 5
    if sys.version_info >= (3, 6):
        assert len([s for s in sites if s == r.data.format['missing']]) == 85
示例#2
0
def get_nexus_reader(thing):
    if isinstance(thing, str):
        return NexusReader.from_string(thing)
    if isinstance(thing, pathlib.Path):
        return NexusReader.from_file(thing)
    if isinstance(thing, NexusWriter):
        return NexusReader.from_string(thing.write())
    assert isinstance(thing, NexusReader)
    return thing
示例#3
0
def make_indo_european_array():
    n = NexusReader.from_file('IELex_Bouckaert2012.nex')
    df = pd.DataFrame.from_dict(n.data.matrix, orient='index')
    df = df.replace('?', np.nan)
    #   cheating at the moment by replacing '?' with 0
    df = df.replace(np.nan, 0)
    array = df.to_numpy()
    array = np.ndarray.astype(array, dtype=np.float32)
    missing_data_matrix = np.ones(array.shape, dtype=np.float32)
    number_of_clusters = 7
    features = 6280
    samples = 103
    return array, missing_data_matrix, samples, features, df
示例#4
0
def main(args):
  newick = ''
  if args.newick:
    newick = args.newick.readline()
  elif args.nexus:
    fn_nexus = args.nexus.name
    nexus = NexusReader.from_file(fn_nexus)
    newick = nexus.trees.trees[0].split('=')[1].strip()

  edges = tree_to_adjacency_list(newick)
  # print adjacency list to stdout
  for v1, v2 in edges:
    print('{},{}'.format(v1, v2))
示例#5
0
def get_reader(args, many=False, required_blocks=None):
    res = []
    for f in (args.filename if many else [args.filename]):
        if f is None:
            res.append(NexusReader.from_string(sys.stdin.read()))
        else:
            res.append(NexusReader.from_file(f))
    if required_blocks:
        for nex in res:
            for block in required_blocks:
                if not getattr(nex, block, None):
                    raise ParserError(
                        colored(
                            'Nexus file {0} has no {1} block'.format(
                                nex.filename, block),
                            'red',
                            attrs=['bold'],
                        ))
    return res if many else res[0]
示例#6
0
 def cmd_makecldf(self, args):
     args.writer.cldf.add_component('LanguageTable')
     args.writer.cldf.add_table(
         'trees.csv',
         {
             "name": 'ID',
             "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#id"
         },
         {
             "name": 'Name',
             "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#name"
         },
         {
             "name": "rooted",
             "datatype": "boolean"
         },
         "Newick",
         "Type",
         "dplace_ID",
         "source",
     )
     #
     # FIXME: need to store the original tree ID and add the source!
     #
     t = args.writer.cldf.add_table(
         'treelabels.csv',
         {
             "name": 'ID',
             "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#id"
         },
         {
             "name": 'Name',
             "propertyUrl": "http://cldf.clld.org/v1.0/terms.rdf#name"
         },
         {
             "name":
             "Language_ID",
             "propertyUrl":
             "http://cldf.clld.org/v1.0/terms.rdf#languageReference"
         },
         {
             "name": "Tree_ID",
             "separator": " "
         },
     )
     t.add_foreign_key('Tree_ID', 'trees.csv', 'ID')
     gcs = set()
     treelabel_id = 0
     tree_id = 0
     for d in sorted(self.raw_dir.iterdir(), key=lambda p: p.stem):
         if d.is_dir():
             print(d.stem)
             labels = {
                 l['taxon']: l['glottocode']
                 for l in reader(d.joinpath('taxa.csv'), dicts=True)
             }
             tree_ids = []
             if d.joinpath('summary.trees').exists():
                 nx = NexusReader.from_file(d.joinpath('summary.trees'))
                 nx.trees.detranslate()
                 tree = nx.trees[0]
                 newick = loads(tree.newick_string, strip_comments=True)[0]
                 for n in newick.walk():
                     n.name = labels.get(n.name, n.name)
                 tree_id += 1
                 args.writer.objects['trees.csv'].append({
                     'ID':
                     str(tree_id),
                     'Name':
                     tree.name,
                     'rooted':
                     tree.rooted,
                     'Newick':
                     newick.newick,
                     "Type":
                     'summary',
                     "dplace_id":
                     d.stem,
                 })
                 tree_ids.append(tree_id)
             if d.joinpath('posterior.trees').exists():
                 nx = NexusReader.from_file(d.joinpath('posterior.trees'))
                 nx.trees.detranslate()
                 for i, tree in enumerate(nx.trees, start=1):
                     newick = loads(tree.newick_string,
                                    strip_comments=True)[0]
                     for n in newick.walk():
                         n.name = labels.get(n.name, n.name)
                     tree_id += 1
                     args.writer.objects['trees.csv'].append({
                         'ID':
                         str(tree_id),
                         'Name':
                         tree.name,
                         'rooted':
                         tree.rooted,
                         'Newick':
                         newick.newick,
                         "Type":
                         'sample',
                         "dplace_id":
                         d.stem,
                     })
                     tree_ids.append(tree_id)
             for name, gc in sorted(labels.items()):
                 if gc:
                     gcs.add(gc)
                     treelabel_id += 1
                     args.writer.objects['treelabels.csv'].append({
                         'ID':
                         str(treelabel_id),
                         'Name':
                         name,
                         'Language_ID':
                         gc,
                         'Tree_ID': [str(i) for i in tree_ids],
                     })
     for gc in sorted(gcs):
         lang = args.glottolog.api.cached_languoids.get(gc)
         if not lang:
             args.log.warning('invalid glottocode: {0}'.format(gc))
         args.writer.objects['LanguageTable'].append({
             'ID':
             gc,
             'Name':
             lang.name if lang else None,
             'Latitude':
             lang.latitude if lang else None,
             'Longitude':
             lang.longitude if lang else None,
             'ISO639P3code':
             lang.iso if lang else None,
             'Glottocode':
             gc,
         })
示例#7
0
 def _make(fname):
     return NexusReader.from_file(examples / fname)