示例#1
0
# -*- coding: utf-8 -*-
###################################################################################################
# Much of following code is from by JAR
from __future__ import print_function
from unidecode import unidecode
import csv
import io
import os
import time

from bs4 import BeautifulSoup as Soup
from peyotl import (assure_dir_exists, get_logger, download_large_file)

from taxalotl.resource_wrapper import TaxonomyWrapper

_LOG = get_logger(__name__)
DOMAIN = "http://www.theplantlist.org"
THROTTLE_BREAK = 10

_num_downloads_this_session = 0


def download_csv_for_family(fam_dir, fam_html_fp, url_pref):
    global _num_downloads_this_session
    fam_html_content = io.open(fam_html_fp, 'rU', encoding='utf-8').read()
    soup = Soup(fam_html_content, 'html.parser')
    csva = soup.find_all("a", attrs={"type": "text/csv"})
    if len(csva) != 1:
        raise RuntimeError(u"Not just 1 CSV type links in {} : {}".format(
            fam_html_fp, csva))
    csv_link = csva[0]
示例#2
0
#!/usr/bin/env python
from peyotl.utility.input_output import read_as_json, write_as_json
from peyotl import get_logger

import sys
import re

_LOG = get_logger('evaluate-auto-mapping')
if len(sys.argv) != 4:
    sys.exit('expecting an input file path for the JSON mapping file and '
             '2 output file for the plausible and implausible unmapped')
inf = sys.argv[1]
poutf = sys.argv[2]
ioutf = sys.argv[3]
_LOG.debug('Reading test cases from "{}"'.format(inf))
test_case_dict = read_as_json(inf)

possible = {}
impossible = {}

np = 0
ni = 0
for study_id, otu_list in test_case_dict.items():
    p = []
    i = []
    for el in otu_list:
        matches = el[1]
        orig = el[0].lower()
        is_plausible = False
        for m in matches:
            if m.lower() in orig:
#!/usr/bin/env python
from peyotl.phylografter.nexson_workaround import workaround_phylografter_export_diffs, \
                                                             add_default_prop
from peyotl.phylesystem.git_actions import get_filepath_for_namespaced_id
from peyotl import get_logger
from subprocess import call
import codecs
import json
import sys
import os
import re
_LOG = get_logger(__name__)

def debug(m):
    _LOG.debug(m)

old_phylesystem = sys.argv[1]
old_phylesystem_study = os.path.abspath(os.path.join(old_phylesystem, 'study'))
new_phylesystem = sys.argv[2]
new_phylesystem_study = os.path.abspath(os.path.join(new_phylesystem, 'study'))
scratch_par = sys.argv[3]
assert(os.path.isdir(old_phylesystem_study))
assert(os.path.isdir(new_phylesystem_study))
assert(os.path.isdir(scratch_par))

script_name = os.path.abspath(sys.argv[0])
peyotl_dev_dir = os.path.split(script_name)[0]
peyotl_dir =os.path.split(peyotl_dev_dir)[0]
conversion_script = os.path.join(peyotl_dir, 'scripts', 'nexson', 'nexson_nexml.py')
assert(os.path.isfile(conversion_script))
validation_script = os.path.join(peyotl_dir, 'scripts', 'nexson', 'validate_ot_nexson.py')
示例#4
0
#!/usr/bin/env python
if __name__ == '__main__':
    from peyotl.nexson_syntax import write_as_json
    from peyotl.nexson_validation import NexsonError, \
                                         NexsonWarningCodes, \
                                         validate_nexson
    from peyotl import get_logger
    import argparse
    import codecs
    import json
    import sys
    import os
    SCRIPT_NAME = os.path.split(os.path.abspath(sys.argv[0]))[-1]
    _LOG = get_logger(SCRIPT_NAME)
    sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
    sys.stderr = codecs.getwriter('utf-8')(sys.stderr)

    parser = argparse.ArgumentParser(description='Validate a json file as Open Tree of Life NexSON')
    parser.add_argument('--verbose',
                        dest='verbose',
                        action='store_true',
                        default=False,
                        help='verbose output')
    parser.add_argument('--agent-only',
                        dest='add_agent_only',
                        action='store_true',
                        default=False,
                        help='If --embed and this argument are both used, only the agent info will be embedded in the annotation')
    out_syntax_choices = ["json",]
    out_syntax_choices.sort()
    s_help = 'Syntax of output. Valid choices are: "{c}"'.format(c='", "'.join(out_syntax_choices))
示例#5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Helper script for making sure that the configuration of the logger works. Called by test-logger.sh"""
from peyotl import get_logger

_LOG = get_logger()
_LOG.debug("a debug message")
_LOG.info("an info with umlaut ü message")
_LOG.warning("a warning message")
_LOG.error("an error message")
_LOG.critical("a critical message")
try:
    raise RuntimeError("A testing runtime error")
except RuntimeError:
    _LOG.exception("expected exception")
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Helper script for making sure that the configuration of the logger works. Called by test-logger.sh"""
from peyotl import get_logger
_LOG = get_logger()
_LOG.debug("a debug message")
_LOG.info("an info with umlaut ü message")
_LOG.warning("a warning message")
_LOG.error("an error message")
_LOG.critical("a critical message")
try:
    raise RuntimeError("A testing runtime error")
except RuntimeError:
    _LOG.exception("expected exception")
#!/usr/bin/env python
from peyotl.nexson_syntax import iter_otu, write_as_json
from peyotl.api import APIWrapper
from peyotl.ott import OTT
from peyotl import get_logger
import sys

_LOG = get_logger('otu-label-comparison')
if len(sys.argv) != 2:
    sys.exit('expecting an output file path for the JSON mapping file')
outfn = sys.argv[1]
a = APIWrapper(phylesystem_api_kwargs={'get_from': 'local'})
ott = OTT()
ott_id_to_names = ott.ott_id_to_names
orig2ott_name = {}

phylesys = a.phylesystem_api.phylesystem_obj
for sid, blob in phylesys.iter_study_objs():
    maps = []
    for otu_id, otu in iter_otu(blob):
        ott_id = otu.get('^ot:ottId')
        if ott_id is not None:
            try:
                names = ott_id_to_names[ott_id]
            except:
                _LOG.debug('Apparently deprecated ott_id="{o}" in study="{s}"'.format(o=ott_id, s=sid))
            else:
                if not isinstance(names, tuple):
                    names = (names,)
                maps.append((otu['^ot:originalLabel'], names))
    if maps:
示例#8
0
#!/usr/bin/env python
from peyotl.nexson_syntax import iter_otu, write_as_json
from peyotl.api import APIWrapper
from peyotl.ott import OTT
from peyotl import get_logger
import sys
_LOG = get_logger('otu-label-comparison')
if len(sys.argv) != 2:
    sys.exit('expecting an output file path for the JSON mapping file')
outfn = sys.argv[1]
a = APIWrapper(phylesystem_api_kwargs={'get_from':'local'})
ott = OTT()
ott_id_to_names = ott.ott_id_to_names
orig2ott_name = {}

phylesys = a.phylesystem_api.phylesystem_obj
for sid, blob in phylesys.iter_study_objs():
    maps = []
    for otu_id, otu in iter_otu(blob):
        ott_id = otu.get('^ot:ottId')
        if ott_id is not None:
            try:
                names = ott_id_to_names[ott_id]
            except:
                _LOG.debug('Apparently deprecated ott_id="{o}" in study="{s}"'.format(o=ott_id, s=sid))
            else:
                if not isinstance(names, tuple):
                    names = (names, )
                maps.append((otu['^ot:originalLabel'], names))
    if maps:
        orig2ott_name[sid] = maps