# -*- coding: utf-8 -*- ################################################################################################### # Much of following code is from by JAR from __future__ import print_function from unidecode import unidecode import csv import io import os import time from bs4 import BeautifulSoup as Soup from peyotl import (assure_dir_exists, get_logger, download_large_file) from taxalotl.resource_wrapper import TaxonomyWrapper _LOG = get_logger(__name__) DOMAIN = "http://www.theplantlist.org" THROTTLE_BREAK = 10 _num_downloads_this_session = 0 def download_csv_for_family(fam_dir, fam_html_fp, url_pref): global _num_downloads_this_session fam_html_content = io.open(fam_html_fp, 'rU', encoding='utf-8').read() soup = Soup(fam_html_content, 'html.parser') csva = soup.find_all("a", attrs={"type": "text/csv"}) if len(csva) != 1: raise RuntimeError(u"Not just 1 CSV type links in {} : {}".format( fam_html_fp, csva)) csv_link = csva[0]
#!/usr/bin/env python from peyotl.utility.input_output import read_as_json, write_as_json from peyotl import get_logger import sys import re _LOG = get_logger('evaluate-auto-mapping') if len(sys.argv) != 4: sys.exit('expecting an input file path for the JSON mapping file and ' '2 output file for the plausible and implausible unmapped') inf = sys.argv[1] poutf = sys.argv[2] ioutf = sys.argv[3] _LOG.debug('Reading test cases from "{}"'.format(inf)) test_case_dict = read_as_json(inf) possible = {} impossible = {} np = 0 ni = 0 for study_id, otu_list in test_case_dict.items(): p = [] i = [] for el in otu_list: matches = el[1] orig = el[0].lower() is_plausible = False for m in matches: if m.lower() in orig:
#!/usr/bin/env python from peyotl.phylografter.nexson_workaround import workaround_phylografter_export_diffs, \ add_default_prop from peyotl.phylesystem.git_actions import get_filepath_for_namespaced_id from peyotl import get_logger from subprocess import call import codecs import json import sys import os import re _LOG = get_logger(__name__) def debug(m): _LOG.debug(m) old_phylesystem = sys.argv[1] old_phylesystem_study = os.path.abspath(os.path.join(old_phylesystem, 'study')) new_phylesystem = sys.argv[2] new_phylesystem_study = os.path.abspath(os.path.join(new_phylesystem, 'study')) scratch_par = sys.argv[3] assert(os.path.isdir(old_phylesystem_study)) assert(os.path.isdir(new_phylesystem_study)) assert(os.path.isdir(scratch_par)) script_name = os.path.abspath(sys.argv[0]) peyotl_dev_dir = os.path.split(script_name)[0] peyotl_dir =os.path.split(peyotl_dev_dir)[0] conversion_script = os.path.join(peyotl_dir, 'scripts', 'nexson', 'nexson_nexml.py') assert(os.path.isfile(conversion_script)) validation_script = os.path.join(peyotl_dir, 'scripts', 'nexson', 'validate_ot_nexson.py')
#!/usr/bin/env python if __name__ == '__main__': from peyotl.nexson_syntax import write_as_json from peyotl.nexson_validation import NexsonError, \ NexsonWarningCodes, \ validate_nexson from peyotl import get_logger import argparse import codecs import json import sys import os SCRIPT_NAME = os.path.split(os.path.abspath(sys.argv[0]))[-1] _LOG = get_logger(SCRIPT_NAME) sys.stdout = codecs.getwriter('utf-8')(sys.stdout) sys.stderr = codecs.getwriter('utf-8')(sys.stderr) parser = argparse.ArgumentParser(description='Validate a json file as Open Tree of Life NexSON') parser.add_argument('--verbose', dest='verbose', action='store_true', default=False, help='verbose output') parser.add_argument('--agent-only', dest='add_agent_only', action='store_true', default=False, help='If --embed and this argument are both used, only the agent info will be embedded in the annotation') out_syntax_choices = ["json",] out_syntax_choices.sort() s_help = 'Syntax of output. Valid choices are: "{c}"'.format(c='", "'.join(out_syntax_choices))
#!/usr/bin/env python # -*- coding: utf-8 -*- """Helper script for making sure that the configuration of the logger works. Called by test-logger.sh""" from peyotl import get_logger _LOG = get_logger() _LOG.debug("a debug message") _LOG.info("an info with umlaut ü message") _LOG.warning("a warning message") _LOG.error("an error message") _LOG.critical("a critical message") try: raise RuntimeError("A testing runtime error") except RuntimeError: _LOG.exception("expected exception")
#!/usr/bin/env python from peyotl.nexson_syntax import iter_otu, write_as_json from peyotl.api import APIWrapper from peyotl.ott import OTT from peyotl import get_logger import sys _LOG = get_logger('otu-label-comparison') if len(sys.argv) != 2: sys.exit('expecting an output file path for the JSON mapping file') outfn = sys.argv[1] a = APIWrapper(phylesystem_api_kwargs={'get_from': 'local'}) ott = OTT() ott_id_to_names = ott.ott_id_to_names orig2ott_name = {} phylesys = a.phylesystem_api.phylesystem_obj for sid, blob in phylesys.iter_study_objs(): maps = [] for otu_id, otu in iter_otu(blob): ott_id = otu.get('^ot:ottId') if ott_id is not None: try: names = ott_id_to_names[ott_id] except: _LOG.debug('Apparently deprecated ott_id="{o}" in study="{s}"'.format(o=ott_id, s=sid)) else: if not isinstance(names, tuple): names = (names,) maps.append((otu['^ot:originalLabel'], names)) if maps:
#!/usr/bin/env python from peyotl.nexson_syntax import iter_otu, write_as_json from peyotl.api import APIWrapper from peyotl.ott import OTT from peyotl import get_logger import sys _LOG = get_logger('otu-label-comparison') if len(sys.argv) != 2: sys.exit('expecting an output file path for the JSON mapping file') outfn = sys.argv[1] a = APIWrapper(phylesystem_api_kwargs={'get_from':'local'}) ott = OTT() ott_id_to_names = ott.ott_id_to_names orig2ott_name = {} phylesys = a.phylesystem_api.phylesystem_obj for sid, blob in phylesys.iter_study_objs(): maps = [] for otu_id, otu in iter_otu(blob): ott_id = otu.get('^ot:ottId') if ott_id is not None: try: names = ott_id_to_names[ott_id] except: _LOG.debug('Apparently deprecated ott_id="{o}" in study="{s}"'.format(o=ott_id, s=sid)) else: if not isinstance(names, tuple): names = (names, ) maps.append((otu['^ot:originalLabel'], names)) if maps: orig2ott_name[sid] = maps