Пример #1
0
def check_raw_str_repeatable(x, obj, k, vc):
    if is_str_type(x):
        return __TRUE_VAL
    if not isinstance(x, list):
        return __FALSE_STR_REPEATABLE_EL
    for i in x:
        if not is_str_type(i):
            return __FALSE_STR_REPEATABLE_EL
    return __TRUE_VAL
Пример #2
0
 def fetch_nexson(self, study_id, output_filepath=None, store_raw=False):
     '''Calls export_gzipNexSON URL and unzips response.
     Raises HTTP error, gzip module error, or RuntimeError
     '''
     if study_id.startswith('pg_'):
         study_id = study_id[3:]  #strip pg_ prefix
     uri = self.domain + '/study/export_gzipNexSON.json/' + study_id
     _LOG.debug('Downloading %s using "%s"\n', study_id, uri)
     resp = requests.get(uri,
                         headers=GZIP_REQUEST_HEADERS,
                         allow_redirects=True)
     resp.raise_for_status()
     try:
         uncompressed = gzip.GzipFile(mode='rb',
                                      fileobj=StringIO(
                                          resp.content)).read()
         results = uncompressed
     except:
         raise
     if is_str_type(results):
         if output_filepath is None:
             return anyjson.loads(results)
         else:
             if store_raw:
                 write_to_filepath(results, output_filepath)
             else:
                 write_as_json(anyjson.loads(results), output_filepath)
             return True
     raise RuntimeError(
         'gzipped response from phylografter export_gzipNexSON.json, but not a string is:',
         results)
Пример #3
0
 def fetch_nexson(self, study_id, output_filepath=None, store_raw=False):
     '''Calls export_gzipNexSON URL and unzips response.
     Raises HTTP error, gzip module error, or RuntimeError
     '''
     if study_id.startswith('pg_'):
         study_id = study_id[3:] #strip pg_ prefix
     uri = self.domain + '/study/export_gzipNexSON.json/' + study_id
     _LOG.debug('Downloading %s using "%s"\n', study_id, uri)
     resp = requests.get(uri,
                         headers=GZIP_REQUEST_HEADERS,
                         allow_redirects=True)
     resp.raise_for_status()
     try:
         uncompressed = gzip.GzipFile(mode='rb',
                                      fileobj=StringIO(resp.content)).read()
         results = uncompressed
     except:
         raise
     if is_str_type(results):
         if output_filepath is None:
             return anyjson.loads(results)
         else:
             if store_raw:
                 write_to_filepath(results, output_filepath)
             else:
                 write_as_json(anyjson.loads(results), output_filepath)
             return True
     raise RuntimeError('gzipped response from phylografter export_gzipNexSON.json, but not a string is:', results)
Пример #4
0
def log_request_as_curl(curl_log, url, verb, headers, params, data):
    if not curl_log:
        return
    with codecs.open(curl_log, 'a', encoding='utf-8') as curl_fo:
        if headers:
            hargs = ' '.join([
                '-H {}:{}'.format(escape_dq(k), escape_dq(v))
                for k, v in headers.items()
            ])
        else:
            hargs = ''
        if params and not data:
            import urllib
            url = url + '?' + urllib.urlencode(params)
            dargs = ''
        if data:
            if is_str_type(data):
                data = anyjson.loads(data)
            dargs = "'" + anyjson.dumps(data) + "'"
        else:
            dargs = ''
        data_arg = ''
        if dargs:
            data_arg = ' --data {d}'.format(d=dargs)
        curl_fo.write('curl -X {v} {h} {u}{d}\n'.format(v=verb,
                                                        u=url,
                                                        h=hargs,
                                                        d=data_arg))
Пример #5
0
def create_pruned_and_taxonomy_for_tip_ott_ids(tree_proxy, ott):
    '''returns a pair of trees:
        the first is that is a pruned version of tree_proxy created by pruning
            any leaf that has no ott_id and every internal that does not have
            any descendant with an ott_id. Nodes of out-degree 1 are suppressed
            as part of the TreeWithPathsInEdges-style.
        the second is the OTT induced tree for these ott_ids
    '''
    # create and id2par that has ott IDs only at the tips (we are
    #   ignoring mappings at internal nodes.
    # OTT IDs are integers, and the nodeIDs are strings - so we should not get clashes.
    #TODO consider prefix scheme
    ott_ids = []
    ottId2OtuPar = {}
    for node in tree_proxy:
        if node.is_leaf:
            ott_id = node.ott_id
            if ott_id is not None:
                ott_ids.append(ott_id)
                assert isinstance(ott_id, int)
                parent_id = node.parent._id
                ottId2OtuPar[ott_id] = parent_id
        else:
            assert is_str_type(node._id)
            edge = node.edge
            if edge is not None:
                parent_id = node.parent._id
                ottId2OtuPar[node._id] = parent_id
            else:
                ottId2OtuPar[node._id] = None
    pruned_phylo = create_tree_from_id2par(ottId2OtuPar, ott_ids)
    taxo_tree = ott.induced_tree(ott_ids)
    return pruned_phylo, taxo_tree
Пример #6
0
 def _process_query_dict(self, query_dict, valid_keys, kwargs):
     if query_dict is None:
         query_dict = {}
     for k, v in kwargs.items():
         if k in valid_keys:
             query_dict[k] = v
         else:
             query_dict['ot:' + k] = v
     nq = len(query_dict)
     if nq == 0:
         if self.use_v1:
             raise ValueError('The property/value pairs for the query should be passed in as keyword arguments')
         return None
     if nq > 1:
         raise NotImplementedError('Currently only searches for one property/value pair are supported')
     k = list(query_dict.keys())[0]
     if k not in valid_keys:
         m = '"{k}" is not a valid search term. Expecting it to be one of the following: {kl}'
         m = m.format(k=k, kl=repr(valid_keys))
         raise ValueError(m)
     v = query_dict[k]
     if not is_str_type(v):
         v = UNICODE(v)
     if k == 'ot:studyPublication':
         v = doi2url(v)
     return (k, v)
Пример #7
0
def check_raw_str_list(x, obj, k, vc):
    if not isinstance(x, list):
        return __FALSE_STR_LIST
    for i in x:
        if not is_str_type(i):
            return __FALSE_STR_LIST
    return __TRUE_VAL
Пример #8
0
def log_request_as_curl(curl_log, url, verb, headers, params, data):
    if not curl_log:
        return
    with codecs.open(curl_log, 'a', encoding='utf-8') as curl_fo:
        if headers:
            hargs = ' '.join(['-H {}:{}'.format(escape_dq(k), escape_dq(v)) for k, v in headers.items()])
        else:
            hargs = ''
        if params and not data:
            import urllib
            url = url + '?' + urllib.urlencode(params)
            dargs = ''
        if data:
            if is_str_type(data):
                data = anyjson.loads(data)
            dargs = "'" + anyjson.dumps(data) + "'"
        else:
            dargs = ''
        data_arg = ''
        if dargs:
            data_arg = ' --data {d}'.format(d=dargs)
        curl_fo.write('curl -X {v} {h} {u}{d}\n'.format(v=verb,
                                                        u=url,
                                                        h=hargs,
                                                        d=data_arg))
Пример #9
0
def commit_and_try_merge2master(git_action,
                                file_content,
                                study_id,
                                auth_info,
                                parent_sha,
                                commit_msg='',
                                merged_sha=None):
    """Actually make a local Git commit and push it to our remote
    """
    #_LOG.debug('commit_and_try_merge2master study_id="{s}" \
    #            parent_sha="{p}" merged_sha="{m}"'.format(
    #            s=study_id, p=parent_sha, m=merged_sha))
    merge_needed = False
    fc = tempfile.NamedTemporaryFile()
    try:
        if is_str_type(file_content):
            fc.write(file_content)
        else:
            write_as_json(file_content, fc)
        fc.flush()
        f = "Could not acquire lock to write to study #{s}".format(s=study_id)
        acquire_lock_raise(git_action, fail_msg=f)
        try:
            try:
                commit_resp = git_action.write_study_from_tmpfile(
                    study_id, fc, parent_sha, auth_info, commit_msg)
            except Exception as e:
                _LOG.exception('write_study_from_tmpfile exception')
                raise GitWorkflowError(
                    "Could not write to study #%s ! Details: \n%s" %
                    (study_id, e.message))
            written_fp = git_action.path_for_study(study_id)
            branch_name = commit_resp['branch']
            new_sha = commit_resp['commit_sha']
            _LOG.debug(
                'write of study {s} on parent {p} returned = {c}'.format(
                    s=study_id, p=parent_sha, c=str(commit_resp)))
            m_resp = _do_merge2master_commit(
                git_action,
                new_sha,
                branch_name,
                written_fp,
                merged_sha=merged_sha,
                prev_file_sha=commit_resp.get('prev_file_sha'))
            new_sha, branch_name, merge_needed = m_resp
        finally:
            git_action.release_lock()
    finally:
        fc.close()
    # What other useful information should be returned on a successful write?
    r = {
        "error": 0,
        "resource_id": study_id,
        "branch_name": branch_name,
        "description": "Updated study #%s" % study_id,
        "sha": new_sha,
        "merge_needed": merge_needed,
    }
    _LOG.debug('returning {r}'.format(r=str(r)))
    return r
Пример #10
0
 def get_name(self, ott_id):
     name_or_name_list = self.ott_id_to_names.get(ott_id)
     if name_or_name_list is None:
         return None
     if is_str_type(name_or_name_list):
         return name_or_name_list
     return name_or_name_list[0]
Пример #11
0
 def _process_query_dict(self, query_dict, valid_keys, kwargs):
     if query_dict is None:
         query_dict = {}
     for k, v in kwargs.items():
         if k in valid_keys:
             query_dict[k] = v
         else:
             query_dict['ot:' + k] = v
     nq = len(query_dict)
     if nq == 0:
         if self.use_v1:
             raise ValueError(
                 'The property/value pairs for the query should be passed in as keyword arguments'
             )
         return None
     if nq > 1:
         raise NotImplementedError(
             'Currently only searches for one property/value pair are supported'
         )
     k = list(query_dict.keys())[0]
     if k not in valid_keys:
         m = '"{k}" is not a valid search term. Expecting it to be one of the following: {kl}'
         m = m.format(k=k, kl=repr(valid_keys))
         raise ValueError(m)
     v = query_dict[k]
     if not is_str_type(v):
         v = UNICODE(v)
     if k == 'ot:studyPublication':
         v = doi2url(v)
     return (k, v)
Пример #12
0
 def get_name(self, ott_id):
     name_or_name_list = self.ott_id_to_names.get(ott_id)
     if name_or_name_list is None:
         return None
     if is_str_type(name_or_name_list):
         return name_or_name_list
     return name_or_name_list[0]
Пример #13
0
def create_pruned_and_taxonomy_for_tip_ott_ids(tree_proxy, ott):
    '''returns a pair of trees:
        the first is that is a pruned version of tree_proxy created by pruning
            any leaf that has no ott_id and every internal that does not have
            any descendant with an ott_id. Nodes of out-degree 1 are suppressed
            as part of the TreeWithPathsInEdges-style.
        the second is the OTT induced tree for these ott_ids
    '''
    # create and id2par that has ott IDs only at the tips (we are
    #   ignoring mappings at internal nodes.
    # OTT IDs are integers, and the nodeIDs are strings - so we should not get clashes.
    #TODO consider prefix scheme
    ott_ids = []
    ottId2OtuPar = {}
    for node in tree_proxy:
        if node.is_leaf:
            ott_id = node.ott_id
            if ott_id is not None:
                ott_ids.append(ott_id)
                assert isinstance(ott_id, int)
                parent_id = node.parent._id
                ottId2OtuPar[ott_id] = parent_id
        else:
            assert is_str_type(node._id)
            edge = node.edge
            if edge is not None:
                parent_id = node.parent._id
                ottId2OtuPar[node._id] = parent_id
            else:
                ottId2OtuPar[node._id] = None
    pruned_phylo = create_tree_from_id2par(ottId2OtuPar, ott_ids)
    taxo_tree = ott.induced_tree(ott_ids)
    return pruned_phylo, taxo_tree
Пример #14
0
 def clone_repo(par_dir, repo_local_name, remote):
     if not os.path.isdir(par_dir):
         raise ValueError(repr(par_dir) + ' is not a directory')
     if not is_str_type(remote):
         raise ValueError(repr(remote) + ' is not a remote string')
     dest = os.path.join(par_dir, repo_local_name)
     if os.path.exists(dest):
         raise RuntimeError('Filepath "{}" is in the way'.format(dest))
     git('clone', remote, repo_local_name, _cwd=par_dir)
Пример #15
0
def check_href(x, obj, k, vc):
    try:
        _check_id(x, obj, k, vc)
        h = x.get('@href')
        if is_str_type(h):
            return __TRUE_VAL
    except:
        pass
    return __FALSE_HREF
Пример #16
0
def collection_to_included_trees(collection):
    """Takes a collection object (or a filepath to collection object), returns
    each element of the `decisions` list that has the decision set to included.
    """
    if is_str_type(collection):
        collection = read_as_json(collection)
    inc = []
    for d in collection.get('decisions', []):
        if d['decision'] == 'INCLUDED':
            inc.append(d)
    return inc
Пример #17
0
def collection_to_included_trees(collection):
    """Takes a collection object (or a filepath to collection object), returns
    each element of the `decisions` list that has the decision set to included.
    """
    if is_str_type(collection):
        collection = read_as_json(collection)
    inc = []
    for d in collection.get('decisions', []):
        if d['decision'] == 'INCLUDED':
            inc.append(d)
    return inc
Пример #18
0
    def write_study(self, study_id, file_content, branch, author):
        """Given a study_id, temporary filename of content, branch and auth_info

        Deprecated but needed until we merge api local-dep to master...

        """
        parent_sha = None
        gh_user = branch.split('_study_')[0]
        fc = tempfile.NamedTemporaryFile()
        if is_str_type(file_content):
            fc.write(file_content)
        else:
            write_as_json(file_content, fc)
        fc.flush()
        try:
            study_filepath = self.path_for_study(study_id)
            study_dir = os.path.split(study_filepath)[0]
            if parent_sha is None:
                self.checkout_master()
                parent_sha = self.get_master_sha()
            branch = self.create_or_checkout_branch(gh_user,
                                                    study_id,
                                                    parent_sha,
                                                    force_branch_name=True)
            # create a study directory if this is a new study EJM- what if it isn't?
            if not os.path.isdir(study_dir):
                os.makedirs(study_dir)
            shutil.copy(fc.name, study_filepath)
            git(self.gitdir, self.gitwd, "add", study_filepath)
            try:
                git(self.gitdir,
                    self.gitwd,
                    "commit",
                    author=author,
                    message="Update Study #%s via OpenTree API" % study_id)
            except Exception as e:
                # We can ignore this if no changes are new,
                # otherwise raise a 400
                if "nothing to commit" in e.message:  #@EJM is this dangerous?
                    pass
                else:
                    _LOG.exception('"git commit" failed')
                    self.reset_hard()
                    raise
            new_sha = git(self.gitdir, self.gitwd, "rev-parse", "HEAD")
        except Exception as e:
            _LOG.exception('write_study exception')
            raise GitWorkflowError(
                "Could not write to study #%s ! Details: \n%s" %
                (study_id, e.message))
        finally:
            fc.close()
        return new_sha
Пример #19
0
def escape_dq(s):
    if not is_str_type(s):
        if isinstance(s, bool):
            if s:
                return 'true'
            return 'false'

        return s
    if '"' in s:
        ss = s.split('"')
        return '"{}"'.format('\\"'.join(ss))
    return '"{}"'.format(s)
Пример #20
0
def get_ot_study_info_from_nexml(src=None,
                                 nexml_content=None,
                                 encoding=u'utf8',
                                 nexson_syntax_version=DEFAULT_NEXSON_VERSION):
    '''Converts an XML doc to JSON using the honeybadgerfish convention (see to_honeybadgerfish_dict)
    and then prunes elements not used by open tree of life study curartion.

    If nexml_content is provided, it is interpreted as the contents
    of an NeXML file in utf-8 encoding.

    If nexml_content is None, then the src arg will be used src can be either:
        * a file_object, or
        * a string
    If `src` is a string then it will be treated as a filepath unless it
        begins with http:// or https:// (in which case it will be downloaded
        using peyotl.utility.download)
    Returns a dictionary with the keys/values encoded according to the honeybadgerfish convention
    See https://github.com/OpenTreeOfLife/api.opentreeoflife.org/wiki/HoneyBadgerFish

    Currently:
        removes nexml/characters @TODO: should replace it with a URI for
            where the removed character data can be found.
    '''
    if _is_by_id_hbf(nexson_syntax_version):
        nsv = DIRECT_HONEY_BADGERFISH
    else:
        nsv = nexson_syntax_version
    if nexml_content is None:
        if is_str_type(src):
            if src.startswith('http://') or src.startswith('https://'):
                from peyotl.utility import download
                nexml_content = download(url=src, encoding=encoding)
            else:
                with codecs.open(src, 'r', encoding=encoding) as src:
                    nexml_content = src.read().encode('utf-8')
        else:
            nexml_content = src.read().encode('utf-8')
    doc = xml.dom.minidom.parseString(nexml_content)
    doc_root = doc.documentElement

    ccfg = ConversionConfig(output_format=nsv,
                            input_format=NEXML_NEXSON_VERSION)
    converter = Nexml2Nexson(ccfg)
    o = converter.convert(doc_root)
    if _is_by_id_hbf(nexson_syntax_version):
        o = convert_nexson_format(o,
                                  BY_ID_HONEY_BADGERFISH,
                                  current_format=nsv)
    if 'nex:nexml' in o:
        n = o['nex:nexml']
        del o['nex:nexml']
        o['nexml'] = n
    return o
Пример #21
0
def escape_dq(s):
    if not is_str_type(s):
        if isinstance(s, bool):
            if s:
                return 'true'
            return 'false'

        return s
    if '"' in s:
        ss = s.split('"')
        return '"{}"'.format('\\"'.join(ss))
    return '"{}"'.format(s)
Пример #22
0
def testing_conv_key_unicode_literal(d):
    r = {}
    if not isinstance(d, dict):
        return d
    for k, v in d.items():
        if isinstance(v, dict):
            r[k] = testing_conv_key_unicode_literal(v)
        elif isinstance(v, list):
            r[k] = [testing_conv_key_unicode_literal(i) for i in v]
        elif is_str_type(v) and v == 'unicode':
            r[k] = 'str'
        else:
            r[k] = v
    return r
Пример #23
0
    def write_document(self, gh_user, doc_id, file_content, branch, author, commit_msg=None):
        """Given a document id, temporary filename of content, branch and auth_info

        Deprecated but needed until we merge api local-dep to master...

        """
        parent_sha = None
        fc = tempfile.NamedTemporaryFile()
        # N.B. we currently assume file_content is text/JSON, or should be serialized from a dict
        if is_str_type(file_content):
            fc.write(file_content)
        else:
            write_as_json(file_content, fc)
        fc.flush()
        try:
            doc_filepath = self.path_for_doc(doc_id)
            doc_dir = os.path.split(doc_filepath)[0]
            if parent_sha is None:
                self.checkout_master()
                parent_sha = self.get_master_sha()
            branch = self.create_or_checkout_branch(gh_user, doc_id, parent_sha, force_branch_name=True)
            # create a document directory if this is a new doc EJM- what if it isn't?
            if not os.path.isdir(doc_dir):
                os.makedirs(doc_dir)
            shutil.copy(fc.name, doc_filepath)
            git(self.gitdir, self.gitwd, "add", doc_filepath)
            if commit_msg is None:
                commit_msg = "Update document '%s' via OpenTree API" % doc_id
            try:
                git(self.gitdir,
                    self.gitwd,
                    "commit",
                    author=author,
                    message=commit_msg)
            except Exception as e:
                # We can ignore this if no changes are new,
                # otherwise raise a 400
                if "nothing to commit" in e.message:  # @EJM is this dangerous?
                    pass
                else:
                    _LOG.exception('"git commit" failed')
                    self.reset_hard()
                    raise
            new_sha = git(self.gitdir, self.gitwd, "rev-parse", "HEAD")
        except Exception as e:
            _LOG.exception('write_document exception')
            raise GitWorkflowError("Could not write to document #%s ! Details: \n%s" % (doc_id, e.message))
        finally:
            fc.close()
        return new_sha
Пример #24
0
def get_ot_study_info_from_nexml(src=None,
                                 nexml_content=None,
                                 encoding=u'utf8',
                                 nexson_syntax_version=DEFAULT_NEXSON_VERSION):
    '''Converts an XML doc to JSON using the honeybadgerfish convention (see to_honeybadgerfish_dict)
    and then prunes elements not used by open tree of life study curartion.

    If nexml_content is provided, it is interpreted as the contents
    of an NeXML file in utf-8 encoding.

    If nexml_content is None, then the src arg will be used src can be either:
        * a file_object, or
        * a string
    If `src` is a string then it will be treated as a filepath unless it
        begins with http:// or https:// (in which case it will be downloaded
        using peyotl.utility.download)
    Returns a dictionary with the keys/values encoded according to the honeybadgerfish convention
    See https://github.com/OpenTreeOfLife/api.opentreeoflife.org/wiki/HoneyBadgerFish

    Currently:
        removes nexml/characters @TODO: should replace it with a URI for
            where the removed character data can be found.
    '''
    if _is_by_id_hbf(nexson_syntax_version):
        nsv = DIRECT_HONEY_BADGERFISH
    else:
        nsv = nexson_syntax_version
    if nexml_content is None:
        if is_str_type(src):
            if src.startswith('http://') or src.startswith('https://'):
                from peyotl.utility import download
                nexml_content = download(url=src, encoding=encoding)
            else:
                with codecs.open(src, 'r', encoding=encoding) as src:
                    nexml_content = src.read().encode('utf-8')
        else:
            nexml_content = src.read().encode('utf-8')
    doc = xml.dom.minidom.parseString(nexml_content)
    doc_root = doc.documentElement

    ccfg = ConversionConfig(output_format=nsv, input_format=NEXML_NEXSON_VERSION)
    converter = Nexml2Nexson(ccfg)
    o = converter.convert(doc_root)
    if _is_by_id_hbf(nexson_syntax_version):
        o = convert_nexson_format(o, BY_ID_HONEY_BADGERFISH, current_format=nsv)
    if 'nex:nexml' in o:
        n = o['nex:nexml']
        del o['nex:nexml']
        o['nexml'] = n
    return o
Пример #25
0
def _http_method_summary_str(url, verb, headers, params, data=None):
    if params is None:
        ps = 'None'
    else:
        ps = _dict_summary(params, 'params')
    hs = _dict_summary(headers, 'headers')
    if data is None:
        ds = 'None'
    elif is_str_type(data):
        ds = _dict_summary(anyjson.loads(data), 'data')
    else:
        ds = _dict_summary(data, 'data')
    fmt = 'error in HTTP {v} verb call to {u} with {p}, {d} and {h}'
    return fmt.format(v=verb, u=url, p=ps, h=hs, d=ds)
Пример #26
0
def write_as_json(blob, dest, indent=0, sort_keys=True):
    opened_out = False
    if is_str_type(dest):
        out = codecs.open(dest, mode='w', encoding='utf-8')
        opened_out = True
    else:
        out = dest
    try:
        json.dump(blob, out, indent=indent, sort_keys=sort_keys)
        out.write('\n')
    finally:
        out.flush()
        if opened_out:
            out.close()
Пример #27
0
def _http_method_summary_str(url, verb, headers, params, data=None):
    if params is None:
        ps = 'None'
    else:
        ps = _dict_summary(params, 'params')
    hs = _dict_summary(headers, 'headers')
    if data is None:
        ds = 'None'
    elif is_str_type(data):
        ds = _dict_summary(anyjson.loads(data), 'data')
    else:
        ds = _dict_summary(data, 'data')
    fmt = 'error in HTTP {v} verb call to {u} with {p}, {d} and {h}'
    return fmt.format(v=verb, u=url, p=ps, h=hs, d=ds)
Пример #28
0
 def match_names(self, *valist, **kwargs):
     '''performs taxonomic name resolution.
     See https://github.com/OpenTreeOfLife/opentree/wiki/Open-Tree-of-Life-APIs#match_names
     with the exception that "ids" in the API call is referred has the name "id_list" in this function.
     The most commonly used kwargs are:
         - context_name=<name> (see contexts and infer_context methods)
         - do_approximate_matching=False (to speed up the search)
         - include_dubious=True see https://github.com/OpenTreeOfLife/reference-taxonomy/wiki/taxon-flags
         - include_deprecated=True to see deprecated taxa (see previous link to documentation about flags)
         - wrap_response=True to return a TNRSRespose object (rather than the "raw" response of the web-services).
     '''
     if len(valist) == 1:
         if not is_str_type(valist[0]):
             return self.taxomachine.TNRS(*valist, **kwargs)
     return self.taxomachine.TNRS(*valist, **kwargs)
Пример #29
0
def _warn_missing_setting(section, param, config_filename, warn_on_none_level=logging.WARN):
    if warn_on_none_level is None:
        return
    _ulog = _get_util_logger()
    if (_ulog is not None) and _ulog.isEnabledFor(warn_on_none_level):
        if config_filename:
            if not is_str_type(config_filename):
                f = ' "{}" '.format('", "'.join(config_filename))
            else:
                f = ' "{}" '.format(config_filename)
        else:
            f = ' '
        mf = 'Config file{f}does not contain option "{o}"" in section "{s}"'
        msg = mf.format(f=f, o=param, s=section)
        _ulog.warn(msg)
Пример #30
0
def _warn_missing_setting(section, param, config_filename, warn_on_none_level=logging.WARN):
    if warn_on_none_level is None:
        return
    _ulog = _get_util_logger()
    if (_ulog is not None) and _ulog.isEnabledFor(warn_on_none_level):
        if config_filename:
            if not is_str_type(config_filename):
                f = ' "{}" '.format('", "'.join(config_filename))
            else:
                f = ' "{}" '.format(config_filename)
        else:
            f = ' '
        mf = 'Config file{f}does not contain option "{o}"" in section "{s}"'
        msg = mf.format(f=f, o=param, s=section)
        _ulog.warn(msg)
Пример #31
0
 def __init__(self, tree, tree_id=None, otus=None, nexson_proxy=None):
     self._nexson_proxy = nexson_proxy
     self._nexson_tree = tree
     self._edge_by_source_id = tree['edgeBySourceId']
     self._node_by_source_id = tree['nodeById']
     if is_str_type(otus):
         self._otus_group_id = otus
         self._otus = nexson_proxy._nexml_el['otusById'][otus]['otuById']
     else:
         self._otus = otus
     self._tree_id = tree_id
     # not part of nexson, filled on demand. will be dict of node_id -> (edge_id, edge) pair
     self._edge_by_target = None
     self._wr = None
     self._node_cache = {}
Пример #32
0
 def match_names(self, *valist, **kwargs):
     '''performs taxonomic name resolution.
     See https://github.com/OpenTreeOfLife/opentree/wiki/Open-Tree-of-Life-APIs#match_names
     with the exception that "ids" in the API call is referred has the name "id_list" in this function.
     The most commonly used kwargs are:
         - context_name=<name> (see contexts and infer_context methods)
         - do_approximate_matching=False (to speed up the search)
         - include_dubious=True see https://github.com/OpenTreeOfLife/reference-taxonomy/wiki/taxon-flags
         - include_deprecated=True to see deprecated taxa (see previous link to documentation about flags)
         - wrap_response=True to return a TNRSRespose object (rather than the "raw" response of the web-services).
     '''
     if len(valist) == 1:
         if not is_str_type(valist[0]):
             return self.taxomachine.TNRS(*valist, **kwargs)
     return self.taxomachine.TNRS(*valist, **kwargs)
Пример #33
0
    def write_study(self, study_id, file_content, branch, author):
        """Given a study_id, temporary filename of content, branch and auth_info

        Deprecated but needed until we merge api local-dep to master...

        """
        parent_sha = None
        gh_user = branch.split('_study_')[0]
        fc = tempfile.NamedTemporaryFile()
        if is_str_type(file_content):
            fc.write(file_content)
        else:
            write_as_json(file_content, fc)
        fc.flush()
        try:
            study_filepath = self.path_for_study(study_id)
            study_dir = os.path.split(study_filepath)[0]
            if parent_sha is None:
                self.checkout_master()
                parent_sha = self.get_master_sha()
            branch = self.create_or_checkout_branch(gh_user, study_id, parent_sha, force_branch_name=True)
            # create a study directory if this is a new study EJM- what if it isn't?
            if not os.path.isdir(study_dir):
                os.makedirs(study_dir)
            shutil.copy(fc.name, study_filepath)
            git(self.gitdir, self.gitwd, "add", study_filepath)
            try:
                git(self.gitdir,
                    self.gitwd,
                    "commit",
                    author=author,
                    message="Update Study #%s via OpenTree API" % study_id)
            except Exception as e:
                # We can ignore this if no changes are new,
                # otherwise raise a 400
                if "nothing to commit" in e.message:#@EJM is this dangerous?
                    pass
                else:
                    _LOG.exception('"git commit" failed')
                    self.reset_hard()
                    raise
            new_sha = git(self.gitdir, self.gitwd, "rev-parse", "HEAD")
        except Exception as e:
            _LOG.exception('write_study exception')
            raise GitWorkflowError("Could not write to study #%s ! Details: \n%s" % (study_id, e.message))
        finally:
            fc.close()
        return new_sha
Пример #34
0
def _warn_missing_setting(section, param, config_filename, warn_on_none_level=logging.WARN):
    if warn_on_none_level is None:
        return
    # noinspection PyProtectedMember
    from peyotl.utility.get_logger import warn_from_util_logger
    from peyotl.utility.str_util import is_str_type
    if config_filename:
        if not is_str_type(config_filename):
            f = ' "{}" '.format('", "'.join(config_filename))
        else:
            f = ' "{}" '.format(config_filename)
    else:
        f = ' '
    mf = 'Config file {f} does not contain option "{o}"" in section "{s}"'
    msg = mf.format(f=f, o=param, s=section)
    warn_from_util_logger(msg)
Пример #35
0
def write_as_json(blob, dest, indent=0, sort_keys=True):
    """Writes `blob` as JSON to the filepath `dest` or the filestream `dest` (if it isn't a string)
    uses utf-8 encoding if the filepath is given (does not change the encoding if dest is already open).
    """
    opened_out = False
    if is_str_type(dest):
        out = codecs.open(dest, mode='w', encoding='utf-8')
        opened_out = True
    else:
        out = dest
    try:
        json.dump(blob, out, indent=indent, sort_keys=sort_keys)
        out.write('\n')
    finally:
        out.flush()
        if opened_out:
            out.close()
Пример #36
0
 def _transform_literal_meta(self, lit_bf_meta):
     dt = lit_bf_meta.get('@datatype')
     content = lit_bf_meta.get('$')
     att_key = lit_bf_meta['@property']
     full_obj = {}
     for k in lit_bf_meta.keys():
         if k not in _SUPPRESSED_LITERAL:
             full_obj[k] = lit_bf_meta[k]
     # Coercion should not be needed for json->json
     if dt and self._coercing_literals:
         if is_str_type(content):
             content = _coerce_literal_val_to_primitive(dt, content)
     att_key = '^' + att_key
     if full_obj:
         if content:
             full_obj['$'] = content
         _cull_redundant_about(full_obj)
         return att_key, full_obj
     return att_key, content
 def _transform_literal_meta(self, lit_bf_meta):
     dt = lit_bf_meta.get('@datatype')
     content = lit_bf_meta.get('$')
     att_key = lit_bf_meta['@property']
     full_obj = {}
     for k in lit_bf_meta.keys():
         if k not in _SUPPRESSED_LITERAL:
             full_obj[k] = lit_bf_meta[k]
     # Coercion should not be needed for json->json
     if dt and self._coercing_literals:
         if is_str_type(content):
             content = _coerce_literal_val_to_primitive(dt, content)
     att_key = '^' + att_key
     if full_obj:
         if content:
             full_obj['$'] = content
         _cull_redundant_about(full_obj)
         return att_key, full_obj
     return att_key, content
Пример #38
0
def _warn_missing_setting(section,
                          param,
                          config_filename,
                          warn_on_none_level=logging.WARN):
    if warn_on_none_level is None:
        return
    # noinspection PyProtectedMember
    from peyotl.utility.get_logger import warn_from_util_logger
    from peyotl.utility.str_util import is_str_type
    if config_filename:
        if not is_str_type(config_filename):
            f = ' "{}" '.format('", "'.join(config_filename))
        else:
            f = ' "{}" '.format(config_filename)
    else:
        f = ' '
    mf = 'Config file {f} does not contain option "{o}"" in section "{s}"'
    msg = mf.format(f=f, o=param, s=section)
    warn_from_util_logger(msg)
Пример #39
0
def generic_commit_and_try_merge2master_wf(git_action,
                                           file_content,
                                           doc_id,
                                           auth_info,
                                           parent_sha,
                                           commit_msg='',
                                           merged_sha=None,
                                           doctype_display_name="document"):
    """Actually make a local Git commit and push it to our remote
    """
    # _LOG.debug('generic_commit_and_try_merge2master_wf: doc_id="{s}" \
    #            parent_sha="{p}" merged_sha="{m}"'.format(
    #            s=doc_id, p=parent_sha, m=merged_sha))
    merge_needed = False
    fc = tempfile.NamedTemporaryFile()
    # N.B. we currently assume file_content is text/JSON, or should be serialized from a dict
    try:
        if is_str_type(file_content):
            fc.write(file_content)
        else:
            write_as_json(file_content, fc)
        fc.flush()
        try:
            max_file_size = git_action.max_file_size
        except:
            max_file_size = None
        if max_file_size is not None:
            file_size = os.stat(fc.name).st_size
            if file_size > max_file_size:
                m = 'Commit of {t} "{i}" had a file size ({a} bytes) which ' \
                    'exceeds the maximum size allowed ({b} bytes).'
                m = m.format(t=doctype_display_name,
                             i=doc_id,
                             a=file_size,
                             b=max_file_size)
                raise GitWorkflowError(m)
        f = "Could not acquire lock to write to %s #%s" % (
            doctype_display_name, doc_id)
        acquire_lock_raise(git_action, fail_msg=f)
        try:
            try:
                commit_resp = git_action.write_doc_from_tmpfile(
                    doc_id, fc, parent_sha, auth_info, commit_msg,
                    doctype_display_name)
            except Exception as e:
                _LOG.exception('write_doc_from_tmpfile exception')
                raise GitWorkflowError(
                    "Could not write to %s #%s ! Details: \n%s" %
                    (doctype_display_name, doc_id, e.message))
            written_fp = git_action.path_for_doc(doc_id)
            branch_name = commit_resp['branch']
            new_sha = commit_resp['commit_sha']
            _LOG.debug('write of {t} {i} on parent {p} returned = {c}'.format(
                t=doctype_display_name,
                i=doc_id,
                p=parent_sha,
                c=str(commit_resp)))
            m_resp = _do_merge2master_commit(
                git_action,
                new_sha,
                branch_name,
                written_fp,
                merged_sha=merged_sha,
                prev_file_sha=commit_resp.get('prev_file_sha'))
            new_sha, branch_name, merge_needed = m_resp
        finally:
            git_action.release_lock()
    finally:
        fc.close()
    # What other useful information should be returned on a successful write?
    r = {
        "error": 0,
        "resource_id": doc_id,
        "branch_name": branch_name,
        "description": "Updated %s #%s" % (doctype_display_name, doc_id),
        "sha": new_sha,
        "merge_needed": merge_needed,
    }
    _LOG.debug('returning {r}'.format(r=str(r)))
    return r
Пример #40
0
 def trigger_unindex(self, study_id):
     url = '{p}/unindexNexsons'.format(p=self.indexing_prefix)
     if is_str_type(study_id):
         study_id = [study_id]
     data = {'ids': study_id}
     return self.json_http_post(url, data=anyjson.dumps(data))
Пример #41
0
def commit_and_try_merge2master(git_action,
                                file_content,
                                study_id,
                                auth_info,
                                parent_sha,
                                commit_msg='',
                                merged_sha=None):
    """Actually make a local Git commit and push it to our remote
    """
    #_LOG.debug('commit_and_try_merge2master study_id="{s}" \
    #            parent_sha="{p}" merged_sha="{m}"'.format(
    #            s=study_id, p=parent_sha, m=merged_sha))
    merge_needed = False
    fc = tempfile.NamedTemporaryFile()
    try:
        if is_str_type(file_content):
            fc.write(file_content)
        else:
            write_as_json(file_content, fc)
        fc.flush()
        try:
            max_file_size = git_action.max_file_size
        except:
            max_file_size = None
        if max_file_size is not None:
            file_size = os.stat(fc.name).st_size
            if file_size > max_file_size:
                m = 'Commit of study "{s}" had a file size ({a} bytes) which exceeds the maximum size allowed ({b} bytes).'
                m = m.format(s=study_id, a=file_size, b=max_file_size)
                raise GitWorkflowError(m)
        f = "Could not acquire lock to write to study #{s}".format(s=study_id)
        acquire_lock_raise(git_action, fail_msg=f)
        try:
            try:
                commit_resp = git_action.write_study_from_tmpfile(study_id, fc, parent_sha, auth_info, commit_msg)
            except Exception as e:
                _LOG.exception('write_study_from_tmpfile exception')
                raise GitWorkflowError("Could not write to study #%s ! Details: \n%s" % (study_id, e.message))
            written_fp = git_action.path_for_study(study_id)
            branch_name = commit_resp['branch']
            new_sha = commit_resp['commit_sha']
            _LOG.debug('write of study {s} on parent {p} returned = {c}'.format(s=study_id,
                                                                                p=parent_sha,
                                                                                c=str(commit_resp)))
            m_resp = _do_merge2master_commit(git_action,
                                             new_sha,
                                             branch_name,
                                             written_fp,
                                             merged_sha=merged_sha,
                                             prev_file_sha=commit_resp.get('prev_file_sha'))
            new_sha, branch_name, merge_needed = m_resp
        finally:
            git_action.release_lock()
    finally:
        fc.close()
    # What other useful information should be returned on a successful write?
    r = {
        "error": 0,
        "resource_id": study_id,
        "branch_name": branch_name,
        "description": "Updated study #%s" % study_id,
        "sha":  new_sha,
        "merge_needed": merge_needed,
    }
    _LOG.debug('returning {r}'.format(r=str(r)))
    return r
Пример #42
0
def check_raw_str(x, obj, k, vc):
    return __TRUE_VAL if is_str_type(x) else __FALSE_STR
Пример #43
0
    def convert(self, src, serialize=None, output_dest=None, src_schema=None):
        if src_schema is None:
            src_format = PhyloSchema.NEXSON
            current_format = None
        else:
            src_format = src_schema.format_code
            current_format = src_schema.version
        if not self.can_convert_from():
            m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description)
            raise NotImplementedError(m)
        if src_format != PhyloSchema.NEXSON:
            raise NotImplementedError('Only conversion from NexSON is currently supported')
        if self.format_code == PhyloSchema.NEXSON:
            d = src
            if self.content == 'study':
                d = convert_nexson_format(src,
                                          out_nexson_format=self.version,
                                          current_format=current_format,
                                          remove_old_structs=True,
                                          pristine_if_invalid=False,
                                          sort_arbitrary=False)
            elif self.content in ('tree', 'subtree'):
                if self.content == 'tree' and self.cull_nonmatching:
                    d = cull_nonmatching_trees(d, self.content_id, current_format)
                    d = convert_nexson_format(d,
                                              out_nexson_format=self.version,
                                              current_format=current_format,
                                              remove_old_structs=True,
                                              pristine_if_invalid=False,
                                              sort_arbitrary=False)

                else:
                    i_t_o_list = extract_tree_nexson(d, self.content_id, current_format)
                    d = {}
                    for ito_tup in i_t_o_list:
                        i, t = ito_tup[0], ito_tup[1]
                        d[i] = t
            elif self.content == 'meta':
                strip_to_meta_only(d, current_format)
            elif self.content == 'otus':
                d = extract_otus_nexson(d, self.content_id, current_format)
            elif self.content == 'otu':
                d = extract_otu_nexson(d, self.content_id, current_format)
            elif self.content == 'otumap':
                if self.content_id is None:
                    r = extract_otu_nexson(d, None, current_format)
                else:
                    p = extract_otus_nexson(d, self.content_id, current_format)
                    if p is None:
                        r = extract_otu_nexson(d, self.content_id, current_format)
                    else:
                        r = {}
                        for v in p.values():
                            r.update(v.get('otuById', {}))
                if not r:
                    return None
                d = _otu_dict_to_otumap(r)
            elif self.content == 'treelist':
                i_t_o_list = extract_tree_nexson(d,
                                                 self.content_id,
                                                 current_format)
                d = [i[0] for i in i_t_o_list]
            if d is None:
                return None
            if serialize:
                if output_dest:
                    write_as_json(d, output_dest)
                    return None
                else:
                    f, wrapper = get_utf_8_string_io_writer()
                    write_as_json(d, wrapper)
                    flush_utf_8_writer(wrapper)
                    return f.getvalue()
            else:
                return d
        # Non-NexSON types go here...
        if (serialize is not None) and (not serialize):
            raise ValueError('Conversion without serialization is only supported for the NexSON format')
        if output_dest:
            if is_str_type(output_dest):
                output_dest = codecs.open(output_dest, 'w', encoding='utf-8')
        if self.format_code == PhyloSchema.NEXML:
            if output_dest:
                write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
                return
            return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
        elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]:
            if self.content in ('tree', 'subtree'):
                if isinstance(self.content_id, list) or isinstance(self.content_id, tuple):
                    ci, subtree_id = self.content_id
                else:
                    ci, subtree_id = self.content_id, None
            else:
                ci, subtree_id = None, None
            response = extract_tree(src, ci, self, subtree_id=subtree_id)
            # these formats are always serialized...
            if output_dest:
                output_dest.write(response)
                output_dest.write('\n')
            return response
        assert False
Пример #44
0
 def trigger_unindex(self, study_id):
     url = '{p}/unindexNexsons'.format(p=self.indexing_prefix)
     if is_str_type(study_id):
         study_id = [study_id]
     data = {'ids': study_id}
     return self.json_http_post(url, data=anyjson.dumps(data))
Пример #45
0
    def _post_key_check_validate_tree(self,
                                      tree_nex_id,
                                      tree_obj,
                                      vc,
                                      otus_group_id=None):
        # pylint: disable=R0914
        node_by_id = tree_obj.get('nodeById')
        edge_by_source = tree_obj.get('edgeBySourceId')
        root_node_id = tree_obj.get('^ot:rootNodeId')
        if (not node_by_id) or (not isinstance(node_by_id, dict)):
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MissingCrucialContentWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=[
                                  'nodeById',
                              ])
            return errorReturn('no "nodeById" in tree')
        if (not edge_by_source) or (not isinstance(edge_by_source, dict)):
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MissingCrucialContentWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=[
                                  'edgeBySourceId',
                              ])
            return errorReturn('no "edgeBySourceId" in tree')
        if not is_str_type(root_node_id):
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MissingCrucialContentWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=[
                                  '^ot:rootNodeId',
                              ])
            return errorReturn('no "^ot:rootNodeId" in tree')
        edge_dict = {}
        edge_by_target = {}
        internal_nodes = []
        if otus_group_id is None:
            tree_group = vc.anc_list[-1][1]
            otus_group_id = tree_group.get('@otus')

        bad_node_ref = []
        repeated_edge_id = []
        missing_target = []
        repeated_target = []
        reachable_nodes = set()
        for par_node_id, edge_by_id in edge_by_source.items():
            if par_node_id not in node_by_id:
                bad_node_ref.append(par_node_id)
            else:
                reachable_nodes.add(par_node_id)
                for edge_id, edge in edge_by_id.items():
                    if edge_id in edge_dict:
                        repeated_edge_id.append(edge_id)
                    else:
                        edge_dict[edge_id] = edge
                        try:
                            t = edge.get('@target')
                        except:
                            t = None
                        if t is None:
                            missing_target.append(edge_id)
                        elif t in edge_by_target:
                            repeated_target.append(t)
                        else:
                            edge_by_target[t] = edge
                            reachable_nodes.add(t)
        node_set = set(node_by_id.keys())
        if node_set != reachable_nodes:
            unreachable = list(node_set - reachable_nodes)
            unreachable.sort()
            not_in_node_by_id = list(reachable_nodes - node_set)
            not_in_node_by_id.sort()
            if unreachable:
                self._error_event(_NEXEL.TREE,
                                  obj=tree_obj,
                                  err_type=gen_UnreachableNodeWarning,
                                  anc=vc.anc_list,
                                  obj_nex_id=tree_nex_id,
                                  key_list=unreachable)
                return errorReturn('unreachable node in tree tree')
            if not_in_node_by_id:
                self._error_event(_NEXEL.TREE,
                                  obj=tree_obj,
                                  err_type=gen_ReferencedIDNotFoundWarning,
                                  anc=vc.anc_list,
                                  obj_nex_id=tree_nex_id,
                                  key_list=not_in_node_by_id)
                return errorReturn(
                    'referenced node id not in "nodeById" in tree')
        if bad_node_ref:
            bad_node_ref.sort()
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_ReferencedIDNotFoundWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=bad_node_ref)
            return errorReturn(
                'referenced parent node not in "nodeById" in tree')
        if missing_target:
            missing_target.sort()
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_ReferencedIDNotFoundWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=missing_target)
            return errorReturn('no "@target" in edge')
        if repeated_target:
            repeated_target.sort()
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_NodeWithMultipleParents,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              node_id_list=repeated_target)
            return errorReturn(
                'same node used as "@target" for different edges')
        if repeated_edge_id:
            repeated_edge_id.sort()
            self._error_event(_NEXEL.EDGE,
                              obj=tree_obj,
                              err_type=gen_RepeatedIDWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=repeated_edge_id)
            return errorReturn('edge "@id" repeated')
        node_set = set(edge_by_target.keys())
        internal_node_set = set(edge_by_source.keys())
        leaf_set = node_set - internal_node_set
        leaves = [(i, node_by_id[i]) for i in leaf_set]
        vc.push_context(_NEXEL.LEAF_NODE, (tree_obj, tree_nex_id))
        try:
            if not self._validate_leaf_list(leaves, vc):
                return False
        finally:
            vc.pop_context()
        internal_nodes = [(i, node_by_id[i]) for i in internal_node_set]
        with_at_root_prop = {}
        for nid, n_obj in internal_nodes:
            if n_obj.get('@root'):
                with_at_root_prop[nid] = n_obj
        if len(with_at_root_prop) > 1:
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MultipleRootsWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              node_id_list=list(with_at_root_prop.keys()))
            return errorReturn('multiple "@root" nodes')
        if len(with_at_root_prop) == 0:
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_NoRootWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id)
            return errorReturn('No node labelled as "@root"')
        if root_node_id not in with_at_root_prop:
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MultipleRootsWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              node_id_list=list(with_at_root_prop.keys()) +
                              [root_node_id])
            return errorReturn('root node not labelled as root')
        vc.push_context(_NEXEL.INTERNAL_NODE, (tree_obj, tree_nex_id))
        try:
            if not self._validate_internal_node_list(internal_nodes, vc):
                return False
        finally:
            vc.pop_context()
        edges = [i for i in edge_dict.items()]
        vc.push_context(_NEXEL.EDGE, (tree_obj, tree_nex_id))
        try:
            if not self._validate_edge_list(edges, vc):
                return False
        finally:
            vc.pop_context()
        otuid2leaf = {}
        for nd_id, nd in leaves:
            otuid = nd['@otu']
            if otuid in otuid2leaf:
                vc.push_context(_NEXEL.LEAF_NODE, (tree_obj, tree_nex_id))
                try:
                    self._error_event(_NEXEL.LEAF_NODE,
                                      obj=nd,
                                      err_type=gen_RepeatedOTUWarning,
                                      anc=vc.anc_list,
                                      obj_nex_id=nd_id,
                                      key_list=[otuid])
                    return errorReturn('Repeated "@otu" id')
                finally:
                    vc.pop_context()
            otuid2leaf[otuid] = nd_id
        self._detect_multilabelled_tree(otus_group_id=otus_group_id,
                                        tree_id=tree_nex_id,
                                        otuid2leaf=otuid2leaf)
        og = self._otu_group_by_id[otus_group_id]['otuById']
        return self._validate_otu_key_if_present(node_by_id.items(), og, vc)
Пример #46
0
    def convert(self, src, serialize=None, output_dest=None, src_schema=None):
        if src_schema is None:
            src_format = PhyloSchema.NEXSON
            current_format = None
        else:
            src_format = src_schema.format_code
            current_format = src_schema.version
        if not self.can_convert_from():
            m = 'Conversion of {c} to {d} is not supported'.format(c=self.content, d=self.description)
            raise NotImplementedError(m)
        if src_format != PhyloSchema.NEXSON:
            raise NotImplementedError('Only conversion from NexSON is currently supported')
        if self.format_code == PhyloSchema.NEXSON:
            d = src
            if self.content == 'study':
                d = convert_nexson_format(src,
                                          out_nexson_format=self.version,
                                          current_format=current_format,
                                          remove_old_structs=True,
                                          pristine_if_invalid=False,
                                          sort_arbitrary=False)
            elif self.content in ('tree', 'subtree'):
                if self.content == 'tree' and self.cull_nonmatching:
                    d = cull_nonmatching_trees(d, self.content_id, current_format)
                    d = convert_nexson_format(d,
                                              out_nexson_format=self.version,
                                              current_format=current_format,
                                              remove_old_structs=True,
                                              pristine_if_invalid=False,
                                              sort_arbitrary=False)

                else:
                    i_t_o_list = extract_tree_nexson(d, self.content_id, current_format)
                    d = {}
                    for ito_tup in i_t_o_list:
                        i, t = ito_tup[0], ito_tup[1]
                        d[i] = t
            elif self.content == 'meta':
                strip_to_meta_only(d, current_format)
            elif self.content == 'otus':
                d = extract_otus_nexson(d, self.content_id, current_format)
            elif self.content == 'otu':
                d = extract_otu_nexson(d, self.content_id, current_format)
            elif self.content == 'otumap':
                if self.content_id is None:
                    r = extract_otu_nexson(d, None, current_format)
                else:
                    p = extract_otus_nexson(d, self.content_id, current_format)
                    if p is None:
                        r = extract_otu_nexson(d, self.content_id, current_format)
                    else:
                        r = {}
                        for v in p.values():
                            r.update(v.get('otuById', {}))
                if not r:
                    return None
                d = _otu_dict_to_otumap(r)
            elif self.content == 'treelist':
                i_t_o_list = extract_tree_nexson(d,
                                                 self.content_id,
                                                 current_format)
                d = [i[0] for i in i_t_o_list]
            if d is None:
                return None
            if serialize:
                if output_dest:
                    write_as_json(d, output_dest)
                    return None
                else:
                    f, wrapper = get_utf_8_string_io_writer()
                    write_as_json(d, wrapper)
                    flush_utf_8_writer(wrapper)
                    return f.getvalue()
            else:
                return d
        # Non-NexSON types go here...
        if (serialize is not None) and (not serialize):
            raise ValueError('Conversion without serialization is only supported for the NexSON format')
        if output_dest:
            if is_str_type(output_dest):
                output_dest = codecs.open(output_dest, 'w', encoding='utf-8')
        if self.format_code == PhyloSchema.NEXML:
            if output_dest:
                write_obj_as_nexml(src, output_dest, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
                return
            return convert_to_nexml(src, addindent=' ', newl='\n', otu_label=self.otu_label_prop)
        elif self.format_code in [PhyloSchema.NEXUS, PhyloSchema.NEWICK]:
            if self.content in ('tree', 'subtree'):
                if isinstance(self.content_id, list) or isinstance(self.content_id, tuple):
                    ci, subtree_id = self.content_id
                else:
                    ci, subtree_id = self.content_id, None
            else:
                ci, subtree_id = None, None
            response = extract_tree(src, ci, self, subtree_id=subtree_id)
            # these formats are always serialized...
            if output_dest:
                output_dest.write(response)
                output_dest.write('\n')
            return response
        assert False
Пример #47
0
    def __init__(self, schema=None, **kwargs):
        '''Checks:
            'schema',
            'type_ext', then
            'output_nexml2json' (implicitly NexSON)
        '''
        self.content = kwargs.get('content', 'study')
        self.bracket_ingroup = bool(kwargs.get('bracket_ingroup', False))
        self.content_id = kwargs.get('content_id')
        self.cull_nonmatching = kwargs.get('cull_nonmatching')
        err_msg = 'expected cull_nonmatching to be "true" or "false" or the boolean versions of those values. found {}'
        #pylint: disable=E1103
        if is_str_type(self.cull_nonmatching):
            if self.cull_nonmatching.lower() in ['true', '1']:
                self.cull_nonmatching = True
            else:
                if self.cull_nonmatching.lower() not in ['false', '0']:
                    raise ValueError(err_msg.format(kwargs.get('cull_nonmatching')))
                self.cull_nonmatching = False
        elif self.cull_nonmatching is not None and not isinstance(self.cull_nonmatching, bool):
            raise ValueError(err_msg.format('a non-boolean and non-string value'))

        if self.content not in PhyloSchema._content_types:
            raise ValueError('"content" must be one of: "{}"'.format('", "'.join(PhyloSchema._content_types)))
        if self.content in PhyloSchema._no_content_id_types:
            if self.content_id is not None:
                raise ValueError('No content_id expected for "{}" content'.format(self.content))
        elif self.content in PhyloSchema._str_content_id_types:
            if not (self.content_id is None or is_str_type(self.content_id)):
                raise ValueError('content_id for "{}" content must be a string (if provided)'.format(self.content))
        else:
            is_list = isinstance(self.content_id, list) or isinstance(self.content_id, tuple)
            if (self.content_id is None) or (not is_list) or len(self.content_id) != 2:
                raise ValueError('Expecting 2 content_ids for the "subtree" content')
        if schema is not None:
            #_LOG.debug('schema from schema arg')
            self.format_str = schema.lower()
        elif kwargs.get('type_ext') is not None:
            #_LOG.debug('schema from type_ext arg')
            ext = kwargs['type_ext'].lower()
            try:
                self.format_str = PhyloSchema._extension2format[ext]
            except:
                raise ValueError('file extension "{}" not recognized'.format(kwargs['type_ext']))
        elif kwargs.get('output_nexml2json') is not None:
            #_LOG.debug('schema from output_nexml2json arg')
            self.format_str = 'nexson'
            self.version = kwargs['output_nexml2json']
        else:
            #_LOG.debug('schema from format_str arg')
            self.format_str = kwargs.get('format_str')
        if self.format_str is None:
            raise ValueError('Expecting "format_str", "schema", or "type_ext" argument')
        try:
            #_LOG.debug('self.format_str = {}'.format(self.format_str))
            self.format_code = PhyloSchema._format_list.index(self.format_str)
            #_LOG.debug('self.format_code = {}'.format(str(self.format_code)))
        except:
            raise ValueError('format "{}" not recognized'.format(self.format_str))
        if self.format_code == PhyloSchema.NEXSON:
            try:
                if not hasattr(self, 'version'):
                    if 'output_nexml2json' in kwargs:
                        self.version = kwargs['output_nexml2json']
                    else:
                        self.version = kwargs['version']
                if self.version == 'native':
                    self.version = kwargs['repo_nexml2json']
                if not _is_supported_nexson_vers(self.version):
                    raise ValueError('The "{}" version of NexSON is not supported'.format(self.version))
            except:
                msg = 'Expecting version of NexSON to be specified using ' \
                      '"output_nexml2json" argument (or via some other mechanism)'
                raise ValueError(msg)
        else:
            if self.content in ['meta']:
                raise ValueError('The "{}" content can only be returned in NexSON'.format(self.content))
            if kwargs.get('otu_label') is not None:
                self.otu_label = kwargs['otu_label'].lower()
            else:
                self.otu_label = kwargs.get('tip_label', 'ot:originallabel').lower()
            if self.otu_label not in PhyloSchema._otu_label_list:
                with_ns = 'ot:{}'.format(self.otu_label)
                if with_ns in PhyloSchema._otu_label_list:
                    self.otu_label = with_ns
                else:
                    m = '"otu_label" or "tip_label" must be one of "{}"'
                    m = m.format('", "'.join(PhyloSchema._otu_label_list))
                    raise ValueError(m)
            self.otu_label_prop = PhyloSchema._otu_label2prop[self.otu_label]
Пример #48
0
    def _post_key_check_validate_tree(self,
                                      tree_nex_id,
                                      tree_obj,
                                      vc,
                                      otus_group_id=None):
        #pylint: disable=R0914
        node_by_id = tree_obj.get('nodeById')
        edge_by_source = tree_obj.get('edgeBySourceId')
        root_node_id = tree_obj.get('^ot:rootNodeId')
        if (not node_by_id) or (not isinstance(node_by_id, dict)):
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MissingCrucialContentWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=['nodeById',])
            return errorReturn('no "nodeById" in tree')
        if (not edge_by_source) or (not isinstance(edge_by_source, dict)):
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MissingCrucialContentWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=['edgeBySourceId',])
            return errorReturn('no "edgeBySourceId" in tree')
        if not is_str_type(root_node_id):
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MissingCrucialContentWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=['^ot:rootNodeId',])
            return errorReturn('no "^ot:rootNodeId" in tree')
        edge_dict = {}
        edge_by_target = {}
        internal_nodes = []
        if otus_group_id is None:
            tree_group = vc.anc_list[-1][1]
            otus_group_id = tree_group.get('@otus')

        bad_node_ref = []
        repeated_edge_id = []
        missing_target = []
        repeated_target = []
        reachable_nodes = set()
        for par_node_id, edge_by_id in edge_by_source.items():
            if par_node_id not in node_by_id:
                bad_node_ref.append(par_node_id)
            else:
                reachable_nodes.add(par_node_id)
                for edge_id, edge in edge_by_id.items():
                    if edge_id in edge_dict:
                        repeated_edge_id.append(edge_id)
                    else:
                        edge_dict[edge_id] = edge
                        try:
                            t = edge.get('@target')
                        except:
                            t = None
                        if t is None:
                            missing_target.append(edge_id)
                        elif t in edge_by_target:
                            repeated_target.append(t)
                        else:
                            edge_by_target[t] = edge
                            reachable_nodes.add(t)
        node_set = set(node_by_id.keys())
        if node_set != reachable_nodes:
            unreachable = list(node_set - reachable_nodes)
            unreachable.sort()
            not_in_node_by_id = list(reachable_nodes - node_set)
            not_in_node_by_id.sort()
            if unreachable:
                self._error_event(_NEXEL.TREE,
                                  obj=tree_obj,
                                  err_type=gen_UnreachableNodeWarning,
                                  anc=vc.anc_list,
                                  obj_nex_id=tree_nex_id,
                                  key_list=unreachable)
                return errorReturn('unreachable node in tree tree')
            if not_in_node_by_id:
                self._error_event(_NEXEL.TREE,
                                  obj=tree_obj,
                                  err_type=gen_ReferencedIDNotFoundWarning,
                                  anc=vc.anc_list,
                                  obj_nex_id=tree_nex_id,
                                  key_list=not_in_node_by_id)
                return errorReturn('referenced node id not in "nodeById" in tree')
        if bad_node_ref:
            bad_node_ref.sort()
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_ReferencedIDNotFoundWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=bad_node_ref)
            return errorReturn('referenced parent node not in "nodeById" in tree')
        if missing_target:
            missing_target.sort()
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_ReferencedIDNotFoundWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=missing_target)
            return errorReturn('no "@target" in edge')
        if repeated_target:
            repeated_target.sort()
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_NodeWithMultipleParents,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              node_id_list=repeated_target)
            return errorReturn('same node used as "@target" for different edges')
        if repeated_edge_id:
            repeated_edge_id.sort()
            self._error_event(_NEXEL.EDGE,
                              obj=tree_obj,
                              err_type=gen_RepeatedIDWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              key_list=repeated_edge_id)
            return errorReturn('edge "@id" repeated')
        node_set = set(edge_by_target.keys())
        internal_node_set = set(edge_by_source.keys())
        leaf_set = node_set - internal_node_set
        leaves = [(i, node_by_id[i]) for i in leaf_set]
        vc.push_context(_NEXEL.LEAF_NODE, (tree_obj, tree_nex_id))
        try:
            if not self._validate_leaf_list(leaves, vc):
                return False
        finally:
            vc.pop_context()
        internal_nodes = [(i, node_by_id[i]) for i in internal_node_set]
        with_at_root_prop = {}
        for nid, n_obj in internal_nodes:
            if n_obj.get('@root'):
                with_at_root_prop[nid] = n_obj
        if len(with_at_root_prop) > 1:
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MultipleRootsWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              node_id_list=list(with_at_root_prop.keys()))
            return errorReturn('multiple "@root" nodes')
        if len(with_at_root_prop) == 0:
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_NoRootWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id)
            return errorReturn('No node labelled as "@root"')
        if root_node_id not in with_at_root_prop:
            self._error_event(_NEXEL.TREE,
                              obj=tree_obj,
                              err_type=gen_MultipleRootsWarning,
                              anc=vc.anc_list,
                              obj_nex_id=tree_nex_id,
                              node_id_list=list(with_at_root_prop.keys()) + [root_node_id])
            return errorReturn('root node not labelled as root')
        vc.push_context(_NEXEL.INTERNAL_NODE, (tree_obj, tree_nex_id))
        try:
            if not self._validate_internal_node_list(internal_nodes, vc):
                return False
        finally:
            vc.pop_context()
        edges = [i for i in edge_dict.items()]
        vc.push_context(_NEXEL.EDGE, (tree_obj, tree_nex_id))
        try:
            if not self._validate_edge_list(edges, vc):
                return False
        finally:
            vc.pop_context()
        otuid2leaf = {}
        for nd_id, nd in leaves:
            otuid = nd['@otu']
            if otuid in otuid2leaf:
                vc.push_context(_NEXEL.LEAF_NODE, (tree_obj, tree_nex_id))
                try:
                    self._error_event(_NEXEL.LEAF_NODE,
                                      obj=nd,
                                      err_type=gen_RepeatedOTUWarning,
                                      anc=vc.anc_list,
                                      obj_nex_id=nd_id,
                                      key_list=[otuid])
                    return errorReturn('Repeated "@otu" id')
                finally:
                    vc.pop_context()
            otuid2leaf[otuid] = nd_id
        self._detect_multilabelled_tree(otus_group_id=otus_group_id,
                                        tree_id=tree_nex_id,
                                        otuid2leaf=otuid2leaf)
        og = self._otu_group_by_id[otus_group_id]['otuById']
        return self._validate_otu_key_if_present(node_by_id.items(), og, vc)
Пример #49
0
    def __init__(self, schema=None, **kwargs):
        '''Checks:
            'schema',
            'type_ext', then
            'output_nexml2json' (implicitly NexSON)
            If exporting to a non-nexson format, `otu_label` (and then
                `tip_label`are checked) to determine how to label the tips
                'ot:originallabel', 'ot:ottid', and 'ot:otttaxonname'
                are supported values
        '''
        self.content = kwargs.get('content', 'study')
        self.bracket_ingroup = bool(kwargs.get('bracket_ingroup', False))
        self.content_id = kwargs.get('content_id')
        self.cull_nonmatching = kwargs.get('cull_nonmatching')
        err_msg = 'expected cull_nonmatching to be "true" or "false" or the boolean versions of those values. found {}'
        #pylint: disable=E1103
        if is_str_type(self.cull_nonmatching):
            if self.cull_nonmatching.lower() in ['true', '1']:
                self.cull_nonmatching = True
            else:
                if self.cull_nonmatching.lower() not in ['false', '0']:
                    raise ValueError(err_msg.format(kwargs.get('cull_nonmatching')))
                self.cull_nonmatching = False
        elif self.cull_nonmatching is not None and not isinstance(self.cull_nonmatching, bool):
            raise ValueError(err_msg.format('a non-boolean and non-string value'))

        if self.content not in PhyloSchema._content_types:
            raise ValueError('"content" must be one of: "{}"'.format('", "'.join(PhyloSchema._content_types)))
        if self.content in PhyloSchema._no_content_id_types:
            if self.content_id is not None:
                raise ValueError('No content_id expected for "{}" content'.format(self.content))
        elif self.content in PhyloSchema._str_content_id_types:
            if not (self.content_id is None or is_str_type(self.content_id)):
                raise ValueError('content_id for "{}" content must be a string (if provided)'.format(self.content))
        else:
            is_list = isinstance(self.content_id, list) or isinstance(self.content_id, tuple)
            if (self.content_id is None) or (not is_list) or len(self.content_id) != 2:
                raise ValueError('Expecting 2 content_ids for the "subtree" content')
        if schema is not None:
            #_LOG.debug('schema from schema arg')
            self.format_str = schema.lower()
        elif kwargs.get('type_ext') is not None:
            #_LOG.debug('schema from type_ext arg')
            ext = kwargs['type_ext'].lower()
            try:
                self.format_str = PhyloSchema._extension2format[ext]
            except:
                raise ValueError('file extension "{}" not recognized'.format(kwargs['type_ext']))
        elif kwargs.get('output_nexml2json') is not None:
            #_LOG.debug('schema from output_nexml2json arg')
            self.format_str = 'nexson'
            self.version = kwargs['output_nexml2json']
        else:
            #_LOG.debug('schema from format_str arg')
            self.format_str = kwargs.get('format_str')
        if self.format_str is None:
            raise ValueError('Expecting "format_str", "schema", or "type_ext" argument')
        try:
            #_LOG.debug('self.format_str = {}'.format(self.format_str))
            self.format_code = PhyloSchema._format_list.index(self.format_str)
            #_LOG.debug('self.format_code = {}'.format(str(self.format_code)))
        except:
            raise ValueError('format "{}" not recognized'.format(self.format_str))
        if self.format_code == PhyloSchema.NEXSON:
            try:
                if not hasattr(self, 'version'):
                    if 'output_nexml2json' in kwargs:
                        self.version = kwargs['output_nexml2json']
                    else:
                        self.version = kwargs['version']
                if self.version == 'native':
                    self.version = kwargs['repo_nexml2json']
                if not _is_supported_nexson_vers(self.version):
                    raise ValueError('The "{}" version of NexSON is not supported'.format(self.version))
            except:
                msg = 'Expecting version of NexSON to be specified using ' \
                      '"output_nexml2json" argument (or via some other mechanism)'
                raise ValueError(msg)
        else:
            if self.content in ['meta']:
                raise ValueError('The "{}" content can only be returned in NexSON'.format(self.content))
            if kwargs.get('otu_label') is not None:
                self.otu_label = kwargs['otu_label'].lower()
            else:
                self.otu_label = kwargs.get('tip_label', 'ot:originallabel').lower()
            if self.otu_label not in PhyloSchema._otu_label_list:
                with_ns = 'ot:{}'.format(self.otu_label)
                if with_ns in PhyloSchema._otu_label_list:
                    self.otu_label = with_ns
                else:
                    m = '"otu_label" or "tip_label" must be one of "{}"'
                    m = m.format('", "'.join(PhyloSchema._otu_label_list))
                    raise ValueError(m)
            self.otu_label_prop = PhyloSchema._otu_label2prop[self.otu_label]