Exemplo n.º 1
0
    def _children_query(self, ids, has_gene=True, include_self=False, raw=False):
        if is_str(ids) or isinstance(ids, int) or (is_seq(ids) and len(ids) == 1):
            _ids = ids if is_str(ids) or isinstance(ids, int) else ids[0] 
            _qstring = "lineage:{} AND has_gene:true".format(_ids) if has_gene else "lineage:{}".format(_ids)
            res = self.options.es_client.search(body={"query":{"query_string":{"query": _qstring}}},
                index=self.options.index, doc_type=self.options.doc_type, fields='_id', size=self.max_taxid_count)
            
            if raw:
                return res
            
            taxid_li = [int(x['_id']) for x in res['hits']['hits'] if x['_id'] != _ids or include_self]
            taxid_li += ([_ids] if include_self and _ids not in taxid_li else [])        
            return {_ids: sorted(taxid_li)[:self.max_taxid_count]}
        elif is_seq(ids):
            qs = '\n'.join(['{{}}\n{{"size": {}, "_source": ["_id"], "query": {{"query_string":{{"query": "lineage:{} AND has_gene:true"}}}}}}'.format(self.max_taxid_count, taxid) if has_gene
                else '{{}}\n{{"size": {}, "_source": ["_id"], "query":{{"query_string":{{"query":"lineage:{}"}}}}}}'.format(self.max_taxid_count, taxid) for taxid in ids])
            res = self.options.es_client.msearch(body=qs, index=self.options.index, doc_type=self.options.doc_type)
            if 'responses' not in res or len(res['responses']) != len(ids):
                return {}
            
            _ret = {}

            for (taxid, response) in zip(ids, res['responses']):
                _ret.setdefault(taxid, []).extend([h['_id'] for h in response['hits']['hits'] 
                                                    if h['_id'] != taxid or include_self])
            for taxid in _ret.keys():
                _ret[taxid] = sorted([int(x) for x in list(set(_ret[taxid]))] + 
                    ([int(taxid)] if include_self and taxid not in _ret[taxid] else []))[:self.max_taxid_count]
            return _ret
        else:
            return {}
Exemplo n.º 2
0
        def _recursion_helper(doc, path, parent_type):
            if is_seq(doc):
                return [
                    _recursion_helper(_doc, path, type(doc)) for _doc in doc
                ]
            elif isinstance(doc, dict):
                if data_src and path in self.data_sources:
                    doc['@sources'] = self.data_sources[path]['@sources']
                if sort:
                    _doc = sorted(doc)
                else:
                    _doc = doc.keys()
                this_list = []
                for key in _doc:
                    new_path = key if not path else field_sep.join([path, key])
                    this_list.append(
                        (self._alias_output_keys(new_path, key),
                         _recursion_helper(doc[key], new_path, type(doc))))

                if parent_type != list and parent_type != tuple and self.options.always_list and path in self.options.always_list:
                    if sort:
                        return [OrderedDict(this_list)]
                    else:
                        return [dict(this_list)]
                else:
                    if sort:
                        return OrderedDict(this_list)
                    else:
                        return dict(this_list)
            elif parent_type != list and parent_type != tuple and self.options.always_list and path in self.options.always_list:
                return [doc]
            else:
                return doc
Exemplo n.º 3
0
 def build_id_query(self, bid, scopes=None):
     _default_scopes = '_id'
     scopes = scopes or _default_scopes
     if is_str(scopes):
         _query = {
             "match": {
                 scopes: {
                     "query": "{}".format(bid),
                     "operator": "and"
                 }
             }
         }
     elif is_seq(scopes):
         _query = {
             "multi_match": {
                 "query": "{}".format(bid),
                 "fields": scopes,
                 "operator": "and"
             }
         }
     else:
         raise ValueError('"scopes" cannot be "%s" type'.format(type(scopes)))
     _q = {"query": _query}
     self._query_options.pop("query", None)    # avoid "query" be overwritten by self.query_options
     _q.update(self._query_options)
     return _q
Exemplo n.º 4
0
def depth_first_recursive_traversal(doc, path=[]):
    if isinstance(doc, dict):
        for (k, v) in doc.items():
            _path = tuple(list(path) + [k])
            yield (_path, v)
            yield from depth_first_recursive_traversal(v, _path)
    elif is_seq(doc):
        for o in doc:
            _path = tuple(list(path))
            yield (_path, o)
            yield from depth_first_recursive_traversal(o, _path)
Exemplo n.º 5
0
 def _recursion_helper(_doc, _ret, out):
     if isinstance(_doc, dict):
         for key in _doc:
             new_key = key if not out else outfield_sep.join([out, key])
             _recursion_helper(_doc[key], _ret, new_key)
     elif is_seq(_doc):
         for _obj in _doc:
             _recursion_helper(_obj, _ret, out)
     else:
         # this is a leaf
         _ret.setdefault(out, []).append(_doc)
Exemplo n.º 6
0
 def _helper(doc, _list, val):
     if isinstance(doc, dict):
         if len(_list) > 1:
             if _list[0] not in doc:
                 doc[_list[0]] = {}
             _helper(doc[_list[0]], _list[1:], val)
         else:
             if _list[0] not in doc:
                 doc[_list[0]] = val
     elif is_seq(doc):
         for o in doc:
             _helper(o, _list, val)
Exemplo n.º 7
0
def flatten_doc_2(doc, outfield_sep='.', sort=True):
    _ret = {}
    for _path, _val in depth_first_traversal(doc):
        if not isinstance(_val, dict) and not is_seq(_val):
            if outfield_sep:
                _new_path = outfield_sep.join(_path)
            else:
                _new_path = _path
            _ret.setdefault(_new_path, []).append(_val)
    if sort and outfield_sep:
        return OrderedDict(sorted([(k,v[0]) if len(v) == 1 else (k, v) for (k, v) in _ret.items()], key=lambda x: x[0]))
    return dict([(k, v[0]) if len(v) == 1 else (k, v) for (k, v) in _ret.items()])
Exemplo n.º 8
0
 def _alias_input_args(self, args):
     alias_dict = dict([(_arg, _setting['alias'])
                        for (_arg, _setting) in self.kwarg_settings.items()
                        if 'alias' in _setting])
     for (target, src) in alias_dict.items():
         if is_str(src) and src in args:
             args.setdefault(target, args[src])
         elif is_seq(src):
             for param in src:
                 if param in args:
                     args.setdefault(target, args[param])
                     break
     return args
Exemplo n.º 9
0
def breadth_first_recursive_traversal(doc, path=[]):
    ''' doesn't exactly implement breadth first ordering it seems, not sure why... '''
    #TODO fix this...
    if isinstance(doc, dict):
        for (k, v) in doc.items():
            yield (tuple(list(path) + [k]), v)
        for (k, v) in doc.items():
            yield from breadth_first_recursive_traversal(
                v, tuple(list(path) + [k]))
    elif is_seq(doc):
        for o in doc:
            yield (tuple(list(path)), o)
        for o in doc:
            yield from breadth_first_recursive_traversal(o, tuple(list(path)))
Exemplo n.º 10
0
 def _cleaned_scopes(self, scopes):
     """return a cleaned scopes parameter.
         should be either a string or a list of scope fields.
     """
     if scopes:
         if is_str(scopes):
             scopes = [x.strip() for x in scopes.split(",")]
         if is_seq(scopes):
             scopes = [x for x in scopes if x]
             if len(scopes) == 1:
                 scopes = scopes[0]
         else:
             scopes = None
     else:
         scopes = None
     return scopes
Exemplo n.º 11
0
def _generic_traversal(doc, structure):
    _struct = structure()

    # push first level
    for (k, v) in doc.items():
        _struct.push((tuple([k]), v))

    while not _struct.isempty():
        _next = _struct.pop()
        yield _next
        if isinstance(_next[1], dict):
            # push this level
            for (k, v) in _next[1].items():
                _struct.push((tuple(list(_next[0]) + [k]), v))
        elif is_seq(_next[1]):
            # push all elements in a list/tuple
            for o in _next[1]:
                _struct.push((_next[0], o))
Exemplo n.º 12
0
 def _recursion_helper(d, ret, path, out):
     if isinstance(d, dict):
         for key in d:
             new_path_key = key if not path else context_sep.join(
                 [path, key])
             new_out_key = self._alias_output_keys(
                 new_path_key, key) if not out else outfield_sep.join(
                     [out,
                      self._alias_output_keys(new_path_key, key)])
             _recursion_helper(d[key], ret, new_path_key, new_out_key)
     elif is_seq(d):
         for obj in d:
             _recursion_helper(obj, ret, path, out)
     else:
         if out in ret:
             if isinstance(ret[out], list):
                 ret[out].append(d)
             else:
                 ret[out] = [ret[out], d]
         else:
             ret[out] = d