def _children_query(self, ids, has_gene=True, include_self=False, raw=False): if is_str(ids) or isinstance(ids, int) or (is_seq(ids) and len(ids) == 1): _ids = ids if is_str(ids) or isinstance(ids, int) else ids[0] _qstring = "lineage:{} AND has_gene:true".format(_ids) if has_gene else "lineage:{}".format(_ids) res = self.options.es_client.search(body={"query":{"query_string":{"query": _qstring}}}, index=self.options.index, doc_type=self.options.doc_type, fields='_id', size=self.max_taxid_count) if raw: return res taxid_li = [int(x['_id']) for x in res['hits']['hits'] if x['_id'] != _ids or include_self] taxid_li += ([_ids] if include_self and _ids not in taxid_li else []) return {_ids: sorted(taxid_li)[:self.max_taxid_count]} elif is_seq(ids): qs = '\n'.join(['{{}}\n{{"size": {}, "_source": ["_id"], "query": {{"query_string":{{"query": "lineage:{} AND has_gene:true"}}}}}}'.format(self.max_taxid_count, taxid) if has_gene else '{{}}\n{{"size": {}, "_source": ["_id"], "query":{{"query_string":{{"query":"lineage:{}"}}}}}}'.format(self.max_taxid_count, taxid) for taxid in ids]) res = self.options.es_client.msearch(body=qs, index=self.options.index, doc_type=self.options.doc_type) if 'responses' not in res or len(res['responses']) != len(ids): return {} _ret = {} for (taxid, response) in zip(ids, res['responses']): _ret.setdefault(taxid, []).extend([h['_id'] for h in response['hits']['hits'] if h['_id'] != taxid or include_self]) for taxid in _ret.keys(): _ret[taxid] = sorted([int(x) for x in list(set(_ret[taxid]))] + ([int(taxid)] if include_self and taxid not in _ret[taxid] else []))[:self.max_taxid_count] return _ret else: return {}
def _recursion_helper(doc, path, parent_type): if is_seq(doc): return [ _recursion_helper(_doc, path, type(doc)) for _doc in doc ] elif isinstance(doc, dict): if data_src and path in self.data_sources: doc['@sources'] = self.data_sources[path]['@sources'] if sort: _doc = sorted(doc) else: _doc = doc.keys() this_list = [] for key in _doc: new_path = key if not path else field_sep.join([path, key]) this_list.append( (self._alias_output_keys(new_path, key), _recursion_helper(doc[key], new_path, type(doc)))) if parent_type != list and parent_type != tuple and self.options.always_list and path in self.options.always_list: if sort: return [OrderedDict(this_list)] else: return [dict(this_list)] else: if sort: return OrderedDict(this_list) else: return dict(this_list) elif parent_type != list and parent_type != tuple and self.options.always_list and path in self.options.always_list: return [doc] else: return doc
def build_id_query(self, bid, scopes=None): _default_scopes = '_id' scopes = scopes or _default_scopes if is_str(scopes): _query = { "match": { scopes: { "query": "{}".format(bid), "operator": "and" } } } elif is_seq(scopes): _query = { "multi_match": { "query": "{}".format(bid), "fields": scopes, "operator": "and" } } else: raise ValueError('"scopes" cannot be "%s" type'.format(type(scopes))) _q = {"query": _query} self._query_options.pop("query", None) # avoid "query" be overwritten by self.query_options _q.update(self._query_options) return _q
def depth_first_recursive_traversal(doc, path=[]): if isinstance(doc, dict): for (k, v) in doc.items(): _path = tuple(list(path) + [k]) yield (_path, v) yield from depth_first_recursive_traversal(v, _path) elif is_seq(doc): for o in doc: _path = tuple(list(path)) yield (_path, o) yield from depth_first_recursive_traversal(o, _path)
def _recursion_helper(_doc, _ret, out): if isinstance(_doc, dict): for key in _doc: new_key = key if not out else outfield_sep.join([out, key]) _recursion_helper(_doc[key], _ret, new_key) elif is_seq(_doc): for _obj in _doc: _recursion_helper(_obj, _ret, out) else: # this is a leaf _ret.setdefault(out, []).append(_doc)
def _helper(doc, _list, val): if isinstance(doc, dict): if len(_list) > 1: if _list[0] not in doc: doc[_list[0]] = {} _helper(doc[_list[0]], _list[1:], val) else: if _list[0] not in doc: doc[_list[0]] = val elif is_seq(doc): for o in doc: _helper(o, _list, val)
def flatten_doc_2(doc, outfield_sep='.', sort=True): _ret = {} for _path, _val in depth_first_traversal(doc): if not isinstance(_val, dict) and not is_seq(_val): if outfield_sep: _new_path = outfield_sep.join(_path) else: _new_path = _path _ret.setdefault(_new_path, []).append(_val) if sort and outfield_sep: return OrderedDict(sorted([(k,v[0]) if len(v) == 1 else (k, v) for (k, v) in _ret.items()], key=lambda x: x[0])) return dict([(k, v[0]) if len(v) == 1 else (k, v) for (k, v) in _ret.items()])
def _alias_input_args(self, args): alias_dict = dict([(_arg, _setting['alias']) for (_arg, _setting) in self.kwarg_settings.items() if 'alias' in _setting]) for (target, src) in alias_dict.items(): if is_str(src) and src in args: args.setdefault(target, args[src]) elif is_seq(src): for param in src: if param in args: args.setdefault(target, args[param]) break return args
def breadth_first_recursive_traversal(doc, path=[]): ''' doesn't exactly implement breadth first ordering it seems, not sure why... ''' #TODO fix this... if isinstance(doc, dict): for (k, v) in doc.items(): yield (tuple(list(path) + [k]), v) for (k, v) in doc.items(): yield from breadth_first_recursive_traversal( v, tuple(list(path) + [k])) elif is_seq(doc): for o in doc: yield (tuple(list(path)), o) for o in doc: yield from breadth_first_recursive_traversal(o, tuple(list(path)))
def _cleaned_scopes(self, scopes): """return a cleaned scopes parameter. should be either a string or a list of scope fields. """ if scopes: if is_str(scopes): scopes = [x.strip() for x in scopes.split(",")] if is_seq(scopes): scopes = [x for x in scopes if x] if len(scopes) == 1: scopes = scopes[0] else: scopes = None else: scopes = None return scopes
def _generic_traversal(doc, structure): _struct = structure() # push first level for (k, v) in doc.items(): _struct.push((tuple([k]), v)) while not _struct.isempty(): _next = _struct.pop() yield _next if isinstance(_next[1], dict): # push this level for (k, v) in _next[1].items(): _struct.push((tuple(list(_next[0]) + [k]), v)) elif is_seq(_next[1]): # push all elements in a list/tuple for o in _next[1]: _struct.push((_next[0], o))
def _recursion_helper(d, ret, path, out): if isinstance(d, dict): for key in d: new_path_key = key if not path else context_sep.join( [path, key]) new_out_key = self._alias_output_keys( new_path_key, key) if not out else outfield_sep.join( [out, self._alias_output_keys(new_path_key, key)]) _recursion_helper(d[key], ret, new_path_key, new_out_key) elif is_seq(d): for obj in d: _recursion_helper(obj, ret, path, out) else: if out in ret: if isinstance(ret[out], list): ret[out].append(d) else: ret[out] = [ret[out], d] else: ret[out] = d