def _pyobj(doc): # ES doc -> Python object for _, _doc in traverse(doc): if isinstance(_doc, dict): for k, v in list(_doc.items()): _doc[k] = _eval(v) elif isinstance(_doc, list): _doc[:] = map(_eval, _doc) return doc
def _update_one(self, doc, update, *args, **kwargs): if args or kwargs: raise NotImplementedError() if not len(update) == 1: raise ValueError("Invalid operator.") if next(iter(update)) not in ("$set", "$unset", "$push", "$addToSet", "$pull"): raise NotImplementedError(next(iter(update))) # https://docs.mongodb.com/manual/reference/operator/update/set/ # https://docs.mongodb.com/manual/reference/operator/update/unset/ # https://docs.mongodb.com/manual/reference/operator/update/push/ # https://docs.mongodb.com/manual/reference/operator/update/addToSet/ # https://docs.mongodb.com/manual/reference/operator/update/pull/ if "$set" in update: _update = json.loads(to_json(update["$set"])) _update = parse_dot_fields(_update) doc = update_dict_recur(doc, _update) elif "$unset" in update: for dotk, v in traverse(doc): if dotk in update["$unset"]: v["__REMOVE__"] = True doc = merge({}, doc) elif "$push" in update: for key, val in update["$push"].items(): if "." in key: # not all mongo operators are fully implemented raise NotImplementedError("nested key in $push: %s" % key) doc.setdefault(key, []).append(val) elif "$addToSet" in update: for key, val in update["$addToSet"].items(): if "." in key: # not all mongo operators are fully implemented raise NotImplementedError("nested key in $addToSet: %s" % key) field = doc.setdefault(key, []) if val not in field: field.append(val) else: # "$pull" in update: for key, val in update["$pull"].items(): if "." in key: # not all mongo operators are fully implemented raise NotImplementedError("nested key in $pull: %s" % key) if not isinstance(val, (str, int)): raise NotImplementedError( "value or condition in $pull: %s" % val) if isinstance(doc.get(key), list): doc[key][:] = [x for x in doc[key] if x != val] self._write_one(doc)
def find(self, filter=None, projection=None, *args, **kwargs): if args or kwargs: raise NotImplementedError() results = [] logger = logging.getLogger(__name__) for doc in self._read().values(): _doc = dict(traverse(doc)) # dotdict _doc.update(dict(traverse(doc, True))) for k, v in (filter or {}).items(): if isinstance(v, dict) and "$exists" in v: logger.error("Ignored filter: {'%s': %s}", k, v) continue if _doc.get(k) != v: break else: # no break results.append(_pyobj(doc)) if projection: # used by BuildManager.build_info logger.error("Ignored projection: %s", projection) return results
def _doc_to_flattened(self): """ Flattened JSON dictionay list items, like this: • name: Wellderly Blood Genetics • description: Wellderly Blood Genetics • author.name: [email protected] • author.orcid: https://orcid.org/0000-0001-9779-1512 """ def keep(item): (path, val) = item if not path or not str(val): return False if path.startswith("includedInDataCatalog"): return False if "@" in path or path.startswith("_"): return False return True def combine(dic, item): (key, val) = item val = str(val).strip() dic[key].append(val) return dic def adfy(item): (key, strs) = item paragraph = ADF().paragraph() paragraph.text(key + ': ') paragraph.content[-1].add_mark(Strong()) text = ', '.join(strs) paragraph.text(text) if ' ' not in text and urlparse(text).scheme: paragraph.link(text) return paragraph entries = filter(keep, traverse(self["doc"], True)) dotdict = reduce(combine, entries, defaultdict(list)) paragraphs = map(adfy, dotdict.items()) doc = ADF().bullet_list() for paragraph in paragraphs: doc.add_item(paragraph) return doc
def value_in_result(value, result: Union[dict, list], key: str, case_insensitive: bool = False) -> bool: """ Check if value is in result at specific key Elasticsearch does not care if a field has one or more values (arrays), so you may get a search with multiple values in one field. You were expecting a result of type T but now you have a List[T] which is bad. In testing, usually any one element in the list eq. to the value you're looking for, you don't really care which. This helper function checks if the value is at a key, regardless of the details of nesting, so you can just do this: assert self.value_in_result(value, result, 'where.it.should.be') Caveats: case_insensitive only calls .lower() and does not care about locale/ unicode/anything Args: value: value to look for result: dict or list of input, most likely from the APIs key: dot delimited key notation case_insensitive: for str comparisons, invoke .lower() first Returns: boolean indicating whether the value is found at the key Raises: TypeError: when case_insensitive set to true on unsupported types """ res_at_key = [] if case_insensitive: try: value = value.lower() except Exception: raise TypeError("failed to invoke method .lower()") for k, v in traverse(result, leaf_node=True): if k == key: if case_insensitive: try: v = v.lower() except Exception: raise TypeError("failed to invoke method .lower()") res_at_key.append(v) return value in res_at_key
def _build_graph_query(self, graph_query): """ Takes a GraphQuery object and return an ES Query object. """ assert isinstance(graph_query, GraphQuery) q = graph_query.to_dict() _q = [] _scopes = [] for k, v in traverse(q, True): if isinstance(v, list): for _v in v: _q.append(_v) _scopes.append(k) else: _q.append(v) _scopes.append(k) # query proxy object does not support OR operator, thus using _proxied return self._build_match_query(_q, _scopes, dotdict()).query._proxied
def _select_indexer(self, build_name=None): """ Find the indexer class required to index build_name. """ rules = self._config.get("indexer_select") if not rules or not build_name: self.logger.debug(self.DEFAULT_INDEXER) return self.DEFAULT_INDEXER # the presence of a path in the build doc # can determine the indexer class to use. path = None doc = self._srcbuild.find_one({"_id": build_name}) for path_in_doc, _ in traverse(doc or dict(), True): if path_in_doc in rules: if not path: path = path_in_doc else: _ERR = "Multiple indexers matched." raise RuntimeError(_ERR) kls = get_class_from_classpath(rules[path]) self.logger.info(kls) return kls