def couch_rev_check_recs(docs): """ Insert revisions for all records into structure using CouchDB bulk interface. Uses key ranges to narrow bulk query to the source being ingested. Performance improved version of couch_rev_check_recs_old, but it uses another input format: Input: {doc["_id"]: doc, ...} """ if not docs: return uri = join(COUCH_DATABASE, '_all_docs') docs_ids = sorted(docs) start = docs_ids[0] end = docs_ids[-1:][0] # uri += "?" + urlencode({"startkey": start, "endkey": end}) uri += '?startkey="%s"&endkey="%s"' % (quote_plus(start), quote_plus(end)) response, content = H.request(uri, 'GET', headers=COUCH_AUTH_HEADER) if str(response.status).startswith('2'): rows = json.loads(content)["rows"] for r in rows: if r["id"] in docs: docs[r["id"]]["_rev"] = r["value"]["rev"] else: logger.warn('Unable to retrieve document revisions via bulk interface: ' + repr(response)) logger.warn('Request: ' + uri)
def couch_rev_check_recs_old(docs, src): """ Insert revisions for all records into structure using CouchDB bulk interface. Uses key ranges to narrow bulk query to the source being ingested. Deprecated: has performance issue """ uri = join(COUCH_DATABASE,'_all_docs') start = quote(COUCH_ID_BUILDER(src,'')) end = quote(COUCH_ID_BUILDER(src,'Z'*100)) # FIXME. Is this correct? uri += '?startkey=%s&endkey=%s'%(start,end) # REVU: it fetches all docs from db again and again for each doc bulk # by killing performance and can cause memory issues with big collections # so, if you need to set revisions for each 100 doc among 10000, you # will be getting by 10000 docs for each hundred (100 times) # # new version is implemented in couch_rev_check_recs2, see details resp, cont = H.request(join(COUCH_DATABASE,'_all_docs'), 'GET', headers=COUCH_AUTH_HEADER) if str(resp.status).startswith('2'): rows = json.loads(cont)["rows"] #revs = { r["id"]:r["value"]["rev"] for r in rows } # 2.7 specific revs = {} for r in rows: revs[r["id"]] = r["value"]["rev"] for doc in docs: id = doc['_id'] if id in revs: doc['_rev'] = revs[id] else: logger.warn('Unable to retrieve document revisions via bulk interface: ' + repr(resp)) logger.warn('Request old: ' + uri)
def geocode_spatial(self, spatial): ''' Accepts a dictionary and attempts to return a set of coordinates in format [latitude, longitude] that match the place. ''' if (not self.api_key): logger.warn("No API key set for Bing " + "(use bing_api_key configuration key)") return None address = Address(spatial) for candidate in address.get_candidates(): # See if this address candidate exists in our cache if (candidate not in DplaBingGeocoder.resultCache): results = self._fetch_results(candidate) DplaBingGeocoder.resultCache[candidate] = list(results) # Require that a single match, or closely grouped matches be # returned to avoid bad geocoding results candidates = len(DplaBingGeocoder.resultCache[candidate]) closely_grouped_results = self._are_closely_grouped_results( DplaBingGeocoder.resultCache[candidate] ) if (candidates == 1 or closely_grouped_results): result = DplaBingGeocoder.resultCache[candidate][0] coordinates = (result["geocodePoints"][0]["coordinates"][0], result["geocodePoints"][0]["coordinates"][1]) valid_result = True # If we have a specified country, perform a sanity check that # the returned coordinates is within the country's bounding box if (address.country and "countryRegion" in result["address"]): bbox_result = self._is_in_country(coordinates, address.country) # If we can't get a country's bbox, assume that we have a # good result if (bbox_result is not None): valid_result = bbox_result if (not valid_result): msg = "Geocode result [%s] " % result["name"] + \ "not in the correct country " + \ "[%s], ignoring" % address.country logger.debug(msg) if (valid_result): if ("name" in spatial): logger.debug("Geocode result: %s => %s (%s)" % (spatial["name"], result["name"], result["point"]["coordinates"],)) else: logger.debug("Geocode result: %s => %s (%s)" % (spatial, result["name"], result["point"]["coordinates"],)) return coordinates return None
def geocode_spatial(self, spatial): if (not self.api_key): logger.warn( "No API key set for Bing (use bing_api_key configuration key") return None address = Address(spatial) for candidate in address.get_candidates(): # See if this address candidate exists in our cache if (candidate not in DplaBingGeocoder.resultCache): # logger.debug("geocode: No result for [%s] in cache, retrieving from Bing" % candidate) results = self._fetch_results(candidate) DplaBingGeocoder.resultCache[candidate] = list(results) # logger.info("geocode: Result:") # logger.info("geocode: spatial: %s" % spatial) # logger.info("geocode: address: %s" % candidate) # logger.info("geocode: count: %s" % len(DplaBingGeocoder.resultCache[candidate])) # logger.info("geocode: result: %s" % DplaBingGeocoder.resultCache[candidate]) # Require that a single match, or closely grouped matches be returned to avoid bad geocoding results if (1 == len(DplaBingGeocoder.resultCache[candidate]) \ or self._are_closely_grouped_results(DplaBingGeocoder.resultCache[candidate])): result = DplaBingGeocoder.resultCache[candidate][0] coordinate = (result["geocodePoints"][0]["coordinates"][0], result["geocodePoints"][0]["coordinates"][1]) valid_result = True # If we have a specified country, perform a sanity check that the returned coordinate is within # the country's bounding box if (address.country and \ "countryRegion" in result["address"]): bbox_result = self._is_in_country(coordinate, address.country) # If we can't get a country's bbox, assume that we have a good result if (bbox_result is not None): valid_result = bbox_result if (not valid_result): # logger.debug("geocode: Result [%s] not in the correct country [%s], ignoring" % (result["name"], address.country,)) pass if (valid_result): if ("name" in spatial): logger.info("geocode: Result: %s => %s (%s)" % ( spatial["name"], result["name"], result["point"]["coordinates"], )) else: logger.info("geocode: Result: %s => %s (%s)" % ( spatial, result["name"], result["point"]["coordinates"], )) return coordinate return None
def register_service(self, ident, path, handler, doc=None, query_template=None): if "/" in path: raise ValueError("Registered path %r may not contain a '/'" % (path,)) if doc is None: doc = inspect.getdoc(handler) or "" if ident in self._registered_services: logger.warn("Replacing mount point %r (%r)" % (path, ident)) else: logger.debug("Created new mount point %r (%r)" % (path, ident)) serv = Service(handler, path, ident, doc, query_template) self._registered_services[path] = serv
def shred(body, ctype, action="shred", prop=None, delim=';', keepdup=None): """ Service that accepts a JSON document and "shreds" or "unshreds" the value of the field(s) named by the "prop" parameter "prop" can include multiple property names, delimited by a comma (the delim property is used only for the fields to be shredded/unshredded). This requires that the fields share a common delimiter however. """ try: data = json.loads(body) except Exception as e: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON\n" + str(e) def mismatch_parens(s): return s.count("(") != s.count(")") for p in prop.split(','): if exists(data, p): v = getprop(data, p) if action == "shred": if isinstance(v, list): try: v = delim.join(v) except Exception as e: logger.warn("Can't join list %s on delim for %s, %s" % (v, data["_id"], e)) if delim in v: setprop(data, p, v) else: continue shredded = [""] for s in re.split(re.escape(delim), v): if mismatch_parens(shredded[-1]): shredded[-1] += "%s%s" % (delim, s) else: shredded.append(s) shredded = [i.strip() for i in shredded if i.strip()] if not keepdup: result = [] for s in shredded: if s not in result: result.append(s) shredded = result setprop(data, p, shredded) elif action == "unshred": if isinstance(v, list): setprop(data, p, delim.join(v)) return json.dumps(data)
def add_handler(self, method, handler): if method in self.method_table: logger.warn("Replacing %r method handler for %r" % (method, self.path)) else: logger.info("Created %r method handler for %r" % (method, self.path)) # If an outer WSGI wrapper was specified, wrap it around the handler method if self.wsgi_wrapper: handler = self.wsgi_wrapper(handler) self.method_table[method] = handler
def map_rights(self): prop = "rights" if exists(self.provider_data, prop): rights_uri = "" rights = self.provider_data.get(prop) try: if rights.startswith("http"): rights_uri = urlparse(rights).geturl() except Exception as e: logger.warn("Unable to parse rights URI: %s\n%s" % (rights, e)) if rights_uri: self.mapped_data.update({"rights": rights_uri}) else: self.update_source_resource({"rights": rights})
def cdl_identify_object(body, ctype): """ Responsible for: adding a field to a document with the URL where we should expect to the find the thumbnail. """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" url = None if exists(data, "object"): handle = getprop(data, "object") for h in (handle if not isinstance(handle, basestring) else [handle]): if is_absolute(h): url = h break if exists(data, "originalRecord/doc/isShownBy"): handle = getprop(data, "originalRecord/doc/isShownBy") for h in (handle if not isinstance(handle, basestring) else [handle]): if is_absolute(h): url = h break if url: if 'content.cdlib.org' in url: base_url, obj_id, object_type = url.rsplit("/", 2) is_shown_at = getprop(data, "isShownAt") is_shown_at_base, is_shown_at_id = is_shown_at.rsplit("/", 1) if obj_id != is_shown_at_id: logger.warn( "Object url for %s has ARK value (%s) that does not match isShownAt (%s)" % (data["_id"], obj_id, is_shown_at_id)) obj_id = is_shown_at_id url = "/".join([base_url, obj_id, object_type]) if object_type == "hi-res": setprop(data, "hasView", {"@id": url}) url = url.replace('hi-res', 'thumbnail') setprop(data, "object", url) else: logger.warn("No url found for object in id %s" % data["_id"]) delprop(data, "object", True) return json.dumps(data)
def register_service(self, ident, path, handler, doc=None, query_template=None): if "/" in path: raise ValueError("Registered path %r may not contain a '/'" % (path, )) if doc is None: doc = inspect.getdoc(handler) or "" if ident in self._registered_services: logger.warn("Replacing mount point %r (%r)" % (path, ident)) else: logger.debug("Created new mount point %r (%r)" % (path, ident)) serv = Service(handler, path, ident, doc, query_template) self._registered_services[path] = serv
def cdl_identify_object(body, ctype): """ Responsible for: adding a field to a document with the URL where we should expect to the find the thumbnail. """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" url = None if exists(data, "object"): handle = getprop(data, "object") for h in (handle if not isinstance(handle, basestring) else [handle]): if is_absolute(h): url = h break if exists(data, "originalRecord/doc/isShownBy"): handle = getprop(data, "originalRecord/doc/isShownBy") for h in (handle if not isinstance(handle, basestring) else [handle]): if is_absolute(h): url = h break if url: if 'content.cdlib.org' in url: base_url, obj_id, object_type = url.rsplit("/", 2) is_shown_at = getprop(data, "isShownAt") is_shown_at_base, is_shown_at_id = is_shown_at.rsplit("/", 1) if obj_id != is_shown_at_id: logger.warn("Object url for %s has ARK value (%s) that does not match isShownAt (%s)" % (data["_id"], obj_id, is_shown_at_id)) obj_id = is_shown_at_id url = "/".join([base_url, obj_id, object_type]) if object_type == "hi-res": setprop(data, "hasView", {"@id": url}) url = url.replace('hi-res', 'thumbnail') setprop(data, "object", url) else: logger.warn("No url found for object in id %s" % data["_id"]) delprop(data, "object", True) return json.dumps(data)
def pipe(content, ctype, enrichments, wsgi_header): body = json.dumps(content) for uri in enrichments: if not uri: continue # in case there's no pipeline if not is_absolute(uri): prefix = request.environ['wsgi.url_scheme'] + '://' prefix += request.environ['HTTP_HOST'] if request.environ.get('HTTP_HOST') else request.environ['SERVER_NAME'] uri = prefix + uri headers = copy_headers_to_dict(request.environ, exclude=[wsgi_header]) headers['content-type'] = ctype logger.debug("Calling url: %s " % uri) resp, cont = H.request(uri, 'POST', body=body, headers=headers) if not str(resp.status).startswith('2'): logger.warn("Error in enrichment pipeline at %s: %s"%(uri,repr(resp))) continue body = cont return body
def geocode_spatial(self, spatial): if (not self.api_key): logger.warn("No API key set for Bing (use bing_api_key configuration key") return None address = Address(spatial) for candidate in address.get_candidates(): # See if this address candidate exists in our cache if (candidate not in DplaBingGeocoder.resultCache): results = self._fetch_results(candidate) DplaBingGeocoder.resultCache[candidate] = list(results) # Require that a single match, or closely grouped matches be returned to avoid bad geocoding results if (1 == len(DplaBingGeocoder.resultCache[candidate]) \ or self._are_closely_grouped_results(DplaBingGeocoder.resultCache[candidate])): result = DplaBingGeocoder.resultCache[candidate][0] coordinate = (result["geocodePoints"][0]["coordinates"][0], result["geocodePoints"][0]["coordinates"][1]) valid_result = True # If we have a specified country, perform a sanity check that the returned coordinate is within # the country's bounding box if (address.country and \ "countryRegion" in result["address"]): bbox_result = self._is_in_country(coordinate, address.country) # If we can't get a country's bbox, assume that we have a good result if (bbox_result is not None): valid_result = bbox_result if (not valid_result): logger.debug("Geocode result [%s] not in the correct country [%s], ignoring" % (result["name"], address.country,)) pass if (valid_result): if ("name" in spatial): logger.info("Geocode result: %s => %s (%s)" % (spatial["name"], result["name"], result["point"]["coordinates"],)) else: logger.info("Geocode result: %s => %s (%s)" % (spatial, result["name"], result["point"]["coordinates"],)) return coordinate return None
def pipe(content, ctype, enrichments, wsgi_header): body = json.dumps(content) for uri in enrichments: if not uri: continue # in case there's no pipeline if not is_absolute(uri): prefix = request.environ['wsgi.url_scheme'] + '://' if request.environ.get('HTTP_HOST'): prefix += request.environ['HTTP_HOST'] else: prefix += request.environ['SERVER_NAME'] uri = prefix + uri headers = copy_headers_to_dict(request.environ, exclude=[wsgi_header]) headers['content-type'] = ctype logger.debug("Calling url: %s " % uri) resp, cont = H.request(uri, 'POST', body=body, headers=headers) if not str(resp.status).startswith('2'): logger.warn("Error in enrichment pipeline at %s: %s" % (uri, repr(resp))) continue body = cont return body
def shred(body, ctype, action="shred", prop=None, delim=';', keepdup=None): """ Service that accepts a JSON document and "shreds" or "unshreds" the value of the field(s) named by the "prop" parameter "prop" can include multiple property names, delimited by a comma (the delim property is used only for the fields to be shredded/unshredded). This requires that the fields share a common delimiter however. The 'shred' action splits values by delimeter. It handles some complex edge cases beyond what split() expects. For example: ["a,b,c", "d,e,f"] -> ["a","b","c","d","e","f"] 'a,b(,c)' -> ['a', 'b(,c)'] Duplicate values are removed unless keepdup evaluates true. The 'unshred' action joins a list of values with delim. See: https://issues.dp.la/issues/2940 https://issues.dp.la/issues/4251 https://issues.dp.la/issues/4266 https://issues.dp.la/issues/4578 https://issues.dp.la/issues/4600 """ try: data = json.loads(body) except Exception as e: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON\n" + str(e) def index_for_first_open_paren(values): """ Accepts a list of values. Returns the index of the index of the first value containing an opening paren. """ for v in values: if v.count("(") > v.count(")"): return values.index(v) return None def index_for_matching_close_paren(values): """ Accepts a list of values. Returns the index of the index of the first value containing a closing paren. """ index = None for v in values: if index is not None and v.count("(") > v.count(")"): return index elif v.count(")") > v.count("("): index = values.index(v) return index def rejoin_partials(values, delim): """ Accepts a list of values which have been split by delim. Searches for values that have been separated For example, this value: 'my (somewhat contrived; value) with a delimeter enclosed in parens' would be split into: ['my (somewhat contrived', 'value) with a delimeter enclosed in parens'] This method rejoins it. """ index1 = index_for_first_open_paren(values) index2 = index_for_matching_close_paren(values) if index1 is not None and index2 is not None: if index1 == 0 and index2 == len(values) - 1: return [delim.join(values)] elif index1 == 0: values = [delim.join(values[:index2 + 1]) ] + values[index2 + 1:] elif index2 == len(values) - 1: values = values[:index1] + [delim.join(values[index1:])] else: values = values[:index1] + [ delim.join(values[index1:index2 + 1]) ] + values[index2 + 1:] return rejoin_partials(values, delim) else: return values for p in prop.split(','): if exists(data, p): v = getprop(data, p) if action == "shred": if isinstance(v, list): v = filter(None, v) try: v = delim.join(v) v = v.replace("%s%s" % (delim, delim), delim) except Exception as e: logger.warn("Can't join list %s on delim for %s, %s" % (v, data["_id"], e)) if delim in v: setprop(data, p, v) else: continue shredded = [""] for s in re.split(re.escape(delim), v): shredded.append(s) shredded = rejoin_partials(shredded, delim) shredded = [i.strip() for i in shredded if i.strip()] if not keepdup: result = [] for s in shredded: if s not in result: result.append(s) shredded = result setprop(data, p, shredded) elif action == "unshred": if isinstance(v, list): setprop(data, p, delim.join(v)) return json.dumps(data)
def copyprop(body, ctype, prop=None, to_prop=None, create=False, key=None, remove=None, no_replace=None, no_overwrite=None): """Copies value in one prop to another prop. Keyword arguments: body -- the content to load ctype -- the type of content prop -- the prop to copy from (default None) to_prop -- the prop to copy into (default None) create -- creates to_prop if True (default False) key -- the key to use if to_prop is a dict (default None) remove -- removes prop if True (default False) no_replace -- creates list of to_prop string and appends prop if True """ try: data = json.loads(body) except: response.code = 500 response.add_header('content-type', 'text/plain') return "Unable to parse body as JSON" if exists(data, to_prop) and no_overwrite: pass else: if exists(data, prop) and create and not exists(data, to_prop): val = {} if key else "" setprop(data, to_prop, val) if exists(data, prop) and exists(data, to_prop): val = getprop(data, prop) to_element = getprop(data, to_prop) if isinstance(to_element, basestring): if no_replace: el = [to_element] if to_element else [] el.append(val) # Flatten val = [ e for s in el for e in (s if not isinstance(s, basestring) else [s]) ] setprop(data, to_prop, val) else: # If key is set, assume to_element is dict or list of dicts if key: if not isinstance(to_element, list): to_element = [to_element] for dict in to_element: if exists(dict, key) or create: setprop(dict, key, val) else: msg = "Key %s does not exist in %s" % (key, to_prop) logger.debug(msg) else: # Handle case where to_element is a list if isinstance(to_element, list): if isinstance(val, list): to_element = to_element + val else: to_element.append(val) setprop(data, to_prop, to_element) else: # to_prop is dictionary but no key was passed. msg = "%s is a dictionary but no key was passed" % to_prop logger.warn(msg) setprop(data, to_prop, val) if remove: delprop(data, prop) return json.dumps(data)
def geocode_spatial(self, spatial): ''' Accepts a dictionary and attempts to return a set of coordinates in format [latitude, longitude] that match the place. ''' if (not self.api_key): logger.warn("No API key set for Bing " + "(use bing_api_key configuration key)") return None address = Address(spatial) for candidate in address.get_candidates(): # See if this address candidate exists in our cache if (candidate not in DplaBingGeocoder.resultCache): results = self._fetch_results(candidate) DplaBingGeocoder.resultCache[candidate] = list(results) # Require that a single match, or closely grouped matches be # returned to avoid bad geocoding results candidates = len(DplaBingGeocoder.resultCache[candidate]) closely_grouped_results = self._are_closely_grouped_results( DplaBingGeocoder.resultCache[candidate]) if (candidates == 1 or closely_grouped_results): result = DplaBingGeocoder.resultCache[candidate][0] coordinates = (result["geocodePoints"][0]["coordinates"][0], result["geocodePoints"][0]["coordinates"][1]) valid_result = True # If we have a specified country, perform a sanity check that # the returned coordinates is within the country's bounding box if (address.country and "countryRegion" in result["address"]): bbox_result = self._is_in_country(coordinates, address.country) # If we can't get a country's bbox, assume that we have a # good result if (bbox_result is not None): valid_result = bbox_result if (not valid_result): msg = "Geocode result [%s] " % result["name"] + \ "not in the correct country " + \ "[%s], ignoring" % address.country logger.debug(msg) if (valid_result): if ("name" in spatial): logger.debug("Geocode result: %s => %s (%s)" % ( spatial["name"], result["name"], result["point"]["coordinates"], )) else: logger.debug("Geocode result: %s => %s (%s)" % ( spatial, result["name"], result["point"]["coordinates"], )) return coordinates return None
def shred(body, ctype, action="shred", prop=None, delim=";", keepdup=None): """ Service that accepts a JSON document and "shreds" or "unshreds" the value of the field(s) named by the "prop" parameter "prop" can include multiple property names, delimited by a comma (the delim property is used only for the fields to be shredded/unshredded). This requires that the fields share a common delimiter however. The 'shred' action splits values by delimeter. It handles some complex edge cases beyond what split() expects. For example: ["a,b,c", "d,e,f"] -> ["a","b","c","d","e","f"] 'a,b(,c)' -> ['a', 'b(,c)'] Duplicate values are removed unless keepdup evaluates true. The 'unshred' action joins a list of values with delim. See: https://issues.dp.la/issues/2940 https://issues.dp.la/issues/4251 https://issues.dp.la/issues/4266 https://issues.dp.la/issues/4578 https://issues.dp.la/issues/4600 """ try: data = json.loads(body) except Exception as e: response.code = 500 response.add_header("content-type", "text/plain") return "Unable to parse body as JSON\n" + str(e) def index_for_first_open_paren(values): """ Accepts a list of values. Returns the index of the index of the first value containing an opening paren. """ for v in values: if v.count("(") > v.count(")"): return values.index(v) return None def index_for_matching_close_paren(values): """ Accepts a list of values. Returns the index of the index of the first value containing a closing paren. """ index = None for v in values: if index is not None and v.count("(") > v.count(")"): return index elif v.count(")") > v.count("("): index = values.index(v) return index def rejoin_partials(values, delim): """ Accepts a list of values which have been split by delim. Searches for values that have been separated For example, this value: 'my (somewhat contrived; value) with a delimeter enclosed in parens' would be split into: ['my (somewhat contrived', 'value) with a delimeter enclosed in parens'] This method rejoins it. """ index1 = index_for_first_open_paren(values) index2 = index_for_matching_close_paren(values) if index1 is not None and index2 is not None: if index1 == 0 and index2 == len(values) - 1: return [delim.join(values)] elif index1 == 0: values = [delim.join(values[: index2 + 1])] + values[index2 + 1 :] elif index2 == len(values) - 1: values = values[:index1] + [delim.join(values[index1:])] else: values = values[:index1] + [delim.join(values[index1 : index2 + 1])] + values[index2 + 1 :] return rejoin_partials(values, delim) else: return values for p in prop.split(","): if exists(data, p): v = getprop(data, p) if action == "shred": if isinstance(v, list): try: v = delim.join(v) v = v.replace("%s%s" % (delim, delim), delim) except Exception as e: logger.warn("Can't join list %s on delim for %s, %s" % (v, data["_id"], e)) if delim in v: setprop(data, p, v) else: continue shredded = [""] for s in re.split(re.escape(delim), v): shredded.append(s) shredded = rejoin_partials(shredded, delim) shredded = [i.strip() for i in shredded if i.strip()] if not keepdup: result = [] for s in shredded: if s not in result: result.append(s) shredded = result setprop(data, p, shredded) elif action == "unshred": if isinstance(v, list): setprop(data, p, delim.join(v)) return json.dumps(data)
def copyprop( body, ctype, prop=None, to_prop=None, create=False, key=None, remove=None, no_replace=None, no_overwrite=None ): """Copies value in one prop to another prop. Keyword arguments: body -- the content to load ctype -- the type of content prop -- the prop to copy from (default None) to_prop -- the prop to copy into (default None) create -- creates to_prop if True (default False) key -- the key to use if to_prop is a dict (default None) remove -- removes prop if True (default False) no_replace -- creates list of to_prop string and appends prop if True """ try: data = json.loads(body) except: response.code = 500 response.add_header("content-type", "text/plain") return "Unable to parse body as JSON" if exists(data, to_prop) and no_overwrite: pass else: if exists(data, prop) and create and not exists(data, to_prop): val = {} if key else "" setprop(data, to_prop, val) if exists(data, prop) and exists(data, to_prop): val = getprop(data, prop) to_element = getprop(data, to_prop) if isinstance(to_element, basestring): if no_replace: el = [to_element] if to_element else [] el.append(val) # Flatten val = [e for s in el for e in (s if not isinstance(s, basestring) else [s])] setprop(data, to_prop, val) else: # If key is set, assume to_element is dict or list of dicts if key: if not isinstance(to_element, list): to_element = [to_element] for dict in to_element: if exists(dict, key) or create: setprop(dict, key, val) else: logger.error("Key %s does not exist in %s" % (key, to_prop)) else: # Handle case where to_element is a list if isinstance(to_element, list): if isinstance(val, list): to_element = to_element + val else: to_element.append(val) setprop(data, to_prop, to_element) else: # to_prop is dictionary but no key was passed. logger.warn("%s is a dict but no key was passed" % to_prop) setprop(data, to_prop, val) if remove: delprop(data, prop) return json.dumps(data)