def post(self, request, resourceid=None, slug=None, graphid=None): try: indent = int(request.POST.get("indent", None)) except Exception: indent = None try: if user_can_edit_resources(user=request.user): data = JSONDeserializer().deserialize(request.body) reader = JsonLdReader() if slug is not None: graphid = models.GraphModel.objects.get(slug=slug).pk reader.read_resource(data, graphid=graphid) if reader.errors: response = [] for value in reader.errors.values(): response.append(value.message) return JSONResponse({"error": response}, indent=indent, status=400) else: response = [] for resource in reader.resources: with transaction.atomic(): resource.save(request=request) response.append(JSONDeserializer().deserialize(self.get(request, resource.resourceinstanceid).content)) return JSONResponse(response, indent=indent, status=201) else: return JSONResponse(status=403) except Exception as e: if settings.DEBUG is True: exc_type, exc_value, exc_traceback = sys.exc_info() formatted = traceback.format_exception(exc_type, exc_value, exc_traceback) if len(formatted): for message in formatted: print(message) return JSONResponse({"error": "resource data could not be saved: %s" % e}, status=500, reason=e)
def post(self, request, resourceid=None): try: indent = int(request.POST.get('indent', None)) except: indent = None try: if user_can_edit_resources(user=request.user): data = JSONDeserializer().deserialize(request.body) reader = JsonLdReader() reader.read_resource(data) if reader.errors: response = [] for value in reader.errors.itervalues(): response.append(value.message) return JSONResponse(data, indent=indent, status=400, reason=response) else: response = [] for resource in reader.resources: with transaction.atomic(): resource.save(request=request) response.append(JSONDeserializer().deserialize( self.get(request, resource.resourceinstanceid).content)) return JSONResponse(response, indent=indent) else: return JSONResponse(status=403) except Exception as e: return JSONResponse(status=500, reason=e)
def test_find_leaf_branch(self): """ Given a list of leaf nodes, find the appropriate node from the given jsonld """ jsonld_graph = { "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "Will", } } graphtree = self.unique_graph.get_tree() for child in graphtree["children"]: if child["node"].name == "Name": node = child reader = JsonLdReader() branch = reader.findBranch( node["children"], "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by", jsonld_graph[ "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by"], ) self.assertEqual(str(branch["node"].pk), "839b0e4c-95e6-11e8-aada-14109fd34195")
def put(self, request, resourceid): if user_can_edit_resources(user=request.user): data = JSONDeserializer().deserialize(request.body) #print data reader = JsonLdReader() reader.read_resource(data) else: return JSONResponse(status=500) return JSONResponse(self.get(request, resourceid))
def put(self, request, resourceid, slug=None, graphid=None): try: indent = int(request.PUT.get("indent", None)) except Exception: indent = None if not user_can_edit_resources(user=request.user, resourceid=resourceid): return JSONResponse(status=403) else: with transaction.atomic(): try: # DELETE resource_instance = Resource.objects.get(pk=resourceid) resource_instance.delete() except models.ResourceInstance.DoesNotExist: pass try: # POST data = JSONDeserializer().deserialize(request.body) reader = JsonLdReader() if slug is not None: graphid = models.GraphModel.objects.get(slug=slug).pk reader.read_resource(data, resourceid=resourceid, graphid=graphid) if reader.errors: response = [] for value in reader.errors.values(): response.append(value.message) return JSONResponse({"error": response}, indent=indent, status=400) else: response = [] for resource in reader.resources: with transaction.atomic(): resource.save(request=request) response.append(JSONDeserializer().deserialize( self.get(request, resource.resourceinstanceid).content)) return JSONResponse(response, indent=indent, status=201) except models.ResourceInstance.DoesNotExist: return JSONResponse(status=404) except Exception as e: return JSONResponse( {"error": "resource data could not be saved"}, status=500, reason=e)
def put(self, request, resourceid): try: indent = int(request.POST.get('indent', None)) except: indent = None try: if user_can_edit_resources(user=request.user): data = JSONDeserializer().deserialize(request.body) reader = JsonLdReader() reader.read_resource(data, use_ids=True) if reader.errors: response = [] for value in reader.errors.itervalues(): response.append(value.message) return JSONResponse(data, indent=indent, status=400, reason=response) else: response = [] for resource in reader.resources: if resourceid != str(resource.pk): raise Exception( 'Resource id in the URI does not match the resource @id supplied in the document' ) old_resource = Resource.objects.get(pk=resource.pk) old_resource.load_tiles() old_tile_ids = set( [str(tile.pk) for tile in old_resource.tiles]) new_tile_ids = set([ str(tile.pk) for tile in resource.get_flattened_tiles() ]) tileids_to_delete = old_tile_ids.difference( new_tile_ids) tiles_to_delete = models.TileModel.objects.filter( pk__in=tileids_to_delete) with transaction.atomic(): tiles_to_delete.delete() resource.save(request=request) response.append(JSONDeserializer().deserialize( self.get(request, resource.resourceinstanceid).content)) return JSONResponse(response, indent=indent) else: return JSONResponse(status=403) except Exception as e: return JSONResponse(status=500, reason=e)
def test_find_branch_from_jsonld_2(self): """ The same test as above except that we now add an additional node to the supplied json which now will match a branch in the graph The graph is partially unique (the children of the root are not unique) """ ambiguous_jsonld_graph = { "@id": "http://*****:*****@type": [ "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation", "http://*****:*****@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E41_Appellation", "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "Will", }, "http://www.cidoc-crm.org/cidoc-crm/P1i_identifies": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E41_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "Smith", }, }], } graphtree = self.ambiguous_graph.get_tree() reader = JsonLdReader() branch = reader.findBranch( graphtree["children"], "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by", ambiguous_jsonld_graph[ "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by"], ) self.assertEqual(str(branch["node"].pk), "3f40c4c0-9693-11e8-8a0f-14109fd34195")
def test_find_unique_branch_from_jsonld(self): """ Test that we can find the correct branch in the graph that matches the supplied json-ld The graph is partially unique (the children of the root are not unique) """ jsonld_graph = { "@id": "http://*****:*****@type": [ "http://*****:*****@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E41_Appellation", "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "Will", }, "http://www.cidoc-crm.org/cidoc-crm/P1i_identifies": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E41_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "Smith", }, }, } graphtree = self.unique_graph.get_tree() reader = JsonLdReader() branch = reader.findBranch( graphtree["children"], "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by", jsonld_graph[ "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by"], ) self.assertEqual(str(branch["node"].pk), "3e1e65dc-95e6-11e8-9de9-14109fd34195")
def put(self, request, resourceid): try: indent = int(request.PUT.get('indent', None)) except: indent = None if user_can_edit_resources(user=request.user): with transaction.atomic(): try: # DELETE resource_instance = Resource.objects.get(pk=resourceid) resource_instance.delete() except models.ResourceInstance.DoesNotExist: pass try: # POST data = JSONDeserializer().deserialize(request.body) reader = JsonLdReader() reader.read_resource(data, resourceid=resourceid) if reader.errors: response = [] for value in reader.errors.itervalues(): response.append(value.message) return JSONResponse(data, indent=indent, status=400, reason=response) else: response = [] for resource in reader.resources: with transaction.atomic(): resource.save(request=request) response.append(JSONDeserializer().deserialize( self.get(request, resource.resourceinstanceid).content)) return JSONResponse(response, indent=indent, status=201) except models.ResourceInstance.DoesNotExist: return JSONResponse(status=404) else: return JSONResponse(status=500)
def test_cant_find_branch_from_ambiguous_jsonld(self): """ Test that we raise the appropriate error when we can't find the correct branch in the graph given that the supplied json-ld could match more than one branch The graph is partially unique (the children of the root are not unique) """ ambiguous_jsonld_graph = { "@id": "http://*****:*****@type": [ "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation", "http://*****:*****@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E41_Appellation", "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "Will - Ambiguous", }, }], } graphtree = self.ambiguous_graph.get_tree() reader = JsonLdReader() with self.assertRaises(reader.AmbiguousGraphException) as cm: branch = reader.findBranch( graphtree["children"], "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by", ambiguous_jsonld_graph[ "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by"], )
def test_find_other_unique_branch_from_jsonld(self): """ Test that we can find the correct branch in the graph that matches the supplied json-ld The graph is partially unique (the children of the root are not unique) """ jsonld_graph = { "@id": "http://*****:*****@type": [ "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation", "http://*****:*****@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E41_Appellation", "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by": { "@id": "http://*****:*****@type": "http://www.ics.forth.gr/isl/CRMdig/D21_Person_Name", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "The Shadow", }, }], } graphtree = self.unique_graph.get_tree() reader = JsonLdReader() branch = reader.findBranch( graphtree["children"], "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by", jsonld_graph[ "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by"], ) self.assertEqual(str(branch["node"].pk), "91679e1e-95e6-11e8-a166-14109fd34195")
def test_cant_find_branch_from_jsonld(self): """ Test that we raise the appropriate error when we can't find the correct branch in the graph that matches the supplied json-ld The graph is partially unique (the children of the root are not unique) """ incorrect_jsonld_graph = { "@id": "http://*****:*****@type": [ "http://www.cidoc-crm.org/cidoc-crm/E82_Actor_Appellation", "http://*****:*****@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E41_Appellation", "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by": { "@id": "http://*****:*****@type": "---THIS TYPE IS INCORRECT AND SHOULN'T MATCH---", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "The Shadow", }, }], } graphtree = self.unique_graph.get_tree() reader = JsonLdReader() with self.assertRaises(reader.DataDoesNotMatchGraphException) as cm: branch = reader.findBranch( graphtree["children"], "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by", incorrect_jsonld_graph[ "http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by"], )
def test_cant_find_branch_from_complex_ambigious_jsonld(self): """ The same test as above except that we now supply a jsonld structure that matches more then one branch in the graph (it's ambiguous) """ complex_jsonld_graph = { "@id": "http://*****:*****@type": [ "http://www.cidoc-crm.org/cidoc-crm/E12_Production", "http://*****:*****@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E17_Type_Assignment", "http://www.cidoc-crm.org/cidoc-crm/P42_assigned": [ { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E55_Type", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "174e9486-0663-4c9d-ab78-c7e441720c26", }, { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E55_Type", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "None", }, ], "http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span", "http://www.cidoc-crm.org/cidoc-crm/P78_is_identified_by": [ { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E49_Time_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "2018-08-06", }, { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E49_Time_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "2018-09-20", }, ], }, }], } graphtree = self.phase_type_assignment_graph.get_tree() reader = JsonLdReader() with self.assertRaises(reader.AmbiguousGraphException) as cm: branch = reader.findBranch( graphtree["children"], "http://www.cidoc-crm.org/cidoc-crm/P41i_was_classified_by", complex_jsonld_graph[ "http://www.cidoc-crm.org/cidoc-crm/P41i_was_classified_by"], )
def test_find_branch_from_complex_jsonld(self): """ Given a more complicated json structure find the branch in the graph """ complex_jsonld_graph = { "@id": "http://*****:*****@type": [ "http://www.cidoc-crm.org/cidoc-crm/E12_Production", "http://*****:*****@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E17_Type_Assignment", "http://www.cidoc-crm.org/cidoc-crm/P42_assigned": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E55_Type", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "None", }, "http://www.cidoc-crm.org/cidoc-crm/P2_has_type": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E55_Type", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "51cbfba6-34ee-4fbd-8b6e-10ef73fd4083", }, "http://www.cidoc-crm.org/cidoc-crm/P4_has_time-span": { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E52_Time-Span", "http://www.cidoc-crm.org/cidoc-crm/P78_is_identified_by": [ { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E49_Time_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "2018-08-05", }, { "@id": "http://*****:*****@type": "http://www.cidoc-crm.org/cidoc-crm/E49_Time_Appellation", "http://www.w3.org/1999/02/22-rdf-syntax-ns#value": "2018-08-06", }, ], }, }], } graphtree = self.phase_type_assignment_graph.get_tree() reader = JsonLdReader() branch = reader.findBranch( graphtree["children"], "http://www.cidoc-crm.org/cidoc-crm/P41i_was_classified_by", complex_jsonld_graph[ "http://www.cidoc-crm.org/cidoc-crm/P41i_was_classified_by"], ) self.assertEqual(str(branch["node"].pk), "049fc0c9-fa36-11e6-9e3e-026d961c88e6")
def load_resources(self, options): self.reader = JsonLdReader() self.jss = JSONSerializer() source = options["source"] if options["model"]: models = [options["model"]] else: models = os.listdir(source) models.sort() models = [m for m in models if m[0] not in ["_", "."]] print(f"Found possible models: {models}") # This is boilerplate for any use of get_documents_to_index() # Need to add issearchable for strip_search option # Only calculate it once per load self.datatype_factory = DataTypeFactory() dt_instance_hash = {} self.node_info = { str(nodeid): { "datatype": dt_instance_hash.setdefault(datatype, self.datatype_factory.get_instance(datatype)), "issearchable": srch, } for nodeid, datatype, srch in archesmodels.Node.objects.values_list("nodeid", "datatype", "issearchable") } self.node_datatypes = {str(nodeid): datatype for nodeid, datatype in archesmodels.Node.objects.values_list("nodeid", "datatype")} start = time.time() seen = 0 loaded = 0 for m in models: print(f"Loading {m}") graphid = graph_uuid_map.get(m, None) if not graphid: # Check slug try: graphid = archesmodels.GraphModel.objects.get(slug=m).pk except: print(f"Couldn't find a model definition for {m}; skipping") continue # We have a good model, so build the pre-processed tree once self.reader.graphtree = self.reader.process_graph(graphid) block = options["block"] if block and "," not in block: blocks = [block] else: blocks = os.listdir(f"{source}/{m}") blocks.sort() blocks = [b for b in blocks if b[0] not in ["_", "."]] if "," in block: # {slice},{max-slices} (cslice, mslice) = block.split(",") cslice = int(cslice) - 1 mslice = int(mslice) blocks = blocks[cslice::mslice] loaded_model = 0 try: for b in blocks: files = os.listdir(f"{source}/{m}/{b}") files.sort() for f in files: if not f.endswith(options["suffix"]): continue elif f.startswith(".") or f.startswith("_"): continue if options["max"] > 0 and loaded_model >= options["max"]: raise StopIteration() seen += 1 if seen <= options["skip"]: # Do it this way to keep the counts correct continue fn = f"{source}/{m}/{b}/{f}" # Check file size of record if not options["quiet"]: print(f"About to import {fn}") if options["toobig"]: sz = os.os.path.getsize(fn) if sz > options["toobig"]: if not quiet: print(f" ... Skipping due to size: {sz} > {options['toobig']}") continue uu = f.replace(f".{options['suffix']}", "") fh = open(fn) data = fh.read() fh.close() # FIXME Timezone / DateTime Workaround # FIXME The following line should be removed when #5669 / #6346 are closed data = data.replace("T00:00:00Z", "") jsdata = json.loads(data) jsdata = fix_js_data(data, jsdata, m) if len(uu) != 36 or uu[8] != "-": # extract uuid from data if filename is not a UUID uu = jsdata["id"][-36:] if jsdata: try: if options["fast"]: l = self.fast_import_resource( uu, graphid, jsdata, n=options["fast"], reload=options["force"], quiet=options["quiet"], strip_search=options["strip_search"], ) else: l = self.import_resource(uu, graphid, jsdata, reload=options["force"], quiet=options["quiet"]) loaded += l loaded_model += l except Exception as e: print(f"*** Failed to load {fn}:\n {e}\n") if not options["ignore_errors"]: raise else: print(" ... skipped due to bad data :(") if not seen % 100: print(f" ... seen {seen} / loaded {loaded} in {time.time()-start}") except StopIteration as e: break except: raise if options["fast"] and self.resources: self.save_resources() self.index_resources(options["strip_search"]) self.resources = [] print(f"Total Time: seen {seen} / loaded {loaded} in {time.time()-start} seconds")