def updateObjectMetadata(self, objectMetaData, toWorkflowId): packageType, jsonConfigFile = self.__getPackageTypeAndJsonConfigFile( toWorkflowId) workflowsDir = FascinatorHome.getPathFile("harvest/workflows") configFile = File(workflowsDir, jsonConfigFile) configObject = StorageUtils.checkHarvestFile(self.storage, configFile) if configObject is None: oid = StorageUtils.generateOid(configFile) configObject = StorageUtils.getDigitalObject(self.storage, oid) objectMetaData.setProperty("jsonConfigPid", jsonConfigFile) objectMetaData.setProperty("jsonConfigOid", configObject.getId()) configJson = JsonSimple(configFile) rulesFileName = configJson.getString(None, "indexer", "script", "rules") rulesFile = File(workflowsDir, rulesFileName) rulesObject = StorageUtils.checkHarvestFile(self.storage, rulesFile) if rulesObject is None: oid = StorageUtils.generateOid(rulesFile) rulesObject = StorageUtils.getDigitalObject(self.storage, oid) objectMetaData.setProperty("rulesPid", rulesFileName) objectMetaData.setProperty("rulesOid", rulesObject.getId()) objectMetaData.setProperty("workflowTransitioned", "true") objectMetaData.setProperty( "date_transitioned", time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()))
def updateObjectMetadata(self, objectMetaData, toWorkflowId): packageType, jsonConfigFile = self.__getPackageTypeAndJsonConfigFile(toWorkflowId) workflowsDir = FascinatorHome.getPathFile("harvest/workflows") configFile = File(workflowsDir, jsonConfigFile) configObject = StorageUtils.checkHarvestFile(self.storage, configFile); if configObject is None: oid = StorageUtils.generateOid(configFile); configObject = StorageUtils.getDigitalObject(self.storage, oid); objectMetaData.setProperty("jsonConfigPid", jsonConfigFile) objectMetaData.setProperty("jsonConfigOid", configObject.getId()) configJson = JsonSimple(configFile) rulesFileName = configJson.getString(None, "indexer","script","rules") rulesFile = File(workflowsDir,rulesFileName) rulesObject = StorageUtils.checkHarvestFile(self.storage, rulesFile); if rulesObject is None: oid = StorageUtils.generateOid(rulesFile); rulesObject = StorageUtils.getDigitalObject(self.storage, oid); objectMetaData.setProperty("rulesPid", rulesFileName) objectMetaData.setProperty("rulesOid", rulesObject.getId()) objectMetaData.setProperty("workflowTransitioned", "true") objectMetaData.setProperty("date_transitioned", time.strftime("%Y-%m-%dT%H:%M:%SZ", time.localtime()))
def __activate__(self, context): self.None = context["log"] self.systemConfig = context["systemConfig"] self.sessionState = context["sessionState"] self.response = context["response"] self.request = context["request"] self.indexer = context["Services"].getIndexer() self.storage = context["Services"].getStorage() self.log = context["log"] self.sessionState.set("username", "admin") self.writer = self.response.getPrintWriter("text/plain; charset=UTF-8") publishedRecords = self.findPublishedRecords() for publishedRecord in publishedRecords: digitalObject = StorageUtils.getDigitalObject( self.storage, publishedRecord.getString(None, "storage_id")) tfPackage = self.getTfPackage(digitalObject) metadata = digitalObject.getMetadata() configObject = StorageUtils.getDigitalObject( self.storage, metadata.getProperty("jsonConfigOid")) payload = configObject.getPayload( metadata.getProperty("jsonConfigPid")) inStream = payload.open() jsonConfig = JsonSimple(inStream) payload.close() requiredIdentifiers = jsonConfig.getArray("curation", "requiredIdentifiers") if requiredIdentifiers is not None: pidName = self.systemConfig.getString(None, "curation", "identifier-pids", requiredIdentifiers[0]) pid = metadata.getProperty(pidName) identifier = tfPackage.getString(pid, "metadata", "dc.identifier") relationships = tfPackage.getArray("relationships") if relationships is not None: for relationship in relationships: self.writer.println(relationship) if relationship.get("broker") is None: if relationship.get( "system" ) is not None and relationship.get( "system") != self.systemConfig.getString( None, "system"): self.writer.println( "notifyExternalRelationship") self.notifyExternalRelationship( relationship, pid, relationship.get("system"), identifier) else: self.updateRelationships(relationship, pid) self.writer.close() self.sessionState.remove("username")
def __createFromSelected(self): self.vc("log").debug("Creating package from selected...") packageType, jsonConfigFile = self.__getPackageTypeAndJsonConfigFile() #self.vc("log").debug("packageType = '{}'", packageType) #self.vc("log").debug("jsonConfigFile = '{}'", jsonConfigFile) # if modifying existing manifest, we already have an identifier, # otherwise create a new one manifestId = self.__getActiveManifestId() if manifestId is None: manifestHash = "%s.tfpackage" % uuid.uuid4() else: manifestHash = self.__getActiveManifestPid() # store the manifest file for harvesting packageDir = FascinatorHome.getPathFile("packages") packageDir.mkdirs() manifestFile = File(packageDir, manifestHash) outStream = FileOutputStream(manifestFile) outWriter = OutputStreamWriter(outStream, "UTF-8") manifest = self.__getActiveManifest() oldType = manifest.getType() if oldType is None: manifest.setType(packageType) else: manifest.setType(oldType) #self.vc("log").debug("Manifest: {}", manifest) outWriter.write(manifest.toString(True)) outWriter.close() try: if manifestId is None: # harvest the package as an object username = self.vc("sessionState").get("username") if username is None: username = "******" # necessary? harvester = None # set up config files, and make sure they are both deployed workflowsDir = FascinatorHome.getPathFile("harvest/workflows") configFile = self.__getFile(workflowsDir, jsonConfigFile) rulesFile = self.__getFile(workflowsDir, "packaging-rules.py") # run the harvest client with our packaging workflow config harvester = HarvestClient(configFile, manifestFile, username) harvester.start() manifestId = harvester.getUploadOid() harvester.shutdown() else: # update existing object object = StorageUtils.getDigitalObject(Services.getStorage(), manifestId) manifestStream = FileUtils.openInputStream(manifestFile) StorageUtils.createOrUpdatePayload(object, manifestHash, manifestStream) manifestStream.close() object.close() except Exception, ex: error = "Packager workflow failed: %s" % str(ex) self.vc("log").error(error, ex) if harvester is not None: harvester.shutdown() return '{ "status": "failed" }'
def updateRelationships(self, relationship,pid,identifier): oid = self.findOidByIdentifier(relationship.get("identifier")) self.writer.println(oid) digitalObject = StorageUtils.getDigitalObject(self.storage, oid) metadataJsonPayload = digitalObject.getPayload("metadata.json") metadataJsonInstream = metadataJsonPayload.open() metadataJson = JsonSimple(metadataJsonInstream) metadataJsonPayload.close() relationships = metadataJson.getArray("relationships") found = False if relationships is None: relationships = JSONArray() metadataJson.getJsonObject().put("relationships",relationships) for relationship1 in relationships: if relationship1.get("identifier") == identifier: relationship1.put("isCurated",True) relationship1.put("curatedPid",pid) found = True if not found: newRelationship = JsonObject() newRelationship.put("isCurated",True) newRelationship.put("curatedPid",pid) newRelationship.put("relationship",relationship.get("relationship")) newRelationship.put("identifier",identifier) relationships.add(newRelationship) istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes()) StorageUtils.createOrUpdatePayload(digitalObject,"metadata.json",istream)
def getTFPackagePid(self,oid): digitalObject = StorageUtils.getDigitalObject(self.storage,oid) for pid in digitalObject.getPayloadIdList(): pidString = String(pid) if pidString.endsWith("tfpackage"): return pid return None
def updateLocalRecordRelations(self, jobItems): oidIdentifierMap = HashMap() for jobItem in jobItems: oidIdentifierMap.put(jobItem.get("oid"),jobItem.get("required_identifiers")[0].get("identifier")) for jobItem in jobItems: type = jobItem.get("type"); targetSystem = self.systemConfig.getString(None, "curation", "supported-types", type); if targetSystem == "redbox": oid = jobItem.get("oid") digitalObject = StorageUtils.getDigitalObject(self.services.getStorage(), oid) tfPackagePid = self.getPackageData(digitalObject) metadataJsonPayload = digitalObject.getPayload(tfPackagePid) metadataJsonInstream = metadataJsonPayload.open() metadataJson = JsonSimple(metadataJsonInstream) metadataJsonPayload.close() relationships = metadataJson.getArray("relationships") if relationships is not None: for relationship in relationships: system = relationship.get("system") if system != "redbox" or system != None: url = self.systemConfig.getString("can't find it", "curation","external-system-urls","get-oid-for-identifier",system) client = BasicHttpClient(url+ "&identifier="+relationship.get("identifier")) get = GetMethod(url+ "&identifier="+relationship.get("identifier")) client.executeMethod(get) if get.getStatusCode() == 200: response = JsonSimple(get.getResponseBodyAsString()) relationship.put("curatedPid",oidIdentifierMap.get(response.getString(None,"oid"))) relationship.put("isCurated",True) #Now update the relationship on Mint's side break istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes()) StorageUtils.createOrUpdatePayload(digitalObject,tfPackagePid,istream)
def __activate__(self, context): try: self.log = context["log"] self.response = context["response"] self.request = context["request"] self.systemConfig = context["systemConfig"] self.storage = context["Services"].getStorage() self.indexer = context["Services"].getIndexer() self.sessionState = context["sessionState"] self.sessionState.set("username", "admin") out = self.response.getPrintWriter("text/plain; charset=UTF-8") relationshipMapper = ApplicationContextProvider.getApplicationContext().getBean("relationshipMapper") externalCurationMessageBuilder = ApplicationContextProvider.getApplicationContext().getBean("externalCurationMessageBuilder") oid = self.request.getParameter("oid") if oid is None : identifier = self.request.getParameter("identifier") oid = self.findOidByIdentifier(identifier) relationshipType = self.request.getParameter("relationship") curatedPid = self.request.getParameter("curatedPid") sourceId = self.request.getParameter("sourceIdentifier") digitalObject = StorageUtils.getDigitalObject(self.storage, oid) metadataJson = self.getTfPackage(digitalObject) relationships = metadataJson.getArray("relationships") found = False for relationship in relationships: if relationship.get("identifier") == sourceId: relationship.put("isCurated",True) relationship.put("curatedPid",curatedPid) found = True if not found: relationship = JsonObject() relationship.put("isCurated",True) relationship.put("curatedPid",curatedPid) relationship.put("relationship",relationshipType) relationship.put("identifier",sourceId) relationships.add(relationship) self.log.info(metadataJson.toString(True)) out.println(metadataJson.toString(True)) istream = ByteArrayInputStream(String(metadataJson.toString(True)).getBytes()) for pid in digitalObject.getPayloadIdList(): if pid.endswith(".tfpackage"): StorageUtils.createOrUpdatePayload(digitalObject,pid,istream) out.close() finally: self.sessionState.remove("username")
def __activate__(self, context): self.None = context["log"] self.systemConfig = context["systemConfig"] self.sessionState = context["sessionState"] self.response = context["response"] self.request = context["request"] self.indexer = context["Services"].getIndexer() self.storage = context["Services"].getStorage() self.log = context["log"] self.sessionState.set("username","admin") self.writer = self.response.getPrintWriter("text/plain; charset=UTF-8") publishedRecords = self.findPublishedRecords() for publishedRecord in publishedRecords: self.log.info("processing record "+publishedRecord.getString(None,"storage_id")) digitalObject = StorageUtils.getDigitalObject(self.storage, publishedRecord.getString(None,"storage_id")) tfPackage = self.getTfPackage(digitalObject) metadata = digitalObject.getMetadata() configObject = StorageUtils.getDigitalObject(self.storage,metadata.getProperty("jsonConfigOid")) payload = configObject.getPayload(metadata.getProperty("jsonConfigPid")) inStream = payload.open() jsonConfig = JsonSimple(inStream) payload.close() requiredIdentifiers = jsonConfig.getArray("curation","requiredIdentifiers") if requiredIdentifiers is not None: pidName = self.systemConfig.getString(None,"curation","identifier-pids",requiredIdentifiers[0]) pid = metadata.getProperty(pidName) identifier = tfPackage.getString(pid,"metadata","dc.identifier") relationships = tfPackage.getArray("relationships") if relationships is not None: for relationship in relationships: if relationship.get("broker") is None: if relationship.get("system") is not None and relationship.get("system") != self.systemConfig.getString(None,"system"): self.notifyExternalRelationship(relationship,pid,relationship.get("system"),identifier) else: self.updateRelationships(relationship,pid,identifier) self.writer.close() self.sessionState.remove("username")
def getPayloadJsonByExtension(self,oid,payloadExtension): digitalObject = StorageUtils.getDigitalObject(self.storage,oid) pid = self.findPidForExtenstion(digitalObject, payloadExtension) tfPackageInputStream = digitalObject.getPayload(pid).open() return JsonSimple(tfPackageInputStream)
def getPayloadJson(self,oid,payloadName): digitalObject = StorageUtils.getDigitalObject(self.storage, oid) workflowMetaInputStream = digitalObject.getPayload(payloadName).open() return JsonSimple(workflowMetaInputStream)
def getObjectMeta(self,oid): digitalObject = StorageUtils.getDigitalObject(self.storage, oid) return digitalObject.getMetadata()
def getTfPackage(self,oid, pid): digitalObject = StorageUtils.getDigitalObject(self.storage, oid) tfPackageInputStream = digitalObject.getPayload(pid).open() return JsonSimple(tfPackageInputStream)
def getWorkflowMeta(self,oid): digitalObject = StorageUtils.getDigitalObject(self.storage, oid) workflowMetaInputStream = digitalObject.getPayload("workflow.metadata").open() return JsonSimple(workflowMetaInputStream)
def __createFromSelected(self): self.vc("log").debug("Creating package from selected...") packageType, jsonConfigFile = self.__getPackageTypeAndJsonConfigFile() #self.vc("log").debug("packageType = '{}'", packageType) #self.vc("log").debug("jsonConfigFile = '{}'", jsonConfigFile) # if modifying existing manifest, we already have an identifier, # otherwise create a new one manifestId = self.__getActiveManifestId() if manifestId is None: manifestHash = "%s.tfpackage" % uuid.uuid4() else: manifestHash = self.__getActiveManifestPid() # store the manifest file for harvesting packageDir = FascinatorHome.getPathFile("packages") packageDir.mkdirs() manifestFile = File(packageDir, manifestHash) outStream = FileOutputStream(manifestFile) outWriter = OutputStreamWriter(outStream, "UTF-8") manifest = self.__getActiveManifest() oldType = manifest.getType() if oldType is None: manifest.setType(packageType) else: manifest.setType(oldType) self.vc("log").debug("Manifest: %s" % manifest) outWriter.write(manifest.toString(True)) outWriter.close() try: if manifestId is None: # harvest the package as an object username = self.vc("sessionState").get("username") if username is None: username = "******" # necessary? harvester = None # set up config files, and make sure they are both deployed workflowsDir = FascinatorHome.getPathFile("harvest/workflows") configFile = self.__getFile(workflowsDir, jsonConfigFile) rulesFile = self.__getFile(workflowsDir, "packaging-rules.py") # run the harvest client with our packaging workflow config harvester = HarvestClient(configFile, manifestFile, username) harvester.start() manifestId = harvester.getUploadOid() harvester.shutdown() else: # update existing object object = StorageUtils.getDigitalObject(Services.getStorage(), manifestId) manifestStream = FileUtils.openInputStream(manifestFile) StorageUtils.createOrUpdatePayload(object, manifestHash, manifestStream) manifestStream.close() object.close() except Exception, ex: error = "Packager workflow failed: %s" % str(ex) self.vc("log").error(error, ex) if harvester is not None: harvester.shutdown() return '{ "status": "failed" }'