def getTikaVersion(): command = "%s -V" % (tikaPath) exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False) if exitCode != 0: print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode return "" return stdOut.strip().replace("Apache Tika ", "")
def verifyBag(bag): global exitCode verificationCommands = [] verificationCommands.append("/usr/share/bagit/bin/bag verifyvalid \"%s\"" % (bag)) #Verifies the validity of a bag. verificationCommands.append("/usr/share/bagit/bin/bag verifycomplete \"%s\"" % (bag)) #Verifies the completeness of a bag. verificationCommands.append("/usr/share/bagit/bin/bag verifypayloadmanifests \"%s\"" % (bag)) #Verifies the checksums in all payload manifests. bagInfoPath = os.path.join(bag, "bag-info.txt") if os.path.isfile(bagInfoPath): for line in open(bagInfoPath,'r'): if line.startswith("Payload-Oxum"): verificationCommands.append("/usr/share/bagit/bin/bag checkpayloadoxum \"%s\"" % (bag)) #Generates Payload-Oxum and checks against Payload-Oxum in bag-info.txt. break for item in os.listdir(bag): if item.startswith("tagmanifest-") and item.endswith(".txt"): verificationCommands.append("/usr/share/bagit/bin/bag verifytagmanifests \"%s\"" % (bag)) #Verifies the checksums in all tag manifests. break for command in verificationCommands: ret = executeOrRun("command", command, printing=printSubProcessOutput) verificationCommandsOutputs.append(ret) exit, stdOut, stdErr = ret if exit != 0: print >>sys.stderr, "Failed test: ", command print >>sys.stderr, stdErr print >>sys.stderr, stdOut print >>sys.stderr exitCode += 1 else: print "Passed test: ", command
def extract(target, destinationDirectory): command = """/usr/bin/7z x -bd -o"%s" "%s" """ % (destinationDirectory, target) exitC, stdOut, stdErr = executeOrRun("command", command, printing=False) if exitC != 0: print stdOut print >>sys.stderr, "Failed extraction: ", command, "\r\n", stdErr exit(exitC)
def checkForPreconfiguredXML(self): ret = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) xmlFilePath = unicodeToStr(xmlFilePath) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: command = "sudo chmod 774 \"" + xmlFilePath + "\"" if isinstance(command, unicode): command = command.encode("utf-8") exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False) tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.find("preconfiguredChoices"): #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk: if preconfiguredChoice.find("appliesTo").text == self.jobChainLink.description: desiredChoice = preconfiguredChoice.find("goToChain").text sql = """SELECT MicroServiceChains.pk FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk WHERE MicroServiceChains.description = '%s' AND MicroServiceChainChoice.choiceAvailableAtLink = %s;""" % (desiredChoice, self.jobChainLink.pk.__str__()) c, sqlLock = databaseInterface.querySQL(sql) row = c.fetchone() while row != None: ret = row[0] row = c.fetchone() sqlLock.release() try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") unitAtimeXML = delayXML.get("unitCtime") if unitAtimeXML != None and unitAtimeXML.lower() != "no": delaySeconds=int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime=time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference print "time to go:", timeToGo #print "that will be: ", (nowTime + timeToGo) self.jobChainLink.setExitMessage("Waiting till: " + datetime.datetime.fromtimestamp((nowTime + timeToGo)).ctime()) t = threading.Timer(timeToGo, self.proceedWithChoice, args=[ret], kwargs={"delayTimerStart":True}) t.daemon = True self.delayTimer = t t.start() return None except Exception as inst: print >>sys.stderr, "Error parsing xml:" print >>sys.stderr, type(inst) print >>sys.stderr, inst.args except Exception as inst: print >>sys.stderr, "Error parsing xml:" print >>sys.stderr, type(inst) print >>sys.stderr, inst.args return ret
def getMediaInfoVersion(): command = "%s --version" % (MediaInfoPath) exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False) if exitCode != 255: print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode return "" ret = stdOut[stdOut.find("v") + 1:].strip() return ret
def getFidoVersion(): command = "%s -v" % (FidoPath) exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False) if exitCode != 0: print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode return "" ret = stdOut.split(" ")[1][1:] return ret
def getTikaID(itemdirectoryPath): command = "java -jar ./tika-app-1.3.jar -x -d " + itemdirectoryPath exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False) if exitCode == 1: #no id found return "" if exitCode != 0: print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode return "" return stdOut
def getTikaID(itemdirectoryPath): command = "%s -x -d %s" % (tikaPath, itemdirectoryPath) exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False) if exitCode == 1: #no id found print >>sys.stderr, "Tika found no format id" exit(0) if exitCode != 0: print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode return "" return stdOut.strip()
def onceExtracted(command): extractedFiles = [] print "TODO - Metadata regarding removal of extracted archive" if removeOnceExtracted: packageFileUUID = sys.argv[6].__str__() sipDirectory = sys.argv[2].__str__() os.remove(replacementDic["%inputFile%"]) currentLocation = replacementDic["%inputFile%"].replace(sipDirectory, "%transferDirectory%", 1) fileWasRemoved(packageFileUUID, eventOutcomeDetailNote = "removed from: " + currentLocation) print "OUTPUT DIRECTORY: ", replacementDic["%outputDirectory%"] for w in os.walk(replacementDic["%outputDirectory%"].replace("*", "asterisk*")): path, directories, files = w for p in files: p = os.path.join(path, p) #print "path: ", p if os.path.isfile(p): extractedFiles.append(p) for ef in extractedFiles: fileUUID = uuid.uuid4().__str__() #print "File Extracted:", ef if True: #Add the file to the SIP #<arguments>"%relativeLocation%" "%SIPObjectsDirectory%" "%SIPLogsDirectory%" "%date%" "%taskUUID%" "%fileUUID%"</arguments> sipDirectory = sys.argv[2].__str__() transferUUID = sys.argv[3].__str__() date = sys.argv[4].__str__() taskUUID = sys.argv[5].__str__() packageFileUUID = sys.argv[6].__str__() filePathRelativeToSIP = ef.replace(sipDirectory,"%transferDirectory%", 1) print "File Extracted:: {" + fileUUID + "} ", filePathRelativeToSIP eventDetail="Unpacked from: {" + packageFileUUID + "}" + filePathRelativeToSIP addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail) updateSizeAndChecksum(fileUUID, ef, date, uuid.uuid4.__str__()) run = sys.argv[0].__str__() + \ " \"" + transcoder.escapeForCommand(ef) + "\"" if True: #Add the file to the SIP run = run + " \"" + transcoder.escapeForCommand(sys.argv[2].__str__()) + "\"" + \ " \"" + transcoder.escapeForCommand(sys.argv[3].__str__()) + "\"" + \ " \"" + transcoder.escapeForCommand(sys.argv[4].__str__()) + "\"" + \ " \"" + transcoder.escapeForCommand(sys.argv[5].__str__()) + "\"" + \ " \"" + fileUUID + "\"" exitCode, stdOut, stdError = executeOrRun("command", run) print stdOut print >>sys.stderr, stdError if exitCode != 0 and command.exitCode == 0: command.exitCode = exitCode global extractedCount date = sys.argv[4].__str__().split(".", 1)[0] extractedCount = extractedCount + 1 replacementDic["%outputDirectory%"] = transcoder.fileFullName + '-' + extractedCount.__str__() + '-' + date
def renameAsSudo(source, destination): """Used to move/rename Directories that the archivematica user may or may not have writes to move""" command = "sudo mv \"" + source + "\" \"" + destination + "\"" if isinstance(command, unicode): command = command.encode("utf-8") exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False) if exitCode: print >>sys.stderr, "exitCode:", exitCode print >>sys.stderr, stdOut print >>sys.stderr, stdError exit(exitCode)
def executeCommand(gearman_worker, gearman_job): try: execute = gearman_job.task print "executing:", execute, "{", gearman_job.unique, "}" data = cPickle.loads(gearman_job.data) utcDate = databaseInterface.getUTCDate() arguments = data["arguments"]#.encode("utf-8") if isinstance(arguments, unicode): arguments = arguments.encode("utf-8") #if isinstance(arguments, str): # arguments = unicode(arguments) sInput = "" clientID = gearman_worker.worker_client_id #if True: # print clientID, execute, data logTaskAssignedSQL(gearman_job.unique.__str__(), clientID, utcDate) if execute not in supportedModules: output = ["Error!", "Error! - Tried to run and unsupported command." ] exitCode = -1 return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]}) command = supportedModules[execute] replacementDic["%date%"] = utcDate replacementDic["%jobCreatedDate%"] = data["createdDate"] #Replace replacement strings for key in replacementDic.iterkeys(): command = command.replace ( key, replacementDic[key] ) arguments = arguments.replace ( key, replacementDic[key] ) key = "%taskUUID%" value = gearman_job.unique.__str__() arguments = arguments.replace(key, value) #execute command command += " " + arguments printOutputLock.acquire() print >>sys.stderr, "<processingCommand>{" + gearman_job.unique + "}" + command.__str__() + "</processingCommand>" printOutputLock.release() exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False) return cPickle.dumps({"exitCode" : exitCode, "stdOut": stdOut, "stdError": stdError}) #catch OS errors except OSError, ose: traceback.print_exc(file=sys.stdout) printOutputLock.acquire() print >>sys.stderr, "Execution failed:", ose printOutputLock.release() output = ["Config Error!", ose.__str__() ] exitCode = 1 return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]})
def runBag(arguments): command = "/usr/share/bagit/bin/bag %s" % (arguments) exitCode, stdOut, stdError = executeOrRun("command", command, printing=False) if exitCode != 0: print >>sys.stderr, "" print >>sys.stderr, "Error with command: ", command print >>sys.stderr, "Standard OUT:" print >>sys.stderr, stdOut print >>sys.stderr, "Standard Error:" print >>sys.stderr, stdError exit(exitCode) else: print stdOut print >>sys.stderr, stdError
def execute(self, skipOnSuccess=False): #print self.__str__() #Do a dictionary replacement. #Replace replacement strings global replacementDic #for each key replace all instances of the key in the command string for key, value in replacementDic.iteritems(): key = toStrFromUnicode(key) replacementDic[key] = toStrFromUnicode(value) #self.outputLocation = toStrFromUnicode(self.outputLocation) #self.command = self.command.replace ( key, quote(replacementDic[key]) ) self.command = self.command.replace( key, escapeForCommand(replacementDic[key]) ) if self.outputLocation: self.outputLocation = self.outputLocation.replace( key, replacementDic[key] ) print "Running: " print self.__str__() self.exitCode, self.stdOut, self.stdError = executeOrRun(self.type, self.command) if (not self.exitCode) and self.verificationCommand: print self.exitCode = self.verificationCommand.execute(skipOnSuccess=True) if (not self.exitCode) and self.eventDetailCommand: self.eventDetailCommand.execute(skipOnSuccess=True) #If unsuccesful if self.exitCode: print >>sys.stderr, "Failed:" #print >>sys.stderr, self.__str__() print self.stdOut print >>sys.stderr, self.stdError if False and self.failedCount < 1: #retry count self.failedCount= self.failedCount + 1 time.sleep(2) print >>sys.stderr, "retrying, ", self.failedCount return self.execute(skipOnSuccess) else: global onSuccess #uncommenting these floods the buffers with ffmpeg #print self.stdOut #print self.stdError if (not skipOnSuccess) and onSuccess: onSuccess(self) return self.exitCode
def getFidoID(itemdirectoryPath): command = 'python ./fido/fido/fido.py "%s"' % (itemdirectoryPath) exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False) if exitCode != 0: print >> sys.stderr, "Error: ", stdOut, stdErr, exitCode return "" if not stdOut: return "" try: ret = stdOut.split(",")[2] except: print stdErr print stdOut raise return ret
def verifyBag(bag): global exitCode verificationCommands = [ "/usr/share/bagit/bin/bag verifyvalid \"" + bag + "\"", "/usr/share/bagit/bin/bag checkpayloadoxum \"" + bag + "\"", "/usr/share/bagit/bin/bag verifycomplete \"" + bag + "\"", "/usr/share/bagit/bin/bag verifypayloadmanifests \"" + bag + "\"", "/usr/share/bagit/bin/bag verifytagmanifests \"" + bag + "\"" ] for command in verificationCommands: ret = executeOrRun("command", command, printing=printSubProcessOutput) verificationCommandsOutputs.append(ret) exit, stdOut, stdErr = ret if exit != 0: print >>sys.stderr, "Failed test: ", command print >>sys.stderr, stdErr print >>sys.stderr exitCode += 1 else: print "Passed test: ", command
def execute(self, skipOnSuccess=False): #for each key replace all instances of the key in the command string for key, value in self.replacementDic.iteritems(): key = toStrFromUnicode(key) self.replacementDic[key] = toStrFromUnicode(value) #self.outputLocation = toStrFromUnicode(self.outputLocation) #self.command = self.command.replace ( key, quote(replacementDic[key]) ) self.command = self.command.replace( key, escapeForCommand(self.replacementDic[key]) ) if self.outputLocation: self.outputLocation = self.outputLocation.replace( key, self.replacementDic[key] ) print "Running: " selfstr = self.__str__() print selfstr if self.opts: self.opts["prependStdOut"] += "\r\nRunning: \r\n%s" % (selfstr) self.exitCode, self.stdOut, self.stdError = executeOrRun(self.type, self.command) if (not self.exitCode) and self.verificationCommand: print if self.opts: self.opts["prependStdOut"] += "\r\n" self.exitCode = self.verificationCommand.execute(skipOnSuccess=True) if (not self.exitCode) and self.eventDetailCommand: self.eventDetailCommand.execute(skipOnSuccess=True) #If unsuccesful if self.exitCode: print >>sys.stderr, "Failed:" #print >>sys.stderr, self.__str__() print self.stdOut print >>sys.stderr, self.stdError if False and self.failedCount < 1: #retry count self.failedCount= self.failedCount + 1 time.sleep(2) print >>sys.stderr, "retrying, ", self.failedCount return self.execute(skipOnSuccess) else: if (not skipOnSuccess) and self.onSuccess: self.onSuccess(self, self.opts, self.replacementDic) return self.exitCode
def getMediaInfoID(itemdirectoryPath): command = "mediainfo \"%s\"" % (itemdirectoryPath) exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False) if exitCode != 0: print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode return "" if not stdOut: return "" try: mediaInfoDic={} for line in stdOut.split("\n"): header = "General" if not line or line.isspace(): break #can be removed to grep more info continue index = line.find(":") if index == -1: header = line.strip() continue key = "%s-%s" % (header, line[:index].strip()) value = line[index+1:].strip() mediaInfoDic[key] = value #print mediaInfoDic if mediaInfoDic.has_key('General-Format'): format = mediaInfoDic['General-Format'] else: return "" formatVersion = None if mediaInfoDic.has_key('General-Format version'): formatVersion = mediaInfoDic['General-Format version'] ret = json.dumps([('Format', format,), ('Format version', formatVersion,)]) except Exception as inst: print type(inst) # the exception instance print inst.args print stdErr print stdOut return ret
from archivematicaFunctions import escapeForCommand clamscanResultShouldBe="Infected files: 0" if __name__ == '__main__': fileUUID = sys.argv[1] target = sys.argv[2] date = sys.argv[3] taskUUID = sys.argv[4] command = 'clamdscan - <"' + escapeForCommand(target) + '"' print >>sys.stderr, command commandVersion = "clamdscan -V" eventOutcome = "Pass" clamscanOutput = executeOrRun("bashScript", command, printing=False) clamscanVersionOutput = executeOrRun("command", commandVersion, printing=False) if clamscanOutput[0] or clamscanVersionOutput[0]: if clamscanVersionOutput[0]: print >>sys.stderr, clamscanVersionOutput exit(2) else: eventOutcome = "Fail" if eventOutcome == "Fail" or clamscanOutput[1].find(clamscanResultShouldBe) == -1: eventOutcome = "Fail" print >>sys.stderr, fileUUID, " - ", os.path.basename(target) print >>sys.stderr, clamscanOutput version, virusDefs, virusDefsDate = clamscanVersionOutput[1].split("/")
def executeCommand(gearman_worker, gearman_job): try: execute = gearman_job.task print "executing:", execute, "{", gearman_job.unique, "}" data = cPickle.loads(gearman_job.data) utcDate = databaseInterface.getUTCDate() arguments = data["arguments"]#.encode("utf-8") if isinstance(arguments, unicode): arguments = arguments.encode("utf-8") #if isinstance(arguments, str): # arguments = unicode(arguments) sInput = "" clientID = gearman_worker.worker_client_id sql = """SELECT Tasks.taskUUID FROM Tasks WHERE taskUUID='%s' AND startTime != 0;""" % (gearman_job.unique.__str__()) rows = databaseInterface.queryAllSQL(sql) if len(rows): exitCode = -1 stdOut = "" stdError = """Detected this task has already started! Unable to determine if it completed successfully.""" return cPickle.dumps({"exitCode" : exitCode, "stdOut": stdOut, "stdError": stdError}) logTaskAssignedSQL(gearman_job.unique.__str__(), clientID, utcDate) if execute not in supportedModules: output = ["Error!", "Error! - Tried to run and unsupported command." ] exitCode = -1 return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]}) command = supportedModules[execute] replacementDic["%date%"] = utcDate replacementDic["%jobCreatedDate%"] = data["createdDate"] #Replace replacement strings for key in replacementDic.iterkeys(): command = command.replace ( key, replacementDic[key] ) arguments = arguments.replace ( key, replacementDic[key] ) key = "%taskUUID%" value = gearman_job.unique.__str__() arguments = arguments.replace(key, value) #execute command command += " " + arguments printOutputLock.acquire() print "<processingCommand>{" + gearman_job.unique + "}" + command.__str__() + "</processingCommand>" printOutputLock.release() exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False) return cPickle.dumps({"exitCode" : exitCode, "stdOut": stdOut, "stdError": stdError}) #catch OS errors except OSError, ose: traceback.print_exc(file=sys.stdout) printOutputLock.acquire() print >>sys.stderr, "Execution failed:", ose printOutputLock.release() output = ["Archivematica Client Error!", ose.__str__() ] exitCode = 1 return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]})
clamscanResultShouldBe = "Infected files: 0" if __name__ == '__main__': fileUUID = sys.argv[1] target = sys.argv[2] date = sys.argv[3] taskUUID = sys.argv[4] command = 'clamdscan - <"' + escapeForCommand(target).replace("$", "\\$") + '"' print >> sys.stderr, command commandVersion = "clamdscan -V" eventOutcome = "Pass" clamscanOutput = executeOrRun("bashScript", command, printing=False) clamscanVersionOutput = executeOrRun("command", commandVersion, printing=False) if clamscanOutput[0] or clamscanVersionOutput[0]: if clamscanVersionOutput[0]: print >> sys.stderr, clamscanVersionOutput exit(2) else: eventOutcome = "Fail" if eventOutcome == "Fail" or clamscanOutput[1].find( clamscanResultShouldBe) == -1: eventOutcome = "Fail" print >> sys.stderr, fileUUID, " - ", os.path.basename(target)
def checkForPreconfiguredXML(self): desiredChoice = None xmlFilePath = os.path.join( \ self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \ archivematicaMCP.config.get('MCPServer', "processingXMLFile") \ ) xmlFilePath = unicodeToStr(xmlFilePath) if os.path.isfile(xmlFilePath): # For a list of items with pks: # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc; try: command = "sudo chmod 774 \"" + xmlFilePath + "\"" if isinstance(command, unicode): command = command.encode("utf-8") exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False) tree = etree.parse(xmlFilePath) root = tree.getroot() for preconfiguredChoice in root.findall( ".//preconfiguredChoice"): if preconfiguredChoice.find( "appliesTo").text == self.jobChainLink.pk: desiredChoice = preconfiguredChoice.find( "goToChain").text try: #<delay unitAtime="yes">30</delay> delayXML = preconfiguredChoice.find("delay") if delayXML is not None: unitAtimeXML = delayXML.get("unitCtime") else: unitAtimeXML = None if unitAtimeXML is not None and unitAtimeXML.lower( ) != "no": delaySeconds = int(delayXML.text) unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \ archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1)) nowTime = time.time() timeDifference = nowTime - unitTime timeToGo = delaySeconds - timeDifference LOGGER.info('Time to go: %s', timeToGo) self.jobChainLink.setExitMessage( "Waiting till: " + datetime.datetime.fromtimestamp( (nowTime + timeToGo)).ctime()) t = threading.Timer( timeToGo, self.proceedWithChoice, args=[desiredChoice, None], kwargs={"delayTimerStart": True}) t.daemon = True self.delayTimer = t t.start() return None except Exception: LOGGER.info('Error parsing XML', exc_info=True) except Exception: LOGGER.warning( 'Error parsing xml at %s for pre-configured choice', xmlFilePath, exc_info=True) LOGGER.info('Using preconfigured choice %s for %s', desiredChoice, self.jobChainLink.pk) return desiredChoice
def main(file_path, file_uuid, sip_uuid): setup_dicts(mcpclient_settings) failed = False # Check to see whether the file has already been characterized; don't try # to characterize it a second time if so. if FPCommandOutput.objects.filter(file_id=file_uuid).count() > 0: return 0 try: format = FormatVersion.active.get( fileformatversion__file_uuid=file_uuid) except FormatVersion.DoesNotExist: rules = format = None if format: rules = FPRule.active.filter(format=format.uuid, purpose='characterization') # Characterization always occurs - if nothing is specified, get one or more # defaults specified in the FPR. if not rules: rules = FPRule.active.filter(purpose='default_characterization') for rule in rules: if rule.command.script_type == 'bashScript' or rule.command.script_type == 'command': args = [] command_to_execute = replace_string_values(rule.command.command, file_=file_uuid, sip=sip_uuid, type_='file') else: rd = ReplacementDict.frommodel(file_=file_uuid, sip=sip_uuid, type_='file') args = rd.to_gnu_options() command_to_execute = rule.command.command exitstatus, stdout, stderr = executeOrRun(rule.command.script_type, command_to_execute, arguments=args) if exitstatus != 0: print('Command {} failed with exit status {}; stderr:'.format( rule.command.description, exitstatus), stderr, file=sys.stderr) failed = True continue # fmt/101 is XML - we want to collect and package any XML output, while # allowing other commands to execute without actually collecting their # output in the event that they are writing their output to disk. # FPCommandOutput can have multiple rows for a given file, # distinguished by the rule that produced it. if rule.command.output_format and rule.command.output_format.pronom_id == 'fmt/101': try: etree.fromstring(stdout) insertIntoFPCommandOutput(file_uuid, stdout, rule.uuid) print('Saved XML output for command "{}" ({})'.format( rule.command.description, rule.command.uuid)) except etree.XMLSyntaxError: failed = True print( 'XML output for command "{}" ({}) was not valid XML; not saving to database' .format(rule.command.description, rule.command.uuid), file=sys.stderr) else: print( 'Tool output for command "{}" ({}) is not XML; not saving to database' .format(rule.command.description, rule.command.uuid), file=sys.stderr) if failed: return -1 else: return 0
os.mkdir(AIPsStoreWithQuads, mode) #mode isn't working on the mkdir os.chmod(AIPsStoreWithQuads, mode) storeLocation = os.path.join(AIPsStoreWithQuads, os.path.basename(os.path.abspath(AIP))) #Store the AIP shutil.move(AIP, storeLocation) #Extract the AIP extractDirectory = "/tmp/" + SIPUUID + "/" os.makedirs(extractDirectory) # command = "7z x -bd -o\"" + extractDirectory + "\" \"" + storeLocation + "\"" ret = executeOrRun("command", command, printing=printSubProcessOutput) exitCode, stdOut, stdErr = ret if exitCode != 0: print >> sys.stderr, "Error extracting" quit(1) bag = extractDirectory + SIPNAME + "-" + SIPUUID + "/" verificationCommands = [] verificationCommands.append("/usr/share/bagit/bin/bag verifyvalid \"" + bag + "\"") verificationCommands.append("/usr/share/bagit/bin/bag checkpayloadoxum \"" + bag + "\"") verificationCommands.append("/usr/share/bagit/bin/bag verifycomplete \"" + bag + "\"") verificationCommands.append( "/usr/share/bagit/bin/bag verifypayloadmanifests \"" + bag + "\"")
def main(job, command_uuid, file_path, file_uuid, disable_reidentify): job.print_output("IDCommand UUID:", command_uuid) job.print_output("File: ({}) {}".format(file_uuid, file_path)) if command_uuid == "None": job.print_output("Skipping file format identification") return 0 try: command = IDCommand.active.get(uuid=command_uuid) except IDCommand.DoesNotExist: job.write_error( "IDCommand with UUID {} does not exist.\n".format(command_uuid)) return 255 file_ = File.objects.get(uuid=file_uuid) # If reidentification is disabled and a format identification event exists for this file, exit if disable_reidentify and file_.event_set.filter( event_type='format identification').exists(): job.print_output( 'This file has already been identified, and re-identification is disabled. Skipping.' ) return 0 # Save the selected ID command for use in a later chain save_idtool(file_, command_uuid) exitcode, output, _ = executeOrRun(command.script_type, command.script, arguments=[file_path], printing=False, capture_output=True) output = output.strip() if exitcode != 0: job.print_error( 'Error: IDCommand with UUID {} exited non-zero.'.format( command_uuid)) return 255 job.print_output('Command output:', output) # PUIDs are the same regardless of tool, so PUID-producing tools don't have "rules" per se - we just # go straight to the FormatVersion table to see if there's a matching PUID try: if command.config == 'PUID': version = FormatVersion.active.get(pronom_id=output) else: rule = IDRule.active.get(command_output=output, command=command) version = rule.format except IDRule.DoesNotExist: job.print_error( 'Error: No FPR identification rule for tool output "{}" found'. format(output)) write_identification_event(file_uuid, command, success=False) return 255 except IDRule.MultipleObjectsReturned: job.print_error( 'Error: Multiple FPR identification rules for tool output "{}" found' .format(output)) write_identification_event(file_uuid, command, success=False) return 255 except FormatVersion.DoesNotExist: job.print_error( 'Error: No FPR format record found for PUID {}'.format(output)) write_identification_event(file_uuid, command, success=False) return 255 (ffv, created) = FileFormatVersion.objects.get_or_create( file_uuid=file_, defaults={'format_version': version}) if not created: # Update the version if it wasn't created new ffv.format_version = version ffv.save() job.print_output("{} identified as a {}".format(file_path, version.description)) write_identification_event(file_uuid, command, format=version.pronom_id) write_file_id(file_uuid=file_uuid, format=version, output=output) return 0
if __name__ == '__main__': logger = get_script_logger("archivematica.mcp.client.clamscan") fileUUID = sys.argv[1] target = sys.argv[2] date = sys.argv[3] # Check if scan event already exists for this file - if so abort early count = Event.objects.filter(file_uuid_id=fileUUID, event_type='virus check').count() if count >= 1: print 'Virus scan already performed, not running scan again' sys.exit(0) command = ['clamdscan', '-'] print 'Clamscan command:', ' '.join(command), '<', target with open(target) as file_: scan_rc, scan_stdout, scan_stderr = executeOrRun("command", command, printing=False, stdIn=file_) commandVersion = "clamdscan -V" print 'Clamscan version command:', commandVersion version_rc, version_stdout, version_stderr = executeOrRun("command", commandVersion, printing=False) eventOutcome = "Pass" if scan_rc or version_rc: # Either command returned non-0 RC if version_rc: print >>sys.stderr, 'Error determining version, aborting' print >>sys.stderr, 'Version RC:', version_rc print >>sys.stderr, 'Version Standard output:', version_stdout print >>sys.stderr, 'Version Standard error:', version_stderr sys.exit(2) else: eventOutcome = "Fail"
def executeCommand(gearman_worker, gearman_job): try: execute = gearman_job.task print "executing:", execute, "{", gearman_job.unique, "}" data = cPickle.loads(gearman_job.data) utcDate = databaseInterface.getUTCDate() arguments = data["arguments"] #.encode("utf-8") if isinstance(arguments, unicode): arguments = arguments.encode("utf-8") #if isinstance(arguments, str): # arguments = unicode(arguments) sInput = "" clientID = gearman_worker.worker_client_id sql = """SELECT Tasks.taskUUID FROM Tasks WHERE taskUUID='%s' AND startTime != 0;""" % ( gearman_job.unique.__str__()) rows = databaseInterface.queryAllSQL(sql) if len(rows): exitCode = -1 stdOut = "" stdError = """Detected this task has already started! Unable to determine if it completed successfully.""" return cPickle.dumps({ "exitCode": exitCode, "stdOut": stdOut, "stdError": stdError }) logTaskAssignedSQL(gearman_job.unique.__str__(), clientID, utcDate) if execute not in supportedModules: output = [ "Error!", "Error! - Tried to run and unsupported command." ] exitCode = -1 return cPickle.dumps({ "exitCode": exitCode, "stdOut": output[0], "stdError": output[1] }) command = supportedModules[execute] replacementDic["%date%"] = utcDate replacementDic["%jobCreatedDate%"] = data["createdDate"] #Replace replacement strings for key in replacementDic.iterkeys(): command = command.replace(key, replacementDic[key]) arguments = arguments.replace(key, replacementDic[key]) key = "%taskUUID%" value = gearman_job.unique.__str__() arguments = arguments.replace(key, value) #execute command command += " " + arguments printOutputLock.acquire() print "<processingCommand>{" + gearman_job.unique + "}" + command.__str__( ) + "</processingCommand>" printOutputLock.release() exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False) return cPickle.dumps({ "exitCode": exitCode, "stdOut": stdOut, "stdError": stdError }) #catch OS errors except OSError, ose: traceback.print_exc(file=sys.stdout) printOutputLock.acquire() print >> sys.stderr, "Execution failed:", ose printOutputLock.release() output = ["Archivematica Client Error!", ose.__str__()] exitCode = 1 return cPickle.dumps({ "exitCode": exitCode, "stdOut": output[0], "stdError": output[1] })
def main(transfer_uuid, sip_directory, date, task_uuid, delete=False): files = File.objects.filter(transfer=transfer_uuid, removedtime__isnull=True) if not files: print('No files found for transfer: ', transfer_uuid) # We track whether or not anything was extracted because that controls what # the next microservice chain link will be. # If something was extracted, then a new identification step has to be # kicked off on those files; otherwise, we can go ahead with the transfer. extracted = False for file_ in files: try: format_id = FileFormatVersion.objects.get(file_uuid=file_.uuid) # Can't do anything if the file wasn't identified in the previous step except: print('Not extracting contents from', os.path.basename(file_.currentlocation), ' - file format not identified', file=sys.stderr) continue if format_id.format_version == None: print('Not extracting contents from', os.path.basename(file_.currentlocation), ' - file format not identified', file=sys.stderr) continue # Extraction commands are defined in the FPR just like normalization # commands try: command = FPCommand.active.get( fprule__format=format_id.format_version, fprule__purpose='extract') except FPCommand.DoesNotExist: print('Not extracting contents from', os.path.basename(file_.currentlocation), ' - No rule found to extract', file=sys.stderr) continue # Check if file has already been extracted if already_extracted(file_): print('Not extracting contents from', os.path.basename(file_.currentlocation), ' - extraction already happened.', file=sys.stderr) continue file_path = file_.currentlocation.replace('%transferDirectory%', sip_directory) if command.script_type == 'command' or command.script_type == 'bashScript': args = [] command_to_execute = command.command.replace( '%inputFile%', file_path) command_to_execute = command_to_execute.replace( '%outputDirectory%', output_directory(file_path, date)) else: command_to_execute = command.command args = [file_path, output_directory(file_path, date)] exitstatus, stdout, stderr = executeOrRun(command.script_type, command_to_execute, arguments=args, printing=True) if not exitstatus == 0: # Dang, looks like the extraction failed print('Command', command.description, 'failed!', file=sys.stderr) else: extracted = True print('Extracted contents from', os.path.basename(file_path)) # Assign UUIDs and insert them into the database, so the newly- # extracted files are properly tracked by Archivematica for extracted_file in tree(output_directory(file_path, date)): assign_uuid(extracted_file, file_.uuid, transfer_uuid, date, task_uuid, sip_directory, file_.currentlocation) # We may want to remove the original package file after extracting its contents if delete: delete_and_record_package_file(file_path, file_.uuid, file_.currentlocation) if extracted == True: return 0 else: return -1
def executeCommand(gearman_worker, gearman_job): try: execute = gearman_job.task print "executing:", execute, "{", gearman_job.unique, "}" data = cPickle.loads(gearman_job.data) utcDate = databaseFunctions.getUTCDate() arguments = data["arguments"]#.encode("utf-8") if isinstance(arguments, unicode): arguments = arguments.encode("utf-8") sInput = "" clientID = gearman_worker.worker_client_id task = Task.objects.get(taskuuid=gearman_job.unique) if task.starttime is not None: exitCode = -1 stdOut = "" stdError = """Detected this task has already started! Unable to determine if it completed successfully.""" return cPickle.dumps({"exitCode" : exitCode, "stdOut": stdOut, "stdError": stdError}) else: task.client = clientID task.starttime = utcDate task.save() if execute not in supportedModules: output = ["Error!", "Error! - Tried to run and unsupported command." ] exitCode = -1 return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]}) command = supportedModules[execute] replacementDic["%date%"] = utcDate.isoformat() replacementDic["%jobCreatedDate%"] = data["createdDate"] # Replace replacement strings for key in replacementDic.iterkeys(): command = command.replace ( key, replacementDic[key] ) arguments = arguments.replace ( key, replacementDic[key] ) key = "%taskUUID%" value = gearman_job.unique.__str__() arguments = arguments.replace(key, value) # Add useful environment vars for client scripts lib_paths = ['/usr/share/archivematica/dashboard/', '/usr/lib/archivematica/archivematicaCommon'] env_updates = { 'PYTHONPATH': os.pathsep.join(lib_paths), 'DJANGO_SETTINGS_MODULE': config.get('MCPClient', 'django_settings_module') } # Execute command command += " " + arguments printOutputLock.acquire() print "<processingCommand>{" + gearman_job.unique + "}" + command.__str__() + "</processingCommand>" printOutputLock.release() exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False, env_updates=env_updates) return cPickle.dumps({"exitCode": exitCode, "stdOut": stdOut, "stdError": stdError}) except OSError as ose: traceback.print_exc(file=sys.stdout) printOutputLock.acquire() print >>sys.stderr, "Execution failed:", ose printOutputLock.release() output = ["Archivematica Client Error!", traceback.format_exc()] exitCode = 1 return cPickle.dumps({"exitCode": exitCode, "stdOut": output[0], "stdError": output[1]}) except Exception as e: traceback.print_exc(file=sys.stdout) printOutputLock.acquire() print "Unexpected error:", e printOutputLock.release() output = ["", traceback.format_exc()] return cPickle.dumps({"exitCode": -1, "stdOut": output[0], "stdError": output[1]})
if not os.path.isdir(AIPsStoreWithQuads): os.mkdir(AIPsStoreWithQuads, mode) #mode isn't working on the mkdir os.chmod(AIPsStoreWithQuads, mode) storeLocation=os.path.join(AIPsStoreWithQuads, os.path.basename(os.path.abspath(AIP))) #Store the AIP shutil.move(AIP, storeLocation) #Extract the AIP extractDirectory = "/tmp/" + SIPUUID + "/" os.makedirs(extractDirectory) # command = "7z x -bd -o\"" + extractDirectory + "\" \"" + storeLocation + "\"" ret = executeOrRun("command", command, printing=printSubProcessOutput) exitCode, stdOut, stdErr = ret if exitCode != 0: print >>sys.stderr, "Error extracting" quit(1) bag = extractDirectory + SIPNAME + "-" + SIPUUID + "/" verificationCommands = [] verificationCommands.append("/usr/share/bagit/bin/bag verifyvalid \"" + bag + "\"") verificationCommands.append("/usr/share/bagit/bin/bag checkpayloadoxum \"" + bag + "\"") verificationCommands.append("/usr/share/bagit/bin/bag verifycomplete \"" + bag + "\"") verificationCommands.append("/usr/share/bagit/bin/bag verifypayloadmanifests \"" + bag + "\"") verificationCommands.append("/usr/share/bagit/bin/bag verifytagmanifests \"" + bag + "\"") exitCode = 0 for command in verificationCommands: ret = executeOrRun("command", command, printing=printSubProcessOutput)
def verify_aip(job): """Verify the AIP was bagged correctly by extracting it and running verification on its contents. This is also where we verify the checksums now that the verifyPREMISChecksums_v0.0 ("Verify checksums generated on ingest") micro-service has been removed. It was removed because verifying checksums by calculating them in that MS and then having bagit calculate them here was redundant. job.args[1] = UUID UUID of the SIP, which will become the UUID of the AIP job.args[2] = current location Full absolute path to the AIP's current location on the local filesystem """ sip_uuid = job.args[1] # %sip_uuid% aip_path = job.args[2] # SIPDirectory%%sip_name%-%sip_uuid%.7z temp_dir = mcpclient_settings.TEMP_DIRECTORY is_uncompressed_aip = os.path.isdir(aip_path) if is_uncompressed_aip: bag = aip_path else: try: extract_dir = os.path.join(temp_dir, sip_uuid) bag = extract_aip(job, aip_path, extract_dir) except Exception as err: job.print_error(repr(err)) job.pyprint('Error extracting AIP at "{}"'.format(aip_path), file=sys.stderr) return 1 verification_commands = [ '/usr/share/bagit/bin/bag verifyvalid "{}"'.format(bag), '/usr/share/bagit/bin/bag checkpayloadoxum "{}"'.format(bag), '/usr/share/bagit/bin/bag verifycomplete "{}"'.format(bag), '/usr/share/bagit/bin/bag verifypayloadmanifests "{}"'.format(bag), '/usr/share/bagit/bin/bag verifytagmanifests "{}"'.format(bag), ] return_code = 0 for command in verification_commands: job.pyprint("Running test: ", command) exit_code, stdout, stderr = executeOrRun("command", command, printing=True, capture_output=True) job.write_output(stdout) job.write_error(stderr) if exit_code != 0: job.pyprint("Failed test: ", command, file=sys.stderr) return_code = 1 if return_code == 0: try: verify_checksums(job, bag, sip_uuid) except VerifyChecksumsError: return_code = 1 else: job.pyprint('Not verifying checksums because other tests have already' ' failed.') # cleanup if not is_uncompressed_aip: try: shutil.rmtree(extract_dir) except OSError as err: job.pyprint( 'Failed to remove temporary directory at {extract_dir} which' ' contains the AIP extracted for verification.' ' Error:\n{err}'.format(extract_dir=extract_dir, err=err), file=sys.stderr) return return_code
def _execute_rule_command(self, rule): """Execute the FPR command of FPR rule ``rule`` against the file passed in to this client script. The output of that command determines what we print to stdout and stderr, and the nature of the validation event that we save to the db. We also copy the MediaConch policy file to the logs/ directory of the AIP if it has not already been copied there. """ result = 'passed' command_to_execute, args = self._get_command_to_execute(rule) self.job.pyprint('Running', rule.command.description) exitstatus, stdout, stderr = executeOrRun(rule.command.script_type, command_to_execute, arguments=args, printing=False, capture_output=True) try: output = json.loads(stdout) except ValueError: logger.exception( 'Unable to load an object from the malformed JSON: \n%s', stderr) raise if self.file_type in ('preservation', 'original'): self._save_to_logs_dir(output) if exitstatus == 0: self.job.pyprint('Command {} completed with output {}'.format( rule.command.description, stdout)) else: self.job.print_error( 'Command {} failed with exit status {}; stderr:'.format( rule.command.description, exitstatus), stderr) return 'failed' event_detail = ('program="{tool.description}";' ' version="{tool.version}"'.format( tool=rule.command.tool)) if output.get('eventOutcomeInformation') != 'pass': self.job.print_error( 'Command {descr} returned a non-pass outcome ' 'for the policy check;\n\noutcome: ' '{outcome}\n\ndetails: {details}.'.format( descr=rule.command.description, outcome=output.get('eventOutcomeInformation'), details=output.get('eventOutcomeDetailNote'), )) result = 'failed' self.job.pyprint('Creating policy checking event for {} ({})'.format( self.file_path, self.file_uuid)) # Manually-normalized access derivatives have no file UUID so we can't # create a validation event for them. TODO/QUESTION: should we use the # UUID that was assigned to the manually normalized derivative during # transfer, i.e., the one that we retrieve in # ``_get_manually_normalized_access_derivative_file_uuid`` above? if not self.is_manually_normalized_access_derivative: databaseFunctions.insertIntoEvents( fileUUID=self.file_uuid, eventType='validation', # From PREMIS controlled vocab. eventDetail=event_detail, eventOutcome=output.get('eventOutcomeInformation'), eventOutcomeDetailNote=output.get('eventOutcomeDetailNote'), ) return result
exit(2) f = open(filePath, 'r') line = f.readline() while not line.startswith("Depends:"): line = f.readline() for part in line.split(","): part = part.strip() if part.find("${shlibs:Depends}") != -1 or \ part.find("${misc:Depends}") != -1: continue if part.startswith("archivematica"): continue if part in excludePackages: continue print sys.argv[1] print "Attempting Install/Update of: ", part command = "sudo apt-get install -y " + part exitCode, stdOut, stdError = executeOrRun("command", command, printing=False) if exitCode: print "exitCode:", exitCode print stdOut print >>sys.stderr, stdError #else: #print "OK"
def playAudioFile(filePath): command = "cvlc --play-and-exit \"" + filePath + "\"" exitCode, stdOut, stdError = executeOrRun("command", command, printing=False) if exitCode != 0: print stdOut print >>sys.stderr, stdError
def _execute_rule_command(self, rule): """Run the command against the file and return either 'passed' or 'failed'. If the command errors or determines that the file is invalid, return 'failed'. Non-errors will result in the creation of an Event model in the db. Preservation derivative validation will result in the stdout from the command being saved to disk within the unit (i.e., SIP). """ result = "passed" if rule.command.script_type in ("bashScript", "command"): command_to_execute = replace_string_values( rule.command.command, file_=self.file_uuid, sip=self.sip_uuid, type_="file", ) args = [] else: command_to_execute = rule.command.command args = [self.file_path] self.job.print_output("Running", rule.command.description) exitstatus, stdout, stderr = executeOrRun( type=rule.command.script_type, text=command_to_execute, printing=False, arguments=args, ) if exitstatus != 0: self.job.print_error( "Command {description} failed with exit status {status};" " stderr:".format(description=rule.command.description, status=exitstatus)) return "failed" # Parse output and generate an Event # TODO: Evaluating a python string from a user-definable script seems # insecure practice; should be JSON. output = ast.literal_eval(stdout) event_detail = ('program="{tool.description}";' ' version="{tool.version}"'.format( tool=rule.command.tool)) # If the FPR command has not errored but the actual validation # determined that the file is not valid, then we want to both create a # validation event in the db and set ``failed`` to ``True`` because we # want the micro-service in the dashboard GUI to indicate "Failed". # NOTE: this requires that the stdout of all validation FPR commands be # a dict (preferably a JSON object) with an ``eventOutcomeInformation`` # boolean attribute. if output.get("eventOutcomeInformation") == "pass": self.job.print_output('Command "{}" was successful'.format( rule.command.description)) elif output.get("eventOutcomeInformation") == "partial pass": self.job.print_output( 'Command "{}" was partially successful'.format( rule.command.description)) else: self.job.pyprint( "Command {cmd_description} indicated failure with this" " output:\n\n{output}".format( cmd_description=rule.command.description, output=pformat(stdout)), file=sys.stderr, ) result = "failed" if self.file_type == "preservation": self._save_stdout_to_logs_dir(output) self.job.print_output( "Creating {purpose} event for {file_path} ({file_uuid})".format( purpose=self.purpose, file_path=self.file_path, file_uuid=self.file_uuid)) databaseFunctions.insertIntoEvents( fileUUID=self.file_uuid, eventType="validation", # From PREMIS controlled vocab. eventDetail=event_detail, eventOutcome=output.get("eventOutcomeInformation"), eventOutcomeDetailNote=output.get("eventOutcomeDetailNote"), ) return result
def main(job, task_uuid, file_uuid): setup_dicts(mcpclient_settings) succeeded = True file_ = File.objects.get(uuid=file_uuid) # Normally we don't transcribe derivatives (access copies, preservation copies); # however, some useful transcription tools can't handle some formats that # are common as the primary copies. For example, tesseract can't handle JPEG2000. # If there are no rules for the primary format passed in, try to look at each # derivative until a transcribable derivative is found. # # Skip derivatives to avoid double-scanning them; only look at them as a fallback. if file_.filegrpuse != "original": job.print_error( '{} is not an original; not transcribing'.format(file_uuid)) return 0 rules = fetch_rules_for(file_) if not rules: file_, rules = fetch_rules_for_derivatives(file_) if not rules: job.print_error( 'No rules found for file {} and its derivatives; not transcribing'. format(file_uuid)) return 0 else: if file_.filegrpuse == "original": noun = "original" else: noun = file_.filegrpuse + " derivative" job.print_error('Transcribing {} {}'.format(noun, file_.uuid)) rd = ReplacementDict.frommodel(file_=file_, type_='file') for rule in rules: script = rule.command.command if rule.command.script_type in ('bashScript', 'command'): script, = rd.replace(script) args = [] else: args = rd.to_gnu_options exitstatus, stdout, stderr = executeOrRun(rule.command.script_type, script, arguments=args, capture_output=True) job.write_output(stdout) job.write_error(stderr) if exitstatus != 0: succeeded = False output_path = rd.replace(rule.command.output_location)[0] relative_path = output_path.replace(rd['%SIPDirectory%'], '%SIPDirectory%') event = insert_transcription_event(exitstatus, file_uuid, rule, relative_path) if os.path.isfile(output_path): insert_file_into_database(task_uuid, file_uuid, rd['%SIPUUID%'], event, rule, output_path, relative_path) return 0 if succeeded else 1
def main(job, enabled, file_path, file_uuid, disable_reidentify): enabled = True if enabled == "True" else False if not enabled: job.print_output("Skipping file format identification") return 0 command = _default_idcommand() if command is None: job.write_error("Unable to determine IDCommand.\n") return 255 command_uuid = command.uuid job.print_output("IDCommand:", command.description) job.print_output("IDCommand UUID:", command.uuid) job.print_output("IDTool:", command.tool.description) job.print_output("IDTool UUID:", command.tool.uuid) job.print_output("File: ({}) {}".format(file_uuid, file_path)) file_ = File.objects.get(uuid=file_uuid) # If reidentification is disabled and a format identification event exists for this file, exit if ( disable_reidentify and file_.event_set.filter(event_type="format identification").exists() ): job.print_output( "This file has already been identified, and re-identification is disabled. Skipping." ) return 0 # Save whether identification was enabled by the user for use in a later # chain. _save_id_preference(file_, enabled) exitcode, output, err = executeOrRun( command.script_type, command.script, arguments=[file_path], printing=False, capture_output=True, ) output = output.strip() if exitcode != 0: job.print_error( "Error: IDCommand with UUID {} exited non-zero.".format(command_uuid) ) job.print_error("Error: {}".format(err)) return 255 job.print_output("Command output:", output) # PUIDs are the same regardless of tool, so PUID-producing tools don't have "rules" per se - we just # go straight to the FormatVersion table to see if there's a matching PUID try: if command.config == "PUID": version = FormatVersion.active.get(pronom_id=output) else: rule = IDRule.active.get(command_output=output, command=command) version = rule.format except IDRule.DoesNotExist: job.print_error( 'Error: No FPR identification rule for tool output "{}" found'.format( output ) ) write_identification_event(file_uuid, command, success=False) return 255 except IDRule.MultipleObjectsReturned: job.print_error( 'Error: Multiple FPR identification rules for tool output "{}" found'.format( output ) ) write_identification_event(file_uuid, command, success=False) return 255 except FormatVersion.DoesNotExist: job.print_error("Error: No FPR format record found for PUID {}".format(output)) write_identification_event(file_uuid, command, success=False) return 255 (ffv, created) = FileFormatVersion.objects.get_or_create( file_uuid=file_, defaults={"format_version": version} ) if not created: # Update the version if it wasn't created new ffv.format_version = version ffv.save() job.print_output("{} identified as a {}".format(file_path, version.description)) write_identification_event(file_uuid, command, format=version.pronom_id) write_file_id(file_uuid=file_uuid, format=version, output=output) return 0
followup = f.readline() for part in line.split(","): # The word is split in order to try to install the latest version of # packages expressed in the syntax: foo (>= bar) # TODO apt-get install doesn't appear to support the full version # syntax control files support, but this should possibly try to # install the exact version specified? part = part.strip().split(' ')[0] if part.find("${shlibs:Depends}") != -1 or \ part.find("${misc:Depends}") != -1: continue if part.startswith(("archivematica", "Depends:")): continue if part in excludePackages: continue print(sys.argv[1]) print("Attempting Install/Update of: ", part) command = "sudo apt-get install -y " + part exitCode, stdOut, stdError = executeOrRun("command", command, printing=False) if exitCode: print("exitCode:", exitCode) print(stdOut) print(stdError, file=sys.stderr) #else: #print "OK"
def compress_aip(compression, compression_level, sip_directory, sip_name, sip_uuid): """ Compresses AIP according to compression algorithm and level. compression = AIP compression algorithm, format: <program>-<algorithm>, eg. 7z-lzma, pbzip2- compression_level = AIP compression level, integer between 1 and 9 inclusive sip_directory = Absolute path to the directory where the SIP is sip_name = User-provided name of the SIP sip_uuid = SIP UUID Example inputs: compressAIP.py 7z-lzma 5 /var/archivematica/sharedDirectory/watchedDirectories/workFlowDecisions/compressionAIPDecisions/ep-d87d5845-bd07-4200-b1a4-928e0cb6e1e4/ ep d87d5845-bd07-4200-b1a4-928e0cb6e1e4 """ try: program, compression_algorithm = compression.split('-') except ValueError: msg = "Invalid program-compression algorithm: {}".format(compression) print(msg, file=sys.stderr) return -1 archive_path = '{name}-{uuid}'.format(name=sip_name, uuid=sip_uuid) uncompressed_location = sip_directory + archive_path # Even though no actual compression is taking place, # the location still needs to be set in the unit to ensure that the # %AIPFilename% variable is set appropriately. # Setting it to an empty string ensures the common # "%SIPDirectory%%AIPFilename%" pattern still points at the right thing. if program == 'None': update_unit(sip_uuid, uncompressed_location) return 0 print("Compressing {} with {}, algorithm {}, level {}".format( uncompressed_location, program, compression_algorithm, compression_level)) if program == '7z': compressed_location = uncompressed_location + ".7z" command = '/usr/bin/7z a -bd -t7z -y -m0={algorithm} -mx={level} -mta=on -mtc=on -mtm=on -mmt=on "{compressed_location}" "{uncompressed_location}"'.format( algorithm=compression_algorithm, level=compression_level, uncompressed_location=uncompressed_location, compressed_location=compressed_location) tool_info_command = ( 'echo program="7z"\; ' 'algorithm="{}"\; ' 'version="`7z | grep Version`"'.format(compression_algorithm)) elif program == 'pbzip2': compressed_location = uncompressed_location + ".tar.bz2" command = '/bin/tar -c --directory "{sip_directory}" "{archive_path}" | /usr/bin/pbzip2 --compress -{level} > "{compressed_location}"'.format( sip_directory=sip_directory, archive_path=archive_path, level=compression_level, compressed_location=compressed_location) tool_info_command = ( 'echo program="pbzip2"\; ' 'algorithm="{}"\; ' 'version="$((pbzip2 -V) 2>&1)"'.format(compression_algorithm)) else: msg = "Program {} not recognized, exiting script prematurely.".format( program) print(msg, file=sys.stderr) return -1 print('Executing command:', command) exit_code, std_out, std_err = executeOrRun("bashScript", command, printing=True, capture_output=False) # Add new AIP File file_uuid = sip_uuid databaseFunctions.insertIntoFiles( fileUUID=file_uuid, filePath=compressed_location.replace(sip_directory, '%SIPDirectory%', 1), sipUUID=sip_uuid, use='aip', ) # Add compression event print('Tool info command:', tool_info_command) _, tool_info, _ = executeOrRun("bashScript", tool_info_command, printing=True) tool_output = 'Standard Output="{}"; Standard Error="{}"'.format( std_out, std_err) databaseFunctions.insertIntoEvents( eventType='compression', eventDetail=tool_info, eventOutcomeDetailNote=tool_output, fileUUID=file_uuid, ) update_unit(sip_uuid, compressed_location) return exit_code