Exemplo n.º 1
0
def getTikaVersion():
    command = "%s -V" % (tikaPath) 
    exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False)
    if exitCode != 0:
        print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode
        return ""
    return stdOut.strip().replace("Apache Tika ", "")
def verifyBag(bag):
    global exitCode
    verificationCommands = []
    verificationCommands.append("/usr/share/bagit/bin/bag verifyvalid \"%s\"" % (bag)) #Verifies the validity of a bag.
    verificationCommands.append("/usr/share/bagit/bin/bag verifycomplete \"%s\"" % (bag)) #Verifies the completeness of a bag.
    verificationCommands.append("/usr/share/bagit/bin/bag verifypayloadmanifests \"%s\"" % (bag)) #Verifies the checksums in all payload manifests.
    
    bagInfoPath = os.path.join(bag, "bag-info.txt")
    if os.path.isfile(bagInfoPath):
        for line in open(bagInfoPath,'r'):
            if line.startswith("Payload-Oxum"):
                verificationCommands.append("/usr/share/bagit/bin/bag checkpayloadoxum \"%s\"" % (bag)) #Generates Payload-Oxum and checks against Payload-Oxum in bag-info.txt.
                break
    
    for item in os.listdir(bag):
        if item.startswith("tagmanifest-") and item.endswith(".txt"):        
            verificationCommands.append("/usr/share/bagit/bin/bag verifytagmanifests \"%s\"" % (bag)) #Verifies the checksums in all tag manifests.
            break

    for command in verificationCommands:
        ret = executeOrRun("command", command, printing=printSubProcessOutput)
        verificationCommandsOutputs.append(ret)
        exit, stdOut, stdErr = ret
        if exit != 0:
            print >>sys.stderr, "Failed test: ", command
            print >>sys.stderr, stdErr
            print >>sys.stderr, stdOut
            print >>sys.stderr
            exitCode += 1
        else:
            print "Passed test: ", command
def extract(target, destinationDirectory):
    command = """/usr/bin/7z x -bd -o"%s" "%s" """ % (destinationDirectory, target)
    exitC, stdOut, stdErr = executeOrRun("command", command, printing=False)
    if exitC != 0:
        print stdOut
        print >>sys.stderr, "Failed extraction: ", command, "\r\n", stdErr
        exit(exitC)
    def checkForPreconfiguredXML(self):
        ret = None
        xmlFilePath = os.path.join( \
                                        self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \
                                        archivematicaMCP.config.get('MCPServer', "processingXMLFile") \
                                    )
        xmlFilePath = unicodeToStr(xmlFilePath)
        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                command = "sudo chmod 774 \"" + xmlFilePath + "\""
                if isinstance(command, unicode):
                    command = command.encode("utf-8")
                exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False)
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.find("preconfiguredChoices"):
                    #if int(preconfiguredChoice.find("appliesTo").text) == self.jobChainLink.pk:
                    if preconfiguredChoice.find("appliesTo").text == self.jobChainLink.description:
                        desiredChoice = preconfiguredChoice.find("goToChain").text
                        sql = """SELECT MicroServiceChains.pk FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk WHERE MicroServiceChains.description = '%s' AND MicroServiceChainChoice.choiceAvailableAtLink = %s;""" % (desiredChoice, self.jobChainLink.pk.__str__())
                        c, sqlLock = databaseInterface.querySQL(sql)
                        row = c.fetchone()
                        while row != None:
                            ret = row[0]
                            row = c.fetchone()
                        sqlLock.release()
                        try:
                            #<delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            unitAtimeXML = delayXML.get("unitCtime")
                            if unitAtimeXML != None and unitAtimeXML.lower() != "no":
                                delaySeconds=int(delayXML.text)
                                unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \
                                               archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1))
                                nowTime=time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                print "time to go:", timeToGo
                                #print "that will be: ", (nowTime + timeToGo)
                                self.jobChainLink.setExitMessage("Waiting till: " + datetime.datetime.fromtimestamp((nowTime + timeToGo)).ctime())

                                t = threading.Timer(timeToGo, self.proceedWithChoice, args=[ret], kwargs={"delayTimerStart":True})
                                t.daemon = True
                                self.delayTimer = t
                                t.start()
                                return None

                        except Exception as inst:
                            print >>sys.stderr, "Error parsing xml:"
                            print >>sys.stderr, type(inst)
                            print >>sys.stderr, inst.args

            except Exception as inst:
                print >>sys.stderr, "Error parsing xml:"
                print >>sys.stderr, type(inst)
                print >>sys.stderr, inst.args
        return ret
def getMediaInfoVersion():
    command = "%s --version" % (MediaInfoPath) 
    exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False)
    if exitCode != 255:
        print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode
        return ""
    ret = stdOut[stdOut.find("v") + 1:].strip()
    return ret
Exemplo n.º 6
0
def getFidoVersion():
    command = "%s -v" % (FidoPath) 
    exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False)
    if exitCode != 0:
        print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode
        return ""
    ret = stdOut.split(" ")[1][1:]
    return ret
Exemplo n.º 7
0
def getTikaID(itemdirectoryPath):
    command = "java -jar ./tika-app-1.3.jar -x -d " + itemdirectoryPath 
    exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False)
    if exitCode == 1: #no id found
        return ""
    if exitCode != 0:
        print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode
        return ""
    return stdOut
Exemplo n.º 8
0
def getTikaID(itemdirectoryPath):
    command = "%s -x -d %s" % (tikaPath, itemdirectoryPath) 
    exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False)
    if exitCode == 1: #no id found
        print >>sys.stderr, "Tika found no format id"
        exit(0)
    if exitCode != 0:
        print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode
        return ""
    return stdOut.strip()
Exemplo n.º 9
0
def onceExtracted(command):
    extractedFiles = []
    print "TODO - Metadata regarding removal of extracted archive"
    if removeOnceExtracted:
        packageFileUUID = sys.argv[6].__str__()
        sipDirectory = sys.argv[2].__str__()
        os.remove(replacementDic["%inputFile%"])
        currentLocation =  replacementDic["%inputFile%"].replace(sipDirectory, "%transferDirectory%", 1)
        fileWasRemoved(packageFileUUID, eventOutcomeDetailNote = "removed from: " + currentLocation)

    print "OUTPUT DIRECTORY: ", replacementDic["%outputDirectory%"]
    for w in os.walk(replacementDic["%outputDirectory%"].replace("*", "asterisk*")):
        path, directories, files = w
        for p in files:
            p = os.path.join(path, p)
            #print "path: ", p
            if os.path.isfile(p):
                extractedFiles.append(p)
    for ef in extractedFiles:
        fileUUID = uuid.uuid4().__str__()
        #print "File Extracted:", ef
        if True: #Add the file to the SIP
            #<arguments>"%relativeLocation%" "%SIPObjectsDirectory%" "%SIPLogsDirectory%" "%date%" "%taskUUID%" "%fileUUID%"</arguments>
            sipDirectory = sys.argv[2].__str__()
            transferUUID = sys.argv[3].__str__()
            date = sys.argv[4].__str__()
            taskUUID = sys.argv[5].__str__()
            packageFileUUID = sys.argv[6].__str__()

            filePathRelativeToSIP = ef.replace(sipDirectory,"%transferDirectory%", 1)
            print "File Extracted:: {" + fileUUID + "} ", filePathRelativeToSIP
            eventDetail="Unpacked from: {" + packageFileUUID + "}" + filePathRelativeToSIP
            addFileToTransfer(filePathRelativeToSIP, fileUUID, transferUUID, taskUUID, date, sourceType="unpacking", eventDetail=eventDetail)
            updateSizeAndChecksum(fileUUID, ef, date, uuid.uuid4.__str__())


        run = sys.argv[0].__str__() + \
        " \"" + transcoder.escapeForCommand(ef) + "\""
        if True: #Add the file to the SIP
            run = run + " \"" + transcoder.escapeForCommand(sys.argv[2].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[3].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[4].__str__()) + "\"" + \
            " \"" + transcoder.escapeForCommand(sys.argv[5].__str__()) + "\"" + \
            " \"" + fileUUID + "\""

        exitCode, stdOut, stdError = executeOrRun("command", run)
        print stdOut
        print >>sys.stderr, stdError
        if exitCode != 0 and command.exitCode == 0:
            command.exitCode = exitCode

    global extractedCount
    date = sys.argv[4].__str__().split(".", 1)[0]
    extractedCount = extractedCount + 1
    replacementDic["%outputDirectory%"] = transcoder.fileFullName + '-' + extractedCount.__str__() + '-' + date
Exemplo n.º 10
0
def renameAsSudo(source, destination):
    """Used to move/rename Directories that the archivematica user may or may not have writes to move"""
    command = "sudo mv \"" + source + "\"   \"" + destination + "\""
    if isinstance(command, unicode):
        command = command.encode("utf-8")
    exitCode, stdOut, stdError = executeOrRun("command", command, "", printing=False)
    if exitCode:
        print >>sys.stderr, "exitCode:", exitCode
        print >>sys.stderr, stdOut
        print >>sys.stderr, stdError
        exit(exitCode)
Exemplo n.º 11
0
def executeCommand(gearman_worker, gearman_job):
    try:
        execute = gearman_job.task
        print "executing:", execute, "{", gearman_job.unique, "}"
        data = cPickle.loads(gearman_job.data)
        utcDate = databaseInterface.getUTCDate()
        arguments = data["arguments"]#.encode("utf-8")
        if isinstance(arguments, unicode):
            arguments = arguments.encode("utf-8")
        #if isinstance(arguments, str):
        #    arguments = unicode(arguments)

        sInput = ""
        clientID = gearman_worker.worker_client_id

        #if True:
        #    print clientID, execute, data
        logTaskAssignedSQL(gearman_job.unique.__str__(), clientID, utcDate)

        if execute not in supportedModules:
            output = ["Error!", "Error! - Tried to run and unsupported command." ]
            exitCode = -1
            return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]})
        command = supportedModules[execute]


        replacementDic["%date%"] = utcDate
        replacementDic["%jobCreatedDate%"] = data["createdDate"]
        #Replace replacement strings
        for key in replacementDic.iterkeys():
            command = command.replace ( key, replacementDic[key] )
            arguments = arguments.replace ( key, replacementDic[key] )

        key = "%taskUUID%"
        value = gearman_job.unique.__str__()
        arguments = arguments.replace(key, value)

        #execute command

        command += " " + arguments
        printOutputLock.acquire()
        print >>sys.stderr, "<processingCommand>{" + gearman_job.unique + "}" + command.__str__() + "</processingCommand>"
        printOutputLock.release()
        exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False)
        return cPickle.dumps({"exitCode" : exitCode, "stdOut": stdOut, "stdError": stdError})
    #catch OS errors
    except OSError, ose:
        traceback.print_exc(file=sys.stdout)
        printOutputLock.acquire()
        print >>sys.stderr, "Execution failed:", ose
        printOutputLock.release()
        output = ["Config Error!", ose.__str__() ]
        exitCode = 1
        return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]})
def runBag(arguments):
    command = "/usr/share/bagit/bin/bag %s" % (arguments) 
    exitCode, stdOut, stdError = executeOrRun("command", command, printing=False)
    if exitCode != 0:
        print >>sys.stderr, ""
        print >>sys.stderr, "Error with command: ", command
        print >>sys.stderr, "Standard OUT:"
        print >>sys.stderr, stdOut
        print >>sys.stderr, "Standard Error:"
        print >>sys.stderr, stdError
        exit(exitCode)
    else:
        print stdOut
        print >>sys.stderr, stdError
    def execute(self, skipOnSuccess=False):

        #print self.__str__()

        #Do a dictionary replacement.
        #Replace replacement strings
        global replacementDic

        #for each key replace all instances of the key in the command string
        for key, value in replacementDic.iteritems():
            key = toStrFromUnicode(key)
            replacementDic[key] = toStrFromUnicode(value)
            #self.outputLocation = toStrFromUnicode(self.outputLocation)
            #self.command = self.command.replace ( key, quote(replacementDic[key]) )
            self.command = self.command.replace( key, escapeForCommand(replacementDic[key]) )
            if self.outputLocation:
                self.outputLocation = self.outputLocation.replace( key, replacementDic[key] )
        print "Running: "
        print self.__str__()

        self.exitCode, self.stdOut, self.stdError = executeOrRun(self.type, self.command)


        if (not self.exitCode) and self.verificationCommand:
            print
            self.exitCode = self.verificationCommand.execute(skipOnSuccess=True)

        if (not self.exitCode) and self.eventDetailCommand:
            self.eventDetailCommand.execute(skipOnSuccess=True)

        #If unsuccesful
        if self.exitCode:
            print >>sys.stderr, "Failed:"
            #print >>sys.stderr, self.__str__()
            print self.stdOut
            print >>sys.stderr, self.stdError
            if False and self.failedCount < 1: #retry count
                self.failedCount= self.failedCount + 1
                time.sleep(2)
                print >>sys.stderr, "retrying, ", self.failedCount
                return self.execute(skipOnSuccess)
        else:
            global onSuccess
            #uncommenting these floods the buffers with ffmpeg
            #print self.stdOut
            #print self.stdError
            if (not skipOnSuccess) and onSuccess:
                onSuccess(self)
        return self.exitCode
Exemplo n.º 14
0
def getFidoID(itemdirectoryPath):
    command = 'python ./fido/fido/fido.py "%s"' % (itemdirectoryPath)
    exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False)

    if exitCode != 0:
        print >> sys.stderr, "Error: ", stdOut, stdErr, exitCode
        return ""

    if not stdOut:
        return ""
    try:
        ret = stdOut.split(",")[2]
    except:
        print stdErr
        print stdOut
        raise
    return ret
def verifyBag(bag):
    global exitCode
    verificationCommands = [
        "/usr/share/bagit/bin/bag verifyvalid \"" + bag + "\"", 
        "/usr/share/bagit/bin/bag checkpayloadoxum \"" + bag + "\"", 
        "/usr/share/bagit/bin/bag verifycomplete \"" + bag + "\"", 
        "/usr/share/bagit/bin/bag verifypayloadmanifests \"" + bag + "\"", 
        "/usr/share/bagit/bin/bag verifytagmanifests \"" + bag + "\"" ]
    for command in verificationCommands:
        ret = executeOrRun("command", command, printing=printSubProcessOutput)
        verificationCommandsOutputs.append(ret)
        exit, stdOut, stdErr = ret
        if exit != 0:
            print >>sys.stderr, "Failed test: ", command
            print >>sys.stderr, stdErr
            print >>sys.stderr
            exitCode += 1
        else:
            print "Passed test: ", command
    def execute(self, skipOnSuccess=False):
        #for each key replace all instances of the key in the command string
        for key, value in self.replacementDic.iteritems():
            key = toStrFromUnicode(key)
            self.replacementDic[key] = toStrFromUnicode(value)
            #self.outputLocation = toStrFromUnicode(self.outputLocation)
            #self.command = self.command.replace ( key, quote(replacementDic[key]) )
            self.command = self.command.replace( key, escapeForCommand(self.replacementDic[key]) )
            if self.outputLocation:
                self.outputLocation = self.outputLocation.replace( key, self.replacementDic[key] )
        print "Running: "
        selfstr = self.__str__()
        print selfstr
        if self.opts:
            self.opts["prependStdOut"] += "\r\nRunning: \r\n%s" % (selfstr)

        self.exitCode, self.stdOut, self.stdError = executeOrRun(self.type, self.command)


        if (not self.exitCode) and self.verificationCommand:
            print
            if self.opts:
                self.opts["prependStdOut"] += "\r\n"
            self.exitCode = self.verificationCommand.execute(skipOnSuccess=True)

        if (not self.exitCode) and self.eventDetailCommand:
            self.eventDetailCommand.execute(skipOnSuccess=True)

        #If unsuccesful
        if self.exitCode:
            print >>sys.stderr, "Failed:"
            #print >>sys.stderr, self.__str__()
            print self.stdOut
            print >>sys.stderr, self.stdError
            if False and self.failedCount < 1: #retry count
                self.failedCount= self.failedCount + 1
                time.sleep(2)
                print >>sys.stderr, "retrying, ", self.failedCount
                return self.execute(skipOnSuccess)
        else:
            if (not skipOnSuccess) and self.onSuccess:
                self.onSuccess(self, self.opts, self.replacementDic)
        return self.exitCode
Exemplo n.º 17
0
def getMediaInfoID(itemdirectoryPath):
    command = "mediainfo \"%s\"" % (itemdirectoryPath)
    exitCode, stdOut, stdErr = executeOrRun("command", command, printing=False)

    if exitCode != 0:
        print >>sys.stderr, "Error: ", stdOut, stdErr, exitCode
        return ""

    if not stdOut:
        return ""
    try:
        mediaInfoDic={}
        for line in stdOut.split("\n"):
            header = "General"
            if not line or line.isspace():
                break #can be removed to grep more info
                continue
            index = line.find(":") 
            if index == -1:
                header = line.strip()
                continue
            key = "%s-%s" % (header, line[:index].strip())
            value = line[index+1:].strip()
            mediaInfoDic[key] = value       
        #print mediaInfoDic
        
        if mediaInfoDic.has_key('General-Format'):
            format = mediaInfoDic['General-Format']
        else:
            return ""
        formatVersion = None
        if mediaInfoDic.has_key('General-Format version'):
            formatVersion = mediaInfoDic['General-Format version']
        ret = json.dumps([('Format', format,), ('Format version', formatVersion,)])
    except Exception as inst:
        print type(inst)     # the exception instance
        print inst.args
        print stdErr
        print stdOut
    return ret
Exemplo n.º 18
0
from archivematicaFunctions import escapeForCommand

clamscanResultShouldBe="Infected files: 0"

if __name__ == '__main__':
    fileUUID = sys.argv[1]
    target =  sys.argv[2]
    date = sys.argv[3]
    taskUUID = sys.argv[4]

    command = 'clamdscan  - <"' + escapeForCommand(target) + '"'
    print >>sys.stderr, command
    commandVersion = "clamdscan -V"
    eventOutcome = "Pass"

    clamscanOutput = executeOrRun("bashScript", command, printing=False)
    clamscanVersionOutput = executeOrRun("command", commandVersion, printing=False)

    if clamscanOutput[0] or clamscanVersionOutput[0]:
        if clamscanVersionOutput[0]:
            print >>sys.stderr, clamscanVersionOutput
            exit(2)
        else:
            eventOutcome = "Fail"

    if eventOutcome == "Fail" or clamscanOutput[1].find(clamscanResultShouldBe) == -1:
        eventOutcome = "Fail"
        print >>sys.stderr, fileUUID, " - ", os.path.basename(target)
        print >>sys.stderr, clamscanOutput

    version, virusDefs, virusDefsDate = clamscanVersionOutput[1].split("/")
Exemplo n.º 19
0
def executeCommand(gearman_worker, gearman_job):
    try:
        execute = gearman_job.task
        print "executing:", execute, "{", gearman_job.unique, "}"
        data = cPickle.loads(gearman_job.data)
        utcDate = databaseInterface.getUTCDate()
        arguments = data["arguments"]#.encode("utf-8")
        if isinstance(arguments, unicode):
            arguments = arguments.encode("utf-8")
        #if isinstance(arguments, str):
        #    arguments = unicode(arguments)

        sInput = ""
        clientID = gearman_worker.worker_client_id


        sql = """SELECT Tasks.taskUUID FROM Tasks WHERE taskUUID='%s' AND startTime != 0;""" % (gearman_job.unique.__str__())
        rows = databaseInterface.queryAllSQL(sql)
        if len(rows):
            exitCode = -1
            stdOut = ""
            stdError = """Detected this task has already started!
Unable to determine if it completed successfully."""
            return cPickle.dumps({"exitCode" : exitCode, "stdOut": stdOut, "stdError": stdError})
        
        logTaskAssignedSQL(gearman_job.unique.__str__(), clientID, utcDate)

        if execute not in supportedModules:
            output = ["Error!", "Error! - Tried to run and unsupported command." ]
            exitCode = -1
            return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]})
        command = supportedModules[execute]


        replacementDic["%date%"] = utcDate
        replacementDic["%jobCreatedDate%"] = data["createdDate"]
        #Replace replacement strings
        for key in replacementDic.iterkeys():
            command = command.replace ( key, replacementDic[key] )
            arguments = arguments.replace ( key, replacementDic[key] )

        key = "%taskUUID%"
        value = gearman_job.unique.__str__()
        arguments = arguments.replace(key, value)

        #execute command

        command += " " + arguments
        printOutputLock.acquire()
        print "<processingCommand>{" + gearman_job.unique + "}" + command.__str__() + "</processingCommand>"
        printOutputLock.release()
        exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False)
        return cPickle.dumps({"exitCode" : exitCode, "stdOut": stdOut, "stdError": stdError})
    #catch OS errors
    except OSError, ose:
        traceback.print_exc(file=sys.stdout)
        printOutputLock.acquire()
        print >>sys.stderr, "Execution failed:", ose
        printOutputLock.release()
        output = ["Archivematica Client Error!", ose.__str__() ]
        exitCode = 1
        return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]})
Exemplo n.º 20
0
clamscanResultShouldBe = "Infected files: 0"

if __name__ == '__main__':
    fileUUID = sys.argv[1]
    target = sys.argv[2]
    date = sys.argv[3]
    taskUUID = sys.argv[4]

    command = 'clamdscan  - <"' + escapeForCommand(target).replace("$",
                                                                   "\\$") + '"'
    print >> sys.stderr, command
    commandVersion = "clamdscan -V"
    eventOutcome = "Pass"

    clamscanOutput = executeOrRun("bashScript", command, printing=False)
    clamscanVersionOutput = executeOrRun("command",
                                         commandVersion,
                                         printing=False)

    if clamscanOutput[0] or clamscanVersionOutput[0]:
        if clamscanVersionOutput[0]:
            print >> sys.stderr, clamscanVersionOutput
            exit(2)
        else:
            eventOutcome = "Fail"

    if eventOutcome == "Fail" or clamscanOutput[1].find(
            clamscanResultShouldBe) == -1:
        eventOutcome = "Fail"
        print >> sys.stderr, fileUUID, " - ", os.path.basename(target)
Exemplo n.º 21
0
    def checkForPreconfiguredXML(self):
        desiredChoice = None
        xmlFilePath = os.path.join( \
                                        self.unit.currentPath.replace("%sharedPath%", archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1), \
                                        archivematicaMCP.config.get('MCPServer', "processingXMLFile") \
                                    )
        xmlFilePath = unicodeToStr(xmlFilePath)
        if os.path.isfile(xmlFilePath):
            # For a list of items with pks:
            # SELECT TasksConfigs.description, choiceAvailableAtLink, ' ' AS 'SPACE', MicroServiceChains.description, chainAvailable FROM MicroServiceChainChoice Join MicroServiceChains on MicroServiceChainChoice.chainAvailable = MicroServiceChains.pk Join MicroServiceChainLinks on MicroServiceChainLinks.pk = MicroServiceChainChoice.choiceAvailableAtLink Join TasksConfigs on TasksConfigs.pk = MicroServiceChainLinks.currentTask ORDER BY choiceAvailableAtLink desc;
            try:
                command = "sudo chmod 774 \"" + xmlFilePath + "\""
                if isinstance(command, unicode):
                    command = command.encode("utf-8")
                exitCode, stdOut, stdError = executeOrRun("command",
                                                          command,
                                                          "",
                                                          printing=False)
                tree = etree.parse(xmlFilePath)
                root = tree.getroot()
                for preconfiguredChoice in root.findall(
                        ".//preconfiguredChoice"):
                    if preconfiguredChoice.find(
                            "appliesTo").text == self.jobChainLink.pk:
                        desiredChoice = preconfiguredChoice.find(
                            "goToChain").text
                        try:
                            #<delay unitAtime="yes">30</delay>
                            delayXML = preconfiguredChoice.find("delay")
                            if delayXML is not None:
                                unitAtimeXML = delayXML.get("unitCtime")
                            else:
                                unitAtimeXML = None
                            if unitAtimeXML is not None and unitAtimeXML.lower(
                            ) != "no":
                                delaySeconds = int(delayXML.text)
                                unitTime = os.path.getmtime(self.unit.currentPath.replace("%sharedPath%", \
                                               archivematicaMCP.config.get('MCPServer', "sharedDirectory"), 1))
                                nowTime = time.time()
                                timeDifference = nowTime - unitTime
                                timeToGo = delaySeconds - timeDifference
                                LOGGER.info('Time to go: %s', timeToGo)
                                self.jobChainLink.setExitMessage(
                                    "Waiting till: " +
                                    datetime.datetime.fromtimestamp(
                                        (nowTime + timeToGo)).ctime())

                                t = threading.Timer(
                                    timeToGo,
                                    self.proceedWithChoice,
                                    args=[desiredChoice, None],
                                    kwargs={"delayTimerStart": True})
                                t.daemon = True
                                self.delayTimer = t
                                t.start()
                                return None

                        except Exception:
                            LOGGER.info('Error parsing XML', exc_info=True)
            except Exception:
                LOGGER.warning(
                    'Error parsing xml at %s for pre-configured choice',
                    xmlFilePath,
                    exc_info=True)
        LOGGER.info('Using preconfigured choice %s for %s', desiredChoice,
                    self.jobChainLink.pk)
        return desiredChoice
Exemplo n.º 22
0
def main(file_path, file_uuid, sip_uuid):
    setup_dicts(mcpclient_settings)

    failed = False

    # Check to see whether the file has already been characterized; don't try
    # to characterize it a second time if so.
    if FPCommandOutput.objects.filter(file_id=file_uuid).count() > 0:
        return 0

    try:
        format = FormatVersion.active.get(
            fileformatversion__file_uuid=file_uuid)
    except FormatVersion.DoesNotExist:
        rules = format = None

    if format:
        rules = FPRule.active.filter(format=format.uuid,
                                     purpose='characterization')

    # Characterization always occurs - if nothing is specified, get one or more
    # defaults specified in the FPR.
    if not rules:
        rules = FPRule.active.filter(purpose='default_characterization')

    for rule in rules:
        if rule.command.script_type == 'bashScript' or rule.command.script_type == 'command':
            args = []
            command_to_execute = replace_string_values(rule.command.command,
                                                       file_=file_uuid,
                                                       sip=sip_uuid,
                                                       type_='file')
        else:
            rd = ReplacementDict.frommodel(file_=file_uuid,
                                           sip=sip_uuid,
                                           type_='file')
            args = rd.to_gnu_options()
            command_to_execute = rule.command.command

        exitstatus, stdout, stderr = executeOrRun(rule.command.script_type,
                                                  command_to_execute,
                                                  arguments=args)
        if exitstatus != 0:
            print('Command {} failed with exit status {}; stderr:'.format(
                rule.command.description, exitstatus),
                  stderr,
                  file=sys.stderr)
            failed = True
            continue
        # fmt/101 is XML - we want to collect and package any XML output, while
        # allowing other commands to execute without actually collecting their
        # output in the event that they are writing their output to disk.
        # FPCommandOutput can have multiple rows for a given file,
        # distinguished by the rule that produced it.
        if rule.command.output_format and rule.command.output_format.pronom_id == 'fmt/101':
            try:
                etree.fromstring(stdout)
                insertIntoFPCommandOutput(file_uuid, stdout, rule.uuid)
                print('Saved XML output for command "{}" ({})'.format(
                    rule.command.description, rule.command.uuid))
            except etree.XMLSyntaxError:
                failed = True
                print(
                    'XML output for command "{}" ({}) was not valid XML; not saving to database'
                    .format(rule.command.description, rule.command.uuid),
                    file=sys.stderr)
        else:
            print(
                'Tool output for command "{}" ({}) is not XML; not saving to database'
                .format(rule.command.description, rule.command.uuid),
                file=sys.stderr)

    if failed:
        return -1
    else:
        return 0
Exemplo n.º 23
0
        os.mkdir(AIPsStoreWithQuads, mode)
        #mode isn't working on the mkdir
        os.chmod(AIPsStoreWithQuads, mode)

storeLocation = os.path.join(AIPsStoreWithQuads,
                             os.path.basename(os.path.abspath(AIP)))

#Store the AIP
shutil.move(AIP, storeLocation)

#Extract the AIP
extractDirectory = "/tmp/" + SIPUUID + "/"
os.makedirs(extractDirectory)
#
command = "7z x -bd -o\"" + extractDirectory + "\" \"" + storeLocation + "\""
ret = executeOrRun("command", command, printing=printSubProcessOutput)
exitCode, stdOut, stdErr = ret
if exitCode != 0:
    print >> sys.stderr, "Error extracting"
    quit(1)

bag = extractDirectory + SIPNAME + "-" + SIPUUID + "/"
verificationCommands = []
verificationCommands.append("/usr/share/bagit/bin/bag verifyvalid \"" + bag +
                            "\"")
verificationCommands.append("/usr/share/bagit/bin/bag checkpayloadoxum \"" +
                            bag + "\"")
verificationCommands.append("/usr/share/bagit/bin/bag verifycomplete \"" +
                            bag + "\"")
verificationCommands.append(
    "/usr/share/bagit/bin/bag verifypayloadmanifests \"" + bag + "\"")
Exemplo n.º 24
0
def main(job, command_uuid, file_path, file_uuid, disable_reidentify):
    job.print_output("IDCommand UUID:", command_uuid)
    job.print_output("File: ({}) {}".format(file_uuid, file_path))
    if command_uuid == "None":
        job.print_output("Skipping file format identification")
        return 0
    try:
        command = IDCommand.active.get(uuid=command_uuid)
    except IDCommand.DoesNotExist:
        job.write_error(
            "IDCommand with UUID {} does not exist.\n".format(command_uuid))
        return 255

    file_ = File.objects.get(uuid=file_uuid)

    # If reidentification is disabled and a format identification event exists for this file, exit
    if disable_reidentify and file_.event_set.filter(
            event_type='format identification').exists():
        job.print_output(
            'This file has already been identified, and re-identification is disabled. Skipping.'
        )
        return 0

    # Save the selected ID command for use in a later chain
    save_idtool(file_, command_uuid)

    exitcode, output, _ = executeOrRun(command.script_type,
                                       command.script,
                                       arguments=[file_path],
                                       printing=False,
                                       capture_output=True)
    output = output.strip()

    if exitcode != 0:
        job.print_error(
            'Error: IDCommand with UUID {} exited non-zero.'.format(
                command_uuid))
        return 255

    job.print_output('Command output:', output)
    # PUIDs are the same regardless of tool, so PUID-producing tools don't have "rules" per se - we just
    # go straight to the FormatVersion table to see if there's a matching PUID
    try:
        if command.config == 'PUID':
            version = FormatVersion.active.get(pronom_id=output)
        else:
            rule = IDRule.active.get(command_output=output, command=command)
            version = rule.format
    except IDRule.DoesNotExist:
        job.print_error(
            'Error: No FPR identification rule for tool output "{}" found'.
            format(output))
        write_identification_event(file_uuid, command, success=False)
        return 255
    except IDRule.MultipleObjectsReturned:
        job.print_error(
            'Error: Multiple FPR identification rules for tool output "{}" found'
            .format(output))
        write_identification_event(file_uuid, command, success=False)
        return 255
    except FormatVersion.DoesNotExist:
        job.print_error(
            'Error: No FPR format record found for PUID {}'.format(output))
        write_identification_event(file_uuid, command, success=False)
        return 255

    (ffv, created) = FileFormatVersion.objects.get_or_create(
        file_uuid=file_, defaults={'format_version': version})
    if not created:  # Update the version if it wasn't created new
        ffv.format_version = version
        ffv.save()
    job.print_output("{} identified as a {}".format(file_path,
                                                    version.description))

    write_identification_event(file_uuid, command, format=version.pronom_id)
    write_file_id(file_uuid=file_uuid, format=version, output=output)

    return 0
Exemplo n.º 25
0
if __name__ == '__main__':
    logger = get_script_logger("archivematica.mcp.client.clamscan")
    fileUUID = sys.argv[1]
    target = sys.argv[2]
    date = sys.argv[3]

    # Check if scan event already exists for this file - if so abort early
    count = Event.objects.filter(file_uuid_id=fileUUID, event_type='virus check').count()
    if count >= 1:
        print 'Virus scan already performed, not running scan again'
        sys.exit(0)

    command = ['clamdscan', '-']
    print 'Clamscan command:', ' '.join(command), '<', target
    with open(target) as file_:
        scan_rc, scan_stdout, scan_stderr = executeOrRun("command", command, printing=False, stdIn=file_)
    commandVersion = "clamdscan -V"
    print 'Clamscan version command:', commandVersion
    version_rc, version_stdout, version_stderr = executeOrRun("command", commandVersion, printing=False)

    eventOutcome = "Pass"
    if scan_rc or version_rc:  # Either command returned non-0 RC
        if version_rc:
            print >>sys.stderr, 'Error determining version, aborting'
            print >>sys.stderr, 'Version RC:', version_rc
            print >>sys.stderr, 'Version Standard output:', version_stdout
            print >>sys.stderr, 'Version Standard error:', version_stderr
            sys.exit(2)
        else:
            eventOutcome = "Fail"
Exemplo n.º 26
0
def executeCommand(gearman_worker, gearman_job):
    try:
        execute = gearman_job.task
        print "executing:", execute, "{", gearman_job.unique, "}"
        data = cPickle.loads(gearman_job.data)
        utcDate = databaseInterface.getUTCDate()
        arguments = data["arguments"]  #.encode("utf-8")
        if isinstance(arguments, unicode):
            arguments = arguments.encode("utf-8")
        #if isinstance(arguments, str):
        #    arguments = unicode(arguments)

        sInput = ""
        clientID = gearman_worker.worker_client_id

        sql = """SELECT Tasks.taskUUID FROM Tasks WHERE taskUUID='%s' AND startTime != 0;""" % (
            gearman_job.unique.__str__())
        rows = databaseInterface.queryAllSQL(sql)
        if len(rows):
            exitCode = -1
            stdOut = ""
            stdError = """Detected this task has already started!
Unable to determine if it completed successfully."""
            return cPickle.dumps({
                "exitCode": exitCode,
                "stdOut": stdOut,
                "stdError": stdError
            })

        logTaskAssignedSQL(gearman_job.unique.__str__(), clientID, utcDate)

        if execute not in supportedModules:
            output = [
                "Error!", "Error! - Tried to run and unsupported command."
            ]
            exitCode = -1
            return cPickle.dumps({
                "exitCode": exitCode,
                "stdOut": output[0],
                "stdError": output[1]
            })
        command = supportedModules[execute]

        replacementDic["%date%"] = utcDate
        replacementDic["%jobCreatedDate%"] = data["createdDate"]
        #Replace replacement strings
        for key in replacementDic.iterkeys():
            command = command.replace(key, replacementDic[key])
            arguments = arguments.replace(key, replacementDic[key])

        key = "%taskUUID%"
        value = gearman_job.unique.__str__()
        arguments = arguments.replace(key, value)

        #execute command

        command += " " + arguments
        printOutputLock.acquire()
        print "<processingCommand>{" + gearman_job.unique + "}" + command.__str__(
        ) + "</processingCommand>"
        printOutputLock.release()
        exitCode, stdOut, stdError = executeOrRun("command",
                                                  command,
                                                  sInput,
                                                  printing=False)
        return cPickle.dumps({
            "exitCode": exitCode,
            "stdOut": stdOut,
            "stdError": stdError
        })
    #catch OS errors
    except OSError, ose:
        traceback.print_exc(file=sys.stdout)
        printOutputLock.acquire()
        print >> sys.stderr, "Execution failed:", ose
        printOutputLock.release()
        output = ["Archivematica Client Error!", ose.__str__()]
        exitCode = 1
        return cPickle.dumps({
            "exitCode": exitCode,
            "stdOut": output[0],
            "stdError": output[1]
        })
Exemplo n.º 27
0
def main(transfer_uuid, sip_directory, date, task_uuid, delete=False):
    files = File.objects.filter(transfer=transfer_uuid,
                                removedtime__isnull=True)
    if not files:
        print('No files found for transfer: ', transfer_uuid)

    # We track whether or not anything was extracted because that controls what
    # the next microservice chain link will be.
    # If something was extracted, then a new identification step has to be
    # kicked off on those files; otherwise, we can go ahead with the transfer.
    extracted = False

    for file_ in files:
        try:
            format_id = FileFormatVersion.objects.get(file_uuid=file_.uuid)
        # Can't do anything if the file wasn't identified in the previous step
        except:
            print('Not extracting contents from',
                  os.path.basename(file_.currentlocation),
                  ' - file format not identified',
                  file=sys.stderr)
            continue
        if format_id.format_version == None:
            print('Not extracting contents from',
                  os.path.basename(file_.currentlocation),
                  ' - file format not identified',
                  file=sys.stderr)
            continue
        # Extraction commands are defined in the FPR just like normalization
        # commands
        try:
            command = FPCommand.active.get(
                fprule__format=format_id.format_version,
                fprule__purpose='extract')
        except FPCommand.DoesNotExist:
            print('Not extracting contents from',
                  os.path.basename(file_.currentlocation),
                  ' - No rule found to extract',
                  file=sys.stderr)
            continue

        # Check if file has already been extracted
        if already_extracted(file_):
            print('Not extracting contents from',
                  os.path.basename(file_.currentlocation),
                  ' - extraction already happened.',
                  file=sys.stderr)
            continue

        file_path = file_.currentlocation.replace('%transferDirectory%',
                                                  sip_directory)

        if command.script_type == 'command' or command.script_type == 'bashScript':
            args = []
            command_to_execute = command.command.replace(
                '%inputFile%', file_path)
            command_to_execute = command_to_execute.replace(
                '%outputDirectory%', output_directory(file_path, date))
        else:
            command_to_execute = command.command
            args = [file_path, output_directory(file_path, date)]

        exitstatus, stdout, stderr = executeOrRun(command.script_type,
                                                  command_to_execute,
                                                  arguments=args,
                                                  printing=True)

        if not exitstatus == 0:
            # Dang, looks like the extraction failed
            print('Command', command.description, 'failed!', file=sys.stderr)
        else:
            extracted = True
            print('Extracted contents from', os.path.basename(file_path))

            # Assign UUIDs and insert them into the database, so the newly-
            # extracted files are properly tracked by Archivematica
            for extracted_file in tree(output_directory(file_path, date)):
                assign_uuid(extracted_file, file_.uuid, transfer_uuid, date,
                            task_uuid, sip_directory, file_.currentlocation)
            # We may want to remove the original package file after extracting its contents
            if delete:
                delete_and_record_package_file(file_path, file_.uuid,
                                               file_.currentlocation)

    if extracted == True:
        return 0
    else:
        return -1
Exemplo n.º 28
0
def executeCommand(gearman_worker, gearman_job):
    try:
        execute = gearman_job.task
        print "executing:", execute, "{", gearman_job.unique, "}"
        data = cPickle.loads(gearman_job.data)
        utcDate = databaseFunctions.getUTCDate()
        arguments = data["arguments"]#.encode("utf-8")
        if isinstance(arguments, unicode):
            arguments = arguments.encode("utf-8")

        sInput = ""
        clientID = gearman_worker.worker_client_id

        task = Task.objects.get(taskuuid=gearman_job.unique)
        if task.starttime is not None:
            exitCode = -1
            stdOut = ""
            stdError = """Detected this task has already started!
Unable to determine if it completed successfully."""
            return cPickle.dumps({"exitCode" : exitCode, "stdOut": stdOut, "stdError": stdError})
        else:
            task.client = clientID
            task.starttime = utcDate
            task.save()

        if execute not in supportedModules:
            output = ["Error!", "Error! - Tried to run and unsupported command." ]
            exitCode = -1
            return cPickle.dumps({"exitCode" : exitCode, "stdOut": output[0], "stdError": output[1]})
        command = supportedModules[execute]

        replacementDic["%date%"] = utcDate.isoformat()
        replacementDic["%jobCreatedDate%"] = data["createdDate"]
        # Replace replacement strings
        for key in replacementDic.iterkeys():
            command = command.replace ( key, replacementDic[key] )
            arguments = arguments.replace ( key, replacementDic[key] )

        key = "%taskUUID%"
        value = gearman_job.unique.__str__()
        arguments = arguments.replace(key, value)

        # Add useful environment vars for client scripts
        lib_paths = ['/usr/share/archivematica/dashboard/', '/usr/lib/archivematica/archivematicaCommon']
        env_updates = {
            'PYTHONPATH': os.pathsep.join(lib_paths),
            'DJANGO_SETTINGS_MODULE': config.get('MCPClient', 'django_settings_module')
        }

        # Execute command
        command += " " + arguments
        printOutputLock.acquire()
        print "<processingCommand>{" + gearman_job.unique + "}" + command.__str__() + "</processingCommand>"
        printOutputLock.release()
        exitCode, stdOut, stdError = executeOrRun("command", command, sInput, printing=False, env_updates=env_updates)
        return cPickle.dumps({"exitCode": exitCode, "stdOut": stdOut, "stdError": stdError})
    except OSError as ose:
        traceback.print_exc(file=sys.stdout)
        printOutputLock.acquire()
        print >>sys.stderr, "Execution failed:", ose
        printOutputLock.release()
        output = ["Archivematica Client Error!", traceback.format_exc()]
        exitCode = 1
        return cPickle.dumps({"exitCode": exitCode, "stdOut": output[0], "stdError": output[1]})
    except Exception as e:
        traceback.print_exc(file=sys.stdout)
        printOutputLock.acquire()
        print "Unexpected error:", e
        printOutputLock.release()
        output = ["", traceback.format_exc()]
        return cPickle.dumps({"exitCode": -1, "stdOut": output[0], "stdError": output[1]})
Exemplo n.º 29
0
    if not os.path.isdir(AIPsStoreWithQuads):
        os.mkdir(AIPsStoreWithQuads, mode)
        #mode isn't working on the mkdir
        os.chmod(AIPsStoreWithQuads, mode)

storeLocation=os.path.join(AIPsStoreWithQuads, os.path.basename(os.path.abspath(AIP)))

#Store the AIP
shutil.move(AIP, storeLocation)

#Extract the AIP
extractDirectory = "/tmp/" + SIPUUID + "/"
os.makedirs(extractDirectory)
#
command = "7z x -bd -o\"" + extractDirectory + "\" \"" + storeLocation + "\""
ret = executeOrRun("command", command, printing=printSubProcessOutput)
exitCode, stdOut, stdErr = ret
if exitCode != 0:
    print >>sys.stderr, "Error extracting"
    quit(1)

bag = extractDirectory + SIPNAME + "-" + SIPUUID + "/"
verificationCommands = []
verificationCommands.append("/usr/share/bagit/bin/bag verifyvalid \"" + bag + "\"")
verificationCommands.append("/usr/share/bagit/bin/bag checkpayloadoxum \"" + bag + "\"")
verificationCommands.append("/usr/share/bagit/bin/bag verifycomplete \"" + bag + "\"")
verificationCommands.append("/usr/share/bagit/bin/bag verifypayloadmanifests \"" + bag + "\"")
verificationCommands.append("/usr/share/bagit/bin/bag verifytagmanifests \"" + bag + "\"")
exitCode = 0
for command in verificationCommands:
    ret = executeOrRun("command", command, printing=printSubProcessOutput)
Exemplo n.º 30
0
def verify_aip(job):
    """Verify the AIP was bagged correctly by extracting it and running
    verification on its contents. This is also where we verify the checksums
    now that the verifyPREMISChecksums_v0.0 ("Verify checksums generated on
    ingest") micro-service has been removed. It was removed because verifying
    checksums by calculating them in that MS and then having bagit calculate
    them here was redundant.

    job.args[1] = UUID
      UUID of the SIP, which will become the UUID of the AIP
    job.args[2] = current location
      Full absolute path to the AIP's current location on the local filesystem
    """

    sip_uuid = job.args[1]  # %sip_uuid%
    aip_path = job.args[2]  # SIPDirectory%%sip_name%-%sip_uuid%.7z

    temp_dir = mcpclient_settings.TEMP_DIRECTORY

    is_uncompressed_aip = os.path.isdir(aip_path)

    if is_uncompressed_aip:
        bag = aip_path
    else:
        try:
            extract_dir = os.path.join(temp_dir, sip_uuid)
            bag = extract_aip(job, aip_path, extract_dir)
        except Exception as err:
            job.print_error(repr(err))
            job.pyprint('Error extracting AIP at "{}"'.format(aip_path),
                        file=sys.stderr)
            return 1

    verification_commands = [
        '/usr/share/bagit/bin/bag verifyvalid "{}"'.format(bag),
        '/usr/share/bagit/bin/bag checkpayloadoxum "{}"'.format(bag),
        '/usr/share/bagit/bin/bag verifycomplete "{}"'.format(bag),
        '/usr/share/bagit/bin/bag verifypayloadmanifests "{}"'.format(bag),
        '/usr/share/bagit/bin/bag verifytagmanifests "{}"'.format(bag),
    ]
    return_code = 0
    for command in verification_commands:
        job.pyprint("Running test: ", command)
        exit_code, stdout, stderr = executeOrRun("command",
                                                 command,
                                                 printing=True,
                                                 capture_output=True)
        job.write_output(stdout)
        job.write_error(stderr)
        if exit_code != 0:
            job.pyprint("Failed test: ", command, file=sys.stderr)
            return_code = 1

    if return_code == 0:
        try:
            verify_checksums(job, bag, sip_uuid)
        except VerifyChecksumsError:
            return_code = 1
    else:
        job.pyprint('Not verifying checksums because other tests have already'
                    ' failed.')

    # cleanup
    if not is_uncompressed_aip:
        try:
            shutil.rmtree(extract_dir)
        except OSError as err:
            job.pyprint(
                'Failed to remove temporary directory at {extract_dir} which'
                ' contains the AIP extracted for verification.'
                ' Error:\n{err}'.format(extract_dir=extract_dir, err=err),
                file=sys.stderr)

    return return_code
Exemplo n.º 31
0
 def _execute_rule_command(self, rule):
     """Execute the FPR command of FPR rule ``rule`` against the file passed
     in to this client script. The output of that command determines what we
     print to stdout and stderr, and the nature of the validation event that
     we save to the db. We also copy the MediaConch policy file to the logs/
     directory of the AIP if it has not already been copied there.
     """
     result = 'passed'
     command_to_execute, args = self._get_command_to_execute(rule)
     self.job.pyprint('Running', rule.command.description)
     exitstatus, stdout, stderr = executeOrRun(rule.command.script_type,
                                               command_to_execute,
                                               arguments=args,
                                               printing=False,
                                               capture_output=True)
     try:
         output = json.loads(stdout)
     except ValueError:
         logger.exception(
             'Unable to load an object from the malformed JSON: \n%s',
             stderr)
         raise
     if self.file_type in ('preservation', 'original'):
         self._save_to_logs_dir(output)
     if exitstatus == 0:
         self.job.pyprint('Command {} completed with output {}'.format(
             rule.command.description, stdout))
     else:
         self.job.print_error(
             'Command {} failed with exit status {}; stderr:'.format(
                 rule.command.description, exitstatus), stderr)
         return 'failed'
     event_detail = ('program="{tool.description}";'
                     ' version="{tool.version}"'.format(
                         tool=rule.command.tool))
     if output.get('eventOutcomeInformation') != 'pass':
         self.job.print_error(
             'Command {descr} returned a non-pass outcome '
             'for the policy check;\n\noutcome: '
             '{outcome}\n\ndetails: {details}.'.format(
                 descr=rule.command.description,
                 outcome=output.get('eventOutcomeInformation'),
                 details=output.get('eventOutcomeDetailNote'),
             ))
         result = 'failed'
     self.job.pyprint('Creating policy checking event for {} ({})'.format(
         self.file_path, self.file_uuid))
     # Manually-normalized access derivatives have no file UUID so we can't
     # create a validation event for them. TODO/QUESTION: should we use the
     # UUID that was assigned to the manually normalized derivative during
     # transfer, i.e., the one that we retrieve in
     # ``_get_manually_normalized_access_derivative_file_uuid`` above?
     if not self.is_manually_normalized_access_derivative:
         databaseFunctions.insertIntoEvents(
             fileUUID=self.file_uuid,
             eventType='validation',  # From PREMIS controlled vocab.
             eventDetail=event_detail,
             eventOutcome=output.get('eventOutcomeInformation'),
             eventOutcomeDetailNote=output.get('eventOutcomeDetailNote'),
         )
     return result
    exit(2)
f = open(filePath, 'r')

line = f.readline()
while not line.startswith("Depends:"):
    line = f.readline()



for part in line.split(","):
    part = part.strip()
    if part.find("${shlibs:Depends}") != -1 or \
        part.find("${misc:Depends}") != -1:
        continue
    if part.startswith("archivematica"):
        continue

    if part in excludePackages:
        continue

    print sys.argv[1]
    print "Attempting Install/Update of: ", part
    command = "sudo apt-get install -y " + part
    exitCode, stdOut, stdError = executeOrRun("command", command, printing=False)
    if exitCode:
        print "exitCode:", exitCode
        print stdOut
        print >>sys.stderr, stdError
    #else:
        #print "OK"
def playAudioFile(filePath):
    command = "cvlc --play-and-exit \"" + filePath + "\""
    exitCode, stdOut, stdError = executeOrRun("command", command, printing=False)
    if exitCode != 0:
        print stdOut
        print >>sys.stderr, stdError
Exemplo n.º 34
0
 def _execute_rule_command(self, rule):
     """Run the command against the file and return either 'passed' or
     'failed'. If the command errors or determines that the file is invalid,
     return 'failed'. Non-errors will result in the creation of an Event
     model in the db. Preservation derivative validation will result in the
     stdout from the command being saved to disk within the unit (i.e., SIP).
     """
     result = "passed"
     if rule.command.script_type in ("bashScript", "command"):
         command_to_execute = replace_string_values(
             rule.command.command,
             file_=self.file_uuid,
             sip=self.sip_uuid,
             type_="file",
         )
         args = []
     else:
         command_to_execute = rule.command.command
         args = [self.file_path]
     self.job.print_output("Running", rule.command.description)
     exitstatus, stdout, stderr = executeOrRun(
         type=rule.command.script_type,
         text=command_to_execute,
         printing=False,
         arguments=args,
     )
     if exitstatus != 0:
         self.job.print_error(
             "Command {description} failed with exit status {status};"
             " stderr:".format(description=rule.command.description,
                               status=exitstatus))
         return "failed"
     # Parse output and generate an Event
     # TODO: Evaluating a python string from a user-definable script seems
     # insecure practice; should be JSON.
     output = ast.literal_eval(stdout)
     event_detail = ('program="{tool.description}";'
                     ' version="{tool.version}"'.format(
                         tool=rule.command.tool))
     # If the FPR command has not errored but the actual validation
     # determined that the file is not valid, then we want to both create a
     # validation event in the db and set ``failed`` to ``True`` because we
     # want the micro-service in the dashboard GUI to indicate "Failed".
     # NOTE: this requires that the stdout of all validation FPR commands be
     # a dict (preferably a JSON object) with an ``eventOutcomeInformation``
     # boolean attribute.
     if output.get("eventOutcomeInformation") == "pass":
         self.job.print_output('Command "{}" was successful'.format(
             rule.command.description))
     elif output.get("eventOutcomeInformation") == "partial pass":
         self.job.print_output(
             'Command "{}" was partially successful'.format(
                 rule.command.description))
     else:
         self.job.pyprint(
             "Command {cmd_description} indicated failure with this"
             " output:\n\n{output}".format(
                 cmd_description=rule.command.description,
                 output=pformat(stdout)),
             file=sys.stderr,
         )
         result = "failed"
     if self.file_type == "preservation":
         self._save_stdout_to_logs_dir(output)
     self.job.print_output(
         "Creating {purpose} event for {file_path} ({file_uuid})".format(
             purpose=self.purpose,
             file_path=self.file_path,
             file_uuid=self.file_uuid))
     databaseFunctions.insertIntoEvents(
         fileUUID=self.file_uuid,
         eventType="validation",  # From PREMIS controlled vocab.
         eventDetail=event_detail,
         eventOutcome=output.get("eventOutcomeInformation"),
         eventOutcomeDetailNote=output.get("eventOutcomeDetailNote"),
     )
     return result
Exemplo n.º 35
0
def main(job, task_uuid, file_uuid):
    setup_dicts(mcpclient_settings)

    succeeded = True

    file_ = File.objects.get(uuid=file_uuid)

    # Normally we don't transcribe derivatives (access copies, preservation copies);
    # however, some useful transcription tools can't handle some formats that
    # are common as the primary copies. For example, tesseract can't handle JPEG2000.
    # If there are no rules for the primary format passed in, try to look at each
    # derivative until a transcribable derivative is found.
    #
    # Skip derivatives to avoid double-scanning them; only look at them as a fallback.
    if file_.filegrpuse != "original":
        job.print_error(
            '{} is not an original; not transcribing'.format(file_uuid))
        return 0

    rules = fetch_rules_for(file_)
    if not rules:
        file_, rules = fetch_rules_for_derivatives(file_)

    if not rules:
        job.print_error(
            'No rules found for file {} and its derivatives; not transcribing'.
            format(file_uuid))
        return 0
    else:
        if file_.filegrpuse == "original":
            noun = "original"
        else:
            noun = file_.filegrpuse + " derivative"
        job.print_error('Transcribing {} {}'.format(noun, file_.uuid))

    rd = ReplacementDict.frommodel(file_=file_, type_='file')

    for rule in rules:
        script = rule.command.command
        if rule.command.script_type in ('bashScript', 'command'):
            script, = rd.replace(script)
            args = []
        else:
            args = rd.to_gnu_options

        exitstatus, stdout, stderr = executeOrRun(rule.command.script_type,
                                                  script,
                                                  arguments=args,
                                                  capture_output=True)
        job.write_output(stdout)
        job.write_error(stderr)
        if exitstatus != 0:
            succeeded = False

        output_path = rd.replace(rule.command.output_location)[0]
        relative_path = output_path.replace(rd['%SIPDirectory%'],
                                            '%SIPDirectory%')
        event = insert_transcription_event(exitstatus, file_uuid, rule,
                                           relative_path)

        if os.path.isfile(output_path):
            insert_file_into_database(task_uuid, file_uuid, rd['%SIPUUID%'],
                                      event, rule, output_path, relative_path)

    return 0 if succeeded else 1
Exemplo n.º 36
0
def main(job, enabled, file_path, file_uuid, disable_reidentify):
    enabled = True if enabled == "True" else False
    if not enabled:
        job.print_output("Skipping file format identification")
        return 0

    command = _default_idcommand()
    if command is None:
        job.write_error("Unable to determine IDCommand.\n")
        return 255

    command_uuid = command.uuid
    job.print_output("IDCommand:", command.description)
    job.print_output("IDCommand UUID:", command.uuid)
    job.print_output("IDTool:", command.tool.description)
    job.print_output("IDTool UUID:", command.tool.uuid)
    job.print_output("File: ({}) {}".format(file_uuid, file_path))

    file_ = File.objects.get(uuid=file_uuid)

    # If reidentification is disabled and a format identification event exists for this file, exit
    if (
        disable_reidentify
        and file_.event_set.filter(event_type="format identification").exists()
    ):
        job.print_output(
            "This file has already been identified, and re-identification is disabled. Skipping."
        )
        return 0

    # Save whether identification was enabled by the user for use in a later
    # chain.
    _save_id_preference(file_, enabled)

    exitcode, output, err = executeOrRun(
        command.script_type,
        command.script,
        arguments=[file_path],
        printing=False,
        capture_output=True,
    )
    output = output.strip()

    if exitcode != 0:
        job.print_error(
            "Error: IDCommand with UUID {} exited non-zero.".format(command_uuid)
        )
        job.print_error("Error: {}".format(err))
        return 255

    job.print_output("Command output:", output)
    # PUIDs are the same regardless of tool, so PUID-producing tools don't have "rules" per se - we just
    # go straight to the FormatVersion table to see if there's a matching PUID
    try:
        if command.config == "PUID":
            version = FormatVersion.active.get(pronom_id=output)
        else:
            rule = IDRule.active.get(command_output=output, command=command)
            version = rule.format
    except IDRule.DoesNotExist:
        job.print_error(
            'Error: No FPR identification rule for tool output "{}" found'.format(
                output
            )
        )
        write_identification_event(file_uuid, command, success=False)
        return 255
    except IDRule.MultipleObjectsReturned:
        job.print_error(
            'Error: Multiple FPR identification rules for tool output "{}" found'.format(
                output
            )
        )
        write_identification_event(file_uuid, command, success=False)
        return 255
    except FormatVersion.DoesNotExist:
        job.print_error("Error: No FPR format record found for PUID {}".format(output))
        write_identification_event(file_uuid, command, success=False)
        return 255

    (ffv, created) = FileFormatVersion.objects.get_or_create(
        file_uuid=file_, defaults={"format_version": version}
    )
    if not created:  # Update the version if it wasn't created new
        ffv.format_version = version
        ffv.save()
    job.print_output("{} identified as a {}".format(file_path, version.description))

    write_identification_event(file_uuid, command, format=version.pronom_id)
    write_file_id(file_uuid=file_uuid, format=version, output=output)

    return 0
Exemplo n.º 37
0
    followup = f.readline()

for part in line.split(","):
    # The word is split in order to try to install the latest version of
    # packages expressed in the syntax: foo (>= bar)
    # TODO apt-get install doesn't appear to support the full version
    # syntax control files support, but this should possibly try to
    # install the exact version specified?
    part = part.strip().split(' ')[0]
    if part.find("${shlibs:Depends}") != -1 or \
        part.find("${misc:Depends}") != -1:
        continue
    if part.startswith(("archivematica", "Depends:")):
        continue

    if part in excludePackages:
        continue

    print(sys.argv[1])
    print("Attempting Install/Update of: ", part)
    command = "sudo apt-get install -y " + part
    exitCode, stdOut, stdError = executeOrRun("command",
                                              command,
                                              printing=False)
    if exitCode:
        print("exitCode:", exitCode)
        print(stdOut)
        print(stdError, file=sys.stderr)
    #else:
    #print "OK"
Exemplo n.º 38
0
def compress_aip(compression, compression_level, sip_directory, sip_name,
                 sip_uuid):
    """ Compresses AIP according to compression algorithm and level.
    compression = AIP compression algorithm, format: <program>-<algorithm>, eg. 7z-lzma, pbzip2-
    compression_level = AIP compression level, integer between 1 and 9 inclusive
    sip_directory = Absolute path to the directory where the SIP is
    sip_name = User-provided name of the SIP
    sip_uuid = SIP UUID

    Example inputs:
    compressAIP.py
        7z-lzma
        5
        /var/archivematica/sharedDirectory/watchedDirectories/workFlowDecisions/compressionAIPDecisions/ep-d87d5845-bd07-4200-b1a4-928e0cb6e1e4/
        ep
        d87d5845-bd07-4200-b1a4-928e0cb6e1e4
    """
    try:
        program, compression_algorithm = compression.split('-')
    except ValueError:
        msg = "Invalid program-compression algorithm: {}".format(compression)
        print(msg, file=sys.stderr)
        return -1

    archive_path = '{name}-{uuid}'.format(name=sip_name, uuid=sip_uuid)
    uncompressed_location = sip_directory + archive_path

    # Even though no actual compression is taking place,
    # the location still needs to be set in the unit to ensure that the
    # %AIPFilename% variable is set appropriately.
    # Setting it to an empty string ensures the common
    # "%SIPDirectory%%AIPFilename%" pattern still points at the right thing.
    if program == 'None':
        update_unit(sip_uuid, uncompressed_location)
        return 0

    print("Compressing {} with {}, algorithm {}, level {}".format(
        uncompressed_location, program, compression_algorithm,
        compression_level))

    if program == '7z':
        compressed_location = uncompressed_location + ".7z"
        command = '/usr/bin/7z a -bd -t7z -y -m0={algorithm} -mx={level} -mta=on -mtc=on -mtm=on -mmt=on "{compressed_location}" "{uncompressed_location}"'.format(
            algorithm=compression_algorithm,
            level=compression_level,
            uncompressed_location=uncompressed_location,
            compressed_location=compressed_location)
        tool_info_command = (
            'echo program="7z"\; '
            'algorithm="{}"\; '
            'version="`7z | grep Version`"'.format(compression_algorithm))
    elif program == 'pbzip2':
        compressed_location = uncompressed_location + ".tar.bz2"
        command = '/bin/tar -c --directory "{sip_directory}" "{archive_path}" | /usr/bin/pbzip2 --compress -{level} > "{compressed_location}"'.format(
            sip_directory=sip_directory,
            archive_path=archive_path,
            level=compression_level,
            compressed_location=compressed_location)
        tool_info_command = (
            'echo program="pbzip2"\; '
            'algorithm="{}"\; '
            'version="$((pbzip2 -V) 2>&1)"'.format(compression_algorithm))

    else:
        msg = "Program {} not recognized, exiting script prematurely.".format(
            program)
        print(msg, file=sys.stderr)
        return -1

    print('Executing command:', command)
    exit_code, std_out, std_err = executeOrRun("bashScript",
                                               command,
                                               printing=True,
                                               capture_output=False)

    # Add new AIP File
    file_uuid = sip_uuid
    databaseFunctions.insertIntoFiles(
        fileUUID=file_uuid,
        filePath=compressed_location.replace(sip_directory, '%SIPDirectory%',
                                             1),
        sipUUID=sip_uuid,
        use='aip',
    )

    # Add compression event
    print('Tool info command:', tool_info_command)
    _, tool_info, _ = executeOrRun("bashScript",
                                   tool_info_command,
                                   printing=True)
    tool_output = 'Standard Output="{}"; Standard Error="{}"'.format(
        std_out, std_err)
    databaseFunctions.insertIntoEvents(
        eventType='compression',
        eventDetail=tool_info,
        eventOutcomeDetailNote=tool_output,
        fileUUID=file_uuid,
    )

    update_unit(sip_uuid, compressed_location)

    return exit_code