示例#1
0
    def _log_task(self, taskManager, commandReplacementDic, taskUUID,
                  arguments):
        """
        Creates a new entry in the Tasks table using the supplied data.

        :param MCPServer.linkTaskManager taskManager: A linkTaskManager subclass instance.
        :param ReplacementDict commandReplacementDic: A ReplacementDict or dict instance. %fileUUID% and %relativeLocation% variables will be looked up from this dict.
        :param str taskUUID: The UUID to be used for this Task in the database.
        :param str arguments: The arguments to be passed to the command when it is executed, as a string. Can contain replacement variables; see ReplacementDict for supported values.
        """
        jobUUID = taskManager.jobChainLink.UUID
        fileUUID = ""
        if "%fileUUID%" in commandReplacementDic:
            fileUUID = commandReplacementDic["%fileUUID%"]
        taskexec = taskManager.execute
        fileName = os.path.basename(
            os.path.abspath(commandReplacementDic["%relativeLocation%"]))

        Task.objects.create(taskuuid=taskUUID,
                            job_id=jobUUID,
                            fileuuid=fileUUID,
                            filename=fileName,
                            execution=taskexec,
                            arguments=arguments,
                            createdtime=getUTCDate())
示例#2
0
def debugMonitor():
    """Periodically prints out status of MCP, including whether the database lock is locked, thread count, etc."""
    while True:
        logger.debug('Debug monitor: datetime: %s', getUTCDate())
        logger.debug('Debug monitor: thread count: %s',
                     threading.activeCount())
        time.sleep(3600)
        def write_task_results_callback():
            with transaction.atomic():
                for job in jobs:
                    logger.info("\n\n*** Completed job: %s", job.dump())

                    kwargs = {
                        "exitcode": job.get_exit_code(),
                        "endtime": getUTCDate()
                    }
                    if (django_settings.CAPTURE_CLIENT_SCRIPT_OUTPUT
                            or kwargs["exitcode"] > 0):
                        kwargs.update({
                            "stdout": job.get_stdout(),
                            "stderror": job.get_stderr()
                        })
                    Task.objects.filter(taskuuid=job.UUID).update(**kwargs)

                    results[job.UUID] = {"exitCode": job.get_exit_code()}

                    if job.caller_wants_output:
                        # Send back stdout/stderr so it can be written to files.
                        # Most cases don't require this (logging to the database is
                        # enough), but the ones that do are coordinated through the
                        # MCP Server so that multiple MCP Client instances don't try
                        # to write the same file at the same time.
                        results[job.UUID]["stdout"] = job.get_stdout()
                        results[job.UUID]["stderror"] = job.get_stderr()
示例#4
0
def debugMonitor():
    """Periodically prints out status of MCP, including whether the database lock is locked, thread count, etc."""
    global countOfCreateUnitAndJobChainThreaded
    while True:
        logger.debug('Debug monitor: datetime: %s', getUTCDate())
        logger.debug('Debug monitor: thread count: %s', threading.activeCount())
        logger.debug('Debug monitor: created job chain threaded: %s', countOfCreateUnitAndJobChainThreaded)
        time.sleep(3600)
示例#5
0
def debugMonitor():
    """Periodically prints out status of MCP, including whether the database lock is locked, thread count, etc."""
    global countOfCreateUnitAndJobChainThreaded
    while True:
        dblockstatus = "SQL Lock: Locked"
        if databaseInterface.sqlLock.acquire(False):
            databaseInterface.sqlLock.release()
            dblockstatus = "SQL Lock: Unlocked"
        logger.debug('Debug monitor: datetime: %s', databaseFunctions.getUTCDate())
        logger.debug('Debug monitor: thread count: %s', threading.activeCount())
        logger.debug('Debug monitor: created job chain threaded: %s', countOfCreateUnitAndJobChainThreaded)
        logger.debug('Debug monitor: DB lock status: %s', dblockstatus)
        time.sleep(3600)
def handle_batch_task(gearman_job, supported_modules):
    module_name = supported_modules.get(gearman_job.task)
    gearman_data = cPickle.loads(gearman_job.data)

    utc_date = getUTCDate()
    jobs = []
    for task_uuid in gearman_data["tasks"]:
        task_data = gearman_data["tasks"][task_uuid]
        arguments = task_data["arguments"]
        if isinstance(arguments, six.text_type):
            arguments = arguments.encode("utf-8")

        replacements = (replacement_dict.items() + {
            "%date%": utc_date.isoformat(),
            "%taskUUID%": task_uuid,
            "%jobCreatedDate%": task_data["createdDate"],
        }.items())

        for var, val in replacements:
            arguments = arguments.replace(var, val)

        job = Job(
            gearman_job.task,
            task_data["uuid"],
            _parse_command_line(arguments),
            caller_wants_output=task_data["wants_output"],
        )
        jobs.append(job)

    # Set their start times.  If we collide with the MCP Server inserting new
    # Tasks (which can happen under heavy concurrent load), retry as needed.
    def set_start_times():
        Task.objects.filter(taskuuid__in=[item.UUID for item in jobs]).update(
            starttime=utc_date)

    retryOnFailure("Set task start times", set_start_times)

    module = importlib.import_module("clientScripts." + module_name)

    # Our module can indicate that it should be run concurrently...
    if hasattr(module, "concurrent_instances"):
        fork_runner.call(
            "clientScripts." + module_name,
            jobs,
            task_count=module.concurrent_instances(),
        )
    else:
        module.call(jobs)

    return jobs
示例#7
0
    def __init__(self,
                 jobChain,
                 jobChainLinkPK,
                 unit,
                 passVar=None,
                 subJobOf=""):
        if jobChainLinkPK == None:
            return None
        self.UUID = uuid.uuid4().__str__()
        self.jobChain = jobChain
        self.unit = unit
        self.passVar = passVar
        self.createdDate = getUTCDate()
        self.subJobOf = subJobOf

        # Depending on the path that led to this, jobChainLinkPK may
        # either be a UUID or a MicroServiceChainLink instance
        if isinstance(jobChainLinkPK, basestring):
            try:
                link = MicroServiceChainLink.objects.get(
                    id=str(jobChainLinkPK))
            # This will sometimes return no values
            except MicroServiceChainLink.DoesNotExist:
                return
        else:
            link = jobChainLinkPK

        self.pk = link.id

        self.currentTask = link.currenttask_id
        self.defaultNextChainLink = link.defaultnextchainlink_id
        taskType = link.currenttask.tasktype_id
        taskTypePKReference = link.currenttask.tasktypepkreference
        self.description = link.currenttask.description
        self.reloadFileList = link.reloadfilelist
        self.defaultExitMessage = link.defaultexitmessage
        self.microserviceGroup = link.microservicegroup

        LOGGER.info('Running %s (unit %s)', self.description, self.unit.UUID)
        self.unit.reload()

        logJobCreatedSQL(self)

        if self.createTasks(taskType, taskTypePKReference) == None:
            self.getNextChainLinkPK(None)
示例#8
0
def createMetsHdr(sip_uuid):
    header = etree.Element(ns.metsBNS + "metsHdr",
                           CREATEDATE=getUTCDate().strftime("%Y-%m-%dT%H:%M:%S"))
    agent = etree.SubElement(header, ns.metsBNS + "agent",
                             ROLE="CREATOR",
                             TYPE="OTHER",
                             OTHERTYPE="SOFTWARE")
    name = etree.SubElement(agent, ns.metsBNS + "name")
    name.text = get_dashboard_uuid()
    note = etree.SubElement(agent, ns.metsBNS + "note")
    note.text = "Archivematica dashboard UUID"

    accession_number = getAccessionNumberFromTransfer(sip_uuid)
    if accession_number:
        alt_id = etree.SubElement(header, ns.metsBNS + "altRecordID",
                                  TYPE="Accession number")
        alt_id.text = accession_number

    return header
def write_identification_event(file_uuid, command, format=None, success=True):
    event_detail_text = 'program="{}"; version="{}"'.format(
        command.tool.description, command.tool.version)
    if success:
        event_outcome_text = "Positive"
    else:
        event_outcome_text = "Not identified"

    if not format:
        format = 'No Matching Format'

    date = getUTCDate()

    insertIntoEvents(fileUUID=file_uuid,
                     eventIdentifierUUID=str(uuid.uuid4()),
                     eventType="format identification",
                     eventDateTime=date,
                     eventDetail=event_detail_text,
                     eventOutcome=event_outcome_text,
                     eventOutcomeDetailNote=format)
示例#10
0
    def __init__(self, jobChain, link, workflow, unit, passVar=None):
        if link is None:
            return None

        self.UUID = uuid.uuid4().__str__()
        self.jobChain = jobChain
        self.workflow = workflow
        self.unit = unit
        self.passVar = passVar
        self.pk = link.id
        self.link = link
        self.workflow = workflow
        self.created_at = getUTCDate()
        self.group = link.get_label("group", "en")
        self.description = link.get_label("description", "en")

        LOGGER.info("Running %s (unit %s)", self.description, self.unit.UUID)
        self.unit.reload()

        self._create_job()
        self._run_task_manager()
示例#11
0
def executeCommand(gearman_worker, gearman_job):
    try:
        execute = gearman_job.task
        logger.info('Executing %s (%s)', execute, gearman_job.unique)
        data = cPickle.loads(gearman_job.data)
        utcDate = databaseFunctions.getUTCDate()
        arguments = data["arguments"]  #.encode("utf-8")
        if isinstance(arguments, unicode):
            arguments = arguments.encode("utf-8")

        sInput = ""
        clientID = gearman_worker.worker_client_id

        task = Task.objects.get(taskuuid=gearman_job.unique)
        if task.starttime is not None:
            exitCode = -1
            stdOut = ""
            stdError = """Detected this task has already started!
Unable to determine if it completed successfully."""
            return cPickle.dumps({
                "exitCode": exitCode,
                "stdOut": stdOut,
                "stdError": stdError
            })
        else:
            task.client = clientID
            task.starttime = utcDate
            task.save()

        if execute not in supportedModules:
            output = [
                "Error!", "Error! - Tried to run and unsupported command."
            ]
            exitCode = -1
            return cPickle.dumps({
                "exitCode": exitCode,
                "stdOut": output[0],
                "stdError": output[1]
            })
        command = supportedModules[execute]

        replacementDic["%date%"] = utcDate.isoformat()
        replacementDic["%jobCreatedDate%"] = data["createdDate"]
        # Replace replacement strings
        for key in replacementDic.keys():
            command = command.replace(key, replacementDic[key])
            arguments = arguments.replace(key, replacementDic[key])

        key = "%taskUUID%"
        value = gearman_job.unique.__str__()
        arguments = arguments.replace(key, value)

        # Add useful environment vars for client scripts
        lib_paths = [
            '/usr/share/archivematica/dashboard/',
            '/usr/lib/archivematica/archivematicaCommon'
        ]
        env_updates = {
            'PYTHONPATH':
            os.pathsep.join(lib_paths),
            'DJANGO_SETTINGS_MODULE':
            config.get('MCPClient', 'django_settings_module')
        }

        # Execute command
        command += " " + arguments
        logger.info('<processingCommand>{%s}%s</processingCommand>',
                    gearman_job.unique, command)
        exitCode, stdOut, stdError = executeOrRun("command",
                                                  command,
                                                  sInput,
                                                  printing=False,
                                                  env_updates=env_updates)
        return cPickle.dumps({
            "exitCode": exitCode,
            "stdOut": stdOut,
            "stdError": stdError
        })
    except OSError as ose:
        logger.exception('Execution failed')
        output = ["Archivematica Client Error!", traceback.format_exc()]
        exitCode = 1
        return cPickle.dumps({
            "exitCode": exitCode,
            "stdOut": output[0],
            "stdError": output[1]
        })
    except Exception as e:
        logger.exception('Unexpected error')
        output = ["", traceback.format_exc()]
        return cPickle.dumps({
            "exitCode": -1,
            "stdOut": output[0],
            "stdError": output[1]
        })
        def fail_all_tasks_callback():
            for task_uuid in gearman_data["tasks"]:
                Task.objects.filter(taskuuid=task_uuid).update(
                    stderror=str(reason), exitcode=1, endtime=getUTCDate())

            retryOnFailure("Fail all tasks", fail_all_tasks_callback)