def process(self, file_name, to_delete=False, test_mode=False, get_log=False, dump_workflow=False):
     try:
         is_fatal = False
         is_OK = True
         request_id = None
         dump_str = None
         with open(file_name) as f:
             ops = json.load(f)
             user_name = clean_user_id(ops["userName"])
             base_platform = ops['data'].get('base_platform')
             for task_type in ops['data']['taskParams']:
                 ops['data']['taskParams'][task_type]['userName'] = user_name
                 if base_platform:
                     ops['data']['taskParams'][task_type]['basePlatform'] = base_platform
             log_token = '< id="{}" test={} outDS={} >'.format(user_name, test_mode, ops['data']['outDS'])
             tmpLog = LogWrapper(self.log, log_token)
             tmpLog.info('start {}'.format(file_name))
             sandbox_url = os.path.join(ops['data']['sourceURL'], 'cache', ops['data']['sandbox'])
             # IO through json files
             ops_file = tempfile.NamedTemporaryFile(delete=False, mode='w')
             json.dump(ops, ops_file)
             ops_file.close()
             # execute main in another process to avoid chdir mess
             tmp_stat, tmp_out = commands_get_status_output("python {} {} '{}' {} {} '{}' {}".format(
                 __file__, sandbox_url, log_token, dump_workflow, ops_file.name,
                 user_name, test_mode))
             if tmp_stat:
                 is_OK = False
                 tmpLog.error('main execution failed with {}:{}'.format(tmp_stat, tmp_out))
             else:
                 with open(tmp_out.split('\n')[-1]) as tmp_out_file:
                     is_OK, is_fatal, request_id, dump_str = json.load(tmp_out_file)
                 try:
                     os.remove(tmp_out)
                 except Exception:
                     pass
             if not get_log:
                 if is_OK:
                     tmpLog.info('is_OK={} request_id={}'.format(is_OK, request_id))
                 else:
                     tmpLog.info('is_OK={} is_fatal={} request_id={}'.format(is_OK, is_fatal, request_id))
             if to_delete or (not test_mode and (is_OK or is_fatal)):
                 dump_str = tmpLog.dumpToString() + dump_str
                 tmpLog.debug('delete {}'.format(file_name))
                 try:
                     os.remove(file_name)
                 except Exception:
                     pass
                 # send notification
                 if not test_mode and self.taskBuffer is not None:
                     toAdder = self.taskBuffer.getEmailAddr(user_name)
                     if toAdder is None or toAdder.startswith('notsend'):
                         tmpLog.debug('skip to send notification since suppressed')
                     else:
                         # message
                         if is_OK:
                             mailSubject = "PANDA Notification for Workflow {}".format(ops['data']['outDS'])
                             mailBody = "Hello,\n\nWorkflow:{} has been accepted with RequestID:{}\n\n".\
                                 format(ops['data']['outDS'], request_id)
                         else:
                             mailSubject = "PANDA WARNING for Workflow={}".format(ops['data']['outDS'])
                             mailBody = "Hello,\n\nWorkflow {} was not accepted\n\n".\
                                 format(ops['data']['outDS'], request_id)
                             mailBody += "Reason : %s\n" % dump_str
                         # send
                         tmpSM = MailUtils().send(toAdder, mailSubject, mailBody)
                         tmpLog.debug('sent message with {}'.format(tmpSM))
     except Exception as e:
         is_OK = False
         tmpLog.error("failed to run with {} {}".format(str(e), traceback.format_exc()))
     if get_log:
         ret_val = {'status': is_OK}
         if is_OK:
             ret_val['log'] = dump_str
         else:
             if dump_str is None:
                 ret_val['log'] = tmpLog.dumpToString()
             else:
                 ret_val['log'] = dump_str
         return ret_val
示例#2
0
class EventPicker:
    # constructor
    def __init__(self, taskBuffer, siteMapper, evpFileName, ignoreError):
        self.taskBuffer = taskBuffer
        self.siteMapper = siteMapper
        self.ignoreError = ignoreError
        self.evpFileName = evpFileName
        self.token = datetime.datetime.utcnow().isoformat(' ')
        # logger
        self.logger = LogWrapper(_logger, self.token)
        self.pd2p = DynDataDistributer.DynDataDistributer([],
                                                          self.taskBuffer,
                                                          self.siteMapper,
                                                          token=' ',
                                                          logger=self.logger)
        self.userDatasetName = ''
        self.creationTime = ''
        self.params = ''
        self.lockedBy = ''
        self.evpFile = None
        self.userTaskName = ''
        # message buffer
        self.msgBuffer = []
        self.lineLimit = 100
        # JEDI
        self.jediTaskID = None
        self.prodSourceLabel = None
        self.job_label = None

    # main
    def run(self):
        try:
            self.putLog('start %s' % self.evpFileName)
            # lock evp file
            self.evpFile = open(self.evpFileName)
            try:
                fcntl.flock(self.evpFile.fileno(),
                            fcntl.LOCK_EX | fcntl.LOCK_NB)
            except Exception:
                # relase
                self.putLog("cannot lock %s" % self.evpFileName)
                self.evpFile.close()
                return True
            # options
            runEvtList = []
            eventPickDataType = ''
            eventPickStreamName = ''
            eventPickDS = []
            eventPickAmiTag = ''
            eventPickNumSites = 1
            inputFileList = []
            tagDsList = []
            tagQuery = ''
            tagStreamRef = ''
            skipDaTRI = False
            runEvtGuidMap = {}
            ei_api = ''
            # read evp file
            for tmpLine in self.evpFile:
                tmpMatch = re.search('^([^=]+)=(.+)$', tmpLine)
                # check format
                if tmpMatch is None:
                    continue
                tmpItems = tmpMatch.groups()
                if tmpItems[0] == 'runEvent':
                    # get run and event number
                    tmpRunEvt = tmpItems[1].split(',')
                    if len(tmpRunEvt) == 2:
                        runEvtList.append(tmpRunEvt)
                elif tmpItems[0] == 'eventPickDataType':
                    # data type
                    eventPickDataType = tmpItems[1]
                elif tmpItems[0] == 'eventPickStreamName':
                    # stream name
                    eventPickStreamName = tmpItems[1]
                elif tmpItems[0] == 'eventPickDS':
                    # dataset pattern
                    eventPickDS = tmpItems[1].split(',')
                elif tmpItems[0] == 'eventPickAmiTag':
                    # AMI tag
                    eventPickAmiTag = tmpItems[1]
                elif tmpItems[0] == 'eventPickNumSites':
                    # the number of sites where datasets are distributed
                    try:
                        eventPickNumSites = int(tmpItems[1])
                    except Exception:
                        pass
                elif tmpItems[0] == 'userName':
                    # user name
                    self.userDN = tmpItems[1]
                    self.putLog("user=%s" % self.userDN)
                elif tmpItems[0] == 'userTaskName':
                    # user task name
                    self.userTaskName = tmpItems[1]
                elif tmpItems[0] == 'userDatasetName':
                    # user dataset name
                    self.userDatasetName = tmpItems[1]
                elif tmpItems[0] == 'lockedBy':
                    # client name
                    self.lockedBy = tmpItems[1]
                elif tmpItems[0] == 'creationTime':
                    # creation time
                    self.creationTime = tmpItems[1]
                elif tmpItems[0] == 'params':
                    # parameters
                    self.params = tmpItems[1]
                elif tmpItems[0] == 'ei_api':
                    # ei api parameter for MC
                    ei_api = tmpItems[1]
                elif tmpItems[0] == 'inputFileList':
                    # input file list
                    inputFileList = tmpItems[1].split(',')
                    try:
                        inputFileList.remove('')
                    except Exception:
                        pass
                elif tmpItems[0] == 'tagDS':
                    # TAG dataset
                    tagDsList = tmpItems[1].split(',')
                elif tmpItems[0] == 'tagQuery':
                    # query for TAG
                    tagQuery = tmpItems[1]
                elif tmpItems[0] == 'tagStreamRef':
                    # StreamRef for TAG
                    tagStreamRef = tmpItems[1]
                    if not tagStreamRef.endswith('_ref'):
                        tagStreamRef += '_ref'
                elif tmpItems[0] == 'runEvtGuidMap':
                    # GUIDs
                    try:
                        runEvtGuidMap = eval(tmpItems[1])
                    except Exception:
                        pass
            # extract task name
            if self.userTaskName == '' and self.params != '':
                try:
                    tmpMatch = re.search('--outDS(=| ) *([^ ]+)', self.params)
                    if tmpMatch is not None:
                        self.userTaskName = tmpMatch.group(2)
                        if not self.userTaskName.endswith('/'):
                            self.userTaskName += '/'
                except Exception:
                    pass
            # suppress DaTRI
            if self.params != '':
                if '--eventPickSkipDaTRI' in self.params:
                    skipDaTRI = True
            # get compact user name
            compactDN = self.taskBuffer.cleanUserID(self.userDN)
            # get jediTaskID
            self.jediTaskID = self.taskBuffer.getTaskIDwithTaskNameJEDI(
                compactDN, self.userTaskName)
            # get prodSourceLabel
            self.prodSourceLabel, self.job_label = self.taskBuffer.getProdSourceLabelwithTaskID(
                self.jediTaskID)
            # convert run/event list to dataset/file list
            tmpRet, locationMap, allFiles = self.pd2p.convertEvtRunToDatasets(
                runEvtList, eventPickDataType, eventPickStreamName,
                eventPickDS, eventPickAmiTag, self.userDN, runEvtGuidMap,
                ei_api)
            if not tmpRet:
                if 'isFatal' in locationMap and locationMap['isFatal'] is True:
                    self.ignoreError = False
                self.endWithError(
                    'Failed to convert the run/event list to a dataset/file list'
                )
                return False
            # use only files in the list
            if inputFileList != []:
                tmpAllFiles = []
                for tmpFile in allFiles:
                    if tmpFile['lfn'] in inputFileList:
                        tmpAllFiles.append(tmpFile)
                allFiles = tmpAllFiles
            # remove redundant CN from DN
            tmpDN = self.userDN
            tmpDN = re.sub('/CN=limited proxy', '', tmpDN)
            tmpDN = re.sub('(/CN=proxy)+$', '', tmpDN)
            # make dataset container
            tmpRet = self.pd2p.registerDatasetContainerWithDatasets(
                self.userDatasetName,
                allFiles,
                locationMap,
                nSites=eventPickNumSites,
                owner=tmpDN)
            if not tmpRet:
                self.endWithError('Failed to make a dataset container %s' %
                                  self.userDatasetName)
                return False
            # skip DaTRI
            if skipDaTRI:
                # successfully terminated
                self.putLog("skip DaTRI")
                # update task
                self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID)
            else:
                # get candidates
                tmpRet, candidateMaps = self.pd2p.getCandidates(
                    self.userDatasetName,
                    self.prodSourceLabel,
                    self.job_label,
                    checkUsedFile=False,
                    useHidden=True)
                if not tmpRet:
                    self.endWithError(
                        'Failed to find candidate for destination')
                    return False
                # collect all candidates
                allCandidates = []
                for tmpDS in candidateMaps:
                    tmpDsVal = candidateMaps[tmpDS]
                    for tmpCloud in tmpDsVal:
                        tmpCloudVal = tmpDsVal[tmpCloud]
                        for tmpSiteName in tmpCloudVal[0]:
                            if tmpSiteName not in allCandidates:
                                allCandidates.append(tmpSiteName)
                if allCandidates == []:
                    self.endWithError('No candidate for destination')
                    return False
                # get list of dataset (container) names
                if eventPickNumSites > 1:
                    # decompose container to transfer datasets separately
                    tmpRet, tmpOut = self.pd2p.getListDatasetReplicasInContainer(
                        self.userDatasetName)
                    if not tmpRet:
                        self.endWithError('Failed to get replicas in %s' %
                                          self.userDatasetName)
                        return False
                    userDatasetNameList = list(tmpOut)
                else:
                    # transfer container at once
                    userDatasetNameList = [self.userDatasetName]
                # loop over all datasets
                sitesUsed = []
                for tmpUserDatasetName in userDatasetNameList:
                    # get size of dataset container
                    tmpRet, totalInputSize = rucioAPI.getDatasetSize(
                        tmpUserDatasetName)
                    if not tmpRet:
                        self.endWithError(
                            'Failed to get the size of {0} with {1}'.format(
                                tmpUserDatasetName, totalInputSize))
                        return False
                    # run brokerage
                    tmpJob = JobSpec()
                    tmpJob.AtlasRelease = ''
                    self.putLog("run brokerage for %s" % tmpDS)
                    pandaserver.brokerage.broker.schedule(
                        [tmpJob],
                        self.taskBuffer,
                        self.siteMapper,
                        True,
                        allCandidates,
                        True,
                        datasetSize=totalInputSize)
                    if tmpJob.computingSite.startswith('ERROR'):
                        self.endWithError('brokerage failed with %s' %
                                          tmpJob.computingSite)
                        return False
                    self.putLog("site -> %s" % tmpJob.computingSite)
                    # send transfer request
                    try:
                        tmpDN = rucioAPI.parse_dn(tmpDN)
                        tmpStatus, userInfo = rucioAPI.finger(tmpDN)
                        if not tmpStatus:
                            raise RuntimeError(
                                'user info not found for {0} with {1}'.format(
                                    tmpDN, userInfo))
                        tmpDN = userInfo['nickname']
                        tmpSiteSpec = self.siteMapper.getSite(
                            tmpJob.computingSite)
                        scope_input, scope_output = select_scope(
                            tmpSiteSpec, JobUtils.ANALY_PS, JobUtils.ANALY_PS)
                        tmpDQ2ID = tmpSiteSpec.ddm_input[scope_input]
                        tmpMsg = "%s ds=%s site=%s id=%s" % (
                            'registerDatasetLocation for DaTRI ',
                            tmpUserDatasetName, tmpDQ2ID, tmpDN)
                        self.putLog(tmpMsg)
                        rucioAPI.registerDatasetLocation(
                            tmpDS, [tmpDQ2ID],
                            lifetime=14,
                            owner=tmpDN,
                            activity="User Subscriptions")
                        self.putLog('OK')
                    except Exception:
                        errType, errValue = sys.exc_info()[:2]
                        tmpStr = 'Failed to send transfer request : %s %s' % (
                            errType, errValue)
                        tmpStr.strip()
                        tmpStr += traceback.format_exc()
                        self.endWithError(tmpStr)
                        return False
                    # list of sites already used
                    sitesUsed.append(tmpJob.computingSite)
                    self.putLog("used %s sites" % len(sitesUsed))
                    # set candidates
                    if len(sitesUsed) >= eventPickNumSites:
                        # reset candidates to limit the number of sites
                        allCandidates = sitesUsed
                        sitesUsed = []
                    else:
                        # remove site
                        allCandidates.remove(tmpJob.computingSite)
                # send email notification for success
                tmpMsg = 'A transfer request was successfully sent to Rucio.\n'
                tmpMsg += 'Your task will get started once transfer is completed.'
                self.sendEmail(True, tmpMsg)
            try:
                # unlock and delete evp file
                fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_UN)
                self.evpFile.close()
                os.remove(self.evpFileName)
            except Exception:
                pass
            # successfully terminated
            self.putLog("end %s" % self.evpFileName)
            return True
        except Exception:
            errType, errValue = sys.exc_info()[:2]
            self.endWithError('Got exception %s:%s %s' %
                              (errType, errValue, traceback.format_exc()))
            return False

    # end with error
    def endWithError(self, message):
        self.putLog(message, 'error')
        # unlock evp file
        try:
            fcntl.flock(self.evpFile.fileno(), fcntl.LOCK_UN)
            self.evpFile.close()
            if not self.ignoreError:
                # remove evp file
                os.remove(self.evpFileName)
                # send email notification
                self.sendEmail(False, message)
        except Exception:
            pass
        # upload log
        if self.jediTaskID is not None:
            outLog = self.uploadLog()
            self.taskBuffer.updateTaskErrorDialogJEDI(
                self.jediTaskID, 'event picking failed. ' + outLog)
            # update task
            if not self.ignoreError:
                self.taskBuffer.updateTaskModTimeJEDI(self.jediTaskID,
                                                      'tobroken')
            self.putLog(outLog)
        self.putLog('end %s' % self.evpFileName)

    # put log
    def putLog(self, msg, type='debug'):
        tmpMsg = msg
        if type == 'error':
            self.logger.error(tmpMsg)
        else:
            self.logger.debug(tmpMsg)

    # send email notification
    def sendEmail(self, isSucceeded, message):
        # mail address
        toAdder = Notifier(self.taskBuffer, None, []).getEmail(self.userDN)
        if toAdder == '':
            self.putLog('cannot find email address for %s' % self.userDN,
                        'error')
            return
        # subject
        mailSubject = "PANDA notification for Event-Picking Request"
        # message
        mailBody = "Hello,\n\nHere is your request status for event picking\n\n"
        if isSucceeded:
            mailBody += "Status  : Passed to Rucio\n"
        else:
            mailBody += "Status  : Failed\n"
        mailBody += "Created : %s\n" % self.creationTime
        mailBody += "Ended   : %s\n" % datetime.datetime.utcnow().strftime(
            '%Y-%m-%d %H:%M:%S')
        mailBody += "Dataset : %s\n" % self.userDatasetName
        mailBody += "\n"
        mailBody += "Parameters : %s %s\n" % (self.lockedBy, self.params)
        mailBody += "\n"
        mailBody += "%s\n" % message
        # send
        retVal = MailUtils().send(toAdder, mailSubject, mailBody)
        # return
        return

    # upload log
    def uploadLog(self):
        if self.jediTaskID is None:
            return 'cannot find jediTaskID'
        strMsg = self.logger.dumpToString()
        s, o = Client.uploadLog(strMsg, self.jediTaskID)
        if s != 0:
            return "failed to upload log with {0}.".format(s)
        if o.startswith('http'):
            return '<a href="{0}">log</a>'.format(o)
        return o
def core_exec(sandbox_url, log_token, dump_workflow, ops_file, user_name, test_mode):
    tmpLog = LogWrapper(_logger, log_token)
    is_OK = True
    is_fatal = False
    request_id = None
    if dump_workflow == 'True':
        dump_workflow = True
    else:
        dump_workflow = False
    if test_mode == 'True':
        test_mode = True
    else:
        test_mode = False
    try:
        with open(ops_file) as f:
            ops = json.load(f)
        try:
            os.remove(ops_file)
        except Exception:
            pass
        # go to temp dir
        cur_dir = os.getcwd()
        with tempfile.TemporaryDirectory() as tmp_dirname:
            os.chdir(tmp_dirname)
            # download sandbox
            tmpLog.info('downloading sandbox from {}'.format(sandbox_url))
            with requests.get(sandbox_url, allow_redirects=True, verify=False, stream=True) as r:
                if r.status_code == 400:
                    tmpLog.error("not found")
                    is_fatal = True
                    is_OK = False
                elif r.status_code != 200:
                    tmpLog.error("bad HTTP response {}".format(r.status_code))
                    is_OK = False
                # extract sandbox
                if is_OK:
                    with open(ops['data']['sandbox'], 'wb') as fs:
                        for chunk in r.raw.stream(1024, decode_content=False):
                            if chunk:
                                fs.write(chunk)
                        fs.close()
                        tmp_stat, tmp_out = commands_get_status_output(
                            'tar xvfz {}'.format(ops['data']['sandbox']))
                        if tmp_stat != 0:
                            tmpLog.error(tmp_out)
                            dump_str = 'failed to extract {}'.format(ops['data']['sandbox'])
                            tmpLog.error(dump_str)
                            is_fatal = True
                            is_OK = False
                # parse workflow files
                if is_OK:
                    tmpLog.info('parse workflow')
                    if ops['data']['language'] == 'cwl':
                        nodes, root_in = pcwl_utils.parse_workflow_file(ops['data']['workflowSpecFile'],
                                                                        tmpLog)
                        with open(ops['data']['workflowInputFile']) as workflow_input:
                            data = yaml.safe_load(workflow_input)
                        s_id, t_nodes, nodes = pcwl_utils.resolve_nodes(nodes, root_in, data, 0, set(),
                                                                        ops['data']['outDS'], tmpLog)
                        workflow_utils.set_workflow_outputs(nodes)
                        id_node_map = workflow_utils.get_node_id_map(nodes)
                        [node.resolve_params(ops['data']['taskParams'], id_node_map) for node in nodes]
                        dump_str = "the description was internally converted as follows\n" \
                                   + workflow_utils.dump_nodes(nodes)
                        tmpLog.info(dump_str)
                        for node in nodes:
                            s_check, o_check = node.verify()
                            tmp_str = 'Verification failure in ID:{} {}'.format(node.id, o_check)
                            if not s_check:
                                tmpLog.error(tmp_str)
                                dump_str += tmp_str
                                dump_str += '\n'
                                is_fatal = True
                                is_OK = False
                    else:
                        dump_str = "{} is not supported to describe the workflow"
                        tmpLog.error(dump_str)
                        is_fatal = True
                        is_OK = False
                    # convert to workflow
                    if is_OK:
                        workflow_to_submit, dump_str_list = workflow_utils.convert_nodes_to_workflow(nodes)
                        try:
                            if workflow_to_submit:
                                if not test_mode:
                                    tmpLog.info('submit workflow')
                                    wm = ClientManager(host=get_rest_host())
                                    request_id = wm.submit(workflow_to_submit, username=user_name)
                            else:
                                dump_str = 'workflow is empty'
                                tmpLog.error(dump_str)
                                is_fatal = True
                                is_OK = False
                        except Exception as e:
                            dump_str = 'failed to submit the workflow with {}'.format(str(e))
                            tmpLog.error('{} {}'.format(dump_str, traceback.format_exc()))
                        if dump_workflow:
                            tmpLog.debug('\n' + ''.join(dump_str_list))
        os.chdir(cur_dir)
    except Exception as e:
        is_OK = False
        is_fatal = True
        tmpLog.error("failed to run with {} {}".format(str(e), traceback.format_exc()))

    with tempfile.NamedTemporaryFile(delete=False, mode='w') as tmp_json:
        json.dump([is_OK, is_fatal, request_id, tmpLog.dumpToString()], tmp_json)
        print(tmp_json.name)
    sys.exit(0)