Пример #1
0
    def reportDash(self,jobReport):
        '''
        dashboard report dictionary
        '''
        event_report = self.n_of_events(jobReport)
        storage_report, throughput_report = self.storageStat(jobReport)
        dashboard_report = {}
        #
        for k,v in event_report.iteritems() :
            dashboard_report[k]=v

        # extract information to be sent to DashBoard
        # per protocol and for action=read, calculate MBPS
        # dashboard key is io_action
        dashboard_report['MonitorID'] = self.MonitorID
        dashboard_report['MonitorJobID'] = self.MonitorJobID
        for protocol in storage_report.keys() :
            for action in storage_report[protocol].keys() :
                try: size = float(storage_report[protocol][action][2])
                except: size = 'NULL'
                try: time = float(storage_report[protocol][action][3])/1000
                except: time = 'NULL'
                dashboard_report['io_'+protocol+'_'+action] = str(size)+'_'+str(time)
        if self.debug :
            ordered = dashboard_report.keys()
            ordered.sort()
            for key in ordered:
                print key,'=',dashboard_report[key]

        # IO throughput information
        dashboard_report['io_read_throughput'] = throughput_report['readThr']
        dashboard_report['io_write_throughput'] = throughput_report['writeThr']
        dashboard_report['io_netAvg_throughput'] = throughput_report['avgNetThr']

        # send to DashBoard
        apmonSend(self.MonitorID, self.MonitorJobID, dashboard_report)
        apmonFree()

        if self.debug == 1 : print dashboard_report

        return
Пример #2
0
 def free(self):
     apmonFree()
Пример #3
0
syncid = os.environ.get('Dashboard_syncid')
#Replace MetaId by Dashboard_id
_jobid = str(os.environ.get('Dashboard_Id'))
monitorid = monitorid.replace('MetaID', _jobid)
syncid = syncid.replace('MetaID', _jobid)

# Start Dashboard Report
hostname = str(socket.gethostname())
parameters = {
    'ExeStart': executable,
    'SyncCE': str(ce),
    'SyncGridJobId': syncid,
    'WNHostName': hostname
}
apmonSend(taskid, monitorid, parameters)
apmonFree()

###############
# Execute job
###############

#Add PWD to PATH environment
myenv = os.environ
myenv['PATH'] += ':{0}'.format(os.environ.get('PWD'))

t0 = os.times()
p = subprocess.Popen(executable,
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE,
                     shell=True,
                     env=myenv)
Пример #4
0
            elif first == default and line[21:24] == "1st":
                first = int(datetime.strptime(line[-29:-9], "%d-%b-%Y %X").strftime('%s'))

    return (finit, fopen, first)

(config, data) = sys.argv[1:]
with open(data, 'rb') as f:
    (args, files, lumis, stageout, server, taskid, monitorid, syncid, want_summary) = pickle.load(f)

apmonSend(taskid, monitorid, {
            'ExeStart': 'cmsRun',
            'SyncCE': 'ndcms.crc.nd.edu',
            'SyncGridJobId': syncid,
            'WNHostName': os.environ.get('HOSTNAME', '')
            })
apmonFree()

configfile = config.replace(".py", "_mod.py")
shutil.copy2(config, configfile)

env = os.environ
env['X509_USER_PROXY'] = 'proxy'

edit_process_source(configfile, files, lumis, want_summary)

# exit_code = subprocess.call('python "{0}" {1}'.format(configfile, ' '.join(map(repr, args))), shell=True, env=env)
exit_code = subprocess.call('cmsRun -j report.xml "{0}" {1} > cmssw.log 2>&1'.format(configfile, ' '.join(map(repr, args))), shell=True, env=env)

apmonSend(taskid, monitorid, {'ExeEnd': 'cmsRun'})

try:
Пример #5
0
 def free(self):
     apmonFree()
Пример #6
0
def dashboard_postexecute(proc_ad, db):
    # Called in postsubmit for each job.
    # Existence of ad attributes are not guaranteed by collect_history -> check for each one

    if 'DESIRED_Sites' not in proc_ad:
        return

    try:
        if proc_ad['JobUniverse'] != 5:  # 5: vanilla universe
            return
    except KeyError:
        return

    try:
        cluster_id = proc_ad['ClusterId']
    except KeyError:
        return

    try:
        taskid = _taskids[cluster_id]
    except KeyError:
        result = db.query(
            'SELECT `task_id` FROM `cms_tasks` WHERE `instance` = %s AND `cluster_id` = %s',
            HistoryDB.CONDOR_INSTANCE, cluster_id)

        if len(result) == 0:
            # cluster not known to history DB for some reason
            taskid = None
        else:
            taskid = result[0]

        _taskids[cluster_id] = taskid

    LOG.debug(
        'Postexecute for CMS Global Pool: taskid for cluster id %d is %s' %
        (cluster_id, taskid))

    if taskid is None:
        return

    report_remote_host(proc_ad, taskid)

    try:
        cmssite = proc_ad['MATCH_GLIDEIN_CMSSite']
    except KeyError:
        cmssite = 'Unknown'

    if proc_ad['JobStatus'] != 4 or cmssite == 'Unknown':
        # JobStatus = 4 -> failed
        # CMSSite unknown -> didn't run on a CMS resource
        # In both cases, report as Aborted

        LOG.debug('Reporting cluster id %d process id %d as Aborted' %
                  (cluster_id, proc_ad['ProcId']))
        report_task_status(proc_ad, taskid, 'Aborted')

    else:
        LOG.debug('Reporting cluster id %d process id %d as Done' %
                  (cluster_id, proc_ad['ProcId']))
        report_task_status(proc_ad, taskid, 'Done')

    report_exit_code(proc_ad, taskid)

    apmonFree()
Пример #7
0
def dashboard_postsubmit(proc_ads):
    # Called in postsubmit.
    # Report to Dashboard about the cluster. Individual job reports are sent by history_update.py

    if len(proc_ads) == 0:
        return

    if 'DESIRED_Sites' not in proc_ads[0]:
        # This cluster will not match any CMS resource
        return

    try:
        if proc_ads[0]['JobUniverse'] != 5:  # 5: vanilla universe
            return
    except KeyError:
        return

    LOG.debug('Postsubmit for CMS Global Pool')

    db = HistoryDB()

    for proc_ad in proc_ads:
        cluster_id = proc_ad['ClusterId']

        try:
            taskid = _taskids[cluster_id]

            LOG.debug('Using task id %s for cluster id %d' %
                      (taskid, cluster_id))

        except KeyError:
            result = db.query(
                'SELECT `task_id` FROM `cms_tasks` WHERE `instance` = %s AND `cluster_id` = %s',
                HistoryDB.CONDOR_INSTANCE, cluster_id)

            if len(result) == 0:
                # First time encountering this cluster id -> report to dashboard and record in DB
                taskid = report_master_submission(proc_ad)

                LOG.debug('New task id %s for cluster id %d' %
                          (taskid, cluster_id))

                sql = 'INSERT INTO `cms_tasks` (`instance`, `cluster_id`, `task_id`) VALUES (%s, %s, %s)'
                db.query(sql, HistoryDB.CONDOR_INSTANCE, cluster_id, taskid)

            else:
                taskid = result[0]

                LOG.debug('Found task id %s for cluster id %d in DB' %
                          (taskid, cluster_id))

            _taskids[cluster_id] = taskid

        # In CMSConnect, each dashboard task is allowed to have multiple clusters (Dashboard_Id is the full
        # serial ID of the jobs)
        # Here we are simplifying it by making 1:1 correspondence between clusters and tasks
        proc_ad['Dashboard_TaskId'] = taskid
        proc_ad['Dashboard_Id'] = proc_ad['ProcId']

        report_task_submission(proc_ad, taskid)

        LOG.debug('Reported task submission to Dashboard')

    for proc_ad in proc_ads:
        taskid = _taskids[proc_ad['ClusterId']]

        report_task_status(proc_ad, taskid, 'Pending')

    apmonFree()

    LOG.debug('Reported all tasks as Pending')
Пример #8
0
    def popularityInfos(self, jobReport):
        report_dict = {}
        inputList = []
        inputParentList = []
        report_dict['inputBlocks'] = ''
        if (os.path.exists(self.inputInfos)):
            file=open(self.inputInfos,'r')
            lines = file.readlines()
            for line in lines:
                if line.find("inputBlocks")>=0:
                    report_dict['inputBlocks']= line.split("=")[1].strip()
                if line.find("inputFiles")>=0:
                    inputList = line.split("=")[1].strip().split(";")
                if line.find("parentFiles")>=0:
                    inputParentList = line.split("=")[1].strip().split(";")
            file.close()
        if len(inputList) == 1 and inputList[0] == '':
            inputList=[]
        if len(inputParentList) == 1 and inputParentList[0] == '':
            inputParentList=[]
        basename = ''
        if len(inputList) > 1:
            basename = os.path.commonprefix(inputList)
        elif len(inputList) == 1:
            basename =  "%s/"%os.path.dirname(inputList[0])
        basenameParent = ''
        if len(inputParentList) > 1:
            basenameParent = os.path.commonprefix(inputParentList)
        elif len(inputParentList) == 1:
            basenameParent = "%s/"%os.path.dirname(inputParentList[0])

        readFile = {}

        readFileParent = {}
        fileAttr = []
        fileParentAttr = []
        for inputFile in  jobReport.inputFiles:
            fileAccess = 'Local'
            if inputFile.get("PFN").find('xrootd') >= 0 : fileAccess = 'Remote'
            if inputFile['LFN'].find(basename) >=0:
                fileAttr = (inputFile.get("FileType"), fileAccess, inputFile.get("Runs"))
                readFile[inputFile.get("LFN").split(basename)[1]] = fileAttr
            else:
                fileParentAttr = (inputFile.get("FileType"), fileAccess, inputFile.get("Runs"))
                readParentFile[inputFile.get("LFN").split(basenameParent)[1]] = fileParentAttr
        cleanedinputList = []
        for file in inputList:
            cleanedinputList.append(file.split(basename)[1])
        cleanedParentList = []
        for file in inputParentList:
            cleanedParentList.append(file.split(basenameParent)[1])

        inputString = ''
        LumisString = ''
        countFile = 1
        for f,t in readFile.items():
            cleanedinputList.remove(f)
            inputString += '%s::%d::%s::%s::%d;'%(f,1,t[0],t[1],countFile)
            LumisString += '%s::%s::%d;'%(t[2].keys()[0],self.makeRanges(t[2].values()[0]),countFile)
            countFile += 1

        inputParentString = ''
        LumisParentString  = ''
        countParentFile = 1
        for fp,tp in readFileParent.items():
            cleanedParentList.remove(fp)
            inputParentString += '%s::%d::%s::%s::%d;'%(fp,1,tp[0],tp[1],countParentFile)
            LumisParentString += '%s::%s::%d;'%(tp[2].keys()[0],self.makeRanges(tp[2].values()[0]),countParentFile)
            countParentFile += 1

        if len(cleanedinputList):
           for file in cleanedinputList :
               if len(jobReport.errors):
                   if jobReport.errors[0]["Description"].find(file) >= 0:
                       fileAccess = 'Local'
                       if jobReport.errors[0]["Description"].find('xrootd') >= 0: fileAccess = 'Remote'
                       inputString += '%s::%d::%s::%s::%s;'%(file,0,'Unknown',fileAccess,'Unknown')
                   else:
                       inputString += '%s::%d::%s::%s::%s;'%(file,2,'Unknown','Unknown','Unknown')
               else:
                   inputString += '%s::%d::%s::%s::%s;'%(file,2,'Unknown','Unknown','Unknown')

        if len(cleanedParentList):
           for file in cleanedParentList :
               if len(jobReport.errors):
                   if jobReport.errors[0]["Description"].find(file) >= 0:
                       inputString += '%s::%d::%s::%s::%s;'%(file,0,'Unknown','Local','Unknown')
                   else:
                       inputString += '%s::%d::%s::%s::%s;'%(file,2,'Unknown','Unknown','Unknown')
               else:
                   inputParentString += '%s::%d::%s::%s::%s;'%(file,2,'Unknown','Unknown','Unknown')

        report_dict['inputFiles']= inputString
        report_dict['parentFiles']= inputParentString
        report_dict['lumisRange']= LumisString
        report_dict['lumisParentRange']= LumisParentString
        report_dict['Basename']= basename
        report_dict['BasenameParent']= basenameParent

         # send to DashBoard
        apmonSend(self.MonitorID, self.MonitorJobID, report_dict)
        apmonFree()

       # if self.debug == 1 :
        print "Popularity start"
        for k,v in report_dict.items():
            print "%s : %s"%(k,v)
        print "Popularity stop"
        return