def format(self, result): result = DBFormatter.format(self, result) if len(result) == 0: return False else: return result[0][0]
def testBFormatting(self): """ Test various formats """ myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) myThread.transaction.begin() result = myThread.transaction.processData(myThread.select) output = dbformatter.format(result) self.assertEqual(output , [['value1a', 'value2a'], \ ['value1b', 'value2b'], ['value1c', 'value2d']]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOne(result) print('test1 ' + str(output)) self.assertEqual(output, ['value1a', 'value2a']) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatDict(result) self.assertEqual( output , [{'bind2': 'value2a', 'bind1': 'value1a'}, \ {'bind2': 'value2b', 'bind1': 'value1b'},\ {'bind2': 'value2d', 'bind1': 'value1c'}] ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOneDict(result) self.assertEqual(output, {'bind2': 'value2a', 'bind1': 'value1a'})
def testBFormatting(self): """ Test various formats """ myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) myThread.transaction.begin() result = myThread.transaction.processData(myThread.select) output = dbformatter.format(result) self.assertEqual(output, [["value1a", "value2a"], ["value1b", "value2b"], ["value1c", "value2d"]]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOne(result) print("test1 " + str(output)) self.assertEqual(output, ["value1a", "value2a"]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatDict(result) self.assertEqual( output, [ {"bind2": "value2a", "bind1": "value1a"}, {"bind2": "value2b", "bind1": "value1b"}, {"bind2": "value2d", "bind1": "value1c"}, ], ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOneDict(result) self.assertEqual(output, {"bind2": "value2a", "bind1": "value1a"})
def testBFormatting(self): """ Test various formats """ myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) myThread.transaction.begin() result = myThread.transaction.processData(myThread.select) output = dbformatter.format(result) self.assertEqual(output , [['value1a', 'value2a'], \ ['value1b', 'value2b'], ['value1c', 'value2d']]) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOne(result) print('test1 '+str(output)) self.assertEqual( output , ['value1a', 'value2a'] ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatDict(result) self.assertEqual( output , [{'bind2': 'value2a', 'bind1': 'value1a'}, \ {'bind2': 'value2b', 'bind1': 'value1b'},\ {'bind2': 'value2d', 'bind1': 'value1c'}] ) result = myThread.transaction.processData(myThread.select) output = dbformatter.formatOneDict(result) self.assertEqual( output, {'bind2': 'value2a', 'bind1': 'value1a'} )
def format(self, result): results = DBFormatter.format(self, result) tasks = [] for row in results: tasks.append(row[0]) return tasks
def format(self, results): """ _format_ """ result = DBFormatter.format(self, results) return {"fwjr_path": result[0][0], "taskName": result[0][1]}
def format(self, results): "Build a list of tuples" result = [] results = DBFormatter.format(self, results) for item in results: result.append(tuple(item)) return result
def format(self, result): results = DBFormatter.format(self, result) subIDs = [] for row in results: subIDs.append(row[0]) return subIDs
def format(self, results): """ _format_ Return the couch document ID or None if one has not been set. """ result = DBFormatter.format(self, results) if len(result) == 0: return None return result[0][0]
def format(self, results): """ _format_ Format the query results into a list of LFNs. """ results = DBFormatter.format(self, results) status = [] for result in results: status.append(result[0]) return status
def format(self, results): """ _format_ """ results = DBFormatter.format(self, results) jobs = [] for result in results: jobs.append({"id": result[0], "fwjr_path": result[1]}) return jobs
def format(self, results): """ _format_ Turn the query results into a list of LFNs. """ results = DBFormatter.format(self, results) status = [] for result in results: status.append(result[0]) return status
def format(self, results): """ _format_ Format the jobgroup ids into a single list. """ results = DBFormatter.format(self, results) jobGroupList = [] for result in results: jobGroupList.append(result[0]) return jobGroupList
def format(self, result): """ _format_ Format the query result into a single list of subscription types. """ results = DBFormatter.format(self, result) resultList = [] for result in results: resultList.append(result[0]) return resultList
def format(self, results): """ _format_ Format the result of the query so that it is just a single list of LFNs. """ results = DBFormatter.format(self, results) lfns = [] for result in results: lfns.append(result[0]) return lfns
def format(self, result): """ _format_ Format the results into a single list. """ results = DBFormatter.format(self, result) resultList = [] for result in results: resultList.append(result[0]) return resultList
def format(self, results): """ _format_ Format the results into a single list of job group IDs. """ results = DBFormatter.format(self, results) jobGroupIDs = [] for result in results: for row in result: jobGroupIDs.append(int(row)) return jobGroupIDs
def format(self, results): """ _format_ Take the array of rows that were returned by the query and format that into a single list of open fileset names. """ results = DBFormatter.format(self, results) openFilesetNames = [] for result in results: openFilesetNames.append(str(result[0])) return openFilesetNames
def format(self, result): """ Have to filter task names that contain only two slashes '/', such that we can declare those tasks as top level task. :param result: :return: a list of subscriptions id """ results = DBFormatter.format(self, result) subIDs = [] for row in results: if len(row[1].split('/')) <= 3: # remember, first item is empty subIDs.append(row[0]) return subIDs
def execute(self, jobID, conn = None, transaction = False): result = self.dbi.processData(self.inputSQL, {"job": jobID}, conn = conn, transaction = transaction) formattedResult = DBFormatter.format(self, result) if len(formattedResult) == 0: return [] if int(formattedResult[0][1]) == 0: # The input to the job consisted of unmerged files, so we'll need # to query for the parents of the job's input. result = self.dbi.processData(self.parentSQL, {"job": jobID}, conn = conn, transaction = transaction) return self.format(result)
def execute(self, jobID, conn=None, transaction=False): result = self.dbi.processData(self.inputSQL, {"job": jobID}, conn=conn, transaction=transaction) formattedResult = DBFormatter.format(self, result) if len(formattedResult) == 0: return [] if int(formattedResult[0][1]) == 0: # The input to the job consisted of unmerged files, so we'll need # to query for the parents of the job's input. result = self.dbi.processData(self.parentSQL, {"job": jobID}, conn=conn, transaction=transaction) return self.format(result)
def select(self, query): """ execute a query. """ # db connect self.session.connect() # -> WMCore.Database.ResultSet import ResultSet results = self.session.processData(query) if (results.rowcount > 0): formatter = DBFormatter(self.logger, self.session) out = formatter.format(results) else : out = None return out
def select(self, query): """ execute a query. """ # db connect self.session.connect() # -> WMCore.Database.ResultSet import ResultSet results = self.session.processData(query) if (results.rowcount > 0): formatter = DBFormatter(self.logger, self.session) out = formatter.format(results) else: out = None return out
def testBFormatting(self): """ Test various formats """ # fill the database with some initial data self.stuffDB() myThread = threading.currentThread() dbformatter = DBFormatter(myThread.logger, myThread.dbi) result = myThread.dbi.processData(self.selectSQL) output = dbformatter.format(result) self.assertEqual(output, [['value1a', 1, 'value2a'], ['value1b', 2, 'value2b'], ['value1c', 3, 'value2d']]) result = myThread.dbi.processData(self.selectSQL) output = dbformatter.formatOne(result) print('test1 ' + str(output)) self.assertEqual(output, ['value1a', 1, 'value2a']) result = myThread.dbi.processData(self.selectSQL) output = dbformatter.formatDict(result) self.assertEqual(output, [{ 'column3': 'value2a', 'column2': 1, 'column1': 'value1a' }, { 'column3': 'value2b', 'column2': 2, 'column1': 'value1b' }, { 'column3': 'value2d', 'column2': 3, 'column1': 'value1c' }]) result = myThread.dbi.processData(self.selectSQL) output = dbformatter.formatOneDict(result) self.assertEqual(output, { 'column3': 'value2a', 'column2': 1, 'column1': 'value1a' })
def main(): """ _main_ """ usage = "Usage: %prog -j jobId" parser = OptionParser(usage = usage) parser.add_option('-j', '--jobId', help = 'Wmbs jobId reported in the component log', dest = 'jobId') (options, args) = parser.parse_args() if not options.jobId: parse.error('You must provide at least one jobId') print 'Example: python fixJobAccountant.py -j "1678 1679"' sys.exit(1) if 'WMAGENT_CONFIG' not in os.environ: os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage' connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) for job in options.jobId.split(): myQuery = getQuery + str(job) output = myThread.transaction.processData(myQuery) result = formatter.format(output) reportPath = result[0][0] taskName = result[0][1] #print 'Report path: %s' % reportPath #print 'Task name: %s' % taskName jr = Report(reportPath) if jr.getTaskName(): print "Job id %s already has a TaskName %s.\nSkipping .." % (job, jr.getTaskName()) continue jr.setTaskName(taskName) jr.save(reportPath) print "Updated TaskName for fwjr for jobId: %s" % job print "Done!" return 0
def getWMBSInfo(config): """ blah :return: """ connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) workflows = formatter.formatDict(myThread.dbi.processData(knownWorkflows)) workflows = [wf['name'] for wf in workflows] print("\n*** WORKFLOWS: found %d distinct workflows in this agent." % len(workflows)) workflowsDict = fetchWorkflowsSpec(config, workflows) printWfStatus(workflows, workflowsDict) for st in ('Available', 'Negotiating', 'Acquired', 'Running'): print("\n*** WORKQUEUE: elements still marked as %s in LQ workqueue / workqueue_inbox." % st) checkLocalWQStatus(config, st) for st in ("Acquired", "Running"): print("\n*** WORKQUEUE: elements still marked as %s in GQ workqueue." % st) checkGlobalWQStatus(config, st) workflows = formatter.formatDict(myThread.dbi.processData(incompleteWfs)) workflows = [wf['name'] for wf in workflows] print("\n*** WORKFLOWS: there are %d distinct workflows not completed." % len(workflows)) printWfStatus(workflows, workflowsDict) wfsNotInjected = flattenList(formatter.format(myThread.dbi.processData(workflowsNotInjected))) print("\n*** WORKFLOWS: found %d workflows not fully injected." % len(wfsNotInjected)) printWfStatus(wfsNotInjected, workflowsDict) jobsByState = formatter.formatDict(myThread.dbi.processData(jobCountByState)) print("\n*** WMBS: amount of wmbs jobs in each status:\n%s" % jobsByState) # IF we have executing jobs in wmbs and nothing in condor, then investigate the wfs if 'executing' in [item['name'] for item in jobsByState]: wfsJobCount = formatter.formatDict(myThread.dbi.processData(workflowsExecuting)) print("\n*** WMBS: %d workflows with executing jobs in wmbs:" % len(wfsJobCount)) workflows = [wf['name'] for wf in wfsJobCount] printWfStatus(workflows, workflowsDict) unfinishedSubs = formatter.formatDict(myThread.dbi.processData(unfinishedSubscriptions)) unfinishedSubs = [wf['wfname'] for wf in unfinishedSubs] print("\n*** SUBSCRIPTIONS: subscriptions not finished: %d" % len(unfinishedSubs)) printWfStatus(unfinishedSubs, workflowsDict) filesAvailable = formatter.formatDict(myThread.dbi.processData(filesAvailWMBS)) print("\n*** SUBSCRIPTIONS: found %d files available in WMBS (waiting for job creation):\n%s" % (len(filesAvailable), filesAvailable)) filesAcquired = formatter.formatDict(myThread.dbi.processData(filesAcqWMBS)) print("\n*** SUBSCRIPTIONS: found %d files acquired in WMBS (waiting for jobs to finish):\n%s" % (len(filesAcquired), filesAcquired)) blocksopenDBS = formatter.formatDict(myThread.dbi.processData(blocksOpenDBS)) print("\n*** DBS: found %d blocks open in DBS." % len(blocksopenDBS), end="") print(" Printing the first 20 blocks only:\n%s" % blocksopenDBS[:20]) filesnotinDBS = flattenList(formatter.format(myThread.dbi.processData(filesNotInDBS))) print("\n*** DBS: found %d files not uploaded to DBS.\n" % len(filesnotinDBS)) getDsetAndWf(filesnotinDBS, workflowsDict) filesnotinPhedex = flattenList(formatter.format(myThread.dbi.processData(filesNotInPhedex))) print("\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (recoverable)." % len(filesnotinPhedex)) getDsetAndWf(filesnotinPhedex, workflowsDict) filesnotinPhedexNull = flattenList(formatter.format(myThread.dbi.processData(filesNotInPhedexNull))) print("\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (unrecoverable)." % len(filesnotinPhedexNull)) getDsetAndWf(filesnotinPhedexNull, workflowsDict)
def format(self, result): result = DBFormatter.format(self, result) if result: return result[0][0] else: return False
def getWMBSInfo(config): connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) workflows = formatter.formatDict(myThread.dbi.processData(knownWorkflows)) workflows = [wf['name'] for wf in workflows] print("\n*** WORKFLOWS: found %d distinct workflows in this agent." % len(workflows)) workflowsDict = fetchWorkflowsSpec(config, workflows) printWfStatus(workflows, workflowsDict) for st in ('Available', 'Negotiating', 'Acquired', 'Running'): print( "\n*** WORKQUEUE: elements still marked as %s in LQ workqueue / workqueue_inbox." % st) checkLocalWQStatus(config, st) for st in ("Acquired", "Running"): print("\n*** WORKQUEUE: elements still marked as %s in GQ workqueue." % st) checkGlobalWQStatus(config, st) workflows = formatter.formatDict(myThread.dbi.processData(incompleteWfs)) workflows = [wf['name'] for wf in workflows] print("\n*** WORKFLOWS: there are %d distinct workflows not completed." % len(workflows)) printWfStatus(workflows, workflowsDict) wfsNotInjected = flattenList( formatter.format(myThread.dbi.processData(workflowsNotInjected))) print("\n*** WORKFLOWS: found %d workflows not fully injected." % len(wfsNotInjected)) printWfStatus(wfsNotInjected, workflowsDict) jobsByState = formatter.formatDict( myThread.dbi.processData(jobCountByState)) print("\n*** WMBS: amount of wmbs jobs in each status:\n%s" % jobsByState) # IF we have executing jobs in wmbs and nothing in condor, then investigate the wfs if 'executing' in [item['name'] for item in jobsByState]: wfsJobCount = formatter.formatDict( myThread.dbi.processData(workflowsExecuting)) print("\n*** WMBS: %d workflows with executing jobs in wmbs:" % len(wfsJobCount)) workflows = [wf['name'] for wf in wfsJobCount] printWfStatus(workflows, workflowsDict) unfinishedSubs = formatter.formatDict( myThread.dbi.processData(unfinishedSubscriptions)) unfinishedSubs = [wf['wfname'] for wf in unfinishedSubs] print("\n*** SUBSCRIPTIONS: subscriptions not finished: %d" % len(unfinishedSubs)) printWfStatus(unfinishedSubs, workflowsDict) filesAvailable = formatter.formatDict( myThread.dbi.processData(filesAvailWMBS)) print( "\n*** SUBSCRIPTIONS: found %d files available in WMBS (waiting for job creation):\n%s" % (len(filesAvailable), filesAvailable)) filesAcquired = formatter.formatDict( myThread.dbi.processData(filesAcqWMBS)) print( "\n*** SUBSCRIPTIONS: found %d files acquired in WMBS (waiting for jobs to finish):\n%s" % (len(filesAcquired), filesAcquired)) blocksopenDBS = formatter.formatDict( myThread.dbi.processData(blocksOpenDBS)) print("\n*** DBS: found %d blocks open in DBS." % len(blocksopenDBS), end="") print(" Printing the first 20 blocks only:\n%s" % blocksopenDBS[:20]) filesnotinDBS = flattenList( formatter.format(myThread.dbi.processData(filesNotInDBS))) print("\n*** DBS: found %d files not uploaded to DBS.\n" % len(filesnotinDBS)) getDsetAndWf(filesnotinDBS, workflowsDict) filesnotinPhedex = flattenList( formatter.format(myThread.dbi.processData(filesNotInPhedex))) print( "\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (recoverable)." % len(filesnotinPhedex)) getDsetAndWf(filesnotinPhedex, workflowsDict) filesnotinPhedexNull = flattenList( formatter.format(myThread.dbi.processData(filesNotInPhedexNull))) print( "\n*** PHEDEX: found %d files not injected in PhEDEx, with valid block id (unrecoverable)." % len(filesnotinPhedexNull)) getDsetAndWf(filesnotinPhedexNull, workflowsDict)
def format(self, result): result = DBFormatter.format(self, result) return result[0][0]
def main(): """ _main_ """ if 'WMAGENT_CONFIG' not in os.environ: os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' if 'manage' not in os.environ: os.environ['manage'] = '/data/srv/wmagent/current/config/wmagent/manage' ### Fetch the report pickle files from the component log command = ["tail", "-n1000", "install/wmagent/JobAccountant/ComponentLog"] p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() logFiles = [line for line in out.splitlines() if 'install/wmagent/JobCreator/JobCache' in line] logFiles = [i.split()[2] for i in logFiles] msg = "Found %d pickle files to parse " % len(logFiles) ### Now unpickle each of these files and get their output files # also check whether any of them are duplicate lfn2PklDict = {} dupOutputPkl = {} # string value with the dup LFN and keyed by the pickle file path jobReport = Report() for pklPath in logFiles: if not os.path.exists(pklPath): continue jobReport.load(pklPath) for e in jobReport.getAllFiles(): lfn2PklDict.setdefault(e['lfn'], []) lfn2PklDict[e['lfn']].append(pklPath) # now check which files contain more than one pickle path (= created by diff jobs) dupFiles = [] for lfn, pkls in lfn2PklDict.iteritems(): if len(pkls) > 1: dupFiles.append(lfn) for pkl in pkls: if pkl not in dupOutputPkl: jobReport.load(pkl) dupOutputPkl[pkl] = jobReport.__to_json__(None) dupOutputPkl[pkl]['dup_lfns'] = [] dupOutputPkl[pkl]['dup_lfns'].append(lfn) msg += "with a total of %d output files and %d duplicated" % (len(lfn2PklDict), len(dupFiles)) msg += " files to process among them." msg += "\nDuplicate files are:\n%s" % dupFiles print(msg) if dupFiles: print("See dupPickles.json for further details ...") with open('dupPickles.json', 'w') as fo: json.dump(dupOutputPkl, fo, indent=2) if dupFiles: var = raw_input("Can we automatically delete those pickle files? Y/N\n") if var == "Y": # then delete all job report files but the first one - NOT ideal for fname in dupFiles: for pklFile in lfn2PklDict[fname][1:]: if os.path.isfile(pklFile): print("Deleting %s ..." % pklFile) os.remove(pklFile) else: print(" File has probably been already deleted %s ..." % pklFile) print(" Done!") ### Time to load all - this is BAD - LFNs from WMBS database print("\nNow loading all LFNs from wmbs_file_details ...") connectToDB() myThread = threading.currentThread() formatter = DBFormatter(logging, myThread.dbi) output = myThread.transaction.processData("SELECT lfn FROM wmbs_file_details") lfnsDB = formatter.format(output) lfnsDB = [item[0] for item in lfnsDB] print("Retrieved %d lfns from wmbs_file_details" % len(lfnsDB)) ### Compare what are the duplicates dupFiles = list(set(lfn2PklDict.keys()) & set(lfnsDB)) print("\nFound %d duplicate files." % len(dupFiles)) if len(dupFiles) == 0: sys.exit(0) ### Print some basic data about these reports print("Their overview is: ") dbDupPkl = [] for fname in dupFiles: for pklPath in lfn2PklDict[fname]: jobInfo = {'lfn': fname} jobInfo['pklPath'] = pklPath jobReport.load(pklPath) jobInfo['exitCode'] = jobReport.getExitCode() jobInfo['taskSuccess'] = jobReport.taskSuccessful() jobInfo['EOSLogURL'] = jobReport.getLogURL() jobInfo['HostName'] = jobReport.getWorkerNodeInfo()['HostName'] jobInfo['Site'] = jobReport.getSiteName() jobInfo['task'] = jobReport.getTaskName() dbDupPkl.append(jobInfo) print(pformat(dbDupPkl)) print("") print("Remove them, restart the component and be happy!\n") sys.exit(0)
class MySQLBase(object): def __init__(self, logger, dbinterface): self.logger = logger self.dbi = dbinterface self.wmformatter = DBFormatter(self.logger, self.dbi) def truefalse(self, value): if value in ('False', 'FALSE', 'n', 'NO', 'No'): value = 0 return bool(value) def convertdatetime(self, t): return int(time.mktime(t.timetuple())) def timestamp(self): """ generate a timestamp """ t = datetime.datetime.now() return self.convertdatetime(t) def format(self, result, dictionary = False): """ Some standard formatting """ if not dictionary: return self.wmformatter.format(result) else: return self.wmformatter.formatDict(result) # out = [] # for r in result: # if dictionary == False: # for i in r.cursor.fetchall(): # out.append(i) # else: # # for i in r.cursor.fetchall(): # row = RowProxy(r,i) # out.append(dict(row.items())) # # # return out def formatOne(self, result, dictionary = False): """ single value format """ if not dictionary: return self.wmformatter.formatOne(result) else: return self.wmformatter.formatOneDict(result) # if len(result) == 0: # return [] # value = result[0].fetchone() # if value == None: # return [] # # if dictionary == True: # row = RowProxy(result[0],value) # value = dict(row.items()) # return value def getBinds(self): """ Return the appropriately formatted binds for the sql """ return {} def execute(self, conn = None, transaction = False): """ A simple select with no binds/arguments is the default """ result = self.dbi.processData(self.sql, self.getBinds(), conn = conn, transaction = transaction) return self.format(result) def restoreCaps(self, dicts, *newkeys): """ Utility function to restore capitalization in dictionary keys lost to conform with Oracle - use of this should be phased out """ for dict in dicts: for key in newkeys: dict[key] = dict.pop(key.lower()) return dicts