def display_list(lis, **kwargs): id_dict = {} for arg in lis: if type(arg) is Report or type(arg) is job_stats.Job: pass elif str(arg).isdigit(): # XXX id_dict[str(arg)] = None else: raise ValueError("cannot convert arg `%s' to Job or Report" % str(arg)) if id_dict: sge_acct.fill(id_dict) for arg in lis: report = None if type(arg) is Report: report = arg elif type(arg) is job_stats.Job: report = Report(arg) else: acct = id_dict[str(arg)] if acct: job = job_stats.from_acct(acct,os.getenv('TACC_STATS_HOME','/scratch/projects/tacc_stats')) report = Report(job) else: job_stats.error("no accounting data found for job `%s'\n", arg) if report: report.display(**kwargs) kwargs['print_header'] = False
def display_list(lis, **kwargs): id_dict = {} for arg in lis: if type(arg) is Report or type(arg) is job_stats.Job: pass elif str(arg).isdigit(): # XXX id_dict[str(arg)] = None else: raise ValueError("cannot convert arg `%s' to Job or Report" % str(arg)) if id_dict: sge_acct.fill(id_dict) for arg in lis: report = None if type(arg) is Report: report = arg elif type(arg) is job_stats.Job: report = Report(arg) else: acct = id_dict[str(arg)] if acct: job = job_stats.from_acct(acct) report = Report(job) else: job_stats.error("no accounting data found for job `%s'\n", arg) if report: report.display(**kwargs) kwargs['print_header'] = False
def createsummary(options, totalprocs, procid): procidstr = "%s of %s " % (procid, totalprocs) if totalprocs != None else "" logging.info("Processor " + procidstr + "starting") referencetime = int(time.time()) - ( 7 * 24 * 3600 ) config = account.getconfig(options['config']) dbconf = config['accountdatabase'] outdb = output.factory(config['outputdatabase']) ratecalc = RateCalculator(procid) timewindows = dict() for resourcename, settings in config['resources'].iteritems(): if 'enabled' in settings: if settings['enabled'] == False: continue if options['resource'] not in (None, resourcename, str(settings['resource_id'])): continue processtimes = { "mintime": 2**64, "maxtime": 0 } dbreader = account.DbAcct( settings['resource_id'], dbconf, PROCESS_VERSION, totalprocs, procid, options['localjobid']) bacct = batch_acct.factory(settings['batch_system'], settings['acct_path'], settings['host_name_ext'] ) if settings['lariat_path'] != "": lariat = summarize.LariatManager(settings['lariat_path']) else: lariat = None dbwriter = account.DbLogger( dbconf["dbname"], dbconf["tablename"], dbconf["defaultsfile"] ) for acct in dbreader.reader(): logging.debug("%s local_job_id = %s", resourcename, acct['id']) job = job_stats.from_acct( acct, settings['tacc_stats_home'], settings['host_list_dir'], bacct ) summary,timeseries = summarize.summarize(job, lariat) insertOk = outdb.insert(resourcename, summary, timeseries) if summary['complete'] == False and summary["acct"]['end_time'] > referencetime: # Do not mark incomplete jobs as done unless they are older than the # reference time (which defaults to 7 days ago) dbwriter.logprocessed(acct, settings['resource_id'], ERROR_INCOMPLETE) continue if insertOk: dbwriter.logprocessed( acct, settings['resource_id'], PROCESS_VERSION ) processtimes['mintime'] = min( processtimes['mintime'], summary["acct"]['end_time'] ) processtimes['maxtime'] = max( processtimes['maxtime'], summary["acct"]['end_time'] ) ratecalc.increment() else: # Mark as negative process version to indicate that it has been processed # but no summary was output dbwriter.logprocessed( acct, settings['resource_id'], 0 - PROCESS_VERSION ) if processtimes['maxtime'] != 0: timewindows[resourcename] = processtimes logging.info("Processor " + procidstr + "exiting. Processed %s", ratecalc.count) if ratecalc.count == 0: # No need to generate a report if no docs were processed return proc = { "host": socket.getfqdn(), "instance": procid, "totalinstances": totalprocs, "start_time": ratecalc.starttime, "end_time": time.time() , "rate": ratecalc.rate, "records": ratecalc.count } report = { "proc": proc, "resources": timewindows } outdb.logreport(report)
def createsummary(options, totalprocs, procid): procidstr = "%s of %s " % (procid, totalprocs) if totalprocs != None else "" logging.info("Processor " + procidstr + "starting") referencetime = int(time.time()) - ( 7 * 24 * 3600 ) config = account.getconfig(options['config']) dbconf = config['accountdatabase'] outdb = output.factory(config['outputdatabase']) ratecalc = RateCalculator(procid) timewindows = dict() for resourcename, settings in config['resources'].iteritems(): if 'enabled' in settings: if settings['enabled'] == False: continue if options['resource'] not in (None, resourcename, str(settings['resource_id'])): continue processtimes = { "mintime": 2**64, "maxtime": 0 } if options['start'] == None: dbreader = account.DbAcct( settings['resource_id'], dbconf, PROCESS_VERSION, totalprocs, procid, options['localjobid']) else: # Choose a process version that doesn't exist so that all jobs are selected selectedProcVersion = PROCESS_VERSION + 1 if options['ignoreprocessed']: selectedProcVersion = PROCESS_VERSION dbreader = account.DbAcct( settings['resource_id'], dbconf, selectedProcVersion, totalprocs, procid, None) bacct = batch_acct.factory(settings['batch_system'], settings['acct_path'], settings['host_name_ext'] ) if settings['lariat_path'] != "": lariat = summarize.LariatManager(settings['lariat_path']) else: lariat = None dbwriter = account.DbLogger( dbconf["dbname"], dbconf["tablename"], dbconf["defaultsfile"] ) for acct in dbreader.reader(options['start'], options['end']): logging.debug("%s local_job_id = %s", resourcename, acct['id']) job = job_stats.from_acct( acct, settings['tacc_stats_home'], settings['host_list_dir'], bacct ) summary,timeseries = summarize.summarize(job, lariat) insertOk = outdb.insert(resourcename, summary, timeseries) if summary['complete'] == False and summary["acct"]['end_time'] > referencetime: # Do not mark incomplete jobs as done unless they are older than the # reference time (which defaults to 7 days ago) dbwriter.logprocessed(acct, settings['resource_id'], ERROR_INCOMPLETE) continue if insertOk: dbwriter.logprocessed( acct, settings['resource_id'], PROCESS_VERSION ) processtimes['mintime'] = min( processtimes['mintime'], summary["acct"]['end_time'] ) processtimes['maxtime'] = max( processtimes['maxtime'], summary["acct"]['end_time'] ) ratecalc.increment() else: # Mark as negative process version to indicate that it has been processed # but no summary was output dbwriter.logprocessed( acct, settings['resource_id'], 0 - PROCESS_VERSION ) if processtimes['maxtime'] != 0: timewindows[resourcename] = processtimes logging.info("Processor " + procidstr + "exiting. Processed %s", ratecalc.count) if ratecalc.count == 0: # No need to generate a report if no docs were processed return proc = { "host": socket.getfqdn(), "instance": procid, "totalinstances": totalprocs, "start_time": ratecalc.starttime, "end_time": time.time() , "rate": ratecalc.rate, "records": ratecalc.count } report = { "proc": proc, "resources": timewindows } outdb.logreport(report)
return [] try: with open(path) as file: return [host for line in file for host in line.split()] except IOError as (err, str): return [] def short_host_name(str): return str.split('.')[0] if len(sys.argv) != 4: USAGE("DIR START_DATE END_DATE"); pickle_dir = sys.argv[1] start = getdate(sys.argv[2]) end = getdate(sys.argv[3]) seek = 800 << 20 # XXX # Run though all jobs that ended after start and before end + 3 days. for acct in sge_acct.reader(open(acct_path), start_time=start, end_time=end, seek=seek): if acct['end_time'] == 0: continue job = job_stats.from_acct(acct) pickle_path = os.path.join(pickle_dir, job.id) pickle_file = open(pickle_path, 'w') pickle.dump(job, pickle_file, pickle_prot)
return str.split('.')[0] if len(sys.argv) != 5: USAGE("DIR START_DATE END_DATE"); pickle_dir = sys.argv[1] start = getdate(sys.argv[2]) end = getdate(sys.argv[3]) if not 'seek' in locals(): seek = 800 << 20 # XXX # Run though all jobs that ended after start and before end + 3 days. a=batch_acct.factory(sys.argv[4],acct_path) ##for one in [1]: ## acct=a.from_id_with_file_1("1155456",113240000) ## print acct for acct in a.reader(start_time=start,end_time=end,seek=seek): if acct['end_time'] == 0: continue job = job_stats.from_acct(acct, tacc_stats_home, host_list_dir, a) pickle_path = os.path.join(pickle_dir, job.id) pickle_file = open(pickle_path, 'w') pickle.dump(job, pickle_file, pickle_prot)
try: with open(path) as file: return [host for line in file for host in line.split()] except IOError as (err, str): return [] def short_host_name(str): return str.split('.')[0] if len(sys.argv) != 4: USAGE("DIR START_DATE END_DATE"); pickle_dir = sys.argv[1] start = getdate(sys.argv[2]) end = getdate(sys.argv[3]) # get the correct scheduler based on environment variable job_scheduler = os.getenv('TACC_STATS_JOB_SCHEDULER') if not job_scheduler: FATAL('Environment variable TACC_STATS_JOB_SCHEDULER not set') scheduler_acct = eval(job_scheduler + '_acct') # Run through all jobs for acct in scheduler_acct.reader(acct_path, start_time=start, end_time=end): if acct['end_time'] == 0: continue job = job_stats.from_acct(acct) pickle_path = os.path.join(pickle_dir, str(job.id)) pickle_file = open(pickle_path, 'w') pickle.dump(job, pickle_file, pickle_prot)