Пример #1
0
def display_list(lis, **kwargs):
    id_dict = {}
    for arg in lis:
        if type(arg) is Report or type(arg) is job_stats.Job:
            pass
        elif str(arg).isdigit(): # XXX
            id_dict[str(arg)] = None
        else:
            raise ValueError("cannot convert arg `%s' to Job or Report" % str(arg))
    if id_dict:
        sge_acct.fill(id_dict)
    for arg in lis:
        report = None
        if type(arg) is Report:
            report = arg
        elif type(arg) is job_stats.Job:
            report = Report(arg)
        else:
            acct = id_dict[str(arg)]
            if acct:
                job = job_stats.from_acct(acct,os.getenv('TACC_STATS_HOME','/scratch/projects/tacc_stats'))
                report = Report(job)
            else:
                job_stats.error("no accounting data found for job `%s'\n", arg)
        if report:
            report.display(**kwargs)
            kwargs['print_header'] = False
Пример #2
0
def display_list(lis, **kwargs):
    id_dict = {}
    for arg in lis:
        if type(arg) is Report or type(arg) is job_stats.Job:
            pass
        elif str(arg).isdigit():  # XXX
            id_dict[str(arg)] = None
        else:
            raise ValueError("cannot convert arg `%s' to Job or Report" %
                             str(arg))
    if id_dict:
        sge_acct.fill(id_dict)
    for arg in lis:
        report = None
        if type(arg) is Report:
            report = arg
        elif type(arg) is job_stats.Job:
            report = Report(arg)
        else:
            acct = id_dict[str(arg)]
            if acct:
                job = job_stats.from_acct(acct)
                report = Report(job)
            else:
                job_stats.error("no accounting data found for job `%s'\n", arg)
        if report:
            report.display(**kwargs)
            kwargs['print_header'] = False
Пример #3
0
def createsummary(options, totalprocs, procid):

    procidstr = "%s of %s " % (procid, totalprocs) if totalprocs != None else ""

    logging.info("Processor " + procidstr + "starting")

    referencetime = int(time.time()) - ( 7 * 24 * 3600 ) 

    config = account.getconfig(options['config'])
    dbconf = config['accountdatabase']

    outdb = output.factory(config['outputdatabase'])

    ratecalc = RateCalculator(procid)
    timewindows = dict()

    for resourcename, settings in config['resources'].iteritems():

        if 'enabled' in settings:
            if settings['enabled'] == False:
                continue

        if options['resource'] not in (None, resourcename, str(settings['resource_id'])):
            continue

        processtimes = { "mintime": 2**64, "maxtime": 0 }

        dbreader = account.DbAcct( settings['resource_id'], dbconf, PROCESS_VERSION, totalprocs, procid, options['localjobid'])

        bacct = batch_acct.factory(settings['batch_system'], settings['acct_path'], settings['host_name_ext'] )

        if settings['lariat_path'] != "":
            lariat = summarize.LariatManager(settings['lariat_path'])
        else:
            lariat = None

        dbwriter = account.DbLogger( dbconf["dbname"], dbconf["tablename"], dbconf["defaultsfile"] )

        for acct in dbreader.reader():
            logging.debug("%s local_job_id = %s", resourcename, acct['id'])
            job = job_stats.from_acct( acct, settings['tacc_stats_home'], settings['host_list_dir'], bacct )
            summary,timeseries = summarize.summarize(job, lariat)

            insertOk = outdb.insert(resourcename, summary, timeseries)

            if summary['complete'] == False and summary["acct"]['end_time'] > referencetime:
                # Do not mark incomplete jobs as done unless they are older than the
                # reference time (which defaults to 7 days ago)
                dbwriter.logprocessed(acct, settings['resource_id'], ERROR_INCOMPLETE)
                continue
            
            if insertOk:
                dbwriter.logprocessed( acct, settings['resource_id'], PROCESS_VERSION )
                processtimes['mintime'] = min( processtimes['mintime'], summary["acct"]['end_time'] )
                processtimes['maxtime'] = max( processtimes['maxtime'], summary["acct"]['end_time'] )
                ratecalc.increment()
            else:
                # Mark as negative process version to indicate that it has been processed
                # but no summary was output
                dbwriter.logprocessed( acct, settings['resource_id'], 0 - PROCESS_VERSION )

        if processtimes['maxtime'] != 0:
            timewindows[resourcename] = processtimes

    logging.info("Processor " + procidstr + "exiting. Processed %s", ratecalc.count)

    if ratecalc.count == 0:
        # No need to generate a report if no docs were processed
        return

    proc = { "host": socket.getfqdn(),
            "instance": procid,
            "totalinstances": totalprocs,
            "start_time": ratecalc.starttime,
            "end_time": time.time() ,
            "rate": ratecalc.rate,
            "records": ratecalc.count
            }

    report = { "proc": proc, "resources": timewindows }

    outdb.logreport(report)
Пример #4
0
def createsummary(options, totalprocs, procid):

    procidstr = "%s of %s " % (procid, totalprocs) if totalprocs != None else ""

    logging.info("Processor " + procidstr + "starting")

    referencetime = int(time.time()) - ( 7 * 24 * 3600 ) 

    config = account.getconfig(options['config'])
    dbconf = config['accountdatabase']

    outdb = output.factory(config['outputdatabase'])

    ratecalc = RateCalculator(procid)
    timewindows = dict()

    for resourcename, settings in config['resources'].iteritems():

        if 'enabled' in settings:
            if settings['enabled'] == False:
                continue

        if options['resource'] not in (None, resourcename, str(settings['resource_id'])):
            continue

        processtimes = { "mintime": 2**64, "maxtime": 0 }

        if options['start'] == None:
            dbreader = account.DbAcct( settings['resource_id'], dbconf, PROCESS_VERSION, totalprocs, procid, options['localjobid'])
        else:
            # Choose a process version that doesn't exist so that all jobs are selected
            selectedProcVersion = PROCESS_VERSION + 1
            if options['ignoreprocessed']:
                selectedProcVersion = PROCESS_VERSION

            dbreader = account.DbAcct( settings['resource_id'], dbconf, selectedProcVersion, totalprocs, procid, None)

        bacct = batch_acct.factory(settings['batch_system'], settings['acct_path'], settings['host_name_ext'] )

        if settings['lariat_path'] != "":
            lariat = summarize.LariatManager(settings['lariat_path'])
        else:
            lariat = None

        dbwriter = account.DbLogger( dbconf["dbname"], dbconf["tablename"], dbconf["defaultsfile"] )

        for acct in dbreader.reader(options['start'], options['end']):
            logging.debug("%s local_job_id = %s", resourcename, acct['id'])
            job = job_stats.from_acct( acct, settings['tacc_stats_home'], settings['host_list_dir'], bacct )
            summary,timeseries = summarize.summarize(job, lariat)

            insertOk = outdb.insert(resourcename, summary, timeseries)

            if summary['complete'] == False and summary["acct"]['end_time'] > referencetime:
                # Do not mark incomplete jobs as done unless they are older than the
                # reference time (which defaults to 7 days ago)
                dbwriter.logprocessed(acct, settings['resource_id'], ERROR_INCOMPLETE)
                continue
            
            if insertOk:
                dbwriter.logprocessed( acct, settings['resource_id'], PROCESS_VERSION )
                processtimes['mintime'] = min( processtimes['mintime'], summary["acct"]['end_time'] )
                processtimes['maxtime'] = max( processtimes['maxtime'], summary["acct"]['end_time'] )
                ratecalc.increment()
            else:
                # Mark as negative process version to indicate that it has been processed
                # but no summary was output
                dbwriter.logprocessed( acct, settings['resource_id'], 0 - PROCESS_VERSION )

        if processtimes['maxtime'] != 0:
            timewindows[resourcename] = processtimes

    logging.info("Processor " + procidstr + "exiting. Processed %s", ratecalc.count)

    if ratecalc.count == 0:
        # No need to generate a report if no docs were processed
        return

    proc = { "host": socket.getfqdn(),
            "instance": procid,
            "totalinstances": totalprocs,
            "start_time": ratecalc.starttime,
            "end_time": time.time() ,
            "rate": ratecalc.rate,
            "records": ratecalc.count
            }

    report = { "proc": proc, "resources": timewindows }

    outdb.logreport(report)
Пример #5
0
        return []
    try:
        with open(path) as file:
            return [host for line in file for host in line.split()]
    except IOError as (err, str):
        return []

def short_host_name(str):
    return str.split('.')[0]

if len(sys.argv) != 4:
    USAGE("DIR START_DATE END_DATE");

pickle_dir = sys.argv[1]
start = getdate(sys.argv[2])
end = getdate(sys.argv[3])
seek = 800 << 20 # XXX

# Run though all jobs that ended after start and before end + 3 days.

for acct in sge_acct.reader(open(acct_path),
                            start_time=start,
                            end_time=end,
                            seek=seek):
    if acct['end_time'] == 0:
        continue
    job = job_stats.from_acct(acct)
    pickle_path = os.path.join(pickle_dir, job.id)
    pickle_file = open(pickle_path, 'w')
    pickle.dump(job, pickle_file, pickle_prot)
Пример #6
0
    return str.split('.')[0]

if len(sys.argv) != 5:
    USAGE("DIR START_DATE END_DATE");

pickle_dir = sys.argv[1]
start = getdate(sys.argv[2])
end = getdate(sys.argv[3])

if not 'seek' in locals():
  seek = 800 << 20 # XXX

# Run though all jobs that ended after start and before end + 3 days.

a=batch_acct.factory(sys.argv[4],acct_path)



##for one in [1]:
##    acct=a.from_id_with_file_1("1155456",113240000)
##    print acct


for acct in a.reader(start_time=start,end_time=end,seek=seek):
    if acct['end_time'] == 0:
        continue
    job = job_stats.from_acct(acct, tacc_stats_home, host_list_dir, a)
    pickle_path = os.path.join(pickle_dir, job.id)
    pickle_file = open(pickle_path, 'w')
    pickle.dump(job, pickle_file, pickle_prot)
Пример #7
0
    try:
        with open(path) as file:
            return [host for line in file for host in line.split()]
    except IOError as (err, str):
        return []

def short_host_name(str):
    return str.split('.')[0]

if len(sys.argv) != 4:
    USAGE("DIR START_DATE END_DATE");

pickle_dir = sys.argv[1]
start = getdate(sys.argv[2])
end = getdate(sys.argv[3])

# get the correct scheduler based on environment variable
job_scheduler = os.getenv('TACC_STATS_JOB_SCHEDULER')
if not job_scheduler:
    FATAL('Environment variable TACC_STATS_JOB_SCHEDULER not set')
scheduler_acct = eval(job_scheduler + '_acct')

# Run through all jobs
for acct in scheduler_acct.reader(acct_path, start_time=start, end_time=end):
    if acct['end_time'] == 0:
        continue
    job = job_stats.from_acct(acct)
    pickle_path = os.path.join(pickle_dir, str(job.id))
    pickle_file = open(pickle_path, 'w')
    pickle.dump(job, pickle_file, pickle_prot)