def run(self, date): print("Processing for ", date) if not os.path.exists( os.path.join(self.acct_path, date.strftime("%Y-%m-%d") + ".txt")): print("No accounting file for ", date) return acct = self.acct_reader( os.path.join(self.acct_path, date.strftime("%Y-%m-%d") + ".txt")) try: os.makedirs( os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"))) except: pass val_file = os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"), "validated") val_jids = [] if os.path.exists(val_file): with open(val_file, 'r') as fd: val_jids = fd.read().splitlines() acct_jids = [x['id'] for x in acct if "+" not in x['id']] ntot = len(acct_jids) print(len(acct_jids), 'Job records in accounting file') run_jids = sorted(list(set(acct_jids) - set(val_jids))) run_jids += [self.jobids] print(len(run_jids), 'Jobs to process') ntod = len(run_jids) acct = [job for job in acct if job['id'] in run_jids] if not self.jobids: acct = [ job for job in acct if job['nodes'] * (job['end_time'] - job['start_time']) < 1728000 ] ctr = 0 with open(val_file, "a") as fd: for result in self.pool.imap(self.partial_pickle, acct): #for result in map(self.partial_pickle, acct): if result[1]: fd.write("%s\n" % result[0]) fd.flush() ctr += 1.0 progress(ctr + (ntot - ntod), ntot, date.strftime("%Y-%m-%d")) print("Completed ", date)
def run(self, date): print("Processing for ", date) if not os.path.exists(os.path.join(self.acct_path, date.strftime("%Y-%m-%d") + ".txt")): print("No accounting file for ", date) return acct = self.acct_reader(os.path.join(self.acct_path, date.strftime("%Y-%m-%d") + ".txt")) try: os.makedirs(os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"))) except: pass val_file = os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"), "validated") val_jids = [] if os.path.exists(val_file): with open(val_file, 'r') as fd: val_jids = fd.read().splitlines() acct_jids = [x['id'] for x in acct if "+" not in x['id']] ntot = len(acct_jids) print(len(acct_jids),'Job records in accounting file') run_jids = sorted(list(set(acct_jids) - set(val_jids))) run_jids += [self.jobids] print(len(run_jids),'Jobs to process') ntod = len(run_jids) acct = [job for job in acct if job['id'] in run_jids] if not self.jobids: acct = [job for job in acct if job['nodes']*(job['end_time']-job['start_time']) < 1728000] ctr = 0 with open(val_file, "a") as fd: for result in self.pool.imap(self.partial_pickle, acct): #for result in map(self.partial_pickle, acct): if result[1]: fd.write("%s\n" % result[0]) fd.flush() ctr += 1.0 progress(ctr + (ntot - ntod), ntot, date.strftime("%Y-%m-%d")) print("Completed ", date)
def run(self): for date in self.daterange(self.start, self.end): if not os.path.exists( os.path.join(self.acct_path, date.strftime("%Y-%m-%d") + ".txt")): continue acct = self.acct_reader( os.path.join(self.acct_path, date.strftime("%Y-%m-%d") + ".txt")) try: os.makedirs( os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"))) except: pass vfile = os.path.join(self.pickles_dir, date.strftime("%Y-%m-%d"), "validated") val_stat = {} if os.path.exists(vfile): with open(vfile, 'r') as fdv: for line in sorted(list(set(fdv.readlines()))): jobid, stat = line.split() val_stat[jobid] = stat ntot = len(acct) print(len(acct), 'Job records in accounting file') acct = [ x for x in acct if val_stat.get(x['id']) == "False" or val_stat.get(x['id']) == None ] print(len(acct), 'Jobs to process') ntod = len(acct) ctr = 0 with open(vfile, "a+") as fdv: for result in self.pool.imap(self.partial_pickle, acct): fdv.write("%s %s\n" % result) fdv.flush() ctr += 1.0 progress(ctr + (ntot - ntod), ntot, date.strftime("%Y-%m-%d"))
def update_acct(date, rerun=False): ftr = [3600, 60, 1] tz = pytz.timezone('US/Central') ctr = 0 with open(os.path.join(cfg.acct_path, date.strftime("%Y-%m-%d") + '.txt'), encoding="latin1") as fd: nrecords = sum(1 for record in csv.DictReader(fd)) fd.seek(0) for job in csv.DictReader(fd, delimiter='|'): if '+' in job['JobID']: jid, rid = job['JobID'].split('+') job['JobID'] = int(jid) + int(rid) if rerun: pass elif Job.objects.filter(id=job['JobID']).exists(): ctr += 1 continue json = {} json['id'] = job['JobID'] json['project'] = job['Account'] json['start_time'] = tz.localize(parse(job['Start'])) json['end_time'] = tz.localize(parse(job['End'])) json['start_epoch'] = calendar.timegm( json['start_time'].utctimetuple()) json['end_epoch'] = calendar.timegm( json['end_time'].utctimetuple()) json['run_time'] = json['end_epoch'] - json['start_epoch'] try: if '-' in job['Timelimit']: days, time = job['Timelimit'].split('-') else: time = job['Timelimit'] days = 0 json['requested_time'] = (int(days) * 86400 + sum([ a * b for a, b in zip(ftr, [int(i) for i in time.split(":")]) ])) / 60 except: pass json['queue_time'] = int(parse(job['Submit']).strftime('%s')) try: json['queue'] = job['Partition'] json['name'] = job['JobName'][0:128] json['status'] = job['State'].split()[0] json['nodes'] = int(job['NNodes']) json['cores'] = int(job['ReqCPUS']) json['wayness'] = json['cores'] / json['nodes'] json['date'] = json['end_time'].date() json['user'] = job['User'] except: print(job) continue if "user" in json: try: json['uid'] = int(pwd.getpwnam(json['user']).pw_uid) except: pass host_list = hostlist.expand_hostlist(job['NodeList']) del job['NodeList'] Job.objects.filter(id=json['id']).delete() obj, created = Job.objects.update_or_create(**json) ### If xalt is available add data to the DB xd = None try: xd = run.objects.using('xalt').filter(job_id=json['id'])[0] except: pass if xd: obj.exe = xd.exec_path.split('/')[-1][0:128] obj.exec_path = xd.exec_path obj.cwd = xd.cwd[0:128] obj.threads = xd.num_threads obj.save() for join in join_run_object.objects.using('xalt').filter( run_id=xd.run_id): object_path = lib.objects.using('xalt').get( obj_id=join.obj_id).object_path module_name = lib.objects.using('xalt').get( obj_id=join.obj_id).module_name if not module_name: module_name = 'none' library = Libraries(object_path=object_path, module_name=module_name) library.save() library.jobs.add(obj) ### Build host table for host_name in host_list: h = Host(name=host_name) h.save() h.jobs.add(obj) ctr += 1 progress(ctr, nrecords, date) with open( os.path.join(cfg.pickles_dir, date.strftime("%Y-%m-%d"), "validated")) as fd: for line in fd.readlines(): Job.objects.filter(id=int(line)).update(validated=True)
def update(date, rerun=False): tz = pytz.timezone('US/Central') pickle_dir = os.path.join(cfg.pickles_dir, date) ctr = 0 for root, directory, pickle_files in os.walk(pickle_dir): num_files = len(pickle_files) print "Number of pickle files in", root, '=', num_files for pickle_file in sorted(pickle_files): ctr += 1 try: if rerun: pass elif Job.objects.filter(id=pickle_file).exists(): continue except: print pickle_file, "doesn't look like a pickled job" continue pickle_path = os.path.join(root, str(pickle_file)) try: with open(pickle_path, 'rb') as f: data = pickle.load(f) json = data.acct hosts = data.hosts.keys() except EOFError: print pickle_file, "is empty" continue if 'yesno' in json: del json['yesno'] utc_start = datetime.utcfromtimestamp( json['start_time']).replace(tzinfo=pytz.utc) utc_end = datetime.utcfromtimestamp( json['end_time']).replace(tzinfo=pytz.utc) json['run_time'] = json['end_time'] - json['start_time'] if json.has_key('unknown'): json['requested_time'] = json['unknown'] * 60 del json['unknown'] elif json.has_key('requested_time'): json['requested_time'] = json['requested_time'] * 60 else: json['requested_time'] = 0 json['start_epoch'] = json['start_time'] json['end_epoch'] = json['end_time'] json['start_time'] = utc_start.astimezone(tz) json['end_time'] = utc_end.astimezone(tz) json['date'] = json['end_time'].date() json['name'] = json['name'][0:128] json['wayness'] = json['cores'] / json['nodes'] if json.has_key('state'): json['status'] = json['state'] del json['state'] json['status'] = json['status'].split()[0] try: if json.has_key('user'): json['uid'] = int(pwd.getpwnam(json['user']).pw_uid) elif json.has_key('uid'): json['user'] = pwd.getpwuid(int(json['uid']))[0] except: json['user'] = '******' ### If xalt is available add data to the DB xd = None try: xd = run.objects.using('xalt').filter(job_id=json['id'])[0] json['user'] = xd.user json['exe'] = xd.exec_path.split('/')[-1][0:128] json['exec_path'] = xd.exec_path json['cwd'] = xd.cwd[0:128] json['threads'] = xd.num_threads except: xd = False if json.has_key('host_list'): del json['host_list'] Job.objects.filter(id=json['id']).delete() obj, created = Job.objects.update_or_create(**json) for host_name in hosts: h = Host(name=host_name) h.save() h.jobs.add(obj) if xd: for join in join_run_object.objects.using('xalt').filter( run_id=xd.run_id): try: object_path = lib.objects.using('xalt').get( obj_id=join.obj_id).object_path module_name = lib.objects.using('xalt').get( obj_id=join.obj_id).module_name if not module_name: module_name = 'none' library = Libraries(object_path=object_path, module_name=module_name) library.save() library.jobs.add(obj) except: pass progress(ctr, num_files, date)