def _download_file(download_dir, url): filename = url.split("/")[-1] print(download_dir) if file_exist(download_dir, filename): sys.stderr.write(f"[-] Already downloaded: {url} (at {filename}).\n") else: sys.stderr.write(f"[*] Downloading from {url} to {filename}.\n") with TqdmUpTo(unit='B', unit_scale=True, miniters=1, desc=filename) as t: urllib.request.urlretrieve(url, filename=filename, reporthook=t.update_to) return filename
def _download_and_extract(download_dir, url, src_filename, trg_filename): src_path = file_exist(download_dir, src_filename) trg_path = file_exist(download_dir, trg_filename) if src_path and trg_path: sys.stderr.write(f"[-] Already downloaded and extracted {url}.\n") return src_path, trg_path compressed_file = _download_file(download_dir, url) sys.stderr.write(f"[*] Extracting {compressed_file}.\n") with tarfile.open(compressed_file, "r:gz") as corpus_tar: corpus_tar.extractall(download_dir) src_path = file_exist(download_dir, src_filename) trg_path = file_exist(download_dir, trg_filename) if src_path and trg_path: return src_path, trg_path raise OSError( f"[!] Download/extraction failed for url {url} to path {download_dir}")
def makeyaml(outdir, uid): if not ut.dir_exist(outdir): os.system("mkdir %s"%outdir) if outdir[-1]!='/': outdir+='/' outfile='%soutput_%s.yaml'%(outdir,uid) print '========================================== ',outfile if ut.file_exist(outfile): return False data = { 'processing' : { 'status' : 'sending', 'timestamp':ut.gettimestamp(), } } with open(outfile, 'w') as outyaml: yaml.dump(data, outyaml, default_flow_style=False) return True
def run(self): usermaptot = {} for key, value in us.users.iteritems(): usermaptot[key] = 0 ldir = [x[0] for x in os.walk(self.indir)] for l in ldir: mergefile = l + '/merge.yaml' if not ut.file_exist(mergefile): continue process = l.replace(self.basedir, '') print '-------------- process ', process tmpf = None with open(mergefile, 'r') as stream: try: tmpf = yaml.load(stream) except yaml.YAMLError as exc: print(exc) events_tot = tmpf['merge']['nevents'] size_tot = tmpf['merge']['size'] / 1000000000. bad_tot = tmpf['merge']['nbad'] files_tot = tmpf['merge']['ndone'] sumw_tot = 0 nfileseos = 0 if files_tot + bad_tot != 0: if ut.dir_exist('%s%s' % (self.eosdir, process)): nfileseos = len( glob.glob('%s%s/output*' % (self.eosdir, process))) print 'nevents : %i' % events_tot print 'nfiles on eos/checked good/checked bad : %i/%i/%i' % ( nfileseos, files_tot, bad_tot) marked_b = '' marked_e = '' if nfileseos > files_tot + bad_tot: marked_b = '<h2><mark>' marked_e = '</mark></h2>' self.ntot_events += int(events_tot) self.ntot_files += int(files_tot) self.tot_size += float(size_tot) self.ntot_bad += int(bad_tot) self.ntot_eos += int(nfileseos) usermap = {} for key, value in us.users.iteritems(): usermap[key] = 0 for key, value in iter(sorted(tmpf['merge']['users'].iteritems())): usermap[key] = value usermaptot[key] = usermaptot[key] + value toaddus = '' for key, value in iter(sorted(usermap.iteritems())): toaddus += ',,%i' % (value) cmd = '%s,,%s,,%s%i%s,,%s%i%s,,%i,,%.2f%s\n' % ( process, self.comma_me( str(events_tot)), marked_b, files_tot, marked_e, marked_b, nfileseos, marked_e, bad_tot, size_tot, toaddus) self.OutFile.write(cmd) toaddustot = '' for key, value in iter(sorted(usermaptot.iteritems())): toaddustot += ',,%i' % (value) cmd = '%s,,%s,,%s,,%s,,%s,,%.2f%s\n' % ( 'total', self.comma_me(str( self.ntot_events)), self.comma_me(str(self.ntot_files)), self.comma_me(str(self.ntot_eos)), self.comma_me(str( self.ntot_bad)), self.tot_size, toaddustot) self.OutFile.write(cmd)
def check(self): ldir = [x[0] for x in os.walk(self.indireos)] if not ut.testeos(eostest, eostest_size): print 'eos seems to have problems, should check, will exit' sys.exit(3) dic = {} for l in ldir: proc = l.replace(self.indireos, '') #print proc,' ',self.process if self.process != '' and self.process != proc: continue psplit = l.split('/') isana = False for a in psplit: if a == "ana": isana = True if isana: print 'is anana' print l continue #continue if process has been checked if l == 'BADPYTHIA' or l == 'lhe' or l == "__restored_files__" or l == "backup": continue print '--------------------- eos process ', l nfileseos = 0 if os.path.isdir('%s/%s' % (self.indireos, proc)): listeos = [ x for x in os.listdir('%s/%s' % (self.indireos, proc)) if 'output' in x ] nfileseos = len(listeos) if nfileseos == 0: continue nfilesmerged = 0 mergefile = self.indirafs + '/' + proc + '/merge.yaml' print 'mergefile ', mergefile if not ut.file_exist(mergefile): if not ut.dir_exist('%s/%s' % (self.indirafs, proc)): os.system('mkdir -p %s/%s' % (self.indirafs, proc)) self.touch('%s/%s/check' % (self.indirafs, proc)) continue if not os.path.isdir(self.indirafs): os.system('mkdir %s' % self.indirafs) tmpf = None with open(mergefile, 'r') as stream: try: tmpf = yaml.load(stream) except yaml.YAMLError as exc: print(exc) bad_tot = tmpf['merge']['nbad'] files_tot = tmpf['merge']['ndone'] ntot_files = bad_tot + files_tot print "tot files ", ntot_files, " files eos ", nfileseos dic[proc] = {'neos': nfileseos, 'nmerged': ntot_files} print '%s/%s/check' % (self.indirafs, proc) if ntot_files < nfileseos: self.touch('%s/%s/check' % (self.indirafs, proc)) elif ntot_files > nfileseos: print ' ntot_files>nfileseos ', ntot_files, nfileseos, proc os.system('rm %s/%s/output*.yaml' % (self.indirafs, proc)) os.system('rm %s/%s/merge.yaml' % (self.indirafs, proc)) else: if ut.file_exist('%s/%s/check' % (self.indirafs, proc)): os.system('rm %s/%s/check' % (self.indirafs, proc)) outfile = self.indirafs + '/files.yaml' with open(outfile, 'w') as outyaml: yaml.dump(dic, outyaml, default_flow_style=False)
def merge(self, force): print self.indir, ' ==== ', self.process ldir = [x[0] for x in os.walk(self.indir)] process = '' for l in ldir: process = l.replace(self.indir, "") if self.process != '' and self.process != process: continue outfile = l + '/merge.yaml' totsize = 0 totevents = 0 outfiles = [] outfilesbad = [] users = {} outdir = None ndone = 0 nbad = 0 All_files = glob.glob("%s/output_*.yaml" % (l)) if len(All_files) == 0: if os.path.isfile("%s/merge.yaml" % (l)): os.system("rm %s/merge.yaml" % (l)) continue #continue if process has been checked print '%s/check' % (l) if not ut.file_exist('%s/check' % (l)) and not force: continue print 'merging %i files in directory %s' % (len(All_files), l) for f in All_files: if not os.path.isfile(f): print 'file does not exists... %s' % f continue with open(f, 'r') as stream: try: tmpf = yaml.load(stream) if tmpf['processing']['status'] == 'sending': continue if tmpf['processing']['status'] == 'BAD': nbad += 1 outfilesbad.append( tmpf['processing']['out'].split('/')[-1]) outdir = tmpf['processing']['out'].replace( tmpf['processing']['out'].split('/')[-1], '') process = tmpf['processing']['process'] continue totsize += tmpf['processing']['size'] totevents += tmpf['processing']['nevents'] process = tmpf['processing']['process'] tmplist = [ tmpf['processing']['out'].split('/')[-1], tmpf['processing']['nevents'] ] outfiles.append(tmplist) outdir = tmpf['processing']['out'].replace( tmpf['processing']['out'].split('/')[-1], '') if tmpf['processing']['user'] not in users: users[tmpf['processing']['user']] = 1 else: users[tmpf['processing']['user']] = users[ tmpf['processing']['user']] + 1 ndone += 1 except yaml.YAMLError as exc: print(exc) dic = { 'merge': { 'process': process, 'nevents': totevents, 'outfiles': outfiles, 'outdir': outdir, 'size': totsize, 'ndone': ndone, 'nbad': nbad, 'outfilesbad': outfilesbad, 'users': users } } try: with open(outfile, 'w') as outyaml: yaml.dump(dic, outyaml, default_flow_style=False) except IOError as exc: print "I/O error({0}): {1}".format(exc.errno, exc.strerror) print "outfile ", outfile import time time.sleep(10) with open(outfile, 'w') as outyaml: yaml.dump(dic, outyaml, default_flow_style=False)
def check(self, force, statfile): ldir = [x[0] for x in os.walk(self.indir)] if not ut.testeos(eostest, eostest_size): print 'eos seems to have problems, should check, will exit' sys.exit(3) for process in ldir: uid = process.replace(self.indir, "") if uid == "": continue if self.process != '' and uid != self.process: continue print '%s/%s/check' % (self.yamldir, uid) if not ut.file_exist('%s/%s/check' % (self.yamldir, uid)) and not force: continue psplit = process.split('/') isana = False for a in psplit: if a == "ana": isana = True if isana: print 'is anana' print process continue All_files = glob.glob("%s/output_*.root" % (process)) if len(All_files) == 0: continue print '--------------------- ', uid print 'number of files ', len(All_files) print 'process from the input directory ', uid outdir = self.makeyamldir(self.yamldir + uid) hasbeenchecked = False nevents_tot = 0 njobsdone_tot = 0 njobsbad_tot = 0 for f in All_files: self.count = 0 if not os.path.isfile(f): print 'file does not exists... %s' % f continue jobid = f.split('_')[-1] jobid = jobid.replace(self.fext, '') userid = ut.find_owner(f) outfile = '%soutput_%s.yaml' % (outdir, jobid) if ut.file_exist( outfile) and ut.getsize(outfile) > 100 and not force: continue hasbeenchecked = True print '-----------checking root file ', f if '.root' in self.fext: nevts, check = self.checkFile_root(f, treename) status = 'DONE' if not check: status = 'BAD' if status == 'DONE': nevents_tot += nevts njobsdone_tot += 1 else: njobsbad_tot += 1 dic = { 'processing': { 'process': uid, 'jobid': jobid, 'nevents': nevts, 'status': status, 'out': f, 'size': os.path.getsize(f), 'user': userid } } print '-----------writing yaml file ', outfile with open(outfile, 'w') as outyaml: yaml.dump(dic, outyaml, default_flow_style=False) continue else: print 'not correct file extension %s' % self.fext if hasbeenchecked: cmdp = '<pre>date=%s <span class="espace"/> time=%s <span class="espace"/> njobs=%i <span class="espace"/> nevents=%i <span class="espace"/> njobbad=%i <span class="espace"/> process=%s </pre>\n' % ( ut.getdate_str(), ut.gettime_str(), njobsdone_tot, nevents_tot, njobsbad_tot, uid) stat_exist = ut.file_exist(statfile) with open(statfile, "a") as myfile: if not stat_exist: myfile.write( '<link href="/afs/cern.ch/user/h/helsens/www/style/txtstyle.css" rel="stylesheet" type="text/css" />\n' ) myfile.write( '<style type="text/css"> /*<![CDATA[*/ .espace{ margin-left:3em } .espace2{ margin-top:9em } /*]]>*/ </style>\n' ) myfile.write(cmdp) print 'date=%s time=%s njobs=%i nevents=%i njobbad=%i process=%s' % ( ut.getdate_str(), ut.gettime_str(), njobsdone_tot, nevents_tot, njobsbad_tot, uid)
import utils import rule import sys if __name__ == '__main__': #check if a sudoku file was given if (utils.file_exist("sudoku.txt")): sudoku = utils.read_sudoku("sudoku.txt") print("Sudoku grid found:") print("=========") print(utils.read_file("sudoku.txt")) print("=========") else: sys.exit("Error no sudoku.txt file found in the same directory") recalculateBasicRules = False try: if (sys.argv[1] == "-r"): recalculateBasicRules = True except: recalculateBasicRules = False #check if the basic rules were generated if (not utils.file_exist("basic_rules.txt") or recalculateBasicRules): print( "basic rules for sudoku calculated and wrote inside basic_rules.txt" ) rule.write_basic_rules() else: print("basic rules for sudoku read from existing basic_rules.txt")