def _download_file(download_dir, url):
    filename = url.split("/")[-1]
    print(download_dir)
    if file_exist(download_dir, filename):
        sys.stderr.write(f"[-] Already downloaded: {url} (at {filename}).\n")
    else:
        sys.stderr.write(f"[*] Downloading from {url} to {filename}.\n")
        with TqdmUpTo(unit='B', unit_scale=True, miniters=1,
                      desc=filename) as t:
            urllib.request.urlretrieve(url,
                                       filename=filename,
                                       reporthook=t.update_to)
    return filename
def _download_and_extract(download_dir, url, src_filename, trg_filename):
    src_path = file_exist(download_dir, src_filename)
    trg_path = file_exist(download_dir, trg_filename)

    if src_path and trg_path:
        sys.stderr.write(f"[-] Already downloaded and extracted {url}.\n")
        return src_path, trg_path

    compressed_file = _download_file(download_dir, url)

    sys.stderr.write(f"[*] Extracting {compressed_file}.\n")
    with tarfile.open(compressed_file, "r:gz") as corpus_tar:
        corpus_tar.extractall(download_dir)

    src_path = file_exist(download_dir, src_filename)
    trg_path = file_exist(download_dir, trg_filename)

    if src_path and trg_path:
        return src_path, trg_path

    raise OSError(
        f"[!] Download/extraction failed for url {url} to path {download_dir}")
Пример #3
0
def makeyaml(outdir, uid):

    if not ut.dir_exist(outdir):
        os.system("mkdir %s"%outdir)
    
    if  outdir[-1]!='/':
        outdir+='/'
    outfile='%soutput_%s.yaml'%(outdir,uid)
    print '==========================================  ',outfile

    if ut.file_exist(outfile): return False
    data = {
        'processing' : {
            'status' : 'sending',
            'timestamp':ut.gettimestamp(),

            } 
        }

    with open(outfile, 'w') as outyaml:
        yaml.dump(data, outyaml, default_flow_style=False) 
        
    return True
Пример #4
0
    def run(self):
        usermaptot = {}
        for key, value in us.users.iteritems():
            usermaptot[key] = 0
        ldir = [x[0] for x in os.walk(self.indir)]
        for l in ldir:
            mergefile = l + '/merge.yaml'
            if not ut.file_exist(mergefile): continue
            process = l.replace(self.basedir, '')
            print '--------------  process  ', process

            tmpf = None
            with open(mergefile, 'r') as stream:
                try:
                    tmpf = yaml.load(stream)
                except yaml.YAMLError as exc:
                    print(exc)

            events_tot = tmpf['merge']['nevents']
            size_tot = tmpf['merge']['size'] / 1000000000.
            bad_tot = tmpf['merge']['nbad']
            files_tot = tmpf['merge']['ndone']
            sumw_tot = 0
            nfileseos = 0
            if files_tot + bad_tot != 0:
                if ut.dir_exist('%s%s' % (self.eosdir, process)):
                    nfileseos = len(
                        glob.glob('%s%s/output*' % (self.eosdir, process)))

            print 'nevents               : %i' % events_tot
            print 'nfiles on eos/checked good/checked bad : %i/%i/%i' % (
                nfileseos, files_tot, bad_tot)
            marked_b = ''
            marked_e = ''

            if nfileseos > files_tot + bad_tot:
                marked_b = '<h2><mark>'
                marked_e = '</mark></h2>'

            self.ntot_events += int(events_tot)
            self.ntot_files += int(files_tot)
            self.tot_size += float(size_tot)
            self.ntot_bad += int(bad_tot)
            self.ntot_eos += int(nfileseos)

            usermap = {}
            for key, value in us.users.iteritems():
                usermap[key] = 0
            for key, value in iter(sorted(tmpf['merge']['users'].iteritems())):
                usermap[key] = value
                usermaptot[key] = usermaptot[key] + value

            toaddus = ''
            for key, value in iter(sorted(usermap.iteritems())):
                toaddus += ',,%i' % (value)

            cmd = '%s,,%s,,%s%i%s,,%s%i%s,,%i,,%.2f%s\n' % (
                process, self.comma_me(
                    str(events_tot)), marked_b, files_tot, marked_e, marked_b,
                nfileseos, marked_e, bad_tot, size_tot, toaddus)
            self.OutFile.write(cmd)

        toaddustot = ''
        for key, value in iter(sorted(usermaptot.iteritems())):
            toaddustot += ',,%i' % (value)

        cmd = '%s,,%s,,%s,,%s,,%s,,%.2f%s\n' % (
            'total', self.comma_me(str(
                self.ntot_events)), self.comma_me(str(self.ntot_files)),
            self.comma_me(str(self.ntot_eos)), self.comma_me(str(
                self.ntot_bad)), self.tot_size, toaddustot)
        self.OutFile.write(cmd)
Пример #5
0
    def check(self):

        ldir = [x[0] for x in os.walk(self.indireos)]

        if not ut.testeos(eostest, eostest_size):
            print 'eos seems to have problems, should check, will exit'
            sys.exit(3)
        dic = {}
        for l in ldir:
            proc = l.replace(self.indireos, '')
            #print proc,'    ',self.process
            if self.process != '' and self.process != proc:
                continue

            psplit = l.split('/')
            isana = False
            for a in psplit:
                if a == "ana": isana = True
            if isana:
                print 'is anana'
                print l
                continue

            #continue if process has been checked
            if l == 'BADPYTHIA' or l == 'lhe' or l == "__restored_files__" or l == "backup":
                continue
            print '--------------------- eos process ', l
            nfileseos = 0
            if os.path.isdir('%s/%s' % (self.indireos, proc)):
                listeos = [
                    x for x in os.listdir('%s/%s' % (self.indireos, proc))
                    if 'output' in x
                ]
                nfileseos = len(listeos)

            if nfileseos == 0: continue
            nfilesmerged = 0
            mergefile = self.indirafs + '/' + proc + '/merge.yaml'
            print 'mergefile  ', mergefile
            if not ut.file_exist(mergefile):
                if not ut.dir_exist('%s/%s' % (self.indirafs, proc)):
                    os.system('mkdir -p %s/%s' % (self.indirafs, proc))
                self.touch('%s/%s/check' % (self.indirafs, proc))
                continue

            if not os.path.isdir(self.indirafs):
                os.system('mkdir %s' % self.indirafs)

            tmpf = None
            with open(mergefile, 'r') as stream:
                try:
                    tmpf = yaml.load(stream)
                except yaml.YAMLError as exc:
                    print(exc)

            bad_tot = tmpf['merge']['nbad']
            files_tot = tmpf['merge']['ndone']

            ntot_files = bad_tot + files_tot
            print "tot files  ", ntot_files, "  files eos  ", nfileseos
            dic[proc] = {'neos': nfileseos, 'nmerged': ntot_files}
            print '%s/%s/check' % (self.indirafs, proc)
            if ntot_files < nfileseos:
                self.touch('%s/%s/check' % (self.indirafs, proc))
            elif ntot_files > nfileseos:
                print ' ntot_files>nfileseos ', ntot_files, nfileseos, proc
                os.system('rm %s/%s/output*.yaml' % (self.indirafs, proc))
                os.system('rm %s/%s/merge.yaml' % (self.indirafs, proc))
            else:
                if ut.file_exist('%s/%s/check' % (self.indirafs, proc)):
                    os.system('rm %s/%s/check' % (self.indirafs, proc))

        outfile = self.indirafs + '/files.yaml'
        with open(outfile, 'w') as outyaml:
            yaml.dump(dic, outyaml, default_flow_style=False)
Пример #6
0
    def merge(self, force):

        print self.indir, '  ====  ', self.process
        ldir = [x[0] for x in os.walk(self.indir)]
        process = ''
        for l in ldir:
            process = l.replace(self.indir, "")
            if self.process != '' and self.process != process:
                continue
            outfile = l + '/merge.yaml'
            totsize = 0
            totevents = 0
            outfiles = []
            outfilesbad = []
            users = {}
            outdir = None
            ndone = 0
            nbad = 0
            All_files = glob.glob("%s/output_*.yaml" % (l))
            if len(All_files) == 0:
                if os.path.isfile("%s/merge.yaml" % (l)):
                    os.system("rm %s/merge.yaml" % (l))
                continue

            #continue if process has been checked
            print '%s/check' % (l)
            if not ut.file_exist('%s/check' % (l)) and not force: continue

            print 'merging %i files in directory %s' % (len(All_files), l)
            for f in All_files:
                if not os.path.isfile(f):
                    print 'file does not exists... %s' % f
                    continue

                with open(f, 'r') as stream:
                    try:
                        tmpf = yaml.load(stream)
                        if tmpf['processing']['status'] == 'sending': continue
                        if tmpf['processing']['status'] == 'BAD':
                            nbad += 1
                            outfilesbad.append(
                                tmpf['processing']['out'].split('/')[-1])
                            outdir = tmpf['processing']['out'].replace(
                                tmpf['processing']['out'].split('/')[-1], '')
                            process = tmpf['processing']['process']
                            continue

                        totsize += tmpf['processing']['size']
                        totevents += tmpf['processing']['nevents']
                        process = tmpf['processing']['process']
                        tmplist = [
                            tmpf['processing']['out'].split('/')[-1],
                            tmpf['processing']['nevents']
                        ]
                        outfiles.append(tmplist)
                        outdir = tmpf['processing']['out'].replace(
                            tmpf['processing']['out'].split('/')[-1], '')
                        if tmpf['processing']['user'] not in users:
                            users[tmpf['processing']['user']] = 1
                        else:
                            users[tmpf['processing']['user']] = users[
                                tmpf['processing']['user']] + 1
                        ndone += 1
                    except yaml.YAMLError as exc:
                        print(exc)

            dic = {
                'merge': {
                    'process': process,
                    'nevents': totevents,
                    'outfiles': outfiles,
                    'outdir': outdir,
                    'size': totsize,
                    'ndone': ndone,
                    'nbad': nbad,
                    'outfilesbad': outfilesbad,
                    'users': users
                }
            }
            try:
                with open(outfile, 'w') as outyaml:
                    yaml.dump(dic, outyaml, default_flow_style=False)
            except IOError as exc:
                print "I/O error({0}): {1}".format(exc.errno, exc.strerror)
                print "outfile ", outfile
                import time
                time.sleep(10)
                with open(outfile, 'w') as outyaml:
                    yaml.dump(dic, outyaml, default_flow_style=False)
Пример #7
0
    def check(self, force, statfile):

        ldir = [x[0] for x in os.walk(self.indir)]

        if not ut.testeos(eostest, eostest_size):
            print 'eos seems to have problems, should check, will exit'
            sys.exit(3)

        for process in ldir:
            uid = process.replace(self.indir, "")
            if uid == "": continue
            if self.process != '' and uid != self.process: continue

            print '%s/%s/check' % (self.yamldir, uid)
            if not ut.file_exist('%s/%s/check' %
                                 (self.yamldir, uid)) and not force:
                continue

            psplit = process.split('/')
            isana = False
            for a in psplit:
                if a == "ana": isana = True
            if isana:
                print 'is anana'
                print process
                continue

            All_files = glob.glob("%s/output_*.root" % (process))
            if len(All_files) == 0: continue

            print '--------------------- ', uid
            print 'number of files  ', len(All_files)
            print 'process from the input directory ', uid

            outdir = self.makeyamldir(self.yamldir + uid)
            hasbeenchecked = False
            nevents_tot = 0
            njobsdone_tot = 0
            njobsbad_tot = 0
            for f in All_files:

                self.count = 0
                if not os.path.isfile(f):
                    print 'file does not exists... %s' % f
                    continue

                jobid = f.split('_')[-1]
                jobid = jobid.replace(self.fext, '')
                userid = ut.find_owner(f)

                outfile = '%soutput_%s.yaml' % (outdir, jobid)
                if ut.file_exist(
                        outfile) and ut.getsize(outfile) > 100 and not force:
                    continue
                hasbeenchecked = True
                print '-----------checking root file ', f

                if '.root' in self.fext:
                    nevts, check = self.checkFile_root(f, treename)
                    status = 'DONE'
                    if not check: status = 'BAD'

                    if status == 'DONE':
                        nevents_tot += nevts
                        njobsdone_tot += 1
                    else:
                        njobsbad_tot += 1

                    dic = {
                        'processing': {
                            'process': uid,
                            'jobid': jobid,
                            'nevents': nevts,
                            'status': status,
                            'out': f,
                            'size': os.path.getsize(f),
                            'user': userid
                        }
                    }
                    print '-----------writing yaml file ', outfile
                    with open(outfile, 'w') as outyaml:
                        yaml.dump(dic, outyaml, default_flow_style=False)
                    continue

                else:
                    print 'not correct file extension %s' % self.fext

            if hasbeenchecked:
                cmdp = '<pre>date=%s <span class="espace"/> time=%s <span class="espace"/> njobs=%i <span class="espace"/> nevents=%i <span class="espace"/> njobbad=%i <span class="espace"/> process=%s </pre>\n' % (
                    ut.getdate_str(), ut.gettime_str(), njobsdone_tot,
                    nevents_tot, njobsbad_tot, uid)
                stat_exist = ut.file_exist(statfile)
                with open(statfile, "a") as myfile:
                    if not stat_exist:
                        myfile.write(
                            '<link href="/afs/cern.ch/user/h/helsens/www/style/txtstyle.css" rel="stylesheet" type="text/css" />\n'
                        )
                        myfile.write(
                            '<style type="text/css"> /*<![CDATA[*/ .espace{ margin-left:3em } .espace2{ margin-top:9em } /*]]>*/ </style>\n'
                        )

                    myfile.write(cmdp)

                print 'date=%s  time=%s  njobs=%i  nevents=%i  njobbad=%i  process=%s' % (
                    ut.getdate_str(), ut.gettime_str(), njobsdone_tot,
                    nevents_tot, njobsbad_tot, uid)
Пример #8
0
import utils
import rule
import sys

if __name__ == '__main__':

    #check if a sudoku file was given
    if (utils.file_exist("sudoku.txt")):
        sudoku = utils.read_sudoku("sudoku.txt")
        print("Sudoku grid found:")
        print("=========")
        print(utils.read_file("sudoku.txt"))
        print("=========")
    else:
        sys.exit("Error no sudoku.txt file found in the same directory")

    recalculateBasicRules = False
    try:
        if (sys.argv[1] == "-r"):
            recalculateBasicRules = True
    except:
        recalculateBasicRules = False

    #check if the basic rules were generated
    if (not utils.file_exist("basic_rules.txt") or recalculateBasicRules):
        print(
            "basic rules for sudoku calculated and wrote inside basic_rules.txt"
        )
        rule.write_basic_rules()
    else:
        print("basic rules for sudoku read from existing basic_rules.txt")