def update(self): bookdir = self.stage.bookdir # Test whether output directory exists. if larbatch_posix.exists(bookdir): # Output directory exists. self.exists = True self.nfile = 0 self.nev = 0 self.nana = 0 self.nerror = 0 self.nmiss = 0 # Count good files and events. eventsfile = os.path.join(bookdir, 'events.list') if larbatch_posix.exists(eventsfile): lines = larbatch_posix.readlines(eventsfile) for line in lines: words = string.split(line) if len(words) >= 2: self.nfile = self.nfile + 1 self.nev = self.nev + int(words[1]) # Count good files analysis root files. filesana = os.path.join(bookdir, 'filesana.list') if larbatch_posix.exists(filesana): lines = larbatch_posix.readlines(filesana) for line in lines: self.nana = self.nana + 1 # Count errors. badfile = os.path.join(bookdir, 'bad.list') if larbatch_posix.exists(badfile): lines = larbatch_posix.readlines(badfile) for line in lines: if line.strip(): self.nerror += 1 # Count missing files. missingfile = os.path.join(bookdir, 'missing_files.list') if larbatch_posix.exists(missingfile): lines = larbatch_posix.readlines(missingfile) for line in lines: if line.strip(): self.nmiss += 1 else: # Output directory does not exist. self.exists = False
def get_fcl(self, fclname): fcl_list = [] for name in fclname: fcl = '' for fcldir in self.fclpath: fcl = os.path.join(fcldir, name) #print fcl if larbatch_posix.exists(fcl): break if fcl == '' or not larbatch_posix.exists(fcl): raise IOError('Could not find fcl file %s.' % name) fcl_list.append(fcl) return fcl_list
def addLayerTwo(path, recreate=True): # Don't do anything if this file is not located in dCache (/pnfs/...) # or has nonzero size. if larbatch_posix.exists(path) and path[ 0:6] == '/pnfs/' and larbatch_posix.stat(path).st_size == 0: if recreate: print('Adding layer two for path %s.' % path) else: print('Deleting empty file %s.' % path) # Now we got a zero size file in dCache, which kind of files may be # missing layer two. # Delete the file and recreate it using ifdh. larbatch_posix.remove(path) if not recreate: return test_proxy() # Make sure environment variables X509_USER_CERT and X509_USER_KEY # are not defined (they confuse ifdh). save_vars = {} for var in ('X509_USER_CERT', 'X509_USER_KEY'): if var in os.environ: save_vars[var] = os.environ[var] del os.environ[var] # Do ifdh cp. command = ['ifdh', 'cp', '/dev/null', path] jobinfo = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) q = queue.Queue() thread = threading.Thread(target=wait_for_subprocess, args=[jobinfo, q]) thread.start() thread.join(timeout=60) if thread.is_alive(): print('Terminating subprocess.') jobinfo.terminate() thread.join() rc = q.get() jobout = convert_str(q.get()) joberr = convert_str(q.get()) if rc != 0: for var in list(save_vars.keys()): os.environ[var] = save_vars[var] raise IFDHError(command, rc, jobout, joberr) # Restore environment variables. for var in list(save_vars.keys()): os.environ[var] = save_vars[var]
def main(): # Parse arguments. parser = argparse.ArgumentParser( prog='generate_simple_weighted_template.py') parser.add_argument('file') # parser.add_argument('--help', '-h', # action='store_true', # help='help flag' ) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--sbnd', action='store_true', help='Generate metrics for SBND') group.add_argument('--icarus', action='store_true', help='Generate metrics for ICARUS') args = parser.parse_args() # if args.help: # print("To run do:\n"/ # "generate_simple_weighted_template.py file.root\n"/ # "where file.root has a fmatch/nuslicetree") # return(0) if args.sbnd: print("Generate metrics for SBND") elif args.icarus: print("Generate metrics for ICARUS") if not larbatch_posix.exists(args.file): print('Input file %s does not exist.' % args.file) return 1 print('\nOpening %s' % args.file) rootfile = TFile.Open(args.file) if not rootfile.IsOpen() or rootfile.IsZombie(): print('Failed to open %s' % args.file) return 1 if args.sbnd: fcl_params = fhicl.make_pset('flashmatch_sbnd.fcl') pset = dotDict(fcl_params['sbnd_simple_flashmatch']) dir = rootfile.Get(args.file + ":/fmatch") nuslice_tree = dir.Get("nuslicetree") # , nuslice_tree) # nuslice_tree.Print() elif args.icarus: fcl_params = fhicl.make_pset('flashmatch_icarus.fcl') # TODO: add option to use cryo 0 and cryo 1 pset = dotDict(fcl_params['icarus_simple_flashmatch_0']) dir = rootfile.Get(args.file + ":/fmatchCryo0") nuslice_tree = dir.Get("nuslicetree") # , nuslice_tree) # nuslice_tree.Print() generator(nuslice_tree, rootfile, pset)
def main(argv): # Parse arguments. input_files = [] level = 1 nfilemax = 0 all = 0 args = argv[1:] while len(args) > 0: if args[0] == '-h' or args[0] == '--help': # Help. help() return 0 elif args[0] == '--level' and len(args) > 1: # Analyze level. level = int(args[1]) del args[0:2] elif args[0] == '--nfile' and len(args) > 1: # Number of files. nfilemax = int(args[1]) del args[0:2] elif args[0] == '--all': # All files flag. all = 1 del args[0] elif args[0][0] == '-': # Unknown option. print('Unknown option %s' % args[0]) return 1 elif args[0][0] == '@': # Read in file list to input files. filelistname = args[0][1:] if larbatch_posix.exists(filelistname): for filename in larbatch_posix.readlines(filelistname): input_files.append(filename.strip()) else: print('File list %s does not exist.' % filelistname) return 1 del args[0] else: # Add single file to input files. input_files.append(args[0]) del args[0] # Loop over input files. gtrees = {} gbranches = {} nfile = 0 for input_file in input_files: if nfilemax > 0 and nfile >= nfilemax: break nfile = nfile + 1 if not larbatch_posix.exists(input_file): print('Input file %s does not exist.' % input_file) return 1 print('\nOpening %s' % input_file) root = ROOT.TFile.Open(input_file) if not root.IsOpen() or root.IsZombie(): print('Failed to open %s' % input_file) return 1 # Analyze this file. analyze(root, level, gtrees, gbranches, all) print('\n%d files analyzed.' % nfile) # Print summary of trees. print('\nTrees from all files:\n') for key in sorted(gtrees.keys()): nentry = gtrees[key] print('%s has %d total entries.' % (key, nentry)) # Print summary of branches. if level > 0: print('\nBranches of Events tree from all files:\n') print(' Total bytes Zipped bytes Comp. Branch name') print(' ----------- ------------ ----- -----------') allname = 'All branches' ntot = 0 nzip = 0 for key in sorted(gbranches.keys()): if key != allname: ntot = gbranches[key][0] nzip = gbranches[key][1] if nzip != 0: comp = float(ntot) / float(nzip) else: comp = 0. print('%14d%14d%8.2f %s' % (ntot, nzip, comp, key)) if allname in gbranches: ntot = gbranches[allname][0] nzip = gbranches[allname][1] if nzip != 0: comp = float(ntot) / float(nzip) else: comp = 0. print('%14d%14d%8.2f %s' % (ntot, nzip, comp, allname)) # Print average event size. if 'Events' in gtrees: nev = gtrees['Events'] if nev != 0: nevtot = 1.e-6 * float(ntot) / float(nev) nevzip = 1.e-6 * float(nzip) / float(nev) else: nevtot = 0. nevzip = 0. print() print('%10d events.' % nev) if level > 0: print('%7.2f Mb average size per event.' % nevtot) print('%7.2f Mb average zipped size per event.' % nevzip) # Done. return 0
def __init__(self, project_element, default_first_input_list, default_input_lists): # Assign default values. self.name = '' # Project name. self.num_events = 0 # Total events (all jobs). self.num_jobs = 1 # Number of jobs. self.max_files_per_job = 0 # Max number of files per job. self.os = '' # Batch OS. self.resource = 'DEDICATED,OPPORTUNISTIC' # Jobsub resources. self.role = '' # Role (normally Analysis or Production). self.lines = '' # Arbitrary condor commands. self.server = '-' # Jobsub server. self.site = '' # Site. self.blacklist = '' # Blacklist. self.cpu = 0 # Number of cpus. self.disk = '' # Disk space (string value+unit). self.memory = 0 # Amount of memory (integer MB). self.merge = 'hadd -T' # histogram merging program. self.release_tag = '' # Larsoft release tag. self.release_qual = 'debug' # Larsoft release qualifier. self.version = '' # Project version. self.local_release_dir = '' # Larsoft local release directory. self.local_release_tar = '' # Larsoft local release tarball. self.file_type = '' # Sam file type. self.run_type = '' # Sam run type. self.run_number = 0 # Sam run number. self.script = 'condor_lar.sh' # Batch script. self.validate_on_worker = 0 # Run post-job validation on the worker node self.copy_to_fts = 0 # Copy a copy of the file to a dropbox scanned by fts. Note that a copy is still sent to <outdir> self.start_script = 'condor_start_project.sh' # Sam start project script. self.stop_script = 'condor_stop_project.sh' # Sam stop project script. self.force_dag = 0 # Force dag for sam input jobs. self.fclpath = [] # Fcl search path. self.stages = [] # List of stages (StageDef objects). self.parameters = {} # Dictionary of metadata parameters. # Extract values from xml. # Project name (attribute) if 'name' in dict(project_element.attributes): self.name = str(project_element.attributes['name'].firstChild.data) if self.name == '': raise XMLError('Project name not specified.') # Total events (subelement). num_events_elements = project_element.getElementsByTagName('numevents') for num_events_element in num_events_elements: if num_events_element.parentNode == project_element: self.num_events = int(num_events_element.firstChild.data) if self.num_events == 0: raise XMLError('Number of events not specified.') # Number of jobs (subelement). num_jobs_elements = project_element.getElementsByTagName('numjobs') for num_jobs_element in num_jobs_elements: if num_jobs_element.parentNode == project_element: self.num_jobs = int(num_jobs_element.firstChild.data) # Max Number of files per jobs. max_files_per_job_elements = project_element.getElementsByTagName( 'maxfilesperjob') for max_files_per_job_element in max_files_per_job_elements: if max_files_per_job_element.parentNode == project_element: self.max_files_per_job = int( max_files_per_job_element.firstChild.data) # OS (subelement). os_elements = project_element.getElementsByTagName('os') for os_element in os_elements: if os_element.parentNode == project_element: self.os = str(os_element.firstChild.data) self.os = ''.join(self.os.split()) # Resource (subelement). resource_elements = project_element.getElementsByTagName('resource') for resource_element in resource_elements: if resource_element.parentNode == project_element: self.resource = str(resource_element.firstChild.data) self.resource = ''.join(self.resource.split()) # Role (subelement). role_elements = project_element.getElementsByTagName('role') for role_element in role_elements: if role_element.parentNode == project_element: self.role = str(role_element.firstChild.data) # Lines (subelement). lines_elements = project_element.getElementsByTagName('lines') for lines_element in lines_elements: if lines_element.parentNode == project_element: self.lines = str(lines_element.firstChild.data) # Server (subelement). server_elements = project_element.getElementsByTagName('server') for server_element in server_elements: if server_element.parentNode == project_element: self.server = str(server_element.firstChild.data) # Site (subelement). site_elements = project_element.getElementsByTagName('site') for site_element in site_elements: if site_element.parentNode == project_element: self.site = str(site_element.firstChild.data) self.site = ''.join(self.site.split()) # Blacklist (subelement). blacklist_elements = project_element.getElementsByTagName('blacklist') for blacklist_element in blacklist_elements: if blacklist_element.parentNode == project_element: self.blacklist = str(blacklist_element.firstChild.data) self.blacklist = ''.join(self.blacklist.split()) # Cpu (subelement). cpu_elements = project_element.getElementsByTagName('cpu') for cpu_element in cpu_elements: if cpu_element.parentNode == project_element: self.cpu = int(cpu_element.firstChild.data) # Disk (subelement). disk_elements = project_element.getElementsByTagName('disk') for disk_element in disk_elements: if disk_element.parentNode == project_element: self.disk = str(disk_element.firstChild.data) self.disk = ''.join(self.disk.split()) # Memory (subelement). memory_elements = project_element.getElementsByTagName('memory') for memory_element in memory_elements: if memory_element.parentNode == project_element: self.memory = int(memory_element.firstChild.data) # merge (subelement). merge_elements = project_element.getElementsByTagName('merge') for merge_element in merge_elements: if merge_element.parentNode == project_element: if merge_element.firstChild: self.merge = str(merge_element.firstChild.data) else: self.merge = '' # Larsoft (subelement). larsoft_elements = project_element.getElementsByTagName('larsoft') if larsoft_elements: # Release tag (subelement). tag_elements = larsoft_elements[0].getElementsByTagName('tag') if tag_elements and tag_elements[0].firstChild != None: self.release_tag = str(tag_elements[0].firstChild.data) # Release qualifier (subelement). qual_elements = larsoft_elements[0].getElementsByTagName('qual') if qual_elements: self.release_qual = str(qual_elements[0].firstChild.data) # Local release directory or tarball (subelement). # local_elements = larsoft_elements[0].getElementsByTagName('local') if local_elements: local = str(local_elements[0].firstChild.data) if larbatch_posix.isdir(local): self.local_release_dir = local else: self.local_release_tar = local # Version (subelement). version_elements = project_element.getElementsByTagName('version') if version_elements: self.version = str(version_elements[0].firstChild.data) else: self.version = self.release_tag # Make sure local test release directory/tarball exists, if specified. # Existence of non-null local_release_dir has already been tested. if self.local_release_tar != '' and not larbatch_posix.exists( self.local_release_tar): raise IOError( "Local release directory/tarball %s does not exist." % self.local_release_tar) # Sam file type (subelement). file_type_elements = project_element.getElementsByTagName('filetype') if file_type_elements: self.file_type = str(file_type_elements[0].firstChild.data) # Sam run type (subelement). run_type_elements = project_element.getElementsByTagName('runtype') if run_type_elements: self.run_type = str(run_type_elements[0].firstChild.data) # Sam run number (subelement). run_number_elements = project_element.getElementsByTagName('runnumber') if run_number_elements: self.run_number = int(run_number_elements[0].firstChild.data) # Batch script (subelement). script_elements = project_element.getElementsByTagName('script') for script_element in script_elements: if script_element.parentNode == project_element: self.script = str(script_element.firstChild.data) # Make sure batch script exists, and convert into a full path. script_path = '' try: jobinfo = subprocess.Popen(['which', self.script], stdout=subprocess.PIPE, stderr=subprocess.PIPE) jobout, joberr = jobinfo.communicate() jobout = convert_str(jobout) joberr = convert_str(joberr) rc = jobinfo.poll() script_path = jobout.splitlines()[0].strip() except: pass if script_path == '' or not larbatch_posix.access( script_path, os.X_OK): raise IOError('Script %s not found.' % self.script) self.script = script_path worker_validations = project_element.getElementsByTagName('check') for worker_validation in worker_validations: if worker_validation.parentNode == project_element: self.validate_on_worker = int( worker_validation.firstChild.data) worker_copys = project_element.getElementsByTagName('copy') for worker_copy in worker_copys: if worker_copy.parentNode == project_element: self.copy_to_fts = int(worker_copy.firstChild.data) # Start project batch script (subelement). start_script_elements = project_element.getElementsByTagName( 'startscript') for start_script_element in start_script_elements: if start_script_element.parentNode == project_element: self.start_script = str(start_script_element.firstChild.data) self.force_dag = 1 # Make sure start project batch script exists, and convert into a full path. script_path = '' try: jobinfo = subprocess.Popen(['which', self.start_script], stdout=subprocess.PIPE, stderr=subprocess.PIPE) jobout, joberr = jobinfo.communicate() jobout = convert_str(jobout) joberr = convert_str(joberr) rc = jobinfo.poll() script_path = jobout.splitlines()[0].strip() except: pass self.start_script = script_path # Stop project batch script (subelement). stop_script_elements = project_element.getElementsByTagName( 'stopscript') for stop_script_element in stop_script_elements: if stop_script_element.parentNode == project_element: self.stop_script = str(stop_script_element.firstChild.data) self.force_dag = 1 # Make sure stop project batch script exists, and convert into a full path. script_path = '' try: jobinfo = subprocess.Popen(['which', self.stop_script], stdout=subprocess.PIPE, stderr=subprocess.PIPE) jobout, joberr = jobinfo.communicate() jobout = convert_str(jobout) joberr = convert_str(joberr) rc = jobinfo.poll() script_path = jobout.splitlines()[0].strip() except: pass self.stop_script = script_path # Fcl search path (repeatable subelement). fclpath_elements = project_element.getElementsByTagName('fcldir') for fclpath_element in fclpath_elements: self.fclpath.append(str(fclpath_element.firstChild.data)) # Add $FHICL_FILE_PATH. if 'FHICL_FILE_PATH' in os.environ: for fcldir in os.environ['FHICL_FILE_PATH'].split(':'): if larbatch_posix.exists(fcldir): self.fclpath.append(fcldir) # Make sure all directories of fcl search path exist. for fcldir in self.fclpath: if not larbatch_posix.exists(fcldir): raise IOError("Fcl search directory %s does not exist." % fcldir) # Project stages (repeatable subelement). stage_elements = project_element.getElementsByTagName('stage') default_previous_stage = '' default_input_lists[default_previous_stage] = default_first_input_list for stage_element in stage_elements: # Get base stage, if any. base_stage = None if 'base' in dict(stage_element.attributes): base_name = str( stage_element.attributes['base'].firstChild.data) if base_name != '': for stage in self.stages: if stage.name == base_name: base_stage = stage break if base_stage == None: raise LookupError('Base stage %s not found.' % base_name) self.stages.append( StageDef(stage_element, base_stage, default_input_lists, default_previous_stage, self.num_jobs, self.num_events, self.max_files_per_job, self.merge, self.cpu, self.disk, self.memory, self.validate_on_worker, self.copy_to_fts, self.script, self.start_script, self.stop_script, self.site, self.blacklist)) default_previous_stage = self.stages[-1].name default_input_lists[default_previous_stage] = os.path.join( self.stages[-1].bookdir, 'files.list') # Dictionary of metadata parameters param_elements = project_element.getElementsByTagName('parameter') for param_element in param_elements: name = str(param_element.attributes['name'].firstChild.data) value = str(param_element.firstChild.data) self.parameters[name] = value # Done. return
def safeexist(path): return larbatch_posix.exists(path)