def test__init__(df): assert df.namePattern == 'np', 'namePattern not initialised as np' assert df.lfn == 'lfn', 'lfn not initialised as lfn' assert df.localDir == 'ld', 'localDir not initialised as ld' d1 = DiracFile() assert d1.namePattern == '', 'namePattern not default initialised as empty' assert d1.lfn == '', 'lfn not default initialised as empty' assert d1.localDir is None, 'localDir not default initialised as None' assert d1.locations == [], 'locations not initialised as empty list' d2 = DiracFile(namePattern='np', lfn='lfn', localDir='ld') assert d2.namePattern == 'np', 'namePattern not keyword initialised as np, initialized as: %s\n%s' % (d2.namePattern, str(d2)) assert d2.lfn == 'lfn', 'lfn not keyword initialised as lfn, initialized as: %s\n%s' % (d2.lfn, str(d2)) assert d2.localDir == 'ld', 'localDir not keyword initialised as ld, initializes as %s\n%s' % (d2.localDir, str(d2.localDir))
def uploadLocalFile(job, namePattern, localDir, should_del=True): """ Upload a locally available file to the grid as a DiracFile. Randomly chooses an SE. Args: namePattern (str): name of the file localDir (str): localDir of the file should_del = (bool): should we delete the local file? Return DiracFile: a DiracFile of the uploaded LFN on the grid """ new_df = DiracFile(namePattern, localDir=localDir) trySEs = getConfig('DIRAC')['allDiracSE'] random.shuffle(trySEs) new_lfn = os.path.join(getInputFileDir(job), namePattern) returnable = None for SE in trySEs: #Check that the SE is writable if execute('checkSEStatus("%s", "%s")' % (SE, 'Write')): try: returnable = new_df.put(force=True, uploadSE=SE, lfn=new_lfn)[0] break except GangaDiracError as err: raise GangaException("Upload of input file as LFN %s to SE %s failed" % (new_lfn, SE)) if not returnable: raise GangaException("Failed to upload input file to any SE") if should_del: os.unlink(os.path.join(localDir, namePattern)) return returnable
def test_getDataFile(self): from GangaLHCb.Lib.LHCbDataset.LHCbDatasetUtils import getDataFile lfn = DiracFile('a') pfn = LocalFile('a') assert getDataFile(lfn) == lfn assert getDataFile(pfn) == pfn assert getDataFile('lfn:a') == strToDataFile('lfn:a')
def replicate(self, destSE=''): '''Replicate all LFNs to destSE. For a list of valid SE\'s, type ds.replicate().''' if not destSE: from GangaDirac.Lib.Files.DiracFile import DiracFile DiracFile().replicate('') return if not self.hasLFNs(): raise GangaException('Cannot replicate dataset w/ no LFNs.') retry_files = [] for f in self.files: if not isDiracFile(f): continue try: result = f.replicate( destSE=destSE ) except Exception as err: msg = 'Replication error for file %s (will retry in a bit).' % f.lfn logger.warning(msg) logger.warning("Error: %s" % str(err)) retry_files.append(f) for f in retry_files: try: result = f.replicate( destSE=destSE ) except Exception as err: msg = '2nd replication attempt failed for file %s. (will not retry)' % f.lfn logger.warning(msg) logger.warning(str(err))
def getOutputDataLFNs(self): """Get a list of outputdata that has been uploaded by Dirac. Excludes the outputsandbox if it is there.""" lfns = super(Dirac, self).getOutputDataLFNs() ds = LHCbDataset() for f in lfns: ds.files.append(DiracFile(lfn=f)) return GPIProxyObjectFactory(ds)
def upload(self, lfn, diracSE, guid=None): from GangaDirac.Lib.Files.DiracFile import DiracFile diracFile = DiracFile(namePattern=self.name, lfn=lfn) diracFile.put(force=True) return diracFile
def _create_subjob(self, job, dataset): logger.debug("_create_subjob") datatmp = [] if isinstance(dataset, LHCbDataset): for i in dataset: if isinstance(i, DiracFile): datatmp.extend(i) else: logger.error( "Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) from Ganga.Core.exceptions import GangaException raise GangaException( "Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) elif isinstance(dataset, list): from Ganga.GPIDev.Base.Proxy import isType for i in dataset: if type(i) is str: datatmp.append(DiracFile(lfn=i)) elif isType(i, DiracFile()): datatmp.extend(i) else: x = GangaException("Unknown(unexpected) file object: %s" % i) raise x else: logger.error("Unkown dataset type, cannot perform split here") from Ganga.Core.exceptions import GangaException raise GangaException( "Unkown dataset type, cannot perform split here") logger.debug("Creating new Job in Splitter") j = Job() j.copyFrom(stripProxy(job)) j.splitter = None j.merger = None j.inputsandbox = [] # master added automatically j.inputfiles = [] j.inputdata = LHCbDataset(files=datatmp[:], persistency=self.persistency, depth=self.depth) j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice return j
def df(): load_config_files() from GangaDirac.Lib.Files.DiracFile import DiracFile f = DiracFile('np', 'ld', 'lfn') f.locations = ['location'] f.guid = 'guid' yield f clear_config()
def getOutputData(self, outputDir=None, names=None, force=False): """Retrieve data stored on SE to outputDir (default=job output workspace). If names=None, then all outputdata is downloaded otherwise names should be a list of files to download. If force is True then download performed even if data already exists.""" downloaded_files = super(Dirac, self).getOutputData(outputDir, names, force) ds = LHCbDataset() for f in downloaded_files: ds.files.append(DiracFile(lfn=f)) return GPIProxyObjectFactory(ds)
def createChainUnit(self, parent_units, use_copy_output=True): """Create an output unit given this output data""" # we need a parent job that has completed to get the output files incl_pat_list = [] excl_pat_list = [] for parent in parent_units: if len(parent.active_job_ids) == 0 or parent.status != "completed": return None for inds in self.inputdata: from Ganga.GPIDev.Lib.Tasks.TaskChainInput import TaskChainInput if isType( inds, TaskChainInput ) and inds.input_trf_id == parent._getParent().getID(): incl_pat_list += inds.include_file_mask excl_pat_list += inds.exclude_file_mask # go over the output files and copy the appropriates over as input # files flist = [] import re for parent in parent_units: job = getJobByID(parent.active_job_ids[0]) if job.subjobs: job_list = job.subjobs else: job_list = [job] for sj in job_list: for f in sj.outputfiles: # match any dirac files that are allowed in the file mask if isType(f, DiracFile): if len(incl_pat_list) > 0: for pat in incl_pat_list: if re.search(pat, f.lfn): flist.append("LFN:" + f.lfn) else: flist.append("LFN:" + f.lfn) if len(excl_pat_list) > 0: for pat in excl_pat_list: if re.search( pat, f.lfn) and "LFN:" + f.lfn in flist: flist.remove("LFN:" + f.lfn) # just do one unit that uses all data unit = LHCbUnit() unit.name = "Unit %d" % len(self.units) unit.inputdata = LHCbDataset(files=[DiracFile(lfn=f) for f in flist]) return unit
def strToDataFile(name, allowNone=True): if len(name) >= 4 and name[:4].upper() == 'LFN:': return DiracFile(lfn=name[4:]) elif len(name) >= 4 and name[:4].upper() == 'PFN:': logger.warning("PFN is slightly ambiguous, constructing LocalFile") return LocalFile(name[4:]) else: if allowNone: return None else: raise GangaException( "Cannot construct file object: %s" % str(name) )
def _create_subjob(self, job, dataset): logger.debug("_create_subjob") datatmp = [] logger.debug("dataset size: %s" % str(len(dataset))) #logger.debug( "dataset: %s" % str(dataset) ) from GangaLHCb.Lib.LHCbDataset.LHCbDataset import LHCbDataset if isinstance(dataset, LHCbDataset): for i in dataset: if isType(i, DiracFile): datatmp.append(i) else: logger.error("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) from Ganga.Core.exceptions import GangaException raise GangaException("Unkown file-type %s, cannot perform split with file %s" % (type(i), str(i))) elif isinstance(dataset, (list, GangaList)): for this_file in dataset: if type(this_file) is str: datatmp.append(allComponentFilters['gangafiles'](this_file, None)) elif isType(this_file, IGangaFile): datatmp.append(this_file) else: logger.error("Unexpected type: %s" % str(type(this_file))) logger.error("Wanted object to inherit from type: %s: %s" % (str(type(IGangaFile())))) from Ganga.Core.exceptions import GangaException x = GangaException("Unknown(unexpected) file object: %s" % this_file) raise x elif type(dataset) is str: datatmp.append(DiracFile(lfn=dataset)) else: logger.error("Unkown dataset type, cannot perform split here") from Ganga.Core.exceptions import GangaException logger.error("Dataset found: " + str(dataset)) raise GangaException("Unkown dataset type, cannot perform split here") logger.debug("Creating new Job in Splitter") j = Job() logger.debug("Copying From Job") j.copyFrom(stripProxy(job), ['splitter', 'subjobs', 'inputdata', 'inputsandbox', 'inputfiles']) logger.debug("Unsetting Splitter") j.splitter = None #logger.debug("Unsetting Merger") #j.merger = None #j.inputsandbox = [] ## master added automatically #j.inputfiles = [] logger.debug("Setting InputData") j.inputdata = LHCbDataset(files=datatmp[:], persistency=self.persistency, depth=self.depth) #j.inputdata.XMLCatalogueSlice = self.XMLCatalogueSlice logger.debug("Returning new subjob") return j
def getDiracFiles(): import os from GangaDirac.Lib.Files.DiracFile import DiracFile from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList filename = DiracFile.diracLFNBase().replace('/', '-') + '.lfns' logger.info( 'Creating list, this can take a while if you have a large number of SE files, please wait...' ) execute('dirac-dms-user-lfns &> /dev/null', shell=True, timeout=None) g = GangaList() with open(filename[1:], 'r') as lfnlist: lfnlist.seek(0) g.extend( (DiracFile(lfn='%s' % lfn.strip()) for lfn in lfnlist.readlines())) return addProxy(g)
def generateDiracScripts(app): """ Construct a DIRAC scripts which must be unique to each job to have unique checksum. This generates a unique file, uploads it to DRIAC and then stores the LFN in app.uploadedInput Args: app (GaudiExec): This expects a GaudiExec app to be passed so that the constructed """ generateJobScripts(app, appendJobScripts=True) job = app.getJobObject() new_df = uploadLocalFile(job, app.jobScriptArchive.namePattern, app.jobScriptArchive.localDir) app.jobScriptArchive = new_df app.is_prepared.addAssociatedFile(DiracFile(lfn=new_df.lfn))
def uploadLocalFile(job, namePattern, localDir, should_del=True): """ Upload a locally available file to the grid as a DiracFile Args: namePattern (str): name of the file localDir (str): localDir of the file should_del = (bool): should we delete the local file? Return DiracFile: a DiracFile of the uploaded LFN on the grid """ new_df = DiracFile(namePattern, localDir=localDir) random_SE = random.choice(getConfig('DIRAC')['allDiracSE']) new_lfn = os.path.join(getInputFileDir(job), namePattern) returnable = new_df.put(force=True, uploadSE=random_SE, lfn=new_lfn)[0] if should_del: os.unlink(os.path.join(localDir, namePattern)) return returnable
def getDataset(self): '''Gets the dataset from the bookkeeping for current dict.''' if not self.dict: return None cmd = 'bkQueryDict(%s)' % self.dict result = get_result(cmd, 'BK query error.', 'BK query error.') files = [] value = result['Value'] if 'LFNs' in value: files = value['LFNs'] if not type(files) is list: if 'LFNs' in files: # i.e. a dict of LFN:Metadata files = files['LFNs'].keys() from GangaDirac.Lib.Files.DiracFile import DiracFile this_list = [DiracFile(lfn=f) for f in files] from GangaLHCb.Lib.LHCbDataset import LHCbDataset ds = LHCbDataset(files=this_list, fromRef=True) return addProxy(ds)
def generateDiracInput(app): """ Construct a DIRAC input which does not need to be unique to each job but is required to have a unique checksum. This generates a unique file, uploads it to DRIAC and then stores the LFN in app.uploadedInput Args: app (GaudiExec): This expects a GaudiExec app to be passed so that the constructed """ input_files, input_folders = collectPreparedFiles(app) job = app.getJobObject() if input_folders: raise ApplicationConfigurationError( 'Prepared folders not supported yet, please fix this in future') else: prep_dir = app.getSharedPath() addTimestampFile(prep_dir) prep_file = _pseudo_session_id + '.tgz' tmp_dir = tempfile.gettempdir() compressed_file = os.path.join( tmp_dir, 'diracInputFiles_' + os.path.basename(prep_file)) if not job.master: rjobs = job.subjobs else: rjobs = [job] with tarfile.open(compressed_file, "w:gz") as tar_file: for name in input_files: # FIXME Add support for subfiles here once it's working across multiple IGangaFile objects in a consistent way # Not hacking this in for now just in-case we end up with a mess as a result tar_file.add(name, arcname=os.path.basename(name)) new_df = uploadLocalFile(job, os.path.basename(compressed_file), tmp_dir) app.uploadedInput = new_df app.is_prepared.addAssociatedFile(DiracFile(lfn=new_df.lfn))
def test_isLFN(self): from GangaLHCb.Lib.LHCbDataset.LHCbDatasetUtils import isLFN DiracFile('test') isLFN(DiracFile('test')) assert isLFN(DiracFile('test')), 'should be true' assert not isLFN(PhysicalFile('test')), 'should be false'
def OfflineGangaDiracSplitter(_inputs, filesPerJob, maxFiles, ignoremissing): """ Generator that yields a datasets for dirac split jobs """ if maxFiles is not None and maxFiles > 0: inputs = _inputs[:maxFiles] else: inputs = _inputs # First FIND ALL LFN REPLICAS AND SE<->SITE MAPPINGS AND STORE THIS IN MEMORY # THIS IS DONE IN PARALLEL TO AVOID OVERLOADING DIRAC WITH THOUSANDS OF # REQUESTS AT ONCE ON ONE CONNECTION wanted_common_site = configDirac['OfflineSplitterMaxCommonSites'] iterative_limit = configDirac['OfflineSplitterLimit'] good_fraction = configDirac['OfflineSplitterFraction'] uniqueSE = configDirac['OfflineSplitterUniqueSE'] split_files = [] if inputs is None: raise SplittingError( "Cannot Split Job as the inputdata appears to be None!") if len(inputs.getLFNs()) != len(inputs.files): raise SplittingError( "Error trying to split dataset using DIRAC backend with non-DiracFile in the inputdata" ) file_replicas = {} logger.info("Requesting LFN replica info") allLFNData = {} # Perform a lookup of where LFNs are all stored allLFNs, LFNdict = lookUpLFNReplicas(inputs, allLFNData) for _lfn in allLFNData: if allLFNData[_lfn] is None: logger.error( "Error in Getting LFN Replica information, aborting split") raise SplittingError( "Error in Getting LFN Replica information, aborting split") bad_lfns = [] # Sort this information and store is in the relevant Ganga objects errors = sortLFNreplicas(bad_lfns, allLFNs, LFNdict, ignoremissing, allLFNData, inputs) if len(bad_lfns) != 0: if ignoremissing is False: logger.error("Errors found getting LFNs:\n%s" % str(errors)) raise SplittingError( "Error trying to split dataset with invalid LFN and ignoremissing = False" ) # This finds all replicas for all LFNs... # This will probably struggle for LFNs which don't exist # Bad LFN should have been removed by this point however all_lfns = [ LFNdict[this_lfn].locations for this_lfn in LFNdict if this_lfn not in bad_lfns ] logger.info("Got replicas") for this_input in inputs: if this_input.lfn not in bad_lfns: file_replicas[this_input.lfn] = this_input.locations logger.info("found all replicas") logger.info("Calculating site<->SE Mapping") site_to_SE_mapping = {} SE_to_site_mapping = {} # Now lets generate a dictionary of some chosen site vs LFN to use in # constructing subsets site_dict, allSubSets, allChosenSets = calculateSiteSEMapping( file_replicas, wanted_common_site, uniqueSE, site_to_SE_mapping, SE_to_site_mapping) logger.debug("Found all SE in use") # BELOW IS WHERE THE ACTUAL SPLITTING IS DONE logger.info("Calculating best data subsets") iterations = 0 # Loop over all LFNs while len(site_dict.keys()) > 0: # LFN left to be used # NB: Can't modify this list and iterate over it directly in python LFN_instances = site_dict.keys() # Already used LFN chosen_lfns = set() for iterating_LFN in LFN_instances: # If this has previously been selected lets ignore it and move on if iterating_LFN in chosen_lfns: continue # Use this seed to try and construct a subset req_sitez = allChosenSets[iterating_LFN] _this_subset = [] #logger.debug("find common LFN for: " + str(allChosenSets[iterating_LFN])) # Construct subset # Starting with i, populate subset with LFNs which have an # overlap of at least 2 SE for this_LFN in LFN_instances: if this_LFN in chosen_lfns: continue if req_sitez.issubset(site_dict[this_LFN]): if len(_this_subset) >= filesPerJob: break _this_subset.append(this_LFN) limit = int(math.floor(float(filesPerJob) * good_fraction)) #logger.debug("Size limit: %s" % str(limit)) # If subset is too small throw it away if len(_this_subset) < limit: #logger.debug("%s < %s" % (str(len(_this_subset)), str(limit))) allChosenSets[iterating_LFN] = generate_site_selection( site_dict[iterating_LFN], wanted_common_site, uniqueSE, site_to_SE_mapping, SE_to_site_mapping) continue else: logger.debug("found common LFN for: " + str(allChosenSets[iterating_LFN])) logger.debug("%s > %s" % (str(len(_this_subset)), str(limit))) # else Dataset was large enough to be considered useful logger.debug("Generating Dataset of size: %s" % str(len(_this_subset))) ## Construct DiracFile here as we want to keep the above combination allSubSets.append([ DiracFile(lfn=str(this_LFN)) for this_LFN in _this_subset ]) for lfn in _this_subset: site_dict.pop(lfn) allChosenSets.pop(lfn) chosen_lfns.add(lfn) # Lets keep track of how many times we've tried this iterations = iterations + 1 # Can take a while so lets not let threads become un-locked import Ganga.Runtime.Repository_runtime Ganga.Runtime.Repository_runtime.updateLocksNow() # If on final run, will exit loop after this so lets try and cleanup if iterations >= iterative_limit: if good_fraction < 0.5: good_fraction = good_fraction * 0.75 iterations = 0 elif wanted_common_site > 1: logger.debug("Reducing Common Site Size") wanted_common_site = wanted_common_site - 1 iterations = 0 good_fraction = 0.75 else: good_fraction = good_fraction * 0.75 logger.debug("good_fraction: %s" % str(good_fraction)) split_files = allSubSets avg = float() for this_set in allSubSets: avg += float(len(this_set)) avg /= float(len(allSubSets)) logger.info("Average Subset size is: %s" % (str(avg))) # FINISHED SPLITTING CHECK!!! check_count = 0 for i in split_files: check_count = check_count + len(i) if check_count != len(inputs) - len(bad_lfns): logger.error("SERIOUS SPLITTING ERROR!!!!!") raise SplitterError("Files Missing after Splitting!") else: logger.info("File count checked! Ready to Submit") # RETURN THE RESULT logger.info("Created %s subsets" % str(len(split_files))) #logger.info( "Split Files: %s" % str(split_files) ) for dataset in split_files: yield dataset
def df(): f = DiracFile('np', 'ld', 'lfn') f.locations = ['location'] f.guid = 'guid' return f
def _setup_subjob_dataset(self, dataset): return LHCbDataset(files=[DiracFile(lfn=f) for f in dataset])
def DiracSplitter(inputs, filesPerJob, maxFiles, ignoremissing): """ Generator that yields a datasets for dirac split jobs """ #logger.debug( "DiracSplitter" ) #logger.debug( "inputs: %s" % str( inputs ) ) split_files = [] i = inputs.__class__() if len(inputs.getLFNs()) != len(inputs.files): raise SplittingError( "Error trying to split dataset using DIRAC backend with non-DiracFile in the inputdata" ) all_files = igroup(inputs.files[:maxFiles], getConfig('DIRAC')['splitFilesChunks'], leftovers=True) #logger.debug( "Looping over all_files" ) #logger.debug( "%s" % str( all_files ) ) for files in all_files: i.files = files LFNsToSplit = i.getLFNs() if (len(LFNsToSplit)) > 1: result = execute('splitInputData(%s, %d)' % (i.getLFNs(), filesPerJob)) if not result_ok(result): logger.error('DIRAC:: Error splitting files: %s' % str(result)) raise SplittingError('Error splitting files.') split_files += result.get('Value', []) else: split_files = [LFNsToSplit] if len(split_files) == 0: raise SplittingError('An unknown error occured.') # FIXME # check that all files were available on the grid big_list = [] for l in split_files: big_list.extend(l) diff = set(inputs.getFileNames()[:maxFiles]).difference(big_list) if len(diff) > 0: for f in diff: logger.warning('Ignored file: %s' % f) if not ignoremissing: raise SplittingError('Some files not found!') ### logger.debug("Split Files: %s" % str(split_files)) for _dataset in split_files: dataset = [] for _lfn in _dataset: dataset.append(DiracFile(lfn=_lfn)) yield dataset
def performSplitting(site_dict, filesPerJob, allChosenSets, wanted_common_site, uniqueSE, site_to_SE_mapping, SE_to_site_mapping): """ This is the main method which loops through the LFNs and creates subsets which are returned a list of list of LFNs Args: site_dict (dict): This is a dict with LFNs as keys and sites for each LFN as value filesPerJob (int): Max files per jobs as defined by splitter allChosenSets (dict): A dict with LFNs as keys and a sub-set of sites where each LFN is replicated wanted_common_site (int): Number of sites which we want to have in common for each LFN uniqueSE (bool): Should we check to make sure sites don't share an SE site_to_SE_mapping (dict): Dict which has sites as keys and SE as values SE_to_site_mapping (dict): Dict which has sites as values and SE as keys Returns: allSubSets (list): Return a list of subsets each subset being a list of LFNs """ good_fraction = configDirac['OfflineSplitterFraction'] iterative_limit = configDirac['OfflineSplitterLimit'] allSubSets = [] iterations = 0 # Loop over all LFNs while len(site_dict.keys()) > 0: # LFN left to be used # NB: Can't modify this list and iterate over it directly in python LFN_instances = site_dict.keys() # Already used LFN chosen_lfns = set() for iterating_LFN in LFN_instances: # If this has previously been selected lets ignore it and move on if iterating_LFN in chosen_lfns: continue # Use this seed to try and construct a subset req_sitez = allChosenSets[iterating_LFN] _this_subset = [] #logger.debug("find common LFN for: " + str(allChosenSets[iterating_LFN])) # Construct subset # Starting with i, populate subset with LFNs which have an # overlap of at least 2 SE for this_LFN in LFN_instances: if this_LFN in chosen_lfns: continue if req_sitez.issubset(site_dict[this_LFN]): if len(_this_subset) >= filesPerJob: break _this_subset.append(this_LFN) limit = int(math.floor(float(filesPerJob) * good_fraction)) #logger.debug("Size limit: %s" % str(limit)) # If subset is too small throw it away if len(_this_subset) < limit: #logger.debug("%s < %s" % (str(len(_this_subset)), str(limit))) allChosenSets[iterating_LFN] = generate_site_selection(site_dict[iterating_LFN], wanted_common_site, uniqueSE, site_to_SE_mapping, SE_to_site_mapping) continue else: logger.debug("found common LFN for: " + str(allChosenSets[iterating_LFN])) logger.debug("%s > %s" % (str(len(_this_subset)), str(limit))) # else Dataset was large enough to be considered useful logger.debug("Generating Dataset of size: %s" % str(len(_this_subset))) ## Construct DiracFile here as we want to keep the above combination allSubSets.append([DiracFile(lfn=str(this_LFN)) for this_LFN in _this_subset]) for lfn in _this_subset: site_dict.pop(lfn) allChosenSets.pop(lfn) chosen_lfns.add(lfn) # Lets keep track of how many times we've tried this iterations = iterations + 1 # Can take a while so lets not let threads become un-locked import Ganga.Runtime.Repository_runtime Ganga.Runtime.Repository_runtime.updateLocksNow() # If on final run, will exit loop after this so lets try and cleanup if iterations >= iterative_limit: if good_fraction < 0.5: good_fraction = good_fraction * 0.75 iterations = 0 elif wanted_common_site > 1: logger.debug("Reducing Common Site Size") wanted_common_site = wanted_common_site - 1 iterations = 0 good_fraction = 0.75 else: good_fraction = good_fraction * 0.75 logger.debug("good_fraction: %s" % str(good_fraction)) return allSubSets
def getDataset(self): '''Gets the dataset from the bookkeeping for current path, etc.''' if not self.path: return None if not self.type in ['Path', 'RunsByDate', 'Run', 'Production']: raise GangaException('Type="%s" is not valid.' % self.type) if not self.type is 'RunsByDate': if self.startDate: msg = 'startDate not supported for type="%s".' % self.type raise GangaException(msg) if self.endDate: msg = 'endDate not supported for type="%s".' % self.type raise GangaException(msg) if self.selection: msg = 'selection not supported for type="%s".' % self.type raise GangaException(msg) cmd = "getDataset('%s','%s','%s','%s','%s','%s')" % ( self.path, self.dqflag, self.type, self.startDate, self.endDate, self.selection) from Ganga.GPIDev.Lib.GangaList.GangaList import GangaList knownLists = [tuple, list, GangaList] if isType(self.dqflag, knownLists): cmd = "getDataset('%s',%s,'%s','%s','%s','%s')" % ( self.path, self.dqflag, self.type, self.startDate, self.endDate, self.selection) result = get_result( cmd, 'BK query error.', credential_requirements=self.credential_requirements) logger.debug("Finished Running Command") files = [] value = result if 'LFNs' in value: files = value['LFNs'] if not type(files) is list: # i.e. a dict of LFN:Metadata # if 'LFNs' in files: # i.e. a dict of LFN:Metadata files = files.keys() logger.debug("Creating DiracFile objects") ## Doesn't work not clear why from GangaDirac.Lib.Files.DiracFile import DiracFile #new_files = [] #def _createDiracLFN(this_file): # return DiracFile(lfn = this_file) #GangaObject.__createNewList(new_files, files, _createDiracLFN) logger.debug("Creating new list") new_files = [DiracFile(lfn=f) for f in files] #new_files = [DiracFile(lfn=_file) for _file in files] #for f in files: # new_files.append(DiracFile(lfn=f)) #ds.extend([DiracFile(lfn = f)]) logger.info("Constructing LHCbDataset") from GangaLHCb.Lib.LHCbDataset import LHCbDataset logger.debug("Imported LHCbDataset") ds = LHCbDataset(files=new_files, fromRef=True) logger.debug("Returning Dataset") return addProxy(ds)