def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._ext_work_dn = config.get_dn('source directory') self._ext_output_dir = os.path.join(self._ext_work_dn, 'output') if not os.path.isdir(self._ext_output_dir): raise DatasetError('Unable to find task output directory %s' % repr(self._ext_output_dir)) self._selector = JobSelector.create(config.get('source job selector', ''))
def __init__(self, config): InfoScanner.__init__(self, config) self._parentKeys = config.getList('parent keys', []) self._looseMatch = config.getInt('parent match level', 1) self._source = config.get('parent source', '') self._merge = config.getBool('merge parents', False) self._lfnMap = {}
def __init__(self, config): InfoScanner.__init__(self, config) ext_config_fn = config.getPath('source config', onChange=triggerDataResync) ext_config = create_config( ext_config_fn, useDefaultFiles=True).changeView(setSections=['global']) self._extWorkDir = ext_config.getWorkPath() logging.getLogger().disabled = True self._extWorkflow = ext_config.getPlugin('workflow', 'Workflow:global', cls='Workflow', pargs=('task', )) logging.getLogger().disabled = False self._extTask = self._extWorkflow.task selector = config.get('source job selector', '', onChange=triggerDataResync) ext_job_db = ext_config.getPlugin( 'job database', 'TextFileJobDB', cls='JobDB', pkwargs={ 'jobSelector': lambda jobNum, jobObj: jobObj.state == Job.SUCCESS }, onChange=None) self._selected = sorted( ext_job_db.getJobs(JobSelector.create(selector, task=self._extTask)))
def __init__(self, config): InfoScanner.__init__(self, config) self._importParents = config.getBool('include parent infos', False) self._mergeKey = 'CMSSW_CONFIG_FILE' if config.getBool('merge config infos', True): self._mergeKey = 'CMSSW_CONFIG_HASH' self._cfgStore = {} self._gtStore = {}
def __init__(self, config): InfoScanner.__init__(self, config) ignoreDef = lmap(lambda x: 'SEED_%d' % x, irange(10)) + ['FILE_NAMES', 'SB_INPUT_FILES', 'SE_INPUT_FILES', 'SE_INPUT_PATH', 'SE_INPUT_PATTERN', 'SB_OUTPUT_FILES', 'SE_OUTPUT_FILES', 'SE_OUTPUT_PATH', 'SE_OUTPUT_PATTERN', 'SE_MINFILESIZE', 'DOBREAK', 'MY_RUNTIME', 'GC_RUNTIME', 'MY_JOBID', 'GC_JOB_ID', 'GC_VERSION', 'GC_DEPFILES', 'SUBST_FILES', 'SEEDS', 'SCRATCH_LL', 'SCRATCH_UL', 'LANDINGZONE_LL', 'LANDINGZONE_UL'] self._ignoreVars = config.getList('ignore task vars', ignoreDef)
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) ignore_list_default = lmap(lambda x: 'SEED_%d' % x, irange(10)) + ['DOBREAK', 'FILE_NAMES', 'GC_DEPFILES', 'GC_JOBID', 'GC_JOBNUM', 'GC_JOB_ID', 'GC_PARAM', 'GC_RUNTIME', 'GC_VERSION', 'JOB_RANDOM', 'JOBID', 'LANDINGZONE_LL', 'LANDINGZONE_UL', 'MY_JOB', 'MY_JOBID', 'MY_RUNTIME', 'SB_INPUT_FILES', 'SB_OUTPUT_FILES', 'SCRATCH_LL', 'SCRATCH_UL', 'SEEDS', 'SE_INPUT_FILES', 'SE_INPUT_PATH', 'SE_INPUT_PATTERN', 'SE_MINFILESIZE', 'SE_OUTPUT_FILES', 'SE_OUTPUT_PATH', 'SE_OUTPUT_PATTERN', 'SUBST_FILES'] self._ignore_vars = config.get_list('ignore task vars', ignore_list_default)
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._entries_cmd = config.get(['events command', 'entries command'], '') self._entries_key = config.get(['events key', 'entries key'], '') self._entries_key_scale = config.get_float( ['events per key value', 'entries per key value'], 1.) self._entries_default = config.get_int( ['events default', 'entries default'], -1)
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._ext_work_dn = config.get_dn('source directory') self._ext_output_dir = os.path.join(self._ext_work_dn, 'output') if not os.path.isdir(self._ext_output_dir): raise DatasetError('Unable to find task output directory %s' % repr(self._ext_output_dir)) self._selector = JobSelector.create( config.get('source job selector', ''))
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._import_parents = config.get_bool('include parent infos', False) self._merge_key = 'CMSSW_CONFIG_FILE' if config.get_bool('merge config infos', True): self._merge_key = 'CMSSW_CONFIG_HASH' self._stored_config = {} self._stored_globaltag = {} self._regex_annotation = re.compile(r'.*annotation.*=.*cms.untracked.string.*\((.*)\)') self._regex_datatier = re.compile(r'.*dataTier.*=.*cms.untracked.string.*\((.*)\)')
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._path = config.get('source directory', '.') self._timeout = config.get_int('source timeout', 120) self._trim = config.get_bool('source trim local', True) self._recurse = config.get_bool('source recurse', False) if '://' not in self._path: self._path = 'file://' + self._path (prot, path) = self._path.split('://') self._path = prot + '://' + clean_path(path)
def __init__(self, config): InfoScanner.__init__(self, config) self._importParents = config.getBool('include parent infos', False, onChange = triggerResync(['datasets', 'parameters'])) self._mergeKey = 'CMSSW_CONFIG_FILE' if config.getBool('merge config infos', True, onChange = triggerResync(['datasets', 'parameters'])): self._mergeKey = 'CMSSW_CONFIG_HASH' self._cfgStore = {} self._gtStore = {} self._regexAnnotation = re.compile(r'.*annotation.*=.*cms.untracked.string.*\((.*)\)') self._regexDataTier = re.compile(r'.*dataTier.*=.*cms.untracked.string.*\((.*)\)')
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._parent_source = config.get('parent source', '') self._parent_keys = config.get_list('parent keys', []) self._parent_match_level = config.get_int('parent match level', 1) self._parent_merge = config.get_bool('merge parents', False) # cached "parent lfn parts" (plfnp) to "parent dataset name" (pdn) maps self._plfnp2pdn_cache = {} # the maps are stored for different parent_dataset_expr self._empty_config = create_config(use_default_files=False, load_old_config=False) self._read_plfnp_map(config, self._parent_source) # read from configured parent source
def __init__(self, config): InfoScanner.__init__(self, config) self._extWorkDir = config.getPath('source directory', onChange=triggerDataResync) self._extOutputDir = os.path.join(self._extWorkDir, 'output') if not os.path.isdir(self._extOutputDir): raise DatasetError('Unable to find task output directory %s' % repr(self._extOutputDir)) self._selector = JobSelector.create( config.get('source job selector', '', onChange=triggerDataResync))
def __init__(self, config): InfoScanner.__init__(self, config) newVerbosity = utils.verbosity(utils.verbosity() - 3) extConfigFN = config.getPath('source config') extConfig = createConfig(extConfigFN).changeView(setSections = ['global']) self._extWorkDir = extConfig.getWorkPath() self._extTask = extConfig.getPlugin(['task', 'module'], cls = 'TaskModule') selector = config.get('source job selector', '') extJobDB = JobDB(extConfig, jobSelector = lambda jobNum, jobObj: jobObj.state == Job.SUCCESS) self._selected = sorted(extJobDB.getJobs(JobSelector.create(selector, task = self._extTask))) utils.verbosity(newVerbosity + 3)
def __init__(self, config): InfoScanner.__init__(self, config) ignoreDef = lmap(lambda x: 'SEED_%d' % x, irange(10)) + [ 'FILE_NAMES', 'SB_INPUT_FILES', 'SE_INPUT_FILES', 'SE_INPUT_PATH', 'SE_INPUT_PATTERN', 'SB_OUTPUT_FILES', 'SE_OUTPUT_FILES', 'SE_OUTPUT_PATH', 'SE_OUTPUT_PATTERN', 'SE_MINFILESIZE', 'DOBREAK', 'MY_RUNTIME', 'GC_RUNTIME', 'MY_JOBID', 'GC_JOB_ID', 'GC_VERSION', 'GC_DEPFILES', 'SUBST_FILES', 'SEEDS', 'SCRATCH_LL', 'SCRATCH_UL', 'LANDINGZONE_LL', 'LANDINGZONE_UL' ] self._ignoreVars = config.getList('ignore task vars', ignoreDef)
def __init__(self, config): InfoScanner.__init__(self, config) ext_config_fn = config.getPath('source config') ext_config = createConfig(ext_config_fn).changeView(setSections = ['global']) self._extWorkDir = ext_config.getWorkPath() logging.getLogger('user').disabled = True self._extWorkflow = ext_config.getPlugin('workflow', 'Workflow:global', cls = 'Workflow', pargs = ('task',)) logging.getLogger('user').disabled = False self._extTask = self._extWorkflow.task selector = config.get('source job selector', '') ext_job_db = JobDB(ext_config, jobSelector = lambda jobNum, jobObj: jobObj.state == Job.SUCCESS) self._selected = sorted(ext_job_db.getJobs(JobSelector.create(selector, task = self._extTask)))
def __init__(self, config): InfoScanner.__init__(self, config) self._path = config.get('source directory', '.', onChange=triggerDataResync) self._recurse = config.getBool('source recurse', False, onChange=triggerDataResync) if ('://' in self._path) and self._recurse: raise DatasetError('Recursion is not supported for URL: %s' % repr(self._path)) elif '://' not in self._path: self._path = utils.cleanPath(self._path)
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._parent_source = config.get('parent source', '') self._parent_keys = config.get_list('parent keys', []) self._parent_match_level = config.get_int('parent match level', 1) self._parent_merge = config.get_bool('merge parents', False) # cached "parent lfn parts" (plfnp) to "parent dataset name" (pdn) maps self._plfnp2pdn_cache = { } # the maps are stored for different parent_dataset_expr self._empty_config = create_config(use_default_files=False, load_old_config=False) self._read_plfnp_map( config, self._parent_source) # read from configured parent source
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) ignore_list_default = lmap(lambda x: 'SEED_%d' % x, irange(10)) + [ 'DOBREAK', 'FILE_NAMES', 'GC_DEPFILES', 'GC_JOBID', 'GC_JOBNUM', 'GC_JOB_ID', 'GC_PARAM', 'GC_RUNTIME', 'GC_VERSION', 'JOB_RANDOM', 'JOBID', 'LANDINGZONE_LL', 'LANDINGZONE_UL', 'MY_JOB', 'MY_JOBID', 'MY_RUNTIME', 'SB_INPUT_FILES', 'SB_OUTPUT_FILES', 'SCRATCH_LL', 'SCRATCH_UL', 'SEEDS', 'SE_INPUT_FILES', 'SE_INPUT_PATH', 'SE_INPUT_PATTERN', 'SE_MINFILESIZE', 'SE_OUTPUT_FILES', 'SE_OUTPUT_PATH', 'SE_OUTPUT_PATTERN', 'SUBST_FILES' ] self._ignore_vars = config.get_list('ignore task vars', ignore_list_default)
def __init__(self, config): InfoScanner.__init__(self, config) self._eventsCmd = config.get('events command', '') self._eventsKey = config.get('events key', '') ev_per_kv = parseStr(config.get('events per key value', ''), float, 1) kv_per_ev = parseStr(config.get('key value per events', ''), float, -1) if self._eventsKey: if ev_per_kv * kv_per_ev >= 0: # one is zero or both are negative/positive raise ConfigError('Invalid value for "events per key value" or "key value per events"!') elif ev_per_kv > 0: self._eventsKeyScale = ev_per_kv else: self._eventsKeyScale = 1.0 / kv_per_ev self._eventsDefault = config.getInt('events default', -1)
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) # delimeter based selection match_delim_str = config.get('delimeter match', '') self._match_delim = match_delim_str.split(':') self._match_inactive = len(self._match_delim) != 2 # delimeter based metadata setup self._setup_arg_list = [] self._guard_ds = self._setup('DELIMETER_DS', config.get('delimeter dataset key', ''), config.get('delimeter dataset modifier', '')) self._guard_b = self._setup('DELIMETER_B', config.get('delimeter block key', ''), config.get('delimeter block modifier', ''))
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) ext_config_fn = config.get_fn('source config') ext_config_raw = create_config(ext_config_fn, load_only_old_config=True) ext_config = ext_config_raw.change_view(set_sections=['global']) self._ext_work_dn = ext_config.get_work_path() logging.getLogger().disabled = True ext_workflow = ext_config.get_plugin('workflow', 'Workflow:global', cls='Workflow', pkwargs={'backend': 'NullWMS'}) logging.getLogger().disabled = False self._ext_task = ext_workflow.task job_selector = JobSelector.create(config.get('source job selector', ''), task=self._ext_task) self._selected = sorted(ext_workflow.job_manager.job_db.get_job_list(AndJobSelector( ClassSelector(JobClass.SUCCESS), job_selector)))
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) # delimeter based selection match_delim_str = config.get('delimeter match', '') self._match_delim = match_delim_str.split(':') self._match_inactive = len(self._match_delim) != 2 # delimeter based metadata setup self._setup_arg_list = [] self._guard_ds = self._setup( 'DELIMETER_DS', config.get('delimeter dataset key', ''), config.get('delimeter dataset modifier', '')) self._guard_b = self._setup('DELIMETER_B', config.get('delimeter block key', ''), config.get('delimeter block modifier', ''))
def __init__(self, config): InfoScanner.__init__(self, config) self._eventsCmd = config.get('events command', '', onChange=triggerDataResync) self._eventsKey = config.get('events key', '', onChange=triggerDataResync) self._eventsKeyScale = config.getFloat('events per key value', 1., onChange=triggerDataResync) self._eventsDefault = config.getInt('events default', -1, onChange=triggerDataResync)
def __init__(self, config): InfoScanner.__init__(self, config) self._eventsCmd = config.get('events command', '') self._eventsKey = config.get('events key', '') ev_per_kv = parseStr(config.get('events per key value', ''), float, 1) kv_per_ev = parseStr(config.get('key value per events', ''), float, -1) if self._eventsKey: if ev_per_kv * kv_per_ev >= 0: # one is zero or both are negative/positive raise ConfigError( 'Invalid value for "events per key value" or "key value per events"!' ) elif ev_per_kv > 0: self._eventsKeyScale = ev_per_kv else: self._eventsKeyScale = 1.0 / kv_per_ev self._eventsDefault = config.getInt('events default', -1)
def __init__(self, config): InfoScanner.__init__(self, config) newVerbosity = utils.verbosity(utils.verbosity() - 3) extConfigFN = config.getPath('source config') extConfig = createConfig(extConfigFN).changeView( setSections=['global']) self._extWorkDir = extConfig.getWorkPath() self._extTask = extConfig.getPlugin(['task', 'module'], cls='TaskModule') selector = config.get('source job selector', '') extJobDB = JobDB( extConfig, jobSelector=lambda jobNum, jobObj: jobObj.state == Job.SUCCESS) self._selected = sorted( extJobDB.getJobs(JobSelector.create(selector, task=self._extTask))) utils.verbosity(newVerbosity + 3)
def __init__(self, config): InfoScanner.__init__(self, config) self._source = config.get('parent source', '', onChange=triggerDataResync) self._parentKeys = config.getList('parent keys', [], onChange=triggerDataResync) self._looseMatch = config.getInt('parent match level', 1, onChange=triggerDataResync) self._merge = config.getBool('merge parents', False, onChange=triggerDataResync) self._lfnMapCache = {} self._empty_config = create_config() self._readParents(config, self._source)
def __init__(self, config): InfoScanner.__init__(self, config) self._importParents = config.getBool('include parent infos', False, onChange=triggerResync( ['datasets', 'parameters'])) self._mergeKey = 'CMSSW_CONFIG_FILE' if config.getBool('merge config infos', True, onChange=triggerResync(['datasets', 'parameters'])): self._mergeKey = 'CMSSW_CONFIG_HASH' self._cfgStore = {} self._gtStore = {} self._regexAnnotation = re.compile( r'.*annotation.*=.*cms.untracked.string.*\((.*)\)') self._regexDataTier = re.compile( r'.*dataTier.*=.*cms.untracked.string.*\((.*)\)')
def __init__(self, config, datasetExpr, datasetNick, sList): DataProvider.__init__(self, config, datasetExpr, datasetNick) (self._ds_select, self._ds_name, self._ds_keys_user, self._ds_keys_guard) = self._setup(config, 'dataset') (self._b_select, self._b_name, self._b_keys_user, self._b_keys_guard) = self._setup(config, 'block') scanList = config.getList('scanner', sList) + ['NullScanner'] self._scanner = lmap( lambda cls: InfoScanner.createInstance(cls, config), scanList)
def __init__(self, config, datasetExpr, datasetNick = None, datasetID = 0): DataProvider.__init__(self, config, '', datasetNick, datasetID) def DSB(cFun, n, *args, **kargs): return (cFun('dataset %s' % n, *args, **kargs), cFun('block %s' % n, *args, **kargs)) (self.nameDS, self.nameB) = DSB(config.get, 'name pattern', '') (self.kUserDS, self.kUserB) = DSB(config.getList, 'hash keys', []) (self.kGuardDS, self.kGuardB) = DSB(config.getList, 'guard override', []) self.kSelectDS = config.getList('dataset key select', []) scanList = config.getList('scanner', datasetExpr) + ['NullScanner'] self.scanner = lmap(lambda cls: InfoScanner.createInstance(cls, config), scanList)
def __init__(self, config): InfoScanner.__init__(self, config) ext_config_fn = config.getPath('source config') ext_config = createConfig(ext_config_fn).changeView( setSections=['global']) self._extWorkDir = ext_config.getWorkPath() logging.getLogger('user').disabled = True self._extWorkflow = ext_config.getPlugin('workflow', 'Workflow:global', cls='Workflow', pargs=('task', )) logging.getLogger('user').disabled = False self._extTask = self._extWorkflow.task selector = config.get('source job selector', '') ext_job_db = JobDB( ext_config, jobSelector=lambda jobNum, jobObj: jobObj.state == Job.SUCCESS) self._selected = sorted( ext_job_db.getJobs(JobSelector.create(selector, task=self._extTask)))
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) ext_config_fn = config.get_fn('source config') ext_config_raw = create_config(ext_config_fn, load_only_old_config=True) ext_config = ext_config_raw.change_view(set_sections=['global']) self._ext_work_dn = ext_config.get_work_path() logging.getLogger().disabled = True ext_workflow = ext_config.get_plugin('workflow', 'Workflow:global', cls='Workflow', pkwargs={'backend': 'NullWMS'}) logging.getLogger().disabled = False self._ext_task = ext_workflow.task job_selector = JobSelector.create(config.get('source job selector', ''), task=self._ext_task) self._selected = sorted( ext_workflow.job_manager.job_db.get_job_list( AndJobSelector(ClassSelector(JobClass.SUCCESS), job_selector)))
def __init__(self, config): InfoScanner.__init__(self, config) matchDelim = config.get('delimeter match', '', onChange=triggerDataResync) self._matchDelim = matchDelim.split(':') ds_key = config.get('delimeter dataset key', '', onChange=triggerDataResync) ds_mod = config.get('delimeter dataset modifier', '', onChange=triggerDataResync) b_key = config.get('delimeter block key', '', onChange=triggerDataResync) b_mod = config.get('delimeter block modifier', '', onChange=triggerDataResync) self._ds = self._setup(ds_key, ds_mod) self._b = self._setup(b_key, b_mod)
def __init__(self, config): InfoScanner.__init__(self, config) self.includeConfig = config.getBool('include config infos', False)
def __init__(self, config): InfoScanner.__init__(self, config) self._extWorkDir = config.get('source directory') self._extOutputDir = os.path.join(self._extWorkDir, 'output') self._selector = JobSelector.create(config.get('source job selector', ''))
def __init__(self, config): InfoScanner.__init__(self, config) self.stripPath = config.get('lfn marker', '/store/', onChange=triggerResync( ['datasets', 'parameters']))
def __init__(self, config): InfoScanner.__init__(self, config) self.includeConfig = config.getBool('include config infos', False, onChange=triggerResync( ['datasets', 'parameters']))
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._include_config = config.get_bool('include config infos', False)
def _create_scanner(scanner_name): return InfoScanner.create_instance(scanner_name, scanner_config, datasource_name)
def __init__(self, config, datasetExpr, datasetNick, sList): DataProvider.__init__(self, config, datasetExpr, datasetNick) (self._ds_select, self._ds_name, self._ds_keys_user, self._ds_keys_guard) = self._setup(config, 'dataset') (self._b_select, self._b_name, self._b_keys_user, self._b_keys_guard) = self._setup(config, 'block') scanList = config.getList('scanner', sList) + ['NullScanner'] self._scanner = lmap(lambda cls: InfoScanner.createInstance(cls, config), scanList)
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._prefix = config.get('filename prefix', '')
def __init__(self, config): InfoScanner.__init__(self, config) self._matchDelim = config.get('delimeter match', '').split(':') self._delimDS = config.get('delimeter dataset key', '') self._delimB = config.get('delimeter block key', '')
def __init__(self, config): InfoScanner.__init__(self, config) self.stripPath = config.get('lfn marker', '/store/', onChange = triggerResync(['datasets', 'parameters']))
def __init__(self, config): InfoScanner.__init__(self, config) self.includeConfig = config.getBool('include config infos', False, onChange = triggerResync(['datasets', 'parameters']))
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._entries_cmd = config.get(['events command', 'entries command'], '') self._entries_key = config.get(['events key', 'entries key'], '') self._entries_key_scale = config.get_float(['events per key value', 'entries per key value'], 1.) self._entries_default = config.get_int(['events default', 'entries default'], -1)
def __init__(self, config): InfoScanner.__init__(self, config) self.stripPath = config.get('lfn marker', '/store/')
def __init__(self, config): InfoScanner.__init__(self, config) self._path = config.get('source directory', '.') self._path = utils.QM('://' in self._path, self._path, utils.cleanPath(self._path))
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._strip_path = config.get('lfn marker', '/store/')
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) self._match = config.get_matcher('filename filter', '*.root', default_matcher='ShellStyleMatcher') self._relative = config.get_bool('filename filter relative', True)
def __init__(self, config, datasource_name): InfoScanner.__init__(self, config, datasource_name) source_dataset_path = config.get('source dataset path') self._source = DataProvider.create_instance('ListProvider', config, 'source dataset', source_dataset_path)