def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._targetJobs = config.getInt('target partitions', -1, onChange = onChange) self._targetJobsDS = config.getInt('target partitions per nickname', -1, onChange = onChange) self._entries = {None: 0} self._files = {None: 0} self._config = config
def __init__(self, config, datasource_name): DataProcessor.__init__(self, config, datasource_name) self._location_filter = config.get_filter( self._get_dproc_opt('location filter'), '', default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter')
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._locationfilter = config.getFilter('dataset location filter', '', defaultMatcher='blackwhite', defaultFilter='strict', onChange=onChange)
def __init__(self, config): DataProcessor.__init__(self, config) self._targetJobs = config.getInt('target partitions', -1, onChange = DataProcessor.triggerDataResync) self._targetJobsDS = config.getInt('target partitions per nickname', -1, onChange = DataProcessor.triggerDataResync) self._entries = {None: 0} self._files = {None: 0} self._config = config
def __init__(self, config, datasource_name): DataProcessor.__init__(self, config, datasource_name) self._empty_files = config.get_bool( self._get_dproc_opt('remove empty files'), True) self._empty_block = config.get_bool( self._get_dproc_opt('remove empty blocks'), True) (self._removed_files, self._removed_blocks) = (0, 0)
def __init__(self, config, datasource_name): DataProcessor.__init__(self, config, datasource_name) self._limit_files = config.get_int( self._get_dproc_opt(['limit files', 'limit urls']), -1) self._limit_files_fraction = config.get_float( self._get_dproc_opt( ['limit files fraction', 'limit urls fraction']), -1.) (self._limit_files_per_ds, self._files_per_ds) = ({}, {})
def __init__(self, config): DataProcessor.__init__(self, config) # Ensure the same nickname is used consistently in all blocks of a dataset self._checkConsistency = config.getBool('nickname check consistency', True) self._checkConsistencyData = {} # Check if two different datasets have the same nickname self._checkCollision = config.getBool('nickname check collision', True) self._checkCollisionData = {}
def __init__(self, config): DataProcessor.__init__(self, config) self._targetJobs = config.getInt('target partitions', -1) self._targetJobsDS = config.getInt('target partitions per nickname', -1) self._writeSettings = (self._targetJobs != -1) or (self._targetJobsDS != -1) self._entries = {None: 0} self._files = {None: 0} self._config = config
def __init__(self, config): DataProcessor.__init__(self, config) self._checkURLOpt = 'dataset check unique url' self._checkURL = config.getEnum(self._checkURLOpt, DatasetUniqueMode, DatasetUniqueMode.abort) self._checkBlockOpt = 'dataset check unique block' self._checkBlock = config.getEnum(self._checkBlockOpt, DatasetUniqueMode, DatasetUniqueMode.abort)
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._emptyFiles = config.getBool('dataset remove empty files', True, onChange=onChange) self._emptyBlock = config.getBool('dataset remove empty blocks', True, onChange=onChange) (self._removedFiles, self._removedBlocks) = (0, 0)
def __init__(self, config, datasource_name): DataProcessor.__init__(self, config, datasource_name) self._sort_ds = config.get_bool(self._get_dproc_opt('sort'), False) self._sort_block = config.get_bool(self._get_dproc_opt('block sort'), False) self._sort_files = config.get_bool(self._get_dproc_opt('files sort'), False) self._sort_location = config.get_bool( self._get_dproc_opt('location sort'), False)
def __init__(self, config): DataProcessor.__init__(self, config) internal_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dataprocessor']) internal_config.set('dataset processor', 'NullDataProcessor') self._url_filter = config.getFilter(['dataset ignore files', 'dataset ignore urls'], '', negate = True, filterParser = lambda value: self._parseFilter(internal_config, value), filterStr = lambda value: str.join('\n', value.split()), matchKey = itemgetter(DataProvider.URL), defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = DataProcessor.triggerDataResync)
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) internal_config = config.changeView(viewClass = 'SimpleConfigView', setSections = ['dataprocessor']) internal_config.set('dataset processor', 'NullDataProcessor') config.set('dataset ignore urls matcher case sensitive', 'False') self._url_filter = config.getFilter(['dataset ignore files', 'dataset ignore urls'], '', negate = True, filterParser = lambda value: self._parseFilter(internal_config, value), filterStr = lambda value: str.join('\n', value.split()), defaultMatcher = 'blackwhite', defaultFilter = 'weak', onChange = onChange)
def __init__(self, config, datasource_name): DataProcessor.__init__(self, config, datasource_name) self._target_jobs = config.get_int( join_config_locations(['', datasource_name], 'target partitions'), -1) self._target_jobs_ds = config.get_int( join_config_locations(['', datasource_name], 'target partitions per nickname'), -1) self._entries = {None: 0} self._files = {None: 0} self._config = None if self.enabled(): self._config = config
def __init__(self, config, datasource_name): DataProcessor.__init__(self, config, datasource_name) config.set('%s ignore urls matcher case sensitive' % datasource_name, 'False') self._url_filter = config.get_filter( self._get_dproc_opt(['ignore files', 'ignore urls']), '', negate=True, default_matcher='BlackWhiteMatcher', default_filter='WeakListFilter', filter_parser=lambda value: self._parse_filter(config, value), filter_str=lambda value: str.join('\n', value.split()))
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._sortDS = config.getBool('dataset sort', False, onChange=onChange) self._sortBlock = config.getBool('dataset block sort', False, onChange=onChange) self._sortFiles = config.getBool('dataset files sort', False, onChange=onChange) self._sortLocation = config.getBool('dataset location sort', False, onChange=onChange)
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) internal_config = config.changeView(viewClass='SimpleConfigView', setSections=['dataprocessor']) internal_config.set('dataset processor', 'NullDataProcessor') config.set('dataset ignore urls matcher case sensitive', 'False') self._url_filter = config.getFilter( ['dataset ignore files', 'dataset ignore urls'], '', negate=True, filterParser=lambda value: self._parseFilter( internal_config, value), filterStr=lambda value: str.join('\n', value.split()), defaultMatcher='blackwhite', defaultFilter='weak', onChange=onChange)
def __init__(self, config): DataProcessor.__init__(self, config) self._emptyFiles = config.getBool('dataset remove empty files', True) self._emptyBlock = config.getBool('dataset remove empty blocks', True)
def __init__(self, config): DataProcessor.__init__(self, config) self._entries = 0 self._blocks = 0
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._limitEntries = config.getInt(['dataset limit events', 'dataset limit entries'], -1, onChange = onChange)
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._limitEntries = config.getInt( ['dataset limit events', 'dataset limit entries'], -1, onChange=onChange)
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._sortDS = config.getBool('dataset sort', False, onChange = onChange) self._sortBlock = config.getBool('dataset block sort', False, onChange = onChange) self._sortFiles = config.getBool('dataset files sort', False, onChange = onChange) self._sortLocation = config.getBool('dataset location sort', False, onChange = onChange)
def __init__(self, config): DataProcessor.__init__(self, config) self.reset()
def __init__(self, config): DataProcessor.__init__(self, config) self._checkURL = config.getEnum('dataset check unique url', DatasetUniqueMode, DatasetUniqueMode.abort, onChange = DataProcessor.triggerDataResync) self._checkBlock = config.getEnum('dataset check unique block', DatasetUniqueMode, DatasetUniqueMode.abort, onChange = DataProcessor.triggerDataResync)
def __init__(self, config): DataProcessor.__init__(self, config) self._locationfilter = config.getFilter('dataset location filter', '', defaultMatcher = 'blackwhite', defaultFilter = 'strict', onChange = DataProcessor.triggerDataResync)
def __init__(self, config): DataProcessor.__init__(self, config) self._emptyFiles = config.getBool('dataset remove empty files', True, onChange = DataProcessor.triggerDataResync) self._emptyBlock = config.getBool('dataset remove empty blocks', True, onChange = DataProcessor.triggerDataResync)
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._limitFiles = config.getInt(['dataset limit files', 'dataset limit urls'], -1, onChange = onChange)
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange)
def __init__(self, config): DataProcessor.__init__(self, config) self._ignoreURLs = config.getList(['dataset ignore urls', 'dataset ignore files'], [])
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._emptyFiles = config.getBool('dataset remove empty files', True, onChange = onChange) self._emptyBlock = config.getBool('dataset remove empty blocks', True, onChange = onChange) (self._removedFiles, self._removedBlocks) = (0, 0)
def __init__(self, config): DataProcessor.__init__(self, config) self._limitFiles = config.getInt(['dataset limit files', 'dataset limit urls'], -1, onChange = DataProcessor.triggerDataResync)
def __init__(self, config, datasource_name): DataProcessor.__init__(self, config, datasource_name) (self._entries, self._blocks, self._files) = (0, 0, 0)
def __init__(self, config, onChange): DataProcessor.__init__(self, config, onChange) self._limitFiles = config.getInt( ['dataset limit files', 'dataset limit urls'], -1, onChange=onChange)
def __init__(self, config): DataProcessor.__init__(self, config) self._ignoreURLs = config.getList( ['dataset ignore urls', 'dataset ignore files'], [])
def __init__(self, config): DataProcessor.__init__(self, config) self._limitEntries = config.getInt(['dataset limit events', 'dataset limit entries'], -1, onChange = DataProcessor.triggerDataResync)
def __init__(self, config, datasource_name): DataProcessor.__init__(self, config, datasource_name) self._limit_entries = config.get_int( self._get_dproc_opt(['limit events', 'limit entries']), -1)