def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # check if --commands-file argument specifies existing file self.check_existing_commands_file() # Warn user about unconsidered hard-coded elements for pattern_element in self.cfg.get('directory_format').strip().split( "/"): if not re.match(re.compile(r'%\([\w]+\)s'), pattern_element): msg = 'Hard-coded DRS elements (as "{}") in "directory_format"' \ 'are not supported.'.format(pattern_element) if self.pbar: print(msg) logging.warning(msg) break self.facets = self.cfg.get_facets('directory_format') self.pattern = self.cfg.translate('filename_format') # Init DRS tree self.tree = DRSTree(self.root, self.version, self.mode, self.commands_file) # Disable file scan if a previous DRS tree have generated using same context and no "list" action if not self.rescan and self.action != 'list' and os.path.isfile( TREE_FILE): reader = load(TREE_FILE) old_args = reader.next() # Ensure that processing context is similar to previous step if self.check_args(old_args): self.scan = False # Init data collector if self.pbar: self.sources = Collector(sources=self.directory, data=self) else: self.sources = Collector(sources=self.directory, spinner=False, data=self) # Init file filter # Only supports netCDF files self.sources.FileFilter[uuid()] = ('^.*\.nc$', False) # And exclude hidden files self.sources.FileFilter[uuid()] = ('^\..*$', True) # Init progress bar if self.pbar: nfiles = len(self.sources) self.pbar = tqdm( desc='Scanning incoming files', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self
def __enter__(self): super(ProcessingContext, self).__enter__() # Get the DRS facet keys from pattern self.facets = list() self.facets = list( re.compile( self.cfg.translate( 'directory_format', add_ending_filename=True)).groupindex.keys()) self.facets.extend( list( re.compile( self.cfg.translate('dataset_id')).groupindex.keys())) self.facets = set(self.facets).difference(set(IGNORED_KEYS)) # Init data collector if self.directory: # The source is a list of directories self.source_type = 'file' self.sources = PathCollector(sources=self.directory) # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) self.pattern = self.cfg.translate('directory_format', add_ending_filename=True) elif self.incoming: # The source is a dataset ID (potentially from stdin) self.source_type = 'file' self.sources = Collector(sources=self.incoming) # Init file filter for regex, inclusive in self.file_filter: self.sources.FileFilter.add(regex=regex, inclusive=inclusive) # Init dir filter self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False) # Translate dataset_id format self.pattern = self.cfg.translate('filename_format') elif self.dataset_id: # The source is a dataset ID (potentially from stdin) self.source_type = 'dataset' self.sources = DatasetCollector(sources=[self.dataset_id], versioned=False) # Translate dataset_id format self.pattern = self.cfg.translate('dataset_id') else: # The source is a list of files (i.e., several dataset lists) # Has to be tested at the end because args.dataset_list never None, see __init__ comment. self.source_type = 'dataset' self.sources = DatasetCollector(sources=[ x.strip() for x in self.dataset_list.readlines() if x.strip() ], versioned=False) self.pattern = self.cfg.translate('dataset_id') # Get number of sources self.nbsources = len(self.sources) return self
def __enter__(self): super(ProcessingContext, self).__enter__() # Get the DRS facet keys from pattern self.facets = self.cfg.get_facets('directory_format') # Check if --commands-file argument specifies existing file self.check_existing_commands_file() # Raise error when %(version)s is not part of the final directory format if 'version' not in self.facets: raise NoVersionPattern(self.cfg.get('directory_format'), self.facets) # Consider hard-coded elements in directory format idx = 0 for pattern_element in self.cfg.get('directory_format').strip().split( "/"): try: # If pattern is %(...)s, get its index in the list of facets key = re.match(re.compile(r'%\(([\w]+)\)s'), pattern_element).groups()[0] idx = self.facets.index(key) except AttributeError: # If pattern is not %(...)s, generate a uuid() key = str(uuid()) # Insert hard-coded string in self.facets to be part of DRS path self.facets.insert(idx + 1, key) # Set the value using --set-value self.set_values[key] = pattern_element # Add the uuid to the ignored keys IGNORED_KEYS.append(key) self.pattern = self.cfg.translate('filename_format') # Init DRS tree self.tree = DRSTree(self.root, self.version, self.mode, self.commands_file) # Init data collector self.sources = Collector(sources=self.directory) # Init file filter # Only supports netCDF files self.sources.FileFilter.add(regex='^.*\.nc$') # And exclude hidden files self.sources.FileFilter.add(regex='^\..*$', inclusive=False) # Get number of sources self.nbsources = len(self.sources) return self
def __enter__(self): # Get checksum client self.checksum_type = self.get_checksum_type() # Init configuration parser self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir) # Check if --commands-file argument specifies existing file self.check_existing_commands_file() # Get DRS facets self.facets = self.cfg.get_facets('directory_format') # Raise error when %(version)s is not part of the final directory format if 'version' not in self.facets: raise NoVersionPattern(self.cfg.get('directory_format'), self.facets) # Consider hard-coded elements in directory format idx = 0 for pattern_element in self.cfg.get('directory_format').strip().split( "/"): try: # If pattern is %(...)s # Get its index in the list of facets key = re.match(re.compile(r'%\(([\w]+)\)s'), pattern_element).groups()[0] idx = self.facets.index(key) except AttributeError: # If pattern is not %(...)s # Generate a uuid() key = str(uuid()) # Insert hard-coded string in self.facets to be part of DRS path self.facets.insert(idx + 1, key) # Set the value using --set-value self.set_values[key] = pattern_element # Add the uuid to the ignored keys IGNORED_KEYS.append(key) self.pattern = self.cfg.translate('filename_format') # Init DRS tree self.tree = DRSTree(self.root, self.version, self.mode, self.commands_file) # Disable file scan if a previous DRS tree have generated using same context and no "list" action if not self.rescan and self.action != 'list' and os.path.isfile( TREE_FILE): reader = load(TREE_FILE) old_args = reader.next() # Ensure that processing context is similar to previous step if self.check_args(old_args): self.scan = False # Init data collector if self.pbar: self.sources = Collector(sources=self.directory, data=self) else: self.sources = Collector(sources=self.directory, spinner=False, data=self) # Init file filter # Only supports netCDF files self.sources.FileFilter.add(regex='^.*\.nc$') # And exclude hidden files self.sources.FileFilter.add(regex='^\..*$', inclusive=False) # Init progress bar if self.scan: nfiles = len(self.sources) if self.pbar and nfiles: self.pbar = tqdm( desc='Scanning incoming files', total=nfiles, bar_format= '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files', ncols=100, file=sys.stdout) else: msg = 'Skipping incoming files scan (use "--rescan" to force it) -- ' \ 'Using cached DRS tree from {}'.format(TREE_FILE) if self.pbar: print(msg) logging.warning(msg) # Init threads pool if self.use_pool: self.pool = ThreadPool(int(self.threads)) return self