Пример #1
0
 def __enter__(self):
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir)
     # Init data collector
     if self.directory:
         # The source is a list of directories
         # Instantiate file collector to walk through the tree
         self.source_type = 'files'
         if self.pbar:
             self.sources = PathCollector(sources=self.directory)
         else:
             self.sources = PathCollector(sources=self.directory, spinner=False)
         # Init file filter
         for file_filter in self.file_filter:
             self.sources.FileFilter[uuid()] = file_filter
         # Init dir filter
         self.sources.PathFilter['base_filter'] = (self.dir_filter, True)
         self.pattern = self.cfg.translate('directory_format', filename_pattern=True)
     else:
         # The source is a list of files (i.e., several dataset lists)
         # Instantiate dataset collector to parse the files
         self.source_type = 'datasets'
         if self.pbar:
             self.sources = DatasetCollector(self.dataset_list)
         else:
             self.sources = DatasetCollector(self.dataset_list, spinner=False)
         self.pattern = self.cfg.translate('dataset_id')
     # Get the facet keys from pattern
     self.facets = set(re.compile(self.pattern).groupindex.keys()).difference(set(IGNORED_KEYS))
     # Init progress bar
     if self.pbar:
         self.sources = as_pbar(self.sources, desc='Harvesting facets values from source', units=self.source_type)
     return self
Пример #2
0
    def get_mapfile_drs(self):

        try:
            _cfg = SectionParser(section='config:{}'.format(self.project), directory=self.config_dir)
            mapfile_drs = _cfg.get('mapfile_drs')
            _cfg.reset()
        except (NoConfigOption, NoConfigSection):
            mapfile_drs = None
        return mapfile_drs
Пример #3
0
 def __enter__(self):
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     # check if --commands-file argument specifies existing file
     self.check_existing_commands_file()
     # Warn user about unconsidered hard-coded elements
     for pattern_element in self.cfg.get('directory_format').strip().split(
             "/"):
         if not re.match(re.compile(r'%\([\w]+\)s'), pattern_element):
             msg = 'Hard-coded DRS elements (as "{}") in "directory_format"' \
                   'are not supported.'.format(pattern_element)
             if self.pbar:
                 print(msg)
             logging.warning(msg)
             break
     self.facets = self.cfg.get_facets('directory_format')
     self.pattern = self.cfg.translate('filename_format')
     # Init DRS tree
     self.tree = DRSTree(self.root, self.version, self.mode,
                         self.commands_file)
     # Disable file scan if a previous DRS tree have generated using same context and no "list" action
     if not self.rescan and self.action != 'list' and os.path.isfile(
             TREE_FILE):
         reader = load(TREE_FILE)
         old_args = reader.next()
         # Ensure that processing context is similar to previous step
         if self.check_args(old_args):
             self.scan = False
     # Init data collector
     if self.pbar:
         self.sources = Collector(sources=self.directory, data=self)
     else:
         self.sources = Collector(sources=self.directory,
                                  spinner=False,
                                  data=self)
     # Init file filter
     # Only supports netCDF files
     self.sources.FileFilter[uuid()] = ('^.*\.nc$', False)
     # And exclude hidden files
     self.sources.FileFilter[uuid()] = ('^\..*$', True)
     # Init progress bar
     if self.pbar:
         nfiles = len(self.sources)
         self.pbar = tqdm(
             desc='Scanning incoming files',
             total=nfiles,
             bar_format=
             '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
             ncols=100,
             file=sys.stdout)
     # Init threads pool
     if self.use_pool:
         self.pool = ThreadPool(int(self.threads))
     return self
Пример #4
0
 def __enter__(self):
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     # Init data collector
     if self.directory:
         # The source is a list of directories
         # Instantiate file collector to walk through the tree
         self.source_type = 'files'
         if self.pbar:
             self.sources = PathCollector(sources=self.directory)
         else:
             self.sources = PathCollector(sources=self.directory,
                                          spinner=False)
         # Init file filter
         for regex, inclusive in self.file_filter:
             self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
         # Init dir filter
         self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
         self.pattern = self.cfg.translate('directory_format',
                                           add_ending_filename=True)
     else:
         # The source is a list of files (i.e., several dataset lists)
         # Instantiate dataset collector to parse the files
         self.source_type = 'datasets'
         if self.pbar:
             self.sources = DatasetCollector(source=[
                 x.strip() for x in self.dataset_list.readlines()
                 if x.strip()
             ],
                                             versioned=False)
         else:
             self.sources = DatasetCollector(source=[
                 x.strip() for x in self.dataset_list.readlines()
                 if x.strip()
             ],
                                             spinner=False,
                                             versioned=False)
         self.pattern = self.cfg.translate('dataset_id')
     # Get the facet keys from pattern
     self.facets = set(re.compile(
         self.pattern).groupindex.keys()).difference(set(IGNORED_KEYS))
     # Init progress bar
     nfiles = len(self.sources)
     if self.pbar and nfiles:
         self.sources = tqdm(
             self.sources,
             desc='Harvesting facets values from data',
             total=nfiles,
             bar_format='{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} '
             + self.source_type,
             ncols=100,
             file=sys.stdout)
     return self
Пример #5
0
 def __enter__(self):
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     self.facets = self.cfg.get_facets('dataset_id')
     self.pattern = self.cfg.translate('directory_format',
                                       filename_pattern=True)
     # Get mapfile DRS is set in configuration file
     try:
         self.mapfile_drs = self.cfg.get('mapfile_drs')
     except NoConfigOption:
         self.mapfile_drs = None
     # Init data collector
     if self.pbar:
         self.sources = VersionedPathCollector(
             sources=self.directory,
             data=self,
             dir_format=self.cfg.translate('directory_format'))
     else:
         self.sources = VersionedPathCollector(
             sources=self.directory,
             data=self,
             spinner=False,
             dir_format=self.cfg.translate('directory_format'))
     # Init file filter
     for file_filter in self.file_filter:
         self.sources.FileFilter[uuid()] = file_filter
     # Init dir filter
     self.sources.PathFilter['base_filter'] = (self.dir_filter, True)
     if self.all:
         # Pick up all encountered versions by adding "/latest" exclusion
         self.sources.PathFilter['version_filter'] = ('/latest', True)
     elif self.version:
         # Pick up the specified version only (--version flag) by adding "/v{version}" inclusion
         # If --latest-symlink, --version is set to "latest"
         self.sources.PathFilter['version_filter'] = '/{}'.format(
             self.version)
     # Init progress bar
     if self.pbar:
         nfiles = len(self.sources)
         self.pbar = tqdm(
             desc='Mapfile(s) generation',
             total=nfiles,
             bar_format=
             '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
             ncols=100,
             file=sys.stdout)
     # Init threads pool
     if self.use_pool:
         self.pool = ThreadPool(int(self.threads))
     return self
Пример #6
0
def is_simulation_completed(card_path):
    """
    Returns True if the simulation is completed.

    :param str card_path: Directory including run.card
    :returns: True if the simulation is completed
    :rtype: *boolean*

    """
    # Check cards exist
    if RUN_CARD not in os.listdir(card_path):
        raise NoRunCardFound(card_path)
    else:
        run_card = os.path.join(card_path, RUN_CARD)
    # Extract info from cards
    config = SectionParser('Configuration')
    config.read(run_card)
    return config.get('periodstate').strip('"') == 'Completed'
Пример #7
0
def is_simulation_completed(card_path):
    """
    Returns True if the simulation is completed.

    :param str card_path: Directory including run.card
    :returns: True if the simulation is completed
    :rtype: *boolean*

    """
    # Check cards exist
    if RUN_CARD not in os.listdir(card_path):
        raise NoRunCardFound(card_path)
    else:
        run_card = os.path.join(card_path, RUN_CARD)
    # Extract info from cards
    config = SectionParser('Configuration')
    config.read(run_card)
    return config.get('periodstate').strip('"') == 'Completed'
Пример #8
0
    def get_checksum_type(self):
        """
        Gets the checksum type to use.
        Be careful to Exception constants by reading two different sections.

        :returns: The checksum type
        :rtype: *str*

        """
        _cfg = SectionParser(section='DEFAULT', directory=self.config_dir)
        if _cfg.has_option('checksum', section='DEFAULT'):
            checksum_type = _cfg.get_options_from_table(
                'checksum')[0][1].lower()
        else:  # Use SHA256 as default because esg.ini not mandatory in configuration directory
            checksum_type = 'sha256'
        if checksum_type not in checksum_types:
            raise InvalidChecksumType(checksum_type)
        return checksum_type
Пример #9
0
 def __enter__(self):
     super(MultiprocessingContext, self).__enter__()
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Get mapfile DRS
     self.mapfile_drs = self.get_mapfile_drs()
     # Configuration parser to be loaded in the end
     self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir)
     return self
Пример #10
0
 def __enter__(self):
     # Init file filter
     for regex, inclusive in self.file_filter:
         self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
     # Exclude fixed frequency in any case
     self.sources.FileFilter.add(regex='(_fx_|_fixed_|_fx.|_fixed.|_.fx_)',
                                 inclusive=False)
     # Init dir filter
     self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
     # Set driving time properties
     tinit = TimeInit(ref=self.sources.first(),
                      tunits_default=self.tunits_default)
     if not self.ref_calendar:
         self.ref_calendar = tinit.calendar
     if not self.ref_units:
         self.ref_units = tinit.tunits
     # Get project id
     if not self.project:
         self.project = get_project(self.sources.first())
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     self.pattern = self.cfg.translate('filename_format')
     return self
Пример #11
0
def yield_xml_from_card(card_path):
    """
    Yields XML path from run.card and config.card attributes.

    :param str card_path: Directory including run.card and config.card
    :returns: The XML paths to use
    :rtype: *iter*

    """
    # Check cards exist
    if RUN_CARD not in os.listdir(card_path):
        raise NoRunCardFound(card_path)
    else:
        run_card = os.path.join(card_path, RUN_CARD)
    if CONF_CARD not in os.listdir(card_path):
        raise NoConfigCardFound(card_path)
    else:
        conf_card = os.path.join(card_path, CONF_CARD)
    # Extract config info from config.card
    config = SectionParser('UserChoices')
    config.read(conf_card)
    xml_attrs = dict()
    xml_attrs['root'] = FILEDEF_ROOT
    xml_attrs['longname'] = config.get('longname').strip('"')
    xml_attrs['experimentname'] = config.get('experimentname').strip('"')
    if config.has_option('modelname'):
        xml_attrs['modelname'] = config.get('modelname').strip('"')
    else:
        xml_attrs['modelname'] = 'IPSL-CM6A-LR'
    xml_attrs['member'] = config.get('member').strip('"')
    # Extract first and last simulated years from run.card
    with open(run_card, 'r') as f:
        lines = f.read().split('\n')
    # Get run table without header
    lines = [line for line in lines if line.count('|') == 8][1:]
    year_start = int(lines[0].split()[3][:4])
    year_end = int(lines[-1].split()[5][:4])
    for year in range(year_start, year_end + 1):
        xml_attrs['year'] = str(year)
        yield FILEDEF_DIRECTORY_FORMAT.format(**xml_attrs)
Пример #12
0
def yield_xml_from_card(card_path):
    """
    Yields XML path from run.card and config.card attributes.

    :param str card_path: Directory including run.card and config.card
    :returns: The XML paths to use
    :rtype: *iter*

    """
    # Check cards exist
    if RUN_CARD not in os.listdir(card_path):
        raise NoRunCardFound(card_path)
    else:
        run_card = os.path.join(card_path, RUN_CARD)
    if CONF_CARD not in os.listdir(card_path):
        raise NoConfigCardFound(card_path)
    else:
        conf_card = os.path.join(card_path, CONF_CARD)
    # Extract config info from config.card
    config = SectionParser('UserChoices')
    config.read(conf_card)
    xml_attrs = dict()
    xml_attrs['root'] = FILEDEF_ROOT
    xml_attrs['longname'] = config.get('longname').strip('"')
    xml_attrs['experimentname'] = config.get('experimentname').strip('"')
    if config.has_option('modelname'):
        xml_attrs['modelname'] = config.get('modelname').strip('"')
    else:
        xml_attrs['modelname'] = 'IPSL-CM6A-LR'
    xml_attrs['member'] = config.get('member').strip('"')
    # Extract first and last simulated years from run.card
    with open(run_card, 'r') as f:
        lines = f.read().split('\n')
    # Get run table without header
    lines = [line for line in lines if line.count('|') == 8][1:]
    year_start = int(lines[0].split()[3][:4])
    year_end = int(lines[-1].split()[5][:4])
    for year in range(year_start, year_end + 1):
        xml_attrs['year'] = str(year)
        yield FILEDEF_DIRECTORY_FORMAT.format(**xml_attrs)
Пример #13
0
 def __enter__(self):
     # Init file filter
     for regex, inclusive in self.file_filter:
         self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
     # Exclude fixed frequency in any case
     self.sources.FileFilter.add(regex='(_fx_|_fixed_|_fx.|_fixed.|_.fx_)', inclusive=False)
     # Init dir filter
     self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
     # Set driving time properties
     tinit = TimeInit(ref=self.sources.first(), tunits_default=self.tunits_default)
     if not self.ref_calendar:
         self.ref_calendar = tinit.calendar
     if not self.ref_units:
         self.ref_units = tinit.tunits
     # Get project id
     if not self.project:
         self.project = get_project(self.sources.first())
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir)
     self.pattern = self.cfg.translate('filename_format')
     return self
Пример #14
0
class ProcessingContext(object):
    """
    Encapsulates the processing context/information for main process.

    :param ArgumentParser args: The command-line arguments parser
    :returns: The processing context
    :rtype: *ProcessingContext*

    """

    def __init__(self, args):
        self.pbar = args.pbar
        self.project = args.project
        self.config_dir = args.i
        self.directory = args.directory
        self.dataset_list = args.dataset_list
        self.dir_filter = args.ignore_dir
        self.file_filter = []
        if args.include_file:
            self.file_filter.extend([(f, False) for f in args.include_file])
        else:
            # Default includes netCDF only
            self.file_filter.append(('^.*\.nc$', False))
        if args.exclude_file:
            # Default exclude hidden files
            self.file_filter.extend([(f, True) for f in args.exclude_file])
        else:
            self.file_filter.append(('^\..*$', True))
        self.scan_errors = 0
        self.any_undeclared = False

    def __enter__(self):
        # Init configuration parser
        self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir)
        # Init data collector
        if self.directory:
            # The source is a list of directories
            # Instantiate file collector to walk through the tree
            self.source_type = 'files'
            if self.pbar:
                self.sources = PathCollector(sources=self.directory)
            else:
                self.sources = PathCollector(sources=self.directory, spinner=False)
            # Init file filter
            for file_filter in self.file_filter:
                self.sources.FileFilter[uuid()] = file_filter
            # Init dir filter
            self.sources.PathFilter['base_filter'] = (self.dir_filter, True)
            self.pattern = self.cfg.translate('directory_format', filename_pattern=True)
        else:
            # The source is a list of files (i.e., several dataset lists)
            # Instantiate dataset collector to parse the files
            self.source_type = 'datasets'
            if self.pbar:
                self.sources = DatasetCollector(self.dataset_list)
            else:
                self.sources = DatasetCollector(self.dataset_list, spinner=False)
            self.pattern = self.cfg.translate('dataset_id')
        # Get the facet keys from pattern
        self.facets = set(re.compile(self.pattern).groupindex.keys()).difference(set(IGNORED_KEYS))
        # Init progress bar
        if self.pbar:
            self.sources = as_pbar(self.sources, desc='Harvesting facets values from source', units=self.source_type)
        return self

    def __exit__(self, *exc):
        # Default is sys.exit(0)
        if self.scan_errors > 0:
            print('{}: {} (see {})'.format('Scan errors',
                                           self.scan_errors,
                                           logging.getLogger().handlers[0].baseFilename))
            sys.exit(1)
        if self.any_undeclared:
            print('Please update "esg.{}.ini" following: {}'.format(self.project,
                                                                    logging.getLogger().handlers[0].baseFilename))
            sys.exit(2)
Пример #15
0
class ProcessingContext(object):
    """
    Encapsulates the processing context/information for main process.

    :param ArgumentParser args: The command-line arguments parser
    :returns: The processing context
    :rtype: *ProcessingContext*

    """
    def __init__(self, args):
        self.pbar = args.pbar
        self.config_dir = args.i
        self.project = args.project
        self.directory = args.directory
        self.mapfile_name = args.mapfile
        self.outdir = args.outdir
        self.notes_title = args.tech_notes_title
        self.notes_url = args.tech_notes_url
        self.no_version = args.no_version
        self.threads = args.max_threads
        self.use_pool = (self.threads > 1)
        self.dataset = args.dataset
        if not args.no_cleanup:
            self.clean()
        self.no_cleanup = args.no_cleanup
        self.no_checksum = args.no_checksum
        self.dir_filter = args.ignore_dir
        self.file_filter = []
        if args.include_file:
            self.file_filter.extend([(f, False) for f in args.include_file])
        else:
            # Default includes netCDF only
            self.file_filter.append(('^.*\.nc$', False))
        if args.exclude_file:
            # Default exclude hidden files
            self.file_filter.extend([(f, True) for f in args.exclude_file])
        else:
            self.file_filter.append(('^\..*$', True))
        self.all = args.all_versions
        if self.all:
            self.no_version = False
        self.version = None
        if args.version:
            self.version = 'v{}'.format(args.version)
        if args.latest_symlink:
            self.version = 'latest'
        self.scan_errors = None
        self.scan_files = None
        self.scan_err_log = logging.getLogger().handlers[0].baseFilename
        self.nb_map = None

    def __enter__(self):
        # Get checksum client
        self.checksum_type = self.get_checksum_type()
        # Init configuration parser
        self.cfg = SectionParser(section='project:{}'.format(self.project),
                                 directory=self.config_dir)
        self.facets = self.cfg.get_facets('dataset_id')
        self.pattern = self.cfg.translate('directory_format',
                                          filename_pattern=True)
        # Get mapfile DRS is set in configuration file
        try:
            self.mapfile_drs = self.cfg.get('mapfile_drs')
        except NoConfigOption:
            self.mapfile_drs = None
        # Init data collector
        if self.pbar:
            self.sources = VersionedPathCollector(
                sources=self.directory,
                data=self,
                dir_format=self.cfg.translate('directory_format'))
        else:
            self.sources = VersionedPathCollector(
                sources=self.directory,
                data=self,
                spinner=False,
                dir_format=self.cfg.translate('directory_format'))
        # Init file filter
        for file_filter in self.file_filter:
            self.sources.FileFilter[uuid()] = file_filter
        # Init dir filter
        self.sources.PathFilter['base_filter'] = (self.dir_filter, True)
        if self.all:
            # Pick up all encountered versions by adding "/latest" exclusion
            self.sources.PathFilter['version_filter'] = ('/latest', True)
        elif self.version:
            # Pick up the specified version only (--version flag) by adding "/v{version}" inclusion
            # If --latest-symlink, --version is set to "latest"
            self.sources.PathFilter['version_filter'] = '/{}'.format(
                self.version)
        # Init progress bar
        if self.pbar:
            nfiles = len(self.sources)
            self.pbar = tqdm(
                desc='Mapfile(s) generation',
                total=nfiles,
                bar_format=
                '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
                ncols=100,
                file=sys.stdout)
        # Init threads pool
        if self.use_pool:
            self.pool = ThreadPool(int(self.threads))
        return self

    def __exit__(self, *exc):
        # Close threads pool
        if self.use_pool:
            self.pool.close()
            self.pool.join()
        # Decline outputs depending on the scan results
        # Raise errors when one or several files have been skipped or failed
        # Default is sys.exit(0)
        if self.scan_files and not self.scan_errors:
            # All files have been successfully scanned
            # Print number of generated mapfiles
            if self.pbar:
                print('{}: {} (see {})'.format('Mapfile(s) generated',
                                               self.nb_map, self.outdir))
            logging.info('{} mapfile(s) generated'.format(self.nb_map))
            logging.info('==> Scan completed ({} file(s) scanned)'.format(
                self.scan_files))
        if not self.scan_files and not self.scan_errors:
            # Results list is empty = no files scanned/found
            if self.pbar:
                print('No files found')
            logging.warning('==> No files found')
            sys.exit(1)
        if self.scan_files and self.scan_errors:
            # Print number of scan errors in any case
            if self.pbar:
                print('{}: {} (see {})'.format('Scan errors', self.scan_errors,
                                               self.scan_err_log))
            logging.warning('{} file(s) have been skipped'
                            ' (see {})'.format(self.scan_errors,
                                               self.scan_err_log))
            if self.scan_errors == self.scan_files:
                # All files have been skipped or failed during the scan
                logging.warning(
                    '==> All files have been ignored or have failed leading to no mapfile.'
                )
                sys.exit(3)
            else:
                # Some files have been skipped or failed during the scan
                logging.info('==> Scan completed ({} file(s) scanned)'.format(
                    self.scan_files))
                sys.exit(2)

    def get_checksum_type(self):
        """
        Gets the checksum type to use.
        Be careful to Exception constants by reading two different sections.

        :returns: The checksum type
        :rtype: *str*

        """
        if self.no_checksum:
            return None
        _cfg = SectionParser(section='DEFAULT', directory=self.config_dir)
        if _cfg.has_option('checksum', section='DEFAULT'):
            checksum_type = _cfg.get_options_from_table(
                'checksum')[0][1].lower()
        else:  # Use SHA256 as default because esg.ini not mandatory in configuration directory
            checksum_type = 'sha256'
        if checksum_type not in checksum_types:
            raise InvalidChecksumType(checksum_type)
        return checksum_type

    def clean(self):
        """
        Clean directory from incomplete mapfiles.
        Incomplete mapfiles from a previous run are silently removed.

        """
        for root, _, filenames in os.walk(self.outdir):
            for filename in fnmatch.filter(filenames,
                                           '*{}'.format(WORKING_EXTENSION)):
                os.remove(os.path.join(root, filename))
        logging.info('{} cleaned'.format(self.outdir))
Пример #16
0
 def __enter__(self):
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir)
     self.facets = self.cfg.get_facets('dataset_id')
     # Get mapfile DRS is set in configuration file
     try:
         _cfg = SectionParser(section='config:{}'.format(self.project), directory=self.config_dir)
         self.mapfile_drs = _cfg.get('mapfile_drs')
     except (NoConfigOption, NoConfigSection):
         self.mapfile_drs = None
     # Init data collector
     if self.directory:
         # The source is a list of directories
         # Instantiate file collector to walk through the tree
         self.source_type = 'file'
         if self.pbar:
             self.sources = VersionedPathCollector(sources=self.directory,
                                                   data=self,
                                                   dir_format=self.cfg.translate('directory_format'))
         else:
             self.sources = VersionedPathCollector(sources=self.directory,
                                                   data=self,
                                                   spinner=False,
                                                   dir_format=self.cfg.translate('directory_format'))
         # Translate directory format pattern
         self.pattern = self.cfg.translate('directory_format', add_ending_filename=True)
         # Init file filter
         for regex, inclusive in self.file_filter:
             self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
         # Init dir filter
         self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
         if self.all:
             # Pick up all encountered versions by adding "/latest" exclusion
             self.sources.PathFilter.add(name='version_filter', regex='/latest', inclusive=False)
         elif self.version:
             # Pick up the specified version only (--version flag) by adding "/v{version}" inclusion
             # If --latest-symlink, --version is set to "latest"
             self.sources.PathFilter.add(name='version_filter', regex='/{}'.format(self.version))
         else:
             # Default behavior: pick up the latest version among encountered versions
             self.sources.default = True
     elif self.dataset_list:
         # The source is a list of dataset from a TXT file
         self.source_type = 'dataset'
         self.sources = DatasetCollector(sources=[x.strip() for x in self.dataset_list.readlines() if x.strip()],
                                         data=self,
                                         spinner=False)
         # Translate dataset_id format
         self.pattern = self.cfg.translate('dataset_id', add_ending_version=True, sep='.')
     else:
         # The source is a dataset ID (potentially from stdin)
         self.source_type = 'dataset'
         self.sources = DatasetCollector(sources=[self.dataset_id],
                                         data=self,
                                         spinner=False)
         # Translate dataset_id format
         self.pattern = self.cfg.translate('dataset_id', add_ending_version=True, sep='.')
     # Init progress bar
     nfiles = len(self.sources)
     if self.pbar and nfiles:
         self.pbar = tqdm(desc='Mapfile(s) generation',
                          total=nfiles,
                          bar_format='{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} '
                                     + SOURCE_TYPE[self.source_type],
                          ncols=100,
                          file=sys.stdout)
     # Init threads pool
     if self.use_pool:
         self.pool = ThreadPool(int(self.threads))
     return self
Пример #17
0
class ProcessingContext(object):
    """
    Encapsulates the processing context/information for main process.

    :param ArgumentParser args: Parsed command-line arguments
    :returns: The processing context
    :rtype: *ProcessingContext*

    """
    def __init__(self, args):
        self.pbar = args.pbar
        self.config_dir = args.i
        self.directory = args.directory
        self.root = os.path.normpath(args.root)
        self.rescan = args.rescan
        self.commands_file = args.commands_file
        self.overwrite_commands_file = args.overwrite_commands_file
        self.upgrade_from_latest = args.upgrade_from_latest
        self.set_values = {}
        if args.set_value:
            self.set_values = dict(args.set_value)
        self.set_keys = {}
        if args.set_key:
            self.set_keys = dict(args.set_key)
        self.threads = args.max_threads
        self.use_pool = (self.threads > 1)
        self.project = args.project
        self.action = args.action
        if args.copy:
            self.mode = 'copy'
        elif args.link:
            self.mode = 'link'
        elif args.symlink:
            self.mode = 'symlink'
        else:
            self.mode = 'move'
        self.version = args.version
        DRSPath.TREE_VERSION = 'v{}'.format(args.version)
        self.scan = True
        self.scan_errors = None
        self.scan_files = None
        self.scan_err_log = logging.getLogger().handlers[0].baseFilename
        if self.commands_file and self.action != 'todo':
            print '"{}" action ignores "--commands-file" argument.'.format(
                self.action)
            self.commands_file = None
        if self.overwrite_commands_file and not self.commands_file:
            print '--overwrite-commands-file ignored'

    def __enter__(self):
        # Get checksum client
        self.checksum_type = self.get_checksum_type()
        # Init configuration parser
        self.cfg = SectionParser(section='project:{}'.format(self.project),
                                 directory=self.config_dir)
        # check if --commands-file argument specifies existing file
        self.check_existing_commands_file()
        # Warn user about unconsidered hard-coded elements
        for pattern_element in self.cfg.get('directory_format').strip().split(
                "/"):
            if not re.match(re.compile(r'%\([\w]+\)s'), pattern_element):
                msg = 'Hard-coded DRS elements (as "{}") in "directory_format"' \
                      'are not supported.'.format(pattern_element)
                if self.pbar:
                    print(msg)
                logging.warning(msg)
                break
        self.facets = self.cfg.get_facets('directory_format')
        self.pattern = self.cfg.translate('filename_format')
        # Init DRS tree
        self.tree = DRSTree(self.root, self.version, self.mode,
                            self.commands_file)
        # Disable file scan if a previous DRS tree have generated using same context and no "list" action
        if not self.rescan and self.action != 'list' and os.path.isfile(
                TREE_FILE):
            reader = load(TREE_FILE)
            old_args = reader.next()
            # Ensure that processing context is similar to previous step
            if self.check_args(old_args):
                self.scan = False
        # Init data collector
        if self.pbar:
            self.sources = Collector(sources=self.directory, data=self)
        else:
            self.sources = Collector(sources=self.directory,
                                     spinner=False,
                                     data=self)
        # Init file filter
        # Only supports netCDF files
        self.sources.FileFilter[uuid()] = ('^.*\.nc$', False)
        # And exclude hidden files
        self.sources.FileFilter[uuid()] = ('^\..*$', True)
        # Init progress bar
        if self.pbar:
            nfiles = len(self.sources)
            self.pbar = tqdm(
                desc='Scanning incoming files',
                total=nfiles,
                bar_format=
                '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
                ncols=100,
                file=sys.stdout)
        # Init threads pool
        if self.use_pool:
            self.pool = ThreadPool(int(self.threads))
        return self

    def check_existing_commands_file(self):
        """
        Check for existing commands file,
        and depending on ``--overwrite-commands-file`` setting,
        either delete it or throw a fatal error.

        """
        if self.commands_file and os.path.exists(self.commands_file):
            if self.overwrite_commands_file:
                os.remove(self.commands_file)
            else:
                print "File '{}' already exists and '--overwrite-commands-file'" \
                      "option not used.".format(self.commands_file)
                sys.exit(1)

    def __exit__(self, *exc):
        # Close threads pool
        if self.use_pool:
            self.pool.close()
            self.pool.join()
        # Decline outputs depending on the scan results
        # Raise errors when one or several files have been skipped or failed
        # Default is sys.exit(0)
        if self.scan_files and not self.scan_errors:
            # All files have been successfully scanned
            logging.info('==> Scan completed ({} file(s) scanned)'.format(
                self.scan_files))
        if not self.scan_files and not self.scan_errors:
            # Results list is empty = no files scanned/found
            if self.pbar:
                print('No files found')
            logging.warning('==> No files found')
            sys.exit(1)
        if self.scan_files and self.scan_errors:
            if self.scan:
                msg = 'Scan errors: {} (see {})'
            else:
                msg = 'Orginal scan errors: {} (previously written to {})'
            # Print number of scan errors in any case
            if self.pbar:
                print(msg.format(self.scan_errors, self.scan_err_log))
            logging.warning('{} file(s) have been skipped'
                            ' (see {})'.format(self.scan_errors,
                                               self.scan_err_log))
            if self.scan_errors == self.scan_files:
                # All files have been skipped or failed during the scan
                logging.warning(
                    '==> All files have been ignored or have failed leading to no DRS tree.'
                )
                sys.exit(3)
            else:
                # Some files have been skipped or failed during the scan
                logging.info('==> Scan completed ({} file(s) scanned)'.format(
                    self.scan_files))
                sys.exit(2)

    def get_checksum_type(self):
        """
        Gets the checksum type to use.
        Be careful to Exception constants by reading two different sections.

        :returns: The checksum type
        :rtype: *str*

        """
        _cfg = SectionParser(section='DEFAULT', directory=self.config_dir)
        if _cfg.has_option('checksum', section='DEFAULT'):
            checksum_type = _cfg.get_options_from_table(
                'checksum')[0][1].lower()
        else:  # Use SHA256 as default because esg.ini not mandatory in configuration directory
            checksum_type = 'sha256'
        if checksum_type not in checksum_types:
            raise InvalidChecksumType(checksum_type)
        return checksum_type

    def check_args(self, old_args):
        """
        Checks command-line argument to avoid discrepancies between ``esgprep drs`` steps.

        :param *dict* old_args: The recorded arguments
        :raises Error: If one argument differs

        """
        for k in CONTROLLED_ARGS:
            if self.__getattribute__(k) != old_args[k]:
                logging.warning(
                    '"{}" argument has changed: "{}" instead of "{}". '
                    'File rescan needed.'.format(k, self.__getattribute__(k),
                                                 old_args[k]))
                return False
        return True
Пример #18
0

def declare_map(config, facet):
    maps = []
    if config.has_option('maps'):
        maps = map(str.strip, config.get('maps').split(','))
    maps.append('{}_map'.format(facet))
    config.set('maps', build_line(tuple(maps), sep=', '))


if __name__ == "__main__":
    args = get_args()
    auth = HTTPBasicAuth(
        args.gh_user,
        args.gh_password) if args.gh_user and args.gh_password else None
    config = SectionParser(section='project:{}'.format(args.project))
    # Get all facet keys from format elements
    facets = get_facets()
    config.set('categories', get_categories(facets), newline=True)
    defaults = [('project', 'CMIP6')]
    defaults = tuple([
        build_line(default, length=lengths(defaults), indent=True)
        for default in sorted(defaults)
    ])
    config.set('category_defaults',
               build_line(defaults, sep='\n'),
               newline=True)
    config.set('filename_format', FILENAME_FORMAT)
    config.set('directory_format', DIRECTORY_FORMAT)
    config.set('dataset_id', DATASET_ID)
    config.set('dataset_name_format', DATASET_FORMAT)
Пример #19
0
 def __enter__(self):
     # Get checksum client
     self.checksum_type = self.get_checksum_type()
     # Init configuration parser
     self.cfg = SectionParser(section='project:{}'.format(self.project),
                              directory=self.config_dir)
     # Check if --commands-file argument specifies existing file
     self.check_existing_commands_file()
     # Get DRS facets
     self.facets = self.cfg.get_facets('directory_format')
     # Raise error when %(version)s is not part of the final directory format
     if 'version' not in self.facets:
         raise NoVersionPattern(self.cfg.get('directory_format'),
                                self.facets)
     # Consider hard-coded elements in directory format
     idx = 0
     for pattern_element in self.cfg.get('directory_format').strip().split(
             "/"):
         try:
             # If pattern is %(...)s
             # Get its index in the list of facets
             key = re.match(re.compile(r'%\(([\w]+)\)s'),
                            pattern_element).groups()[0]
             idx = self.facets.index(key)
         except AttributeError:
             # If pattern is not %(...)s
             # Generate a uuid()
             key = str(uuid())
             # Insert hard-coded string in self.facets to be part of DRS path
             self.facets.insert(idx + 1, key)
             # Set the value using --set-value
             self.set_values[key] = pattern_element
             # Add the uuid to the ignored keys
             IGNORED_KEYS.append(key)
     self.pattern = self.cfg.translate('filename_format')
     # Init DRS tree
     self.tree = DRSTree(self.root, self.version, self.mode,
                         self.commands_file)
     # Disable file scan if a previous DRS tree have generated using same context and no "list" action
     if not self.rescan and self.action != 'list' and os.path.isfile(
             TREE_FILE):
         reader = load(TREE_FILE)
         old_args = reader.next()
         # Ensure that processing context is similar to previous step
         if self.check_args(old_args):
             self.scan = False
     # Init data collector
     if self.pbar:
         self.sources = Collector(sources=self.directory, data=self)
     else:
         self.sources = Collector(sources=self.directory,
                                  spinner=False,
                                  data=self)
     # Init file filter
     # Only supports netCDF files
     self.sources.FileFilter.add(regex='^.*\.nc$')
     # And exclude hidden files
     self.sources.FileFilter.add(regex='^\..*$', inclusive=False)
     # Init progress bar
     if self.scan:
         nfiles = len(self.sources)
         if self.pbar and nfiles:
             self.pbar = tqdm(
                 desc='Scanning incoming files',
                 total=nfiles,
                 bar_format=
                 '{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} files',
                 ncols=100,
                 file=sys.stdout)
     else:
         msg = 'Skipping incoming files scan (use "--rescan" to force it) -- ' \
               'Using cached DRS tree from {}'.format(TREE_FILE)
         if self.pbar:
             print(msg)
         logging.warning(msg)
     # Init threads pool
     if self.use_pool:
         self.pool = ThreadPool(int(self.threads))
     return self
Пример #20
0
class BaseContext(object):
    """
    Encapsulates the processing context/information for main process.

    :param ArgumentParser args: Parsed command-line arguments
    :returns: The processing context
    :rtype: *ProcessingContext*

    """
    def __init__(self, args):
        # Init print management
        Print.init(log=args.log, debug=args.debug, all=args.all, cmd=args.prog)
        # Print command-line
        Print.command()
        self._process_color_arg(args)
        # Get project and related configuration
        self.project = args.project
        self.config_dir = args.i
        self.processes = args.max_processes if args.max_processes <= cpu_count(
        ) else cpu_count()
        self.use_pool = (self.processes != 1)
        self.lock = Lock()
        self.nbfiles = 0
        self.nbskip = 0
        self.nberrors = 0
        self.file_filter = []
        if args.include_file:
            self.file_filter.extend([(f, True) for f in args.include_file])
        else:
            # Default includes netCDF only
            self.file_filter.append(('^.*\.nc$', True))
        if args.exclude_file:
            # Default exclude hidden files
            self.file_filter.extend([(f, False) for f in args.exclude_file])
        else:
            self.file_filter.append(('^\..*$', False))
        self.dir_filter = args.ignore_dir
        # Init process manager
        if self.use_pool:
            manager = SyncManager()
            manager.start()
            Print.BUFFER = manager.Value(c_char_p, '')
            self.progress = manager.Value('i', 0)
        else:
            self.progress = Value('i', 0)
        self.tunits_default = None
        if self.project in DEFAULT_TIME_UNITS.keys():
            self.tunits_default = DEFAULT_TIME_UNITS[self.project]
        # Change frequency increment
        if args.set_inc:
            for table, frequency, increment, units in args.set_inc:
                if table not in set(zip(*FREQ_INC.keys())[0]):
                    raise InvalidTable(table)
                if frequency not in set(zip(*FREQ_INC.keys())[1]):
                    raise InvalidFrequency(frequency)
                keys = [(table, frequency)]
                if table == 'all':
                    keys = [k for k in FREQ_INC.keys() if k[1] == frequency]
                if frequency == 'all':
                    keys = [k for k in FREQ_INC.keys() if k[0] == table]
                for key in keys:
                    FREQ_INC[key] = [float(increment), str(units)]
        # Get reference time properties if submitted
        # Default is to deduce them from first file scanned
        self.ref_calendar = args.calendar
        self.ref_units = args.units
        # Init collector
        self.sources = None

    def __enter__(self):
        # Init file filter
        for regex, inclusive in self.file_filter:
            self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
        # Exclude fixed frequency in any case
        self.sources.FileFilter.add(regex='(_fx_|_fixed_|_fx.|_fixed.|_.fx_)',
                                    inclusive=False)
        # Init dir filter
        self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
        # Set driving time properties
        tinit = TimeInit(ref=self.sources.first(),
                         tunits_default=self.tunits_default)
        if not self.ref_calendar:
            self.ref_calendar = tinit.calendar
        if not self.ref_units:
            self.ref_units = tinit.tunits
        # Get project id
        if not self.project:
            self.project = get_project(self.sources.first())
        # Init configuration parser
        self.cfg = SectionParser(section='project:{}'.format(self.project),
                                 directory=self.config_dir)
        self.pattern = self.cfg.translate('filename_format')
        return self

    def __exit__(self, exc_type, exc_val, traceback):
        # Decline outputs depending on the scan results
        msg = COLORS.HEADER('Number of file(s) scanned: {}\n'.format(
            self.nbfiles))
        m = 'Number of file(s) skipped: {}'.format(self.nbskip)
        if self.nbskip:
            msg += COLORS.FAIL(m)
        else:
            msg += COLORS.SUCCESS(m)
        # Print summary
        Print.summary(msg)
        # Print log path if exists
        Print.log()

    def _process_color_arg(self, args):
        # process --color / --no-color arg if present
        if 'color' in args and args.color:
            enable_colors()
        if 'no_color' in args and args.no_color:
            disable_colors()
Пример #21
0
class ProcessingContext(object):
    """
    Encapsulates the processing context/information for main process.

    :param ArgumentParser args: The command-line arguments parser
    :returns: The processing context
    :rtype: *ProcessingContext*

    """
    def __init__(self, args):
        self.pbar = args.pbar
        self.project = args.project
        self.config_dir = args.i
        self.directory = args.directory
        self.dataset_list = args.dataset_list
        self.dir_filter = args.ignore_dir
        self.file_filter = []
        if args.include_file:
            self.file_filter.extend([(f, True) for f in args.include_file])
        else:
            # Default includes netCDF only
            self.file_filter.append(('^.*\.nc$', True))
        if args.exclude_file:
            # Default exclude hidden files
            self.file_filter.extend([(f, False) for f in args.exclude_file])
        else:
            self.file_filter.append(('^\..*$', False))
        self.scan_errors = 0
        self.any_undeclared = False

    def __enter__(self):
        # Init configuration parser
        self.cfg = SectionParser(section='project:{}'.format(self.project),
                                 directory=self.config_dir)
        # Init data collector
        if self.directory:
            # The source is a list of directories
            # Instantiate file collector to walk through the tree
            self.source_type = 'files'
            if self.pbar:
                self.sources = PathCollector(sources=self.directory)
            else:
                self.sources = PathCollector(sources=self.directory,
                                             spinner=False)
            # Init file filter
            for regex, inclusive in self.file_filter:
                self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
            # Init dir filter
            self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
            self.pattern = self.cfg.translate('directory_format',
                                              add_ending_filename=True)
        else:
            # The source is a list of files (i.e., several dataset lists)
            # Instantiate dataset collector to parse the files
            self.source_type = 'datasets'
            if self.pbar:
                self.sources = DatasetCollector(source=[
                    x.strip() for x in self.dataset_list.readlines()
                    if x.strip()
                ],
                                                versioned=False)
            else:
                self.sources = DatasetCollector(source=[
                    x.strip() for x in self.dataset_list.readlines()
                    if x.strip()
                ],
                                                spinner=False,
                                                versioned=False)
            self.pattern = self.cfg.translate('dataset_id')
        # Get the facet keys from pattern
        self.facets = set(re.compile(
            self.pattern).groupindex.keys()).difference(set(IGNORED_KEYS))
        # Init progress bar
        nfiles = len(self.sources)
        if self.pbar and nfiles:
            self.sources = tqdm(
                self.sources,
                desc='Harvesting facets values from data',
                total=nfiles,
                bar_format='{desc}: {percentage:3.0f}% | {n_fmt}/{total_fmt} '
                + self.source_type,
                ncols=100,
                file=sys.stdout)
        return self

    def __exit__(self, *exc):
        # Default is sys.exit(0)
        if self.scan_errors > 0:
            print('{}: {} (see {})'.format(
                'Scan errors', self.scan_errors,
                logging.getLogger().handlers[0].baseFilename))
            sys.exit(1)
        if self.any_undeclared:
            print('Please update "esg.{}.ini" following: {}'.format(
                self.project,
                logging.getLogger().handlers[0].baseFilename))
            sys.exit(2)
Пример #22
0
class BaseContext(object):
    """
    Encapsulates the processing context/information for main process.

    :param ArgumentParser args: Parsed command-line arguments
    :returns: The processing context
    :rtype: *ProcessingContext*

    """

    def __init__(self, args):
        # Init print management
        Print.init(log=args.log, debug=args.debug, all=args.all, cmd=args.prog)
        # Print command-line
        Print.command()
        self._process_color_arg(args)
        # Get project and related configuration
        self.project = args.project
        self.config_dir = args.i
        self.processes = args.max_processes if args.max_processes <= cpu_count() else cpu_count()
        self.use_pool = (self.processes != 1)
        self.lock = Lock()
        self.nbfiles = 0
        self.nbskip = 0
        self.nberrors = 0
        self.file_filter = []
        if args.include_file:
            self.file_filter.extend([(f, True) for f in args.include_file])
        else:
            # Default includes netCDF only
            self.file_filter.append(('^.*\.nc$', True))
        if args.exclude_file:
            # Default exclude hidden files
            self.file_filter.extend([(f, False) for f in args.exclude_file])
        else:
            self.file_filter.append(('^\..*$', False))
        self.dir_filter = args.ignore_dir
        # Init process manager
        if self.use_pool:
            manager = SyncManager()
            manager.start()
            Print.BUFFER = manager.Value(c_char_p, '')
            self.progress = manager.Value('i', 0)
        else:
            self.progress = Value('i', 0)
        self.tunits_default = None
        if self.project in DEFAULT_TIME_UNITS.keys():
            self.tunits_default = DEFAULT_TIME_UNITS[self.project]
        # Change frequency increment
        if args.set_inc:
            for table, frequency, increment, units in args.set_inc:
                if table not in set(zip(*FREQ_INC.keys())[0]):
                    raise InvalidTable(table)
                if frequency not in set(zip(*FREQ_INC.keys())[1]):
                    raise InvalidFrequency(frequency)
                keys = [(table, frequency)]
                if table == 'all':
                    keys = [k for k in FREQ_INC.keys() if k[1] == frequency]
                if frequency == 'all':
                    keys = [k for k in FREQ_INC.keys() if k[0] == table]
                for key in keys:
                    FREQ_INC[key] = [float(increment), str(units)]
        # Get reference time properties if submitted
        # Default is to deduce them from first file scanned
        self.ref_calendar = args.calendar
        self.ref_units = args.units
        # Init collector
        self.sources = None

    def __enter__(self):
        # Init file filter
        for regex, inclusive in self.file_filter:
            self.sources.FileFilter.add(regex=regex, inclusive=inclusive)
        # Exclude fixed frequency in any case
        self.sources.FileFilter.add(regex='(_fx_|_fixed_|_fx.|_fixed.|_.fx_)', inclusive=False)
        # Init dir filter
        self.sources.PathFilter.add(regex=self.dir_filter, inclusive=False)
        # Set driving time properties
        tinit = TimeInit(ref=self.sources.first(), tunits_default=self.tunits_default)
        if not self.ref_calendar:
            self.ref_calendar = tinit.calendar
        if not self.ref_units:
            self.ref_units = tinit.tunits
        # Get project id
        if not self.project:
            self.project = get_project(self.sources.first())
        # Init configuration parser
        self.cfg = SectionParser(section='project:{}'.format(self.project), directory=self.config_dir)
        self.pattern = self.cfg.translate('filename_format')
        return self

    def __exit__(self, exc_type, exc_val, traceback):
        # Decline outputs depending on the scan results
        msg = COLORS.HEADER('Number of file(s) scanned: {}\n'.format(self.nbfiles))
        m = 'Number of file(s) skipped: {}'.format(self.nbskip)
        if self.nbskip:
            msg += COLORS.FAIL(m)
        else:
            msg += COLORS.SUCCESS(m)
        # Print summary
        Print.summary(msg)
        # Print log path if exists
        Print.log()

    def _process_color_arg(self, args):
        # process --color / --no-color arg if present
        if 'color' in args and args.color:
            enable_colors()
        if 'no_color' in args and args.no_color:
            disable_colors()