Пример #1
0
def parse_filename(project, filename):
    """Parses a filename.

    :param str project: Project code.
    :param str filename: Filename.

    :returns: Set of terms extracted from the filename.
    :rtype: set

    """
    assert isinstance(project, basestring), 'Invalid project'
    assert isinstance(filename, basestring), 'Invalid filename'

    global _PROJECT, _PARSER, _TEMPLATE, _COLLECTIONS

    if _PROJECT != project:

        # Get scope corresponding to the project code.
        scopes = all_scopes()
        assert project in [scope.name
                           for scope in scopes], 'Unsupported project'
        scope = [scope for scope in scopes if scope.name == project][0]

        assert 'filename' in scope.data.keys(), 'Filename parser not found'
        assert 'template' in scope.data['filename'].keys(
        ), 'Filename parser template not found'
        assert 'collections' in scope.data['filename'].keys(
        ), 'Filename parser template collections not found'

        # Get template from data scope.
        _TEMPLATE = scope.data['filename']['template']
        assert isinstance(_TEMPLATE, basestring), 'Invalid template'

        # Get template collections from data scope.
        _COLLECTIONS = list()
        for name in scope.data['filename']['collections']:
            _COLLECTIONS.append([
                collection.namespace for collection in scope.collections
                if collection.name == name.replace('_', '-')
            ][0])
        assert _COLLECTIONS, 'Invalid collections'

        # Instantiate parser JIT.
        _PARSER = create_template_parser(_TEMPLATE,
                                         tuple(_COLLECTIONS),
                                         PARSING_STRICTNESS_1,
                                         separator='_')

        # Cached project.
        _PROJECT = project

    # Strip file extension.
    filename = splitext(filename)[0]

    try:
        return _PARSER.parse(filename)
    except TemplateParsingError:
        # Add suffix to filename without file period.
        return _PARSER.parse(filename + '_fixed')
Пример #2
0
def parse_directory(project, directory):
    """Parses a directory.

    :param str project: Project code.
    :param str directory: Data directory.

    :returns: Set of terms extracted from the directory.
    :rtype: set

    """
    assert isinstance(project, basestring), 'Invalid project'
    assert isinstance(directory, basestring), 'Invalid directory'

    global _PROJECT, _PARSER, _TEMPLATE, _COLLECTIONS

    if _PROJECT != project:

        # Instantiated template
        _TEMPLATE = None

        # Instantiated template collections
        _COLLECTIONS = None

        # Get scope corresponding to the project code.
        scopes = all_scopes()
        assert project in [scope.name
                           for scope in scopes], 'Unsupported project'
        scope = [scope for scope in scopes if scope.name == project][0]

        assert 'directory_structure' in scope.data.keys(
        ), 'Directory parser not found'
        assert 'template' in scope.data['directory_structure'].keys(
        ), 'Directory parser template not found'
        assert 'collections' in scope.data['directory_structure'].keys(
        ), 'Directory parser template collections not found'

        # Get template from data scope.
        _TEMPLATE = scope.data['directory_structure']['template']
        assert isinstance(_TEMPLATE, basestring), 'Invalid template'

        # Get template collections from data scope.
        _COLLECTIONS = list()
        for name in scope.data['directory_structure']['collections']:
            _COLLECTIONS.append([
                collection.namespace for collection in scope.collections
                if collection.name == name.replace('_', '-')
            ][0])
        assert _COLLECTIONS, 'Invalid collections'

        # Instantiate parser JIT.
        _PARSER = create_template_parser(_TEMPLATE,
                                         tuple(_COLLECTIONS),
                                         PARSING_STRICTNESS_1,
                                         separator='/')

        # Cached project.
        _PROJECT = project

    return _PARSER.parse(directory)
Пример #3
0
def parse(identifier):
    """Parses a CMIP6 dataset identifier.

    """
    parser = create_template_parser(_TEMPLATE, _COLLECTIONS, PARSING_STRICTNESS_1)

    # Strip version suffix.
    if '#' in identifier:
      identifier = identifier.split('#')[0]

    return parser.parse(identifier.split('#')[0])
Пример #4
0
def parse_dataset_identifier(project, identifier):
    """Parses a dataset identifier.

    :param str project: Project code.
    :param str identifier: Dataset identifier.

    :returns: Set of terms extracted from the identifier.
    :rtype: set

    """
    assert isinstance(project, basestring), 'Invalid project'
    assert isinstance(identifier, basestring), 'Invalid identifier'

    global _PROJECT, _PARSER, _TEMPLATE, _COLLECTIONS

    if _PROJECT != project:

        # Get scope corresponding to the project code.
        scopes = all_scopes()
        assert project in [scope.name
                           for scope in scopes], 'Unsupported project'
        scope = [scope for scope in scopes if scope.name == project][0]

        assert 'dataset_id' in scope.data.keys(), 'Dataset ID parser not found'
        assert 'template' in scope.data['dataset_id'].keys(
        ), 'Dataset ID parser template not found'
        assert 'collections' in scope.data['dataset_id'].keys(
        ), 'Dataset ID parser template collections not found'

        # Get template from data scope.
        _TEMPLATE = scope.data['dataset_id']['template']
        assert isinstance(_TEMPLATE, basestring), 'Invalid template'

        # Get template collections from data scope.
        _COLLECTIONS = list()
        for name in scope.data['dataset_id']['collections']:
            _COLLECTIONS.append([
                collection.namespace for collection in scope.collections
                if collection.name == name.replace('_', '-')
            ][0])
        assert _COLLECTIONS, 'Invalid collections'

        # Instantiate parser JIT.
        _PARSER = create_template_parser(_TEMPLATE, tuple(_COLLECTIONS),
                                         PARSING_STRICTNESS_1)

        # Cached project.
        _PROJECT = project

    # Convert version suffix to an identifier element.
    identifier = identifier.replace('#', '.v')

    return _PARSER.parse(identifier)