Exemplo n.º 1
0
def test_constructor(meta_json_path):
    """
    Make sure that the Manifest class __init__ runs and
    raises an error if you give it an unexpected cache_dir
    """
    Manifest('my/cache/dir', meta_json_path)
    Manifest(pathlib.Path('my/other/cache/dir'), meta_json_path)
    with pytest.raises(ValueError, match=r"cache_dir must be either a str.*"):
        Manifest(1234.2, meta_json_path)
Exemplo n.º 2
0
    def _load_manifest(self, manifest_name: str) -> Manifest:
        """
        Load and return a manifest from this dataset.

        Parameters
        ----------
        manifest_name: str
            The name of the manifest to load. Must be an element in
            self.manifest_file_names

        Returns
        -------
        Manifest
        """
        if manifest_name not in self.manifest_file_names:
            raise ValueError(f"manifest: {manifest_name}\n"
                             "is not one of the valid manifest names "
                             "for this dataset:\n"
                             f"{self.manifest_file_names}")

        filepath = os.path.join(self._cache_dir, manifest_name)
        if not os.path.exists(filepath):
            self._download_manifest(manifest_name)

        with open(filepath) as f:
            local_manifest = Manifest(
                cache_dir=self._cache_dir,
                json_input=f
            )

        with open(self._manifest_last_used, 'w') as out_file:
            out_file.write(manifest_name)

        return local_manifest
Exemplo n.º 3
0
def test_metadata_file_attributes(manifest_for_metadata):
    """
    Test that Manifest.metadata_file_attributes returns the
    correct CacheFileAttributes object and raises the correct
    error when you ask for a metadata file that does not exist
    """

    mfest = Manifest('/my/cache/dir/', manifest_for_metadata)

    a_obj = mfest.metadata_file_attributes('a.txt')
    assert a_obj.url == 'http://my.url.com/path/to/a.txt'
    assert a_obj.version_id == '12345'
    assert a_obj.file_hash == 'abcde'
    expected = safe_system_path('/my/cache/dir/some-project-000/to/a.txt')
    expected = pathlib.Path(expected).resolve()
    assert a_obj.local_path == expected

    b_obj = mfest.metadata_file_attributes('b.txt')
    assert b_obj.url == 'http://my.other.url.com/different/path/to/b.txt'
    assert b_obj.version_id == '67890'
    assert b_obj.file_hash == 'fghijk'
    expected = safe_system_path('/my/cache/dir/some-project-000/path/to/b.txt')
    expected = pathlib.Path(expected).resolve()
    assert b_obj.local_path == expected

    # test that the correct error is raised when you ask
    # for a metadata file that does not exist

    with pytest.raises(ValueError) as context:
        _ = mfest.metadata_file_attributes('c.txt')
    msg = "c.txt\nis not in self.metadata_file_names"
    assert msg in context.value.args[0]
Exemplo n.º 4
0
def test_data_file_attributes(manifest_with_data):
    """
    Test that Manifest.data_file_attributes returns the correct
    CacheFileAttributes object and raises the correct error when
    you ask for a data file that does not exist
    """
    mfest = Manifest('/my/cache/dir', manifest_with_data)

    a_obj = mfest.data_file_attributes('a')
    assert a_obj.url == 'http://my.url.com/myproject/path/to/a.nwb'
    assert a_obj.version_id == '12345'
    assert a_obj.file_hash == 'abcde'
    expected = safe_system_path('/my/cache/dir/myproject-0/path/to/a.nwb')
    assert a_obj.local_path == pathlib.Path(expected).resolve()

    b_obj = mfest.data_file_attributes('b')
    assert b_obj.url == 'http://my.other.url.com/different/path/b.nwb'
    assert b_obj.version_id == '67890'
    assert b_obj.file_hash == 'fghijk'
    expected = safe_system_path('/my/cache/dir/myproject-0/path/b.nwb')
    assert b_obj.local_path == pathlib.Path(expected).resolve()

    with pytest.raises(ValueError) as context:
        _ = mfest.data_file_attributes('c')
    msg = "file_id: c\nIs not a data file listed in manifest:"
    assert msg in context.value.args[0]
Exemplo n.º 5
0
def test_file_attribute_errors(meta_json_path):
    """
    Test that Manifest raises the correct error if you try to get file
    attributes before loading a manifest.json
    """
    mfest = Manifest("/my/cache/dir", meta_json_path)
    with pytest.raises(ValueError,
                       match=r".* not in self.metadata_file_names"):
        mfest.metadata_file_attributes('some_file.txt')

    with pytest.raises(ValueError,
                       match=r".* not a data file listed in manifest"):
        mfest.data_file_attributes('other_file.txt')
Exemplo n.º 6
0
def test_create_file_attributes(meta_json_path):
    """
    Test that Manifest._create_file_attributes correctly
    handles input parameters (this is mostly a test of
    local_path generation)
    """
    mfest = Manifest('/my/cache/dir', meta_json_path)
    attr = mfest._create_file_attributes('http://my.url.com/path/to/file.txt',
                                         '12345', 'aaabbbcccddd')

    assert isinstance(attr, CacheFileAttributes)
    assert attr.url == 'http://my.url.com/path/to/file.txt'
    assert attr.version_id == '12345'
    assert attr.file_hash == 'aaabbbcccddd'
    expected_path = '/my/cache/dir/X-Y/to/file.txt'
    assert attr.local_path == pathlib.Path(expected_path).resolve()
Exemplo n.º 7
0
    def _load_manifest(self,
                       manifest_name: str,
                       use_static_project_dir: bool = False) -> Manifest:
        """
        Load and return a manifest from this dataset.

        Parameters
        ----------
        manifest_name: str
            The name of the manifest to load. Must be an element in
            self.manifest_file_names
        use_static_project_dir: bool
            When determining what the local path of a remote resource
            (data or metadata file) should be, the Manifest class will
            typically create a versioned project subdirectory under the user
            provided `cache_dir`
            (e.g. f"{cache_dir}/{project_name}-{manifest_version}")
            to allow the possibility of multiple manifest (and data) versions
            to be used. In certain cases, like when using a project's s3 bucket
            directly as the cache_dir, the project directory name needs to be
            static (e.g. f"{cache_dir}/{project_name}"). When set to True,
            the Manifest class will use a static project directory to determine
            local paths for remote resources. Defaults to False.

        Returns
        -------
        Manifest
        """
        if manifest_name not in self.manifest_file_names:
            raise ValueError(
                f"Manifest to load ({manifest_name}) is not one of the "
                "valid manifest names for this dataset. Valid names include:\n"
                f"{self.manifest_file_names}")

        if use_static_project_dir:
            manifest_path = os.path.join(self._cache_dir, self.project_name,
                                         "manifests", manifest_name)
        else:
            manifest_path = os.path.join(self._cache_dir, manifest_name)

        with open(manifest_path, "r") as f:
            local_manifest = Manifest(
                cache_dir=self._cache_dir,
                json_input=f,
                use_static_project_dir=use_static_project_dir)

        return local_manifest
Exemplo n.º 8
0
    def load_manifest(self, manifest_name: str):
        """
        Load a manifest from this dataset.

        Parameters
        ----------
        manifest_name: str
            The name of the manifest to load. Must be an element in
            self.manifest_file_names
        """
        if manifest_name not in self.manifest_file_names:
            raise ValueError(f"manifest: {manifest_name}\n"
                             "is not one of the valid manifest names "
                             "for this dataset:\n"
                             f"{self.manifest_file_names}")

        filepath = os.path.join(self._cache_dir, manifest_name)
        if not os.path.exists(filepath):
            self._download_manifest(manifest_name)

        with open(filepath) as f:
            self._manifest = Manifest(cache_dir=self._cache_dir, json_input=f)
Exemplo n.º 9
0
def test_windows_path_to_isilon(monkeypatch, tmpdir):
    """
    This test is just meant to verify on Windows CI instances
    that, if a path to the `/allen/` shared file store is used as
    cache_dir, the path to files will come out useful (i.e. without any
    spurious C:/ prepended as in AllenSDK issue #1964
    """

    cache_dir = Path(tmpdir)

    manifest_1 = {
        'manifest_version': '1',
        'metadata_file_id_column_name': 'file_id',
        'data_pipeline': 'placeholder',
        'project_name': 'my-project',
        'metadata_files': {
            'a.csv': {
                'url': 'http://www.junk.com/path/to/a.csv',  # noqa: E501
                'version_id': '1111',
                'file_hash': 'abcde'
            },
            'b.csv': {
                'url': 'http://silly.com/path/to/b.csv',  # noqa: E501
                'version_id': '2222',
                'file_hash': 'fghijk'
            }
        },
        'data_files': {
            'data_1': {
                'url': 'http://www.junk.com/data/path/data.csv',  # noqa: E501
                'version_id': '1111',
                'file_hash': 'lmnopqrst'
            }
        }
    }
    manifest_path = tmpdir / "manifest.json"
    with open(manifest_path, "w") as f:
        json.dump(manifest_1, f)

    def dummy_file_exists(self, m):
        return True

    # we do not want paths to `/allen` to be resolved to
    # a local drive on the user's machine
    bad_windows_pattern = re.compile('^[A-Z]\:')  # noqa: W605

    # make sure pattern is correctly formulated
    m = bad_windows_pattern.search('C:\\a\windows\path')  # noqa: W605
    assert m is not None

    with monkeypatch.context() as ctx:

        class TestCloudCache(CloudCacheBase):
            def _download_file(self, m, o):
                pass

            def _download_manifest(self, m, o):
                pass

            def _list_all_manifests(self):
                pass

        ctx.setattr(TestCloudCache, '_file_exists', dummy_file_exists)

        cache = TestCloudCache(cache_dir, 'proj')
        cache._manifest = Manifest(cache_dir, json_input=manifest_path)

        m_path = cache.metadata_path('a.csv')
        assert bad_windows_pattern.match(str(m_path)) is None
        d_path = cache.data_path('data_1')
        assert bad_windows_pattern.match(str(d_path)) is None