def test_missing_ida_files(testpath, requests_mock): """Test task when a file can not be found from Ida. The first file should successfully downloaded, but the second file is not found. Task should fail with Exception. :param testpath: Temporary directory fixture :param requests_mock: Mocker object :returns: ``None`` """ requests_mock.get('https://ida.test/files/pid:urn:1/download', content=b'foo\n') requests_mock.get('https://ida.test/files/pid:urn:does_not_exist/download', status_code=404) # Init task workspace = os.path.join(testpath, 'workspaces', 'workspace') os.makedirs(workspace) task = get_files.GetFiles( workspace=workspace, dataset_id="get_files_test_dataset_ida_missing_file", config=tests.conftest.UNIT_TEST_CONFIG_FILE ) assert not task.complete() # Run task. with pytest.raises(FileNotAvailableError) as excinfo: task.run() assert str(excinfo.value) == "File '/path/to/file4' not found in Ida" # Task should not be completed assert not task.complete() # Nothing should be written to workspace/dataset_files assert not os.path.exists(os.path.join(workspace, 'dataset_files'))
def test_missing_local_files(testpath): """Test task when a local file is not available. The first file should successfully downloaded, but the second file is not found. Task should fail with Exception. :param testpath: Temporary directory fixture :returns: ``None`` """ workspace = os.path.join(testpath, 'workspace', 'workspace') os.makedirs(workspace) # Init mocked upload.files collection mongoclient = pymongo.MongoClient() mongo_files = [ ("pid:urn:get_files_1_local", os.path.join(testpath, "file1")), ("pid:urn:does_not_exist_local", os.path.join(testpath, "file2")) ] for identifier, fpath in mongo_files: mongoclient.upload.files.insert_one( {"_id": identifier, "file_path": os.path.abspath(fpath)} ) # Create only the first file in test directory with open(os.path.join(testpath, "file1"), 'w') as file1: file1.write('foo\n') # Init task task = get_files.GetFiles( workspace=workspace, dataset_id="get_files_test_dataset_local_missing_file", config=tests.conftest.UNIT_TEST_CONFIG_FILE ) assert not task.complete() # Run task. with pytest.raises(FileNotAvailableError) as excinfo: task.run() # Check exception message assert str(excinfo.value) \ == "File '/path/to/file4' not found in pre-ingest file storage" # Task should not be completed assert not task.complete() # Nothing should be written to workspace/dataset_files assert not os.path.exists(os.path.join(workspace, 'dataset_files'))
def test_forbidden_relative_path(testpath, requests_mock, path): """Test that files can not be saved outside the workspace. Saving files outside the workspace by using relative file paths in Metax should not be possible. The tested path would be downloaded to `<packaging_root>/workspaces/<workspace>/dataset_files/../../file1` which equals to `<packaging_root>/workspaces/file1`, if the path was not validated. :param testpath: Temporary workspace path fixture :param requests_mock: Request mocker :param path: sample file path :returns: ``None`` """ # Mock metax files = [ { "file_path": path, "parent_directory": {'identifier': 'foo'}, "identifier": "pid:urn:1", "file_storage": { "identifier": "urn:nbn:fi:att:file-storage-ida" } } ] tests.utils.add_metax_dataset(requests_mock, files=files) # Create the workspace and required directories workspace = os.path.join(testpath, 'workspaces', 'workspace') os.makedirs(workspace) # Init task task = get_files.GetFiles( workspace=workspace, dataset_id="dataset_identifier", config=tests.conftest.UNIT_TEST_CONFIG_FILE ) # File download should fail with pytest.raises(InvalidFileMetadataError) as exception_info: task.run() assert str(exception_info.value) == \ 'The file path of file pid:urn:1 is invalid: %s' % path # Check that file is not saved in workspace root i.e. workspace root # contains only the workspace directory assert set(os.listdir(testpath)) == {'workspaces', 'tmp', 'file_cache'}
def test_getfiles(testpath, requests_mock): """Tests for ``GetFiles`` task for IDA and local files. - ``Task.complete()`` is true after ``Task.run()`` - Files are copied to correct path :param testpath: Testpath fixture :param requests_mock: Mocker object :returns: ``None`` """ requests_mock.get("https://ida.test/files/pid:urn:1/download", content=b'foo\n') requests_mock.get("https://ida.test/files/pid:urn:2/download", content=b'bar\n') # Create required directories to workspace workspace = os.path.join(testpath, 'workspaces', 'workspace') os.makedirs(workspace) # Init task task = get_files.GetFiles( workspace=workspace, dataset_id="get_files_test_dataset", config=tests.conftest.UNIT_TEST_CONFIG_FILE ) assert not task.complete() # Run task. task.run() assert task.complete() # Check that correct files are created into correct path with open(os.path.join(workspace, 'dataset_files/path/to/file1')) as open_file: assert open_file.read() == 'foo\n' with open(os.path.join(workspace, 'dataset_files/path/to/file2')) as open_file: assert open_file.read() == 'bar\n'
def test_allowed_relative_paths(testpath, requests_mock, path): """Test that file is downloaded to correct location. :param testpath: Temporary workspace path fixture :param requests_mock: Request mocker :param path: sample file path :returns: ``None`` """ # Mock Ida and Metax requests_mock.get('https://ida.test/files/pid:urn:1/download') files = [ { "file_path": path, "parent_directory": {'identifier': 'foo'}, "identifier": "pid:urn:1", "file_storage": { "identifier": "urn:nbn:fi:att:file-storage-ida" } } ] tests.utils.add_metax_dataset(requests_mock, files=files) # Create the workspace and required directories workspace = os.path.join(testpath, 'workspaces', 'workspace') os.makedirs(workspace) # Init task task = get_files.GetFiles( workspace=workspace, dataset_id="dataset_identifier", config=tests.conftest.UNIT_TEST_CONFIG_FILE ) # Download file and check that is found in expected location task.run() assert os.listdir(os.path.join(workspace, 'dataset_files')) == ['file1']