def test_sync_hdfs_store(self, mock_get_fs_fn): mock_fs = mock.Mock() mock_get_fs_fn.return_value = lambda: mock_fs hdfs_root = '/user/test/output' store = HDFSStore(hdfs_root) run_id = 'run_001' get_local_output_dir = store.get_local_output_dir_fn(run_id) sync_to_store = store.sync_fn(run_id) run_root = store.get_run_path(run_id) def touch(fname, times=None): with open(fname, 'a'): os.utime(fname, times) with get_local_output_dir() as local_dir: touch(os.path.join(local_dir, 'a.txt'), (1330712280, 1330712280)) sync_to_store(local_dir) mock_fs.upload.assert_called_with(os.path.join(run_root, 'a.txt'), mock.ANY) touch(os.path.join(local_dir, 'b.txt'), (1330712280, 1330712280)) sync_to_store(local_dir) mock_fs.upload.assert_called_with(os.path.join(run_root, 'b.txt'), mock.ANY) subdir = os.path.join(local_dir, 'subdir') os.mkdir(subdir) touch(os.path.join(subdir, 'c.txt'), (1330712280, 1330712280)) sync_to_store(local_dir) mock_fs.upload.assert_called_with( os.path.join(run_root, 'subdir/c.txt'), mock.ANY) touch(os.path.join(local_dir, 'a.txt'), (1330712292, 1330712292)) touch(os.path.join(local_dir, 'b.txt'), (1330712292, 1330712292)) assert mock_fs.upload.call_count == 3 sync_to_store(local_dir) assert mock_fs.upload.call_count == 5
def test_hdfs_store_parse_url(self, mock_get_filesystem_fn): # Case 1: full path hdfs_root = 'hdfs://namenode01:8020/user/test/output' store = HDFSStore(hdfs_root) assert store.path_prefix() == 'hdfs://namenode01:8020', hdfs_root assert store.get_full_path( '/user/test/output' ) == 'hdfs://namenode01:8020/user/test/output', hdfs_root assert store.get_localized_path( 'hdfs://namenode01:8020/user/test/output' ) == '/user/test/output', hdfs_root assert store._hdfs_kwargs['host'] == 'namenode01', hdfs_root assert store._hdfs_kwargs['port'] == 8020, hdfs_root # Case 2: no host and port hdfs_root = 'hdfs:///user/test/output' store = HDFSStore(hdfs_root) assert store.path_prefix() == 'hdfs://', hdfs_root assert store.get_full_path( '/user/test/output') == 'hdfs:///user/test/output', hdfs_root assert store.get_localized_path( 'hdfs:///user/test/output') == '/user/test/output', hdfs_root assert store._hdfs_kwargs['host'] == 'default', hdfs_root assert store._hdfs_kwargs['port'] == 0, hdfs_root # Case 3: no prefix hdfs_root = '/user/test/output' store = HDFSStore(hdfs_root) assert store.path_prefix() == 'hdfs://', hdfs_root assert store.get_full_path( '/user/test/output') == 'hdfs:///user/test/output', hdfs_root assert store.get_localized_path( 'hdfs:///user/test/output') == '/user/test/output', hdfs_root assert store._hdfs_kwargs['host'] == 'default', hdfs_root assert store._hdfs_kwargs['port'] == 0, hdfs_root # Case 4: no namespace hdfs_root = 'hdfs://namenode01:8020/user/test/output' store = HDFSStore(hdfs_root) assert store.path_prefix() == 'hdfs://namenode01:8020', hdfs_root assert store.get_full_path( '/user/test/output' ) == 'hdfs://namenode01:8020/user/test/output', hdfs_root assert store.get_localized_path( 'hdfs://namenode01:8020/user/test/output' ) == '/user/test/output', hdfs_root assert store._hdfs_kwargs['host'] == 'namenode01', hdfs_root assert store._hdfs_kwargs['port'] == 8020, hdfs_root # Case 5: bad prefix with pytest.raises(ValueError): hdfs_root = 'file:///user/test/output' HDFSStore(hdfs_root) # Case 6: override paths, no prefix hdfs_root = '/user/prefix' store = HDFSStore(hdfs_root, train_path='/user/train_path', val_path='/user/val_path', test_path='/user/test_path') assert store.get_train_data_path( ) == 'hdfs:///user/train_path', hdfs_root assert store.get_val_data_path() == 'hdfs:///user/val_path', hdfs_root assert store.get_test_data_path( ) == 'hdfs:///user/test_path', hdfs_root # Case 7: override paths, prefix hdfs_root = 'hdfs:///user/prefix' store = HDFSStore(hdfs_root, train_path='hdfs:///user/train_path', val_path='hdfs:///user/val_path', test_path='hdfs:///user/test_path') assert store.get_train_data_path( ) == 'hdfs:///user/train_path', hdfs_root assert store.get_val_data_path() == 'hdfs:///user/val_path', hdfs_root assert store.get_test_data_path( ) == 'hdfs:///user/test_path', hdfs_root