def crawldir( ctx, path, foreign_id, language=None, casefile=False, noindex=False, nojunk=False, parallel=1, ): """Crawl a directory recursively and upload the documents in it to a collection.""" try: config = {"languages": language, "casefile": casefile} api = ctx.obj["api"] crawl_dir( api, path, foreign_id, config, index=not noindex, nojunk=nojunk, parallel=parallel, ) except AlephException as exc: raise click.ClickException(str(exc))
def test_new_collection(self, mocker): mocker.patch.object(self.api, "filter_collections", return_value=[]) mocker.patch.object(self.api, "create_collection") mocker.patch.object(self.api, "update_collection") mocker.patch.object(self.api, "ingest_upload") crawl_dir(self.api, "alephclient/tests/testdata", "test153", {}) self.api.create_collection.assert_called_once_with({ 'category': 'other', 'foreign_id': 'test153', 'label': 'test153', 'languages': [], 'summary': '', 'casefile': False })
def test_new_collection(self, mocker): mocker.patch.object(self.api, "filter_collections", return_value=[]) mocker.patch.object(self.api, "create_collection") mocker.patch.object(self.api, "update_collection") mocker.patch.object(self.api, "ingest_upload") crawl_dir(self.api, "alephclient/tests/testdata", "test153", {}) self.api.create_collection.assert_called_once_with({ "category": "other", "foreign_id": "test153", "label": "test153", "languages": [], "summary": "", "casefile": False, })
def test_ingest(self, mocker): mocker.patch.object(self.api, "ingest_upload", return_value={"id": 42}) mocker.patch.object(self.api, "load_collection_by_foreign_id", return_value={"id": 2}) mocker.patch.object(self.api, "update_collection") crawl_dir(self.api, "alephclient/tests/testdata", "test153", {}) base_path = os.path.abspath("alephclient/tests/testdata") assert self.api.ingest_upload.call_count == 5 expected_calls = [ mocker.call( 2, Path(os.path.join(base_path, "feb")), metadata={ 'foreign_id': 'feb', 'file_name': 'feb' }, index=True ), mocker.call( 2, Path(os.path.join(base_path, "jan")), metadata={ 'foreign_id': 'jan', 'file_name': 'jan' }, index=True ), mocker.call( 2, Path(os.path.join(base_path, "feb/2.txt")), metadata={ 'parent_id': 42, 'foreign_id': 'feb/2.txt', 'file_name': '2.txt' }, index=True ), mocker.call( 2, Path(os.path.join(base_path, "jan/week1")), metadata={ 'parent_id': 42, 'foreign_id': 'jan/week1', 'file_name': 'week1' }, index=True ), mocker.call( 2, Path(os.path.join(base_path, "jan/week1/1.txt")), metadata={ 'parent_id': 42, 'foreign_id': 'jan/week1/1.txt', 'file_name': '1.txt' }, index=True ), ] for call in expected_calls: assert call in self.api.ingest_upload.mock_calls
def test_ingest(self, mocker): mocker.patch.object(self.api, "ingest_upload", return_value={"id": 42}) mocker.patch.object(self.api, "load_collection_by_foreign_id", return_value={"id": 2}) mocker.patch.object(self.api, "update_collection") crawl_dir(self.api, "alephclient/tests/testdata", "test153", {}, True, True) base_path = os.path.abspath("alephclient/tests/testdata") assert self.api.ingest_upload.call_count == 6 expected_calls = [ mocker.call( 2, Path(os.path.join(base_path, "feb")), metadata={ "foreign_id": "feb", "file_name": "feb" }, index=True, ), mocker.call( 2, Path(os.path.join(base_path, "jan")), metadata={ "foreign_id": "jan", "file_name": "jan" }, index=True, ), mocker.call( 2, Path(os.path.join(base_path, "feb/2.txt")), metadata={ "parent_id": 42, "foreign_id": "feb/2.txt", "file_name": "2.txt", }, index=True, ), mocker.call( 2, Path(os.path.join(base_path, "jan/week1")), metadata={ "parent_id": 42, "foreign_id": "jan/week1", "file_name": "week1", }, index=True, ), mocker.call( 2, Path(os.path.join(base_path, "jan/week1/1.txt")), metadata={ "parent_id": 42, "foreign_id": "jan/week1/1.txt", "file_name": "1.txt", }, index=True, ), ] for call in expected_calls: assert call in self.api.ingest_upload.mock_calls