def test_compound_indexer_rw(): all_vecs = np.random.random([6, 5]) for j in range(3): with BaseExecutor.load_config('yaml/test-compound-indexer2.yml', True, j) as a: assert a[0] == a['test_meta'] assert not a[0].is_updated assert not a.is_updated a[0].add([j, j * 2, j * 3], [bytes(j), bytes(j * 2), bytes(j * 3)]) a[0].add([j, j * 2, j * 3], [bytes(j), bytes(j * 2), bytes(j * 3)]) assert a[0].is_updated assert a.is_updated assert not a[1].is_updated a[1].add(np.array([j * 2, j * 2 + 1]), all_vecs[(j * 2, j * 2 + 1), :]) assert a[1].is_updated a.save() # the compound executor itself is not modified, therefore should not generate a save assert not os.path.exists(a.save_abspath) assert os.path.exists(a[0].save_abspath) assert os.path.exists(a[0].index_abspath) assert os.path.exists(a[1].save_abspath) assert os.path.exists(a[1].index_abspath) recovered_vecs = [] for j in range(3): with BaseExecutor.load_config('yaml/test-compound-indexer2.yml', True, j) as a: recovered_vecs.append(a[1].query_handler) np.testing.assert_almost_equal(all_vecs, np.concatenate(recovered_vecs))
def test_load_driver(): b = BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/route.yml')) pprint(b._drivers) c = BaseExecutor.load_config('_route') assert len(b._drivers['ControlRequest']) == len(c._drivers['ControlRequest']) pprint(c._drivers)
def test_exec_type(tmpdir): from jina.executors.indexers import BaseIndexer assert 'BaseIndexer' in BaseExecutor._registered_class # init from YAML should be okay as well BaseExecutor.load_config('BaseIndexer') BaseIndexer().save_config(os.path.join(tmpdir, 'tmp.yml')) with open(os.path.join(tmpdir, 'tmp.yml')) as fp: _ = JAML.load(fp) def assert_bi(): b = BaseIndexer(1) b.save_config(os.path.join(tmpdir, 'tmp.yml')) with open(os.path.join(tmpdir, 'tmp.yml')) as fp: b = JAML.load(fp) assert b.a == 1 # we override BaseIndexer now, without force it shall not store all init values class BaseIndexer(BaseExecutor): def __init__(self, a=0): super().__init__() self.a = a with pytest.raises(AssertionError): assert_bi() class BaseIndexer(BaseExecutor): force_register = True def __init__(self, a=0): super().__init__() self.a = a assert_bi()
def test_compound_indexer_ref_indexer(test_workspace, pea_id): tmpdir = os.environ['JINA_TEST_WORKSPACE'] with BaseExecutor.load_config(os.path.join( cur_dir, 'yaml/test-compound-indexer2.yml'), pea_id=pea_id) as compound_indexer: ref_indexer = compound_indexer[1] ref_indexer.num_dim = 512 ref_indexer.touch() if pea_id > 0: assert os.path.exists( os.path.join( tmpdir, f'{compound_indexer.name}-{compound_indexer.pea_id}', f'{ref_indexer.name}-{ref_indexer.pea_id}', f'{ref_indexer.name}.bin', )) else: assert os.path.exists( os.path.join( tmpdir, f'{compound_indexer.name}', f'{ref_indexer.name}-{ref_indexer.pea_id}', f'{ref_indexer.name}.bin', )) with BaseExecutor.load_config( os.path.join(cur_dir, 'yaml/test_compount_indexer2-ref-indexer.yml'), pea_id=pea_id, ) as compound_indexer: indexer = compound_indexer[1] assert indexer.num_dim == 512
def test_simple_indexer_workspace_move_to_docker(test_workspace_move, tmpdir, pea_id): keys = ['0', '1'] content = [b'a', b'b'] old_tmpdir = os.environ['JINA_TEST_WORKSPACE'] docker_tmpdir = os.path.join(tmpdir, 'docker') with BaseExecutor.load_config(os.path.join( cur_dir, 'yaml/test-kvindexer-workspace.yml'), pea_id=pea_id) as indexer: indexer.add(keys, content) assert os.path.exists( os.path.join(old_tmpdir, f'{indexer.name}-{indexer.pea_id}', f'{indexer.name}.bin')) shutil.copytree(os.environ['JINA_TEST_WORKSPACE'], docker_tmpdir) shutil.rmtree(os.environ['JINA_TEST_WORKSPACE']) os.environ['JINA_TEST_WORKSPACE'] = str(docker_tmpdir) with BaseExecutor.load_config(os.path.join( cur_dir, 'yaml/test-kvindexer-workspace.yml'), pea_id=pea_id) as indexer: assert indexer.query(keys[0]) == content[0] assert indexer.query(keys[1]) == content[1] assert indexer.workspace == docker_tmpdir
def test_compound_indexer_rw(test_workspace): all_vecs = np.random.random([6, 5]) for j in range(3): with BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/test-compound-indexer2.yml'), separated_workspace=True, pea_id=j) as indexer: assert indexer[0] == indexer['test_meta'] assert not indexer[0].is_updated assert not indexer.is_updated indexer[0].add([UniqueId(j), UniqueId(j * 2), UniqueId(j * 3)], [bytes(j), bytes(j * 2), bytes(j * 3)]) indexer[0].add([j, j * 2, j * 3], [bytes(j), bytes(j * 2), bytes(j * 3)]) assert indexer[0].is_updated assert indexer.is_updated assert not indexer[1].is_updated indexer[1].add(np.array([j * 2, j * 2 + 1]), all_vecs[(j * 2, j * 2 + 1), :]) assert indexer[1].is_updated indexer.save() # the compound executor itself is not modified, therefore should not generate a save assert not os.path.exists(indexer.save_abspath) assert os.path.exists(indexer[0].save_abspath) assert os.path.exists(indexer[0].index_abspath) assert os.path.exists(indexer[1].save_abspath) assert os.path.exists(indexer[1].index_abspath) recovered_vecs = [] for j in range(3): with BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/test-compound-indexer2.yml'), separated_workspace=True, pea_id=j) as indexer: recovered_vecs.append(indexer[1].query_handler) np.testing.assert_almost_equal(all_vecs, np.concatenate(recovered_vecs))
def test_compound_indexer_rw(self): all_vecs = np.random.random([6, 5]) for j in range(3): a = BaseExecutor.load_config('yaml/test-compound-indexer2.yml', True, j) assert a[0] == a['test_meta'] self.assertFalse(a[0].is_updated) self.assertFalse(a.is_updated) a[0].add([j, j * 2, j * 3], [bytes(j), bytes(j * 2), bytes(j * 3)]) self.assertTrue(a[0].is_updated) self.assertTrue(a.is_updated) self.assertFalse(a[1].is_updated) a[1].add(np.array([j * 2, j * 2 + 1]), all_vecs[(j * 2, j * 2 + 1), :]) self.assertTrue(a[1].is_updated) a.save() # the compound executor itself is not modified, therefore should not generate a save self.assertFalse(os.path.exists(a.save_abspath)) self.assertTrue(os.path.exists(a[0].save_abspath)) self.assertTrue(os.path.exists(a[0].index_abspath)) self.assertTrue(os.path.exists(a[1].save_abspath)) self.assertTrue(os.path.exists(a[1].index_abspath)) self.add_tmpfile(a[0].save_abspath, a[1].save_abspath, a[0].index_abspath, a[1].index_abspath, a.current_workspace) recovered_vecs = [] for j in range(3): a = BaseExecutor.load_config('yaml/test-compound-indexer2.yml', True, j) recovered_vecs.append(a[1].query_handler) np.testing.assert_almost_equal(all_vecs, np.concatenate(recovered_vecs))
def test_compound_indexer_with_workspace_in_components(test_workspace, pea_id, dump_compound): # the workspace in components will be ignored in compound tmpdir = os.environ['JINA_TEST_WORKSPACE'] comp1_dir = os.environ['JINA_TEST_WORKSPACE_COMP1'] comp2_dir = os.environ['JINA_TEST_WORKSPACE_COMP2'] with BaseExecutor.load_config( os.path.join( cur_dir, 'yaml/test-compound-indexer-components-with-workspace.yml'), pea_id=pea_id, ) as executor: assert len(executor.components) == 2 assert executor.pea_id == pea_id for i, component in enumerate(executor): assert component.pea_id == executor.pea_id component.index_filename = f'index_filename-component-{i}' component.touch() executor._attached_pea = 'hey' if dump_compound: executor.touch() compound_bin_expected = None if pea_id > 0: compound_bin_expected = os.path.join( tmpdir, f'{executor.name}-{executor.pea_id}', f'{executor.name}.bin') else: compound_bin_expected = os.path.join(tmpdir, f'{executor.name}.bin') if dump_compound: assert os.path.exists(compound_bin_expected) else: assert not os.path.exists(compound_bin_expected) for i, component in enumerate(executor): if pea_id > 0: assert os.path.exists( os.path.join( comp1_dir if i == 0 else comp2_dir, f'{component.name}-{component.pea_id}', f'{component.name}.bin', )) else: assert os.path.exists( os.path.join(comp1_dir if i == 0 else comp2_dir, f'{component.name}.bin')) with BaseExecutor.load_config( os.path.join( cur_dir, 'yaml/test-compound-indexer-components-with-workspace.yml'), pea_id=pea_id, ) as executor: assert len(executor.components) == 2 for i, component in enumerate(executor): assert component.index_filename == f'index_filename-component-{i}' if dump_compound: assert executor._attached_pea == 'hey'
def test_drivers_renewed_from_yml_when_loaded_from_dump(temp_workspace): executor_a = BaseExecutor.load_config(f'{cur_dir}/yaml/example_1.yml') assert executor_a._drivers['SearchRequest'][0]._is_update is True with executor_a: executor_a.touch() executor_b = BaseExecutor.load_config(f'{cur_dir}/yaml/example_2.yml') assert executor_b._drivers['SearchRequest'][0]._is_update is False
def test_load_from_dict(): # !BaseEncoder # metas: # name: ${{BE_TEST_NAME}} # batch_size: ${{BATCH_SIZE}} # pea_id: ${{pea_id}} # workspace: ${{this.name}}-${{this.batch_size}} d1 = { 'jtype': 'BaseEncoder', 'metas': {'name': '${{BE_TEST_NAME}}', 'batch_size': '${{BATCH_SIZE}}', 'pea_id': '${{pea_id}}', 'workspace': '${{this.name}} -${{this.batch_size}}'} } # !CompoundExecutor # components: # - !BinaryPbIndexer # with: # index_filename: tmp1 # metas: # name: test1 # - !BinaryPbIndexer # with: # index_filename: tmp2 # metas: # name: test2 # metas: # name: compound1 d2 = { 'jtype': 'CompoundExecutor', 'components': [ { 'jtype': 'BinaryPbIndexer', 'with': {'index_filename': 'tmp1'}, 'metas': {'name': 'test1'} }, { 'jtype': 'BinaryPbIndexer', 'with': {'index_filename': 'tmp2'}, 'metas': {'name': 'test2'} }, ] } d = {'BE_TEST_NAME': 'hello123', 'BATCH_SIZE': 256} b1 = BaseExecutor.load_config(d1, context=d) b2 = BaseExecutor.load_config(d2, context=d) assert isinstance(b1, BaseExecutor) assert isinstance(b2, CompoundExecutor) assert b1.batch_size == 256 assert b1.name == 'hello123'
def test_load_yaml2(self): from jina.executors import BaseExecutor a = BaseExecutor.load_config('yaml/dummy_exec1.yml') a.close() self.add_tmpfile('test.gzip') b = BaseExecutor.load_config('yaml/dummy_exec1.yml') b.save() self.add_tmpfile(b.save_abspath) b.save_config() self.add_tmpfile(b.config_abspath) b.close()
def test_shard_workspace(test_workspace, pea_id): tmpdir = os.environ['JINA_TEST_WORKSPACE'] with BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/test-workspace.yml'), pea_id=pea_id) as executor: executor.index_filename = 'index_filename' executor.touch() if pea_id > 0: assert os.path.exists(os.path.join(tmpdir, f'{executor.name}-{executor.pea_id}', f'{executor.name}.bin')) else: assert os.path.exists(os.path.join(tmpdir, f'{executor.name}.bin')) with BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/test-workspace.yml'), pea_id=pea_id) as executor: assert executor.index_filename == 'index_filename'
def test_resource_executor(self): a = BaseExecutor.load_config(resource_filename('jina', '/'.join(('resources', 'executors._route.yml')))) assert a.name == 'route' assert len(a._drivers) == 4 a = BaseExecutor.load_config(resource_filename('jina', '/'.join(('resources', 'executors._pass.yml')))) assert a.name == 'forward' assert len(a._drivers) == 4 a = BaseExecutor.load_config(resource_filename('jina', '/'.join(('resources', 'executors._merge.yml')))) assert a.name == 'merge' assert len(a._drivers) == 4 a = BaseExecutor.load_config(resource_filename('jina', '/'.join(('resources', 'executors._clear.yml')))) assert a.name == 'clear' assert len(a._drivers) == 4
def test_load_yaml2(test_metas): a = BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/test-exec-with-driver.yml')) assert len(a._drivers) == 2 # should be able to auto fill in ControlRequest assert 'ControlRequest' in a._drivers a.save_config() p = a.config_abspath b = BaseExecutor.load_config(p) assert a._drivers == b._drivers a.touch() a.save() c = BaseExecutor.load(a.save_abspath) assert a._drivers == c._drivers
def test_compound_indexer_no_workspace_in_components(test_workspace, pea_id, dump_compound): tmpdir = os.environ['JINA_TEST_WORKSPACE'] with BaseExecutor.load_config(os.path.join( cur_dir, 'yaml/test-compound-indexer.yml'), pea_id=pea_id) as executor: assert executor.pea_id == pea_id assert len(executor.components) == 2 for i, component in enumerate(executor): assert component.pea_id == executor.pea_id component.index_filename = f'index_filename-component-{i}' component.touch() executor._attached_pea = 'hey' if dump_compound: executor.touch() compound_bin_expected = os.path.join(tmpdir, f'{executor.name}-{executor.pea_id}', f'{executor.name}.bin') if dump_compound: assert os.path.exists(compound_bin_expected) else: assert not os.path.exists(compound_bin_expected) for component in executor: if pea_id > 0: assert os.path.exists( os.path.join( tmpdir, f'{executor.name}-{executor.pea_id}', f'{component.name}-{component.pea_id}', f'{component.name}.bin', )) else: assert os.path.exists( os.path.join( tmpdir, f'{executor.name}', f'{component.name}-{component.pea_id}', f'{component.name}.bin', )) with BaseExecutor.load_config(os.path.join( cur_dir, 'yaml/test-compound-indexer.yml'), pea_id=pea_id) as executor: assert len(executor.components) == 2 for i, component in enumerate(executor): assert component.index_filename == f'index_filename-component-{i}' if dump_compound: assert executor._attached_pea == 'hey'
def test_load_yaml2(tmpdir): os.environ['JINA_TEST_EXEC_WITH_DRIVER'] = str(tmpdir) a = BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/test-exec-with-driver.yml')) assert len(a._drivers) == 2 # should be able to auto fill in ControlRequest assert 'ControlRequest' in a._drivers a.save_config() p = a.config_abspath b = BaseExecutor.load_config(p) assert a._drivers == b._drivers a.touch() a.save() c = BaseExecutor.load(a.save_abspath) assert a._drivers == c._drivers del os.environ['JINA_TEST_EXEC_WITH_DRIVER']
def test_load_yaml2(self): a = BaseExecutor.load_config('yaml/test-exec-with-driver.yml') assert len(a._drivers) == 2 # should be able to auto fill in ControlRequest self.assertTrue('ControlRequest' in a._drivers) a.save_config() p = a.config_abspath b = BaseExecutor.load_config(p) assert a._drivers == b._drivers self.add_tmpfile(p) a.touch() a.save() c = BaseExecutor.load(a.save_abspath) assert a._drivers == c._drivers self.add_tmpfile(a.save_abspath)
def test_save_load_config(tmp_path): transforms = ['VerticalFlip', {'Resize': {'width': 200, 'height': 300}}] metas = get_default_metas() metas['workspace'] = str(tmp_path) orig_crafter = AC(transforms, metas=metas) orig_crafter.save_config() orig_trs = orig_crafter.transforms._to_dict() load_crafter1 = BaseExecutor.load_config('tests/config.yaml') load_crafter2 = BaseExecutor.load_config(orig_crafter.config_abspath) assert orig_trs == load_crafter1.transforms._to_dict() assert orig_trs == load_crafter2.transforms._to_dict()
def test_encoder_name_dict_replace(): d = {'BE_TEST_NAME': 'hello123', 'BATCH_SIZE': 256} with BaseExecutor.load_config('yaml/test-encoder-env.yml', context=d) as be: assert be.name == 'hello123' assert be.batch_size == 256 assert be.workspace == 'hello123-256'
def test_save_and_load_config(): encoder = get_encoder() encoder.save_config() assert os.path.exists(encoder.config_abspath) encoder_loaded = BaseExecutor.load_config(encoder.config_abspath) assert encoder_loaded.channel_axis == encoder.channel_axis rm_files([encoder.save_abspath, encoder.config_abspath])
def test_compound_from_yaml(tmp_workspace): a = BaseExecutor.load_config(os.path.join(cur_dir, 'yaml/npvec.yml')) assert isinstance(a, CompoundExecutor) assert callable(getattr(a, 'add')) assert callable(getattr(a, 'query')) assert callable(getattr(a, 'meta_add')) assert callable(getattr(a, 'meta_query'))
def test_import_with_new_module_structure_should_pass(): """ This is a valid and **RECOMMENDED** structure: - python code for the executor organized in a package structure inside the ``executor/`` folder - core logic in ``executor/my_executor.py`` - the ``executor/__init__.py`` contains ``from .my_executor import GoodCrafterNew``, which makes sure the custom executor class gets registered - all imports are relative - so in ``executor/my_executor.py`` the ``helper`` module is imported as ``from .helper import foo`` File structure: my_cust_module/ |- executor/ |- __init__.py |- my_executor.py |- helper.py |- config.yml |- py_modules |- executor/__init__.py """ b = BaseExecutor.load_config('good_new/crafter.yml') assert b.__class__.__name__ == 'GoodCrafterNew'
def test_save_and_load_config(*args, **kwargs): encoder = FlairTextEncoder(embeddings=('flair:news-forward',), pooling_strategy='mean') encoder.save_config() assert os.path.exists(encoder.config_abspath) encoder_loaded = BaseExecutor.load_config(encoder.config_abspath) assert encoder_loaded.max_length == encoder.max_length rm_files([encoder.config_abspath, encoder.save_abspath])
def test_compound_indexer(self): all_subspace = set() for j in range(3): a = BaseExecutor.load_config( os.path.join(cur_dir, 'yaml/test-compound-indexer.yml'), True, j) for c in a: c.touch() print(c.save_abspath) print(c.index_abspath) c.save() self.assertTrue(os.path.exists(c.save_abspath)) self.assertTrue(os.path.exists(c.index_abspath)) self.add_tmpfile(c.save_abspath, c.index_abspath) self.assertTrue(c.save_abspath.startswith(a.current_workspace)) self.assertTrue(c.index_abspath.startswith( a.current_workspace)) a.touch() a.save() self.assertTrue(os.path.exists(a.save_abspath)) self.add_tmpfile(a.save_abspath) self.add_tmpfile(a.current_workspace) all_subspace.add(a.current_workspace) assert len(all_subspace) == 3
def test_import_with_old_module_structure_should_pass(): """ This is a valid structure, but not recommended: - "my_cust_module" is a python module - all core logic of your customized executor goes to ``__init__.py`` - to import ``foo.py``, you should use relative import, e.g. ``from .foo import bar`` This is not a recommended structure because: - putting core logic inside ``__init__.py`` is not how python packages are usually written - Importing from the workspace disables you from trying out the executor in the console, or test files at the root of the workspace, making development more cumbersome - the main directory is now cluttered with python files - extracting all python files to a separate directory is how python packages are usually composed File structure: my_cust_module |- __init__.py |- helper.py |- config.yml |- py_modules |- __init__.py """ b = BaseExecutor.load_config('good_old/crafter.yml') assert b.__class__.__name__ == 'GoodCrafterOld'
def test_pod_new_api_from_kwargs(self): a = BaseExecutor.load_config('mwu-encoder/mwu_encoder_driver.yml') assert a._drivers['ControlRequest'][0].__class__.__name__ == 'MyAwesomeDriver' with Pod(uses=os.path.join(cur_dir, 'mwu-encoder/mwu_encoder_driver.yml')): # will print a cust task_name from the driver when terminate pass
def test_load_yaml2(self): a = BaseExecutor.load_config( os.path.join(cur_dir, 'yaml/test-exec-with-driver.yml')) self.assertEqual(len(a._drivers), 2) # should be able to auto fill in ControlRequest self.assertTrue('ControlRequest' in a._drivers) a.save_config() p = a.config_abspath b = BaseExecutor.load_config(p) self.assertEqual(a._drivers, b._drivers) self.add_tmpfile(p) a.touch() a.save() c = BaseExecutor.load(a.save_abspath) self.assertEqual(a._drivers, c._drivers) self.add_tmpfile(a.save_abspath)
def test_load_external(self): from jina.executors import BaseExecutor self.assertRaises(ruamel.yaml.constructor.ConstructorError, BaseExecutor.load_config, 'yaml/dummy_ext_exec.yml') b = BaseExecutor.load_config('yaml/dummy_ext_exec_sucess.yml') assert b.__class__.__name__ == 'DummyExternalIndexer'
def test_with_common_kwargs_on_index(): be = BaseExecutor.load_config(y_fill_default_with_index_request_with_common) assert len(be._drivers) == 6 assert isinstance(be._drivers['IndexRequest'][1], EncodeDriver) assert isinstance(be._drivers['IndexRequest'][0], FilterQL) assert be._drivers['IndexRequest'][0]._traversal_paths == ['mmm'] assert be._drivers['IndexRequest'][1]._traversal_paths == ['mmm']
def test_load_cust_with_driver(): a = BaseExecutor.load_config(os.path.join(cur_dir, 'mwu-encoder/mwu_encoder_driver.yml')) assert a._drivers['ControlRequest'][0].__class__.__name__ == 'MyAwesomeDriver' p = set_pod_parser().parse_args(['--uses', os.path.join(cur_dir, 'mwu-encoder/mwu_encoder_driver.yml')]) with Pod(p): # will print a cust task_name from the driver when terminate pass