def test_matches_if_catch_all_pattern_is_defined(self): self.assertTrue( track.Index("test", auto_managed=TrackTests, types=[]).matches(pattern="*")) self.assertTrue( track.Index("test", auto_managed=TrackTests, types=[]).matches(pattern="_all"))
def test_index_names(self): idx1 = track.Index(name="foo") idx2 = track.Index(name="bar") track_ = track.Track(name="unittest", description="unittest track", indices=[idx1, idx2]) assert track_.index_names() == ["foo", "bar"]
def test_matches_exactly(self): self.assertTrue( track.Index("test", auto_managed=TrackTests, types=[]).matches("test")) self.assertFalse( track.Index("test", auto_managed=TrackTests, types=[]).matches(" test"))
def test_passes_all_indices_by_default(self): index1 = track.Index(name="index1", auto_managed=True, types=[]) index2 = track.Index(name="index2", auto_managed=True, types=[]) source = params.BulkIndexParamSource(indices=[index1, index2], params={ "conflicts": "random", "bulk-size": 5000, "batch-size": 20000, "pipeline": "test-pipeline" }) partition = source.partition(0, 1) self.assertEqual(partition.indices, [index1, index2])
def test_generate_two_bulks(self): type1 = track.Type("type1", mapping_file="", number_of_documents=10) index1 = track.Index(name="index1", auto_managed=True, types=[type1]) bulks = params.bulk_data_based(num_clients=1, client_index=0, indices=[index1], action_metadata=params.ActionMetaData.NoMetaData, batch_size=5, bulk_size=5, id_conflicts=params.IndexIdConflict.NoConflicts, pipeline=None, original_params={ "my-custom-parameter": "foo", "my-custom-parameter-2": True }, create_reader=BulkDataGeneratorTests. create_test_reader([["1", "2", "3", "4", "5"], ["6", "7", "8"]])) all_bulks = list(bulks) self.assertEqual(2, len(all_bulks)) self.assertEqual({ "action_metadata_present": False, "body": ["1", "2", "3", "4", "5"], "bulk-id": "0-1", "bulk-size": 5, "index": index1, "type": type1, "my-custom-parameter": "foo", "my-custom-parameter-2": True }, all_bulks[0]) self.assertEqual({ "action_metadata_present": False, "body": ["6", "7", "8"], "bulk-id": "0-2", "bulk-size": 3, "index": index1, "type": type1, "my-custom-parameter": "foo", "my-custom-parameter-2": True }, all_bulks[1])
def _create_index(self, index_spec, mapping_dir, data_dir): index_name = self._r(index_spec, "name") auto_managed = self._r(index_spec, "auto-managed", mandatory=False, default_value=True) types = [ self._create_type(type_spec, mapping_dir, data_dir) for type_spec in self._r( index_spec, "types", mandatory=auto_managed, default_value=[]) ] valid_document_data = False for type in types: if type.has_valid_document_data(): valid_document_data = True break if not valid_document_data: console.warn( "None of the types for index [%s] defines documents. Please check that you either don't want to index data or " "parameter sources are defined for indexing." % index_name, logger=logger) return track.Index(name=index_name, auto_managed=auto_managed, types=types)
def _create_index(self, index_spec, mapping_dir): index_name = self._r(index_spec, "name") if self.override_auto_manage_indices is not None: auto_managed = self.override_auto_manage_indices logger.info( "User explicitly forced auto-managed indices to [%s] on the command line." % str(auto_managed)) else: auto_managed = self._r(index_spec, "auto-managed", mandatory=False, default_value=True) logger.info( "Using index auto-management setting from track which is set to [%s]." % str(auto_managed)) types = [ self._create_type(type_spec, mapping_dir) for type_spec in self._r( index_spec, "types", mandatory=auto_managed, default_value=[]) ] valid_document_data = False for type in types: if type.has_valid_document_data(): valid_document_data = True break if not valid_document_data: console.warn( "None of the types for index [%s] defines documents. Please check that you either don't want to index data or " "parameter sources are defined for indexing." % index_name, logger=logger) return track.Index(name=index_name, auto_managed=auto_managed, types=types)
def test_internal_params_take_precedence(self): type1 = track.Type("type1", mapping={}, number_of_documents=3) index1 = track.Index(name="index1", auto_managed=True, types=[type1]) bulks = params.bulk_data_based( num_clients=1, client_index=0, indices=[index1], batch_size=3, bulk_size=3, id_conflicts=params.IndexIdConflict.NoConflicts, pipeline=None, original_params={ "body": "foo", "custom-param": "bar" }, create_reader=BulkDataGeneratorTests.create_test_reader( [["1", "2", "3"]])) all_bulks = list(bulks) self.assertEqual(1, len(all_bulks)) # body must not contain 'foo'! self.assertEqual( { "action_metadata_present": True, "body": ["1", "2", "3"], "bulk-id": "0-1", "bulk-size": 3, "index": index1, "type": type1, "custom-param": "bar" }, all_bulks[0])
def test_passes_request_parameters(self): type1 = track.Type("type1", mapping={}, number_of_documents=3) index1 = track.Index(name="index1", auto_managed=True, types=[type1]) source = params.SearchParamSource(indices=[index1], params={ "request-params": { "_source_include": "some_field" }, "body": { "query": { "match_all": {} } } }) p = source.params() self.assertEqual(5, len(p)) self.assertEqual("index1", p["index"]) self.assertEqual("type1", p["type"]) self.assertEqual({"_source_include": "some_field"}, p["request_params"]) self.assertFalse(p["use_request_cache"]) self.assertEqual({"query": {"match_all": {}}}, p["body"])
def test_filters_indices(self): index1 = track.Index(name="index1", auto_managed=True, types=[]) index2 = track.Index(name="index2", auto_managed=True, types=[]) source = params.BulkIndexParamSource( indices=[index1, index2], params={ "index": "index2", "action-and-meta-data": "generate", "conflicts": "random", "bulk-size": 5000, "batch-size": 20000, "pipeline": "test-pipeline" }) partition = source.partition(0, 1) self.assertEqual(partition.indices, [index2])
def test_create_with_metadata_in_source_file_but_conflicts(self): type1 = track.Type("type1", mapping={}, number_of_documents=10, includes_action_and_meta_data=True) index1 = track.Index(name="index1", auto_managed=True, types=[type1]) with self.assertRaises(exceptions.InvalidSyntax) as ctx: params.BulkIndexParamSource(indices=[index1], params={"conflicts": "random"}) self.assertEqual( "Cannot generate id conflicts [random] as type [index1] in index [type1] already contains " "an action and meta-data line.", ctx.exception.args[0])
def test_raises_exception_if_no_index_matches(self): index1 = track.Index(name="index1", auto_managed=True, types=[]) source = params.BulkIndexParamSource(indices=[index1], params={ "index": "does_not_exist", "conflicts": "random", "bulk-size": 5000, "batch-size": 20000, "pipeline": "test-pipeline" }) with self.assertRaises(exceptions.RallyAssertionError) as ctx: source.partition(0, 1) self.assertEqual( "The provided index [does_not_exist] does not match any of the indices [index1].", ctx.exception.args[0])
def test_sets_absolute_path(self): from esrally import config from esrally.track import track cfg = config.Config() cfg.add(config.Scope.application, "benchmarks", "local.dataset.cache", "/data") default_challenge = track.Challenge( "default", description="default challenge", default=True, schedule=[ track.Task(operation=track.Operation( "index", operation_type=track.OperationType.Index), clients=4) ]) another_challenge = track.Challenge( "other", description="non-default challenge", default=False) t = track.Track( name="unittest", short_description="unittest track", challenges=[another_challenge, default_challenge], indices=[ track.Index(name="test", auto_managed=True, types=[ track.Type( "docs", mapping={}, document_file="docs/documents.json", document_archive="docs/documents.json.bz2") ]) ]) loader.set_absolute_data_path(cfg, t) self.assertEqual("/data/docs/documents.json", t.indices[0].types[0].document_file) self.assertEqual("/data/docs/documents.json.bz2", t.indices[0].types[0].document_archive)
def test_matches_if_catch_all_pattern_is_defined(self): self.assertTrue(track.Index("test").matches(pattern="*")) self.assertTrue(track.Index("test").matches(pattern="_all"))
def test_str(self): self.assertEqual("test", str(track.Index("test")))
def test_matches_exactly(self): self.assertTrue(track.Index("test").matches("test")) self.assertFalse(track.Index("test").matches(" test"))
def test_matches_if_no_pattern_is_defined(self): self.assertTrue(track.Index("test").matches(pattern=None))
def test_matches_if_no_pattern_is_defined(self): self.assertTrue(track.Index("test", auto_managed=TrackTests, types=[]).matches(pattern=None))
def test_str(self): self.assertEqual("test", str(track.Index("test", auto_managed=TrackTests, types=[])))
def test_str(self): assert str(track.Index("test")) == "test"
tinyTrackSpec = track.Track( name="tiny", short_description= "First 2k documents of the geonames track for local tests", description= "This test indexes 8.6M documents (POIs from Geonames, total 2.8 GB json) using 8 client threads and 5000 docs per bulk " "request against Elasticsearch", source_root_url="http://benchmarks.elastic.co/corpora/tiny", indices=[ track.Index(name=TINY_INDEX_NAME, types=[ track.Type(name=TINY_TYPE_NAME, mapping_file_name="mappings.json", document_file_name="documents.json.bz2", number_of_documents=2000, compressed_size_in_bytes=28333, uncompressed_size_in_bytes=564930) ]) ], # Queries to use in the search benchmark queries=[ DefaultQuery(), TermQuery(), PhraseQuery(), CountryAggQuery(use_request_cache=False), CountryAggQuery(suffix="_cached", use_request_cache=True), ScrollQuery() ], track_setups=track.track_setups)
def test_matches_if_catch_all_pattern_is_defined(self): assert track.Index("test").matches(pattern="*") assert track.Index("test").matches(pattern="_all")
}''') percolatorTrackSpec = track.Track( name="percolator", description= "This test indexes 2M AOL queries and use the percolator query to match", source_root_url="http://benchmarks.elastic.co/corpora/percolator", indices=[ track.Index( name="queries", types=[ # The type for the percolator queries: track.Type(name=".percolator", mapping_file_name="queries-mapping.json", document_file_name="queries.json.bz2", number_of_documents=2000000, compressed_size_in_bytes=123502, uncompressed_size_in_bytes=148039748), # The used for documents being percolated: track.Type(name="content", mapping_file_name="document-mapping.json") ]) ], estimated_benchmark_time_in_minutes=5, # Queries to use in the search benchmark queries=[ PercolatorQuery(content="president bush"), PercolatorQuery(content="saddam hussein"), PercolatorQuery(content="hurricane katrina"), PercolatorQuery(content="google"), PercolatorQueryNoScoring(content="google"),
def test_matches_exactly(self): assert track.Index("test").matches("test") assert not track.Index("test").matches(" test")