def __init__(self, underlying_runner=direct_runner.BundleBasedDirectRunner()): # TODO(qinyeli, BEAM-4755) remove explicitly overriding underlying runner # once interactive_runner works with FnAPI mode self._underlying_runner = underlying_runner self._cache_manager = cache.LocalFileCacheManager() self._in_session = False
def test_priority_filter(self): simulation_config = config_pb2.SimulationConfig() simulation_config.filter.priority_range.lower_bound = 3 simulation_config.filter.priority_range.upper_bound = 7 correct_output = [ { "time": 20, "info": { "priority": 4, "scheduling_class": 1, "machine_id": 200, "alloc_collection_id": 1, }, }, { "time": 30, "info": { "priority": 6, "scheduling_class": 2, "machine_id": 300, "alloc_collection_id": 0, }, }, ] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vmsamples = p | "Create priority test input" >> beam.Create( self.multiple_vmsamples) output = FilterVMSample(input_vmsamples, simulation_config) assert_that(output, equal_to(correct_output))
def test_all_filters(self): simulation_config = config_pb2.SimulationConfig() simulation_config.filter.remove_non_top_level_vms = True simulation_config.filter.priority_range.lower_bound = 1 simulation_config.filter.priority_range.upper_bound = 7 simulation_config.filter.scheduling_class_range.lower_bound = 0 simulation_config.filter.scheduling_class_range.upper_bound = 1 simulation_config.filter.start_time = 5 simulation_config.filter.end_time = 15 correct_output = [ { "time": 10, "info": { "priority": 2, "scheduling_class": 0, "machine_id": 100, "alloc_collection_id": 0, }, }, ] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vmsamples = p | "Create all filter input" >> beam.Create( self.multiple_vmsamples) output = FilterVMSample(input_vmsamples, simulation_config) assert_that(output, equal_to(correct_output))
def test_top_level_filter(self): simulation_config = config_pb2.SimulationConfig() simulation_config.filter.remove_non_top_level_vms = True correct_output = [ { "time": 10, "info": { "priority": 2, "scheduling_class": 0, "machine_id": 100, "alloc_collection_id": 0, }, }, { "time": 30, "info": { "priority": 6, "scheduling_class": 2, "machine_id": 300, "alloc_collection_id": 0, }, }, ] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vmsamples = p | "Create top level filter input" >> beam.Create( self.multiple_vmsamples) output = FilterVMSample(input_vmsamples, simulation_config) assert_that(output, equal_to(correct_output))
def test_any_field_present(self): any_field_present = [{"start_time": 10}] correct_output = [{ "time": 10, "info": { "unique_id": "None-None", "collection_id": None, "instance_index": None, "priority": -1, "scheduling_class": -1, "machine_id": None, "alloc_collection_id": -1, "alloc_instance_index": -100, "collection_type": -1, }, "metrics": { "avg_cpu_usage": -1, "avg_memory_usage": -1, "max_cpu_usage": -1, "max_memory_usage": -1, "random_sample_cpu_usage": -1, "assigned_memory": -1, "sample_rate": -1, "p0_cpu_usage": -1, "p10_cpu_usage": -1, "p20_cpu_usage": -1, "p30_cpu_usage": -1, "p40_cpu_usage": -1, "p50_cpu_usage": -1, "p60_cpu_usage": -1, "p70_cpu_usage": -1, "p80_cpu_usage": -1, "p90_cpu_usage": -1, "p91_cpu_usage": -1, "p92_cpu_usage": -1, "p93_cpu_usage": -1, "p94_cpu_usage": -1, "p95_cpu_usage": -1, "p96_cpu_usage": -1, "p97_cpu_usage": -1, "p98_cpu_usage": -1, "p99_cpu_usage": -1, "memory_limit": -1, "cpu_limit": -1, }, "abstract_metrics": { "usage": 0, "limit": 0 }, }] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vm_sample = p | "Create any field present test input" >> beam.Create( any_field_present) output = input_vm_sample | "Apply MapVMSampleToSchema Transform" >> beam.Map( MapVMSampleToSchema) assert_that(output, equal_to(correct_output))
def test_empty_input(self): empty_input = [] correct_output = [] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vm_sample = p | "Create empty test input" >> beam.Create( empty_input) output = input_vm_sample | "Apply MapVMSampleToSchema Transform" >> beam.Map( MapVMSampleToSchema) assert_that(output, equal_to(correct_output))
def test_dofn_lifecycle(self): from apache_beam.runners.direct import direct_runner from apache_beam.runners.portability import fn_api_runner runners = [ direct_runner.BundleBasedDirectRunner(), fn_api_runner.FnApiRunner() ] for r in runners: with TestPipeline(runner=r) as p: _ = (p | 'Start' >> beam.Create([1, 2, 3]) | 'Do' >> beam.ParDo(CallSequenceEnforcingDoFn()))
def test_single_usage_multiple_events(self): single_usage = [ { "start_time": 15, "collection_id": 2, "instance_index": 1, "avg_usage.cpus": 0.4, }, ] multiple_events = [ { "time": 5, "collection_id": 2, "instance_index": 1, "resource_request": {"cpus": 0.6, "memory": 2}, }, { "time": 17, "collection_id": 2, "instance_index": 1, "resource_request": {"cpus": 0, "memory": 1}, }, { "time": 23, "collection_id": 2, "instance_index": 1, "resource_request": {"cpus": 0.7, "memory": 0.5}, }, ] correct_output = [ { "start_time": 15, "collection_id": 2, "instance_index": 1, "avg_usage.cpus": 0.4, "time": 5, "resource_request": {"cpus": 0.6, "memory": 2}, }, ] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_usages = p | "Create standard usage test input" >> beam.Create( single_usage ) input_events = p | "Create zero request event test input" >> beam.Create( multiple_events ) output = JoinUsageAndEvent(input_usages, input_events) assert_that(output, equal_to(correct_output))
def test_basic(self): # TODO(qinyeli, BEAM-4755) remove explicitly overriding underlying runner # once interactive_runner works with FnAPI mode p = beam.Pipeline(runner=interactive_runner.InteractiveRunner( direct_runner.BundleBasedDirectRunner())) p.run().wait_until_finish() pc0 = (p | 'read' >> beam.Create([1, 2, 3]) | 'Print1.1' >> beam.Map(print_with_message('Run1.1'))) pc = pc0 | 'Print1.2' >> beam.Map(print_with_message('Run1.2')) p.run().wait_until_finish() _ = pc | 'Print2' >> beam.Map(print_with_message('Run2')) p.run().wait_until_finish() _ = pc0 | 'Print3' >> beam.Map(print_with_message('Run3')) p.run().wait_until_finish()
def test_multiple_usages_no_event(self): multiple_usages = [ { "start_time": 10, "collection_id": 2, "instance_index": 1, "avg_usage.cpus": 0.5, }, { "start_time": 15, "collection_id": 2, "instance_index": 1, "avg_usage.cpus": 0.4, }, { "start_time": 20, "collection_id": 2, "instance_index": 1, "avg_usage.cpus": 0.3, }, { "start_time": 25, "collection_id": 2, "instance_index": 1, "avg_usage.cpus": 0.2, }, ] no_event = [] correct_output = [] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_usages = p | "Create multiple usage test input" >> beam.Create( multiple_usages ) input_events = p | "Create no event test input" >> beam.Create(no_event) output = JoinUsageAndEvent(input_usages, input_events) assert_that(output, equal_to(correct_output))
def test_wordcount(self): class WordExtractingDoFn(beam.DoFn): def process(self, element): text_line = element.strip() words = text_line.split() return words # TODO(qinyeli, BEAM-4755) remove explicitly overriding underlying runner # once interactive_runner works with FnAPI mode p = beam.Pipeline( runner=interactive_runner.InteractiveRunner( direct_runner.BundleBasedDirectRunner())) # Count the occurrences of each word. counts = ( p | beam.Create(['to be or not to be that is the question']) | 'split' >> beam.ParDo(WordExtractingDoFn()) | 'pair_with_one' >> beam.Map(lambda x: (x, 1)) | 'group' >> beam.GroupByKey() | 'count' >> beam.Map(lambda wordones: (wordones[0], sum(wordones[1])))) result = p.run() result.wait_until_finish() actual = dict(result.get(counts)) self.assertDictEqual( actual, { 'to': 2, 'be': 2, 'or': 1, 'not': 1, 'that': 1, 'is': 1, 'the': 1, 'question': 1 })
def test_time_filter(self): simulation_config = config_pb2.SimulationConfig() simulation_config.filter.start_time = 15 simulation_config.filter.end_time = 25 correct_output = [ { "time": 20, "info": { "priority": 4, "scheduling_class": 1, "machine_id": 200, "alloc_collection_id": 1, }, }, ] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vmsamples = p | "Create time test input" >> beam.Create( self.multiple_vmsamples) output = FilterVMSample(input_vmsamples, simulation_config) assert_that(output, equal_to(correct_output))
def test_setting_memory_metric(self): simulation_config = config_pb2.SimulationConfig() simulation_config.metric.max_memory_usage = True correct_output = [{ "simulated_time": 1, "simulated_machine": 1, "sample": { "time": 300000000, "info": { "unique_id": "1-2", "collection_id": 1, "instance_index": 2, "priority": 6, "scheduling_class": 3, "machine_id": 3, "alloc_collection_id": 0, "alloc_instance_index": 5, "collection_type": 0, }, "metrics": { "avg_cpu_usage": 0.8, "avg_memory_usage": 8, "max_cpu_usage": 0.1, "max_memory_usage": 0.1, "random_sample_cpu_usage": 0.11, "random_sample_memory_usage": 12, "assigned_memory": 13, "sample_rate": 17, "p0_cpu_usage": 0, "p10_cpu_usage": 0.1, "p20_cpu_usage": 0.2, "p30_cpu_usage": 0.3, "p40_cpu_usage": 0.4, "p50_cpu_usage": 0.5, "p60_cpu_usage": 0.6, "p70_cpu_usage": 0.7, "p80_cpu_usage": 0.8, "p90_cpu_usage": 0.9, "p91_cpu_usage": 0.91, "p92_cpu_usage": 0.92, "p93_cpu_usage": 0.93, "p94_cpu_usage": 0.94, "p95_cpu_usage": 0.95, "p96_cpu_usage": 0.96, "p97_cpu_usage": 0.97, "p98_cpu_usage": 0.98, "p99_cpu_usage": 0.99, "memory_limit": 0.8, "cpu_limit": 0.6, }, "abstract_metrics": { "usage": 0.1, "limit": 0.8 }, }, }] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vmsample = p | "Create test input" >> beam.Create( self.vmsample) output = SetAbstractMetrics(input_vmsample, simulation_config) assert_that(output, equal_to(correct_output))
"time": 600_000_000, "info": { "unique_id": "1-2", }, "metrics": { "avg_cpu_usage": 0.1, }, "abstract_metrics": { "usage": 1, "limit": 1 }, }, }, ] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vmsamples = p | "Create time test input" >> beam.Create( ordered_vmsamples) output = ResetAndShiftSimulatedTime(input_vmsamples, self.simulation_config) assert_that(output, equal_to(correct_output)) def test_unordered_samples(self): unordered_vmsamples = [ { "simulated_time": 600_000_000, "simulated_machine": 1, "sample": { "time": 600_000_000,
def test_all_fields_present(self): all_fields_present = [{ "start_time": 10, "end_time": 15, "collection_id": 1, "instance_index": 2, "machine_id": 3, "alloc_collection_id": 4, "alloc_instance_index": 5, "collection_type": 5, "average_usage": { "cpus": 0.7, "memory": 8 }, "maximum_usage": { "cpus": 0.9, "memory": 10 }, "random_sample_usage": { "cpus": 0.11, "memory": 12 }, "assigned_memory": 13, "sample_rate": 17, "cpu_usage_distribution": [ 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, ], "tail_cpu_usage_distribution": [ 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, ], "time": 5, "scheduling_class": 3, "priority": 6, "resource_request": { "cpus": 0.6, "memory": 2 }, }] correct_output = [{ "time": 10, "info": { "unique_id": "1-2", "collection_id": 1, "instance_index": 2, "priority": 6, "scheduling_class": 3, "machine_id": 3, "alloc_collection_id": 4, "alloc_instance_index": 5, "collection_type": 5, }, "metrics": { "avg_cpu_usage": 0.7, "avg_memory_usage": 8, "max_cpu_usage": 0.9, "max_memory_usage": 10, "random_sample_cpu_usage": 0.11, "assigned_memory": 13, "sample_rate": 17, "p0_cpu_usage": 0, "p10_cpu_usage": 0.1, "p20_cpu_usage": 0.2, "p30_cpu_usage": 0.3, "p40_cpu_usage": 0.4, "p50_cpu_usage": 0.5, "p60_cpu_usage": 0.6, "p70_cpu_usage": 0.7, "p80_cpu_usage": 0.8, "p90_cpu_usage": 0.9, "p91_cpu_usage": 0.91, "p92_cpu_usage": 0.92, "p93_cpu_usage": 0.93, "p94_cpu_usage": 0.94, "p95_cpu_usage": 0.95, "p96_cpu_usage": 0.96, "p97_cpu_usage": 0.97, "p98_cpu_usage": 0.98, "p99_cpu_usage": 0.99, "memory_limit": 2, "cpu_limit": 0.6, }, "abstract_metrics": { "usage": 0, "limit": 0 }, }] with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p: input_vm_sample = p | "Create all fields present test input" >> beam.Create( all_fields_present) output = input_vm_sample | "Apply MapVMSampleToSchema Transform" >> beam.Map( MapVMSampleToSchema) assert_that(output, equal_to(correct_output))