示例#1
0
 def __init__(self,
              underlying_runner=direct_runner.BundleBasedDirectRunner()):
     # TODO(qinyeli, BEAM-4755) remove explicitly overriding underlying runner
     # once interactive_runner works with FnAPI mode
     self._underlying_runner = underlying_runner
     self._cache_manager = cache.LocalFileCacheManager()
     self._in_session = False
    def test_priority_filter(self):
        simulation_config = config_pb2.SimulationConfig()
        simulation_config.filter.priority_range.lower_bound = 3
        simulation_config.filter.priority_range.upper_bound = 7

        correct_output = [
            {
                "time": 20,
                "info": {
                    "priority": 4,
                    "scheduling_class": 1,
                    "machine_id": 200,
                    "alloc_collection_id": 1,
                },
            },
            {
                "time": 30,
                "info": {
                    "priority": 6,
                    "scheduling_class": 2,
                    "machine_id": 300,
                    "alloc_collection_id": 0,
                },
            },
        ]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vmsamples = p | "Create priority test input" >> beam.Create(
                self.multiple_vmsamples)
            output = FilterVMSample(input_vmsamples, simulation_config)

            assert_that(output, equal_to(correct_output))
    def test_all_filters(self):
        simulation_config = config_pb2.SimulationConfig()
        simulation_config.filter.remove_non_top_level_vms = True
        simulation_config.filter.priority_range.lower_bound = 1
        simulation_config.filter.priority_range.upper_bound = 7
        simulation_config.filter.scheduling_class_range.lower_bound = 0
        simulation_config.filter.scheduling_class_range.upper_bound = 1
        simulation_config.filter.start_time = 5
        simulation_config.filter.end_time = 15

        correct_output = [
            {
                "time": 10,
                "info": {
                    "priority": 2,
                    "scheduling_class": 0,
                    "machine_id": 100,
                    "alloc_collection_id": 0,
                },
            },
        ]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vmsamples = p | "Create all filter input" >> beam.Create(
                self.multiple_vmsamples)
            output = FilterVMSample(input_vmsamples, simulation_config)

            assert_that(output, equal_to(correct_output))
    def test_top_level_filter(self):
        simulation_config = config_pb2.SimulationConfig()
        simulation_config.filter.remove_non_top_level_vms = True

        correct_output = [
            {
                "time": 10,
                "info": {
                    "priority": 2,
                    "scheduling_class": 0,
                    "machine_id": 100,
                    "alloc_collection_id": 0,
                },
            },
            {
                "time": 30,
                "info": {
                    "priority": 6,
                    "scheduling_class": 2,
                    "machine_id": 300,
                    "alloc_collection_id": 0,
                },
            },
        ]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vmsamples = p | "Create top level filter input" >> beam.Create(
                self.multiple_vmsamples)
            output = FilterVMSample(input_vmsamples, simulation_config)

            assert_that(output, equal_to(correct_output))
    def test_any_field_present(self):

        any_field_present = [{"start_time": 10}]
        correct_output = [{
            "time": 10,
            "info": {
                "unique_id": "None-None",
                "collection_id": None,
                "instance_index": None,
                "priority": -1,
                "scheduling_class": -1,
                "machine_id": None,
                "alloc_collection_id": -1,
                "alloc_instance_index": -100,
                "collection_type": -1,
            },
            "metrics": {
                "avg_cpu_usage": -1,
                "avg_memory_usage": -1,
                "max_cpu_usage": -1,
                "max_memory_usage": -1,
                "random_sample_cpu_usage": -1,
                "assigned_memory": -1,
                "sample_rate": -1,
                "p0_cpu_usage": -1,
                "p10_cpu_usage": -1,
                "p20_cpu_usage": -1,
                "p30_cpu_usage": -1,
                "p40_cpu_usage": -1,
                "p50_cpu_usage": -1,
                "p60_cpu_usage": -1,
                "p70_cpu_usage": -1,
                "p80_cpu_usage": -1,
                "p90_cpu_usage": -1,
                "p91_cpu_usage": -1,
                "p92_cpu_usage": -1,
                "p93_cpu_usage": -1,
                "p94_cpu_usage": -1,
                "p95_cpu_usage": -1,
                "p96_cpu_usage": -1,
                "p97_cpu_usage": -1,
                "p98_cpu_usage": -1,
                "p99_cpu_usage": -1,
                "memory_limit": -1,
                "cpu_limit": -1,
            },
            "abstract_metrics": {
                "usage": 0,
                "limit": 0
            },
        }]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vm_sample = p | "Create any field present test input" >> beam.Create(
                any_field_present)
            output = input_vm_sample | "Apply MapVMSampleToSchema Transform" >> beam.Map(
                MapVMSampleToSchema)

            assert_that(output, equal_to(correct_output))
    def test_empty_input(self):

        empty_input = []
        correct_output = []

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vm_sample = p | "Create empty test input" >> beam.Create(
                empty_input)
            output = input_vm_sample | "Apply MapVMSampleToSchema Transform" >> beam.Map(
                MapVMSampleToSchema)

            assert_that(output, equal_to(correct_output))
示例#7
0
 def test_dofn_lifecycle(self):
     from apache_beam.runners.direct import direct_runner
     from apache_beam.runners.portability import fn_api_runner
     runners = [
         direct_runner.BundleBasedDirectRunner(),
         fn_api_runner.FnApiRunner()
     ]
     for r in runners:
         with TestPipeline(runner=r) as p:
             _ = (p
                  | 'Start' >> beam.Create([1, 2, 3])
                  | 'Do' >> beam.ParDo(CallSequenceEnforcingDoFn()))
    def test_single_usage_multiple_events(self):
        single_usage = [
            {
                "start_time": 15,
                "collection_id": 2,
                "instance_index": 1,
                "avg_usage.cpus": 0.4,
            },
        ]

        multiple_events = [
            {
                "time": 5,
                "collection_id": 2,
                "instance_index": 1,
                "resource_request": {"cpus": 0.6, "memory": 2},
            },
            {
                "time": 17,
                "collection_id": 2,
                "instance_index": 1,
                "resource_request": {"cpus": 0, "memory": 1},
            },
            {
                "time": 23,
                "collection_id": 2,
                "instance_index": 1,
                "resource_request": {"cpus": 0.7, "memory": 0.5},
            },
        ]

        correct_output = [
            {
                "start_time": 15,
                "collection_id": 2,
                "instance_index": 1,
                "avg_usage.cpus": 0.4,
                "time": 5,
                "resource_request": {"cpus": 0.6, "memory": 2},
            },
        ]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_usages = p | "Create standard usage test input" >> beam.Create(
                single_usage
            )
            input_events = p | "Create zero request event test input" >> beam.Create(
                multiple_events
            )
            output = JoinUsageAndEvent(input_usages, input_events)

            assert_that(output, equal_to(correct_output))
示例#9
0
 def test_basic(self):
     # TODO(qinyeli, BEAM-4755) remove explicitly overriding underlying runner
     # once interactive_runner works with FnAPI mode
     p = beam.Pipeline(runner=interactive_runner.InteractiveRunner(
         direct_runner.BundleBasedDirectRunner()))
     p.run().wait_until_finish()
     pc0 = (p | 'read' >> beam.Create([1, 2, 3])
            | 'Print1.1' >> beam.Map(print_with_message('Run1.1')))
     pc = pc0 | 'Print1.2' >> beam.Map(print_with_message('Run1.2'))
     p.run().wait_until_finish()
     _ = pc | 'Print2' >> beam.Map(print_with_message('Run2'))
     p.run().wait_until_finish()
     _ = pc0 | 'Print3' >> beam.Map(print_with_message('Run3'))
     p.run().wait_until_finish()
    def test_multiple_usages_no_event(self):
        multiple_usages = [
            {
                "start_time": 10,
                "collection_id": 2,
                "instance_index": 1,
                "avg_usage.cpus": 0.5,
            },
            {
                "start_time": 15,
                "collection_id": 2,
                "instance_index": 1,
                "avg_usage.cpus": 0.4,
            },
            {
                "start_time": 20,
                "collection_id": 2,
                "instance_index": 1,
                "avg_usage.cpus": 0.3,
            },
            {
                "start_time": 25,
                "collection_id": 2,
                "instance_index": 1,
                "avg_usage.cpus": 0.2,
            },
        ]

        no_event = []

        correct_output = []

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_usages = p | "Create multiple usage test input" >> beam.Create(
                multiple_usages
            )
            input_events = p | "Create no event test input" >> beam.Create(no_event)
            output = JoinUsageAndEvent(input_usages, input_events)

            assert_that(output, equal_to(correct_output))
示例#11
0
  def test_wordcount(self):

    class WordExtractingDoFn(beam.DoFn):

      def process(self, element):
        text_line = element.strip()
        words = text_line.split()
        return words

    # TODO(qinyeli, BEAM-4755) remove explicitly overriding underlying runner
    # once interactive_runner works with FnAPI mode
    p = beam.Pipeline(
        runner=interactive_runner.InteractiveRunner(
            direct_runner.BundleBasedDirectRunner()))

    # Count the occurrences of each word.
    counts = (
        p
        | beam.Create(['to be or not to be that is the question'])
        | 'split' >> beam.ParDo(WordExtractingDoFn())
        | 'pair_with_one' >> beam.Map(lambda x: (x, 1))
        | 'group' >> beam.GroupByKey()
        | 'count' >> beam.Map(lambda wordones: (wordones[0], sum(wordones[1]))))

    result = p.run()
    result.wait_until_finish()

    actual = dict(result.get(counts))
    self.assertDictEqual(
        actual, {
            'to': 2,
            'be': 2,
            'or': 1,
            'not': 1,
            'that': 1,
            'is': 1,
            'the': 1,
            'question': 1
        })
    def test_time_filter(self):
        simulation_config = config_pb2.SimulationConfig()
        simulation_config.filter.start_time = 15
        simulation_config.filter.end_time = 25

        correct_output = [
            {
                "time": 20,
                "info": {
                    "priority": 4,
                    "scheduling_class": 1,
                    "machine_id": 200,
                    "alloc_collection_id": 1,
                },
            },
        ]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vmsamples = p | "Create time test input" >> beam.Create(
                self.multiple_vmsamples)
            output = FilterVMSample(input_vmsamples, simulation_config)

            assert_that(output, equal_to(correct_output))
    def test_setting_memory_metric(self):
        simulation_config = config_pb2.SimulationConfig()
        simulation_config.metric.max_memory_usage = True
        correct_output = [{
            "simulated_time": 1,
            "simulated_machine": 1,
            "sample": {
                "time": 300000000,
                "info": {
                    "unique_id": "1-2",
                    "collection_id": 1,
                    "instance_index": 2,
                    "priority": 6,
                    "scheduling_class": 3,
                    "machine_id": 3,
                    "alloc_collection_id": 0,
                    "alloc_instance_index": 5,
                    "collection_type": 0,
                },
                "metrics": {
                    "avg_cpu_usage": 0.8,
                    "avg_memory_usage": 8,
                    "max_cpu_usage": 0.1,
                    "max_memory_usage": 0.1,
                    "random_sample_cpu_usage": 0.11,
                    "random_sample_memory_usage": 12,
                    "assigned_memory": 13,
                    "sample_rate": 17,
                    "p0_cpu_usage": 0,
                    "p10_cpu_usage": 0.1,
                    "p20_cpu_usage": 0.2,
                    "p30_cpu_usage": 0.3,
                    "p40_cpu_usage": 0.4,
                    "p50_cpu_usage": 0.5,
                    "p60_cpu_usage": 0.6,
                    "p70_cpu_usage": 0.7,
                    "p80_cpu_usage": 0.8,
                    "p90_cpu_usage": 0.9,
                    "p91_cpu_usage": 0.91,
                    "p92_cpu_usage": 0.92,
                    "p93_cpu_usage": 0.93,
                    "p94_cpu_usage": 0.94,
                    "p95_cpu_usage": 0.95,
                    "p96_cpu_usage": 0.96,
                    "p97_cpu_usage": 0.97,
                    "p98_cpu_usage": 0.98,
                    "p99_cpu_usage": 0.99,
                    "memory_limit": 0.8,
                    "cpu_limit": 0.6,
                },
                "abstract_metrics": {
                    "usage": 0.1,
                    "limit": 0.8
                },
            },
        }]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vmsample = p | "Create test input" >> beam.Create(
                self.vmsample)
            output = SetAbstractMetrics(input_vmsample, simulation_config)
            assert_that(output, equal_to(correct_output))
示例#14
0
                    "time": 600_000_000,
                    "info": {
                        "unique_id": "1-2",
                    },
                    "metrics": {
                        "avg_cpu_usage": 0.1,
                    },
                    "abstract_metrics": {
                        "usage": 1,
                        "limit": 1
                    },
                },
            },
        ]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vmsamples = p | "Create time test input" >> beam.Create(
                ordered_vmsamples)
            output = ResetAndShiftSimulatedTime(input_vmsamples,
                                                self.simulation_config)

            assert_that(output, equal_to(correct_output))

    def test_unordered_samples(self):

        unordered_vmsamples = [
            {
                "simulated_time": 600_000_000,
                "simulated_machine": 1,
                "sample": {
                    "time": 600_000_000,
    def test_all_fields_present(self):
        all_fields_present = [{
            "start_time":
            10,
            "end_time":
            15,
            "collection_id":
            1,
            "instance_index":
            2,
            "machine_id":
            3,
            "alloc_collection_id":
            4,
            "alloc_instance_index":
            5,
            "collection_type":
            5,
            "average_usage": {
                "cpus": 0.7,
                "memory": 8
            },
            "maximum_usage": {
                "cpus": 0.9,
                "memory": 10
            },
            "random_sample_usage": {
                "cpus": 0.11,
                "memory": 12
            },
            "assigned_memory":
            13,
            "sample_rate":
            17,
            "cpu_usage_distribution": [
                0,
                0.1,
                0.2,
                0.3,
                0.4,
                0.5,
                0.6,
                0.7,
                0.8,
                0.9,
                1,
            ],
            "tail_cpu_usage_distribution": [
                0.91,
                0.92,
                0.93,
                0.94,
                0.95,
                0.96,
                0.97,
                0.98,
                0.99,
            ],
            "time":
            5,
            "scheduling_class":
            3,
            "priority":
            6,
            "resource_request": {
                "cpus": 0.6,
                "memory": 2
            },
        }]
        correct_output = [{
            "time": 10,
            "info": {
                "unique_id": "1-2",
                "collection_id": 1,
                "instance_index": 2,
                "priority": 6,
                "scheduling_class": 3,
                "machine_id": 3,
                "alloc_collection_id": 4,
                "alloc_instance_index": 5,
                "collection_type": 5,
            },
            "metrics": {
                "avg_cpu_usage": 0.7,
                "avg_memory_usage": 8,
                "max_cpu_usage": 0.9,
                "max_memory_usage": 10,
                "random_sample_cpu_usage": 0.11,
                "assigned_memory": 13,
                "sample_rate": 17,
                "p0_cpu_usage": 0,
                "p10_cpu_usage": 0.1,
                "p20_cpu_usage": 0.2,
                "p30_cpu_usage": 0.3,
                "p40_cpu_usage": 0.4,
                "p50_cpu_usage": 0.5,
                "p60_cpu_usage": 0.6,
                "p70_cpu_usage": 0.7,
                "p80_cpu_usage": 0.8,
                "p90_cpu_usage": 0.9,
                "p91_cpu_usage": 0.91,
                "p92_cpu_usage": 0.92,
                "p93_cpu_usage": 0.93,
                "p94_cpu_usage": 0.94,
                "p95_cpu_usage": 0.95,
                "p96_cpu_usage": 0.96,
                "p97_cpu_usage": 0.97,
                "p98_cpu_usage": 0.98,
                "p99_cpu_usage": 0.99,
                "memory_limit": 2,
                "cpu_limit": 0.6,
            },
            "abstract_metrics": {
                "usage": 0,
                "limit": 0
            },
        }]

        with TestPipeline(runner=direct_runner.BundleBasedDirectRunner()) as p:
            input_vm_sample = p | "Create all fields present test input" >> beam.Create(
                all_fields_present)
            output = input_vm_sample | "Apply MapVMSampleToSchema Transform" >> beam.Map(
                MapVMSampleToSchema)

            assert_that(output, equal_to(correct_output))