Exemplo n.º 1
0
    def test_left_outer_join_without_where(self):
        source_path = os.path.join(self.tempdir + '/streaming.csv')
        field_names = ["a", "b", "c"]
        field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()]
        data = [(1, "Hi", "Hello"), (2, "Hi", "Hello"), (3, "Hello", "Hello")]
        csv_source = self.prepare_csv_source(source_path, data, field_types, field_names)
        source_path2 = os.path.join(self.tempdir + '/streaming2.csv')
        field_names2 = ["d", "e"]
        field_types2 = [DataTypes.INT(), DataTypes.STRING()]
        data2 = [(2, "Flink"), (3, "Python"), (3, "Flink")]
        csv_source2 = self.prepare_csv_source(source_path2, data2, field_types2, field_names2)
        t_env = self.t_env
        t_env.register_table_source("Source1", csv_source)
        t_env.register_table_source("Source2", csv_source2)
        source1 = t_env.scan("Source1")
        source2 = t_env.scan("Source2")
        field_names = ["a", "b"]
        field_types = [DataTypes.INT(), DataTypes.STRING()]
        t_env.register_table_sink(
            "Results",
            field_names, field_types, source_sink_utils.TestRetractSink())

        result = source1.left_outer_join(source2, "a = d").select("a, b + e")
        result.insert_into("Results")
        t_env.execute()
        actual = source_sink_utils.results()

        expected = ['1,null', '2,HiFlink', '3,HelloPython', '3,HelloFlink']
        self.assert_equals(actual, expected)
 def load_model(self, table_env):
     """
     Train the model to get the max_sum value which is used to predict data.
     """
     table_sink = source_sink_utils.TestRetractSink(["max_sum"], [DataTypes.BIGINT()])
     table_env.register_table_sink("Model_Results", table_sink)
     self._model_data_table.execute_insert("Model_Results").wait()
     actual = source_sink_utils.results()
     self.max_sum = actual.apply(0)
Exemplo n.º 3
0
    def test_print_schema(self):
        t_env = self.t_env
        t = t_env.from_elements([(1, 'Hi', 'Hello'), (2, 'Hello', 'Hello'),
                                 (2, 'Hello', 'Hello')], ['a', 'b', 'c'])
        field_names = ["a", "b"]
        field_types = [DataTypes.BIGINT(), DataTypes.STRING()]
        t_env.register_table_sink("Results", field_names, field_types,
                                  source_sink_utils.TestRetractSink())

        result = t.group_by("c").select("a.sum, c as b")
        result.print_schema()
Exemplo n.º 4
0
    def test_group_by(self):
        t_env = self.t_env
        t = t_env.from_elements([(1, 'Hi', 'Hello'), (2, 'Hello', 'Hello'),
                                 (2, 'Hello', 'Hello')], ['a', 'b', 'c'])
        field_names = ["a", "b"]
        field_types = [DataTypes.BIGINT(), DataTypes.STRING()]
        t_env.register_table_sink("Results", field_names, field_types,
                                  source_sink_utils.TestRetractSink())

        result = t.group_by("c").select("a.sum, c as b")
        result.insert_into("Results")
        t_env.execute()
        actual = source_sink_utils.results()

        expected = ['5,Hello']
        self.assert_equals(actual, expected)
Exemplo n.º 5
0
    def test_distinct(self):
        t_env = self.t_env
        t = t_env.from_elements([(1, "Hi", "Hello"), (2, "Hello", "Hello"),
                                 (2, "Hello", "Hello")], ['a', 'b', 'c'])
        field_names = ["a", "b"]
        field_types = [DataTypes.BIGINT(), DataTypes.STRING()]
        t_env.register_table_sink("Results", field_names, field_types,
                                  source_sink_utils.TestRetractSink())

        result = t.distinct().select("a, c as b")
        result.insert_into("Results")
        t_env.execute()
        actual = source_sink_utils.results()

        expected = ['1,Hello', '2,Hello']
        self.assert_equals(actual, expected)
Exemplo n.º 6
0
    def test_print_schema(self):
        source_path = os.path.join(self.tempdir + '/streaming.csv')
        field_names = ["a", "b", "c"]
        field_types = [DataTypes.INT, DataTypes.STRING, DataTypes.STRING]
        data = [(1, "Hi", "Hello"), (2, "Hello", "Hello"), (2, "Hello", "Hello")]
        csv_source = self.prepare_csv_source(source_path, data, field_types, field_names)
        t_env = self.t_env
        t_env.register_table_source("Source", csv_source)
        source = t_env.scan("Source")
        field_names = ["a", "b"]
        field_types = [DataTypes.INT, DataTypes.STRING]
        t_env.register_table_sink(
            "Results",
            field_names, field_types, source_sink_utils.TestRetractSink())

        result = source.group_by("c").select("a.sum, c as b")
        result.print_schema()
Exemplo n.º 7
0
    def test_left_outer_join_with_where(self):
        t_env = self.t_env
        t1 = t_env.from_elements([(1, "Hi", "Hello"), (2, "Hi", "Hello"),
                                  (3, "Hello", "Hello")], ['a', 'b', 'c'])
        t2 = t_env.from_elements([(2, "Flink"), (3, "Python"), (3, "Flink")],
                                 ['d', 'e'])
        field_names = ["a", "b"]
        field_types = [DataTypes.BIGINT(), DataTypes.STRING()]
        t_env.register_table_sink("Results", field_names, field_types,
                                  source_sink_utils.TestRetractSink())

        result = t1.left_outer_join(t2).where("a = d").select("a, b + e")
        result.insert_into("Results")
        t_env.execute()
        actual = source_sink_utils.results()

        expected = ['2,HiFlink', '3,HelloPython', '3,HelloFlink']
        self.assert_equals(actual, expected)
Exemplo n.º 8
0
    def test_distinct(self):
        source_path = os.path.join(self.tempdir + '/streaming.csv')
        field_names = ["a", "b", "c"]
        field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()]
        data = [(1, "Hi", "Hello"), (2, "Hello", "Hello"),
                (2, "Hello", "Hello")]
        csv_source = self.prepare_csv_source(source_path, data, field_types,
                                             field_names)
        t_env = self.t_env
        t_env.register_table_source("Source", csv_source)
        source = t_env.scan("Source")
        field_names = ["a", "b"]
        field_types = [DataTypes.INT(), DataTypes.STRING()]
        t_env.register_table_sink("Results", field_names, field_types,
                                  source_sink_utils.TestRetractSink())

        result = source.distinct().select("a, c as b")
        result.insert_into("Results")
        t_env.execute()
        actual = source_sink_utils.results()

        expected = ['1,Hello', '2,Hello']
        self.assert_equals(actual, expected)