def test_left_outer_join_without_where(self): source_path = os.path.join(self.tempdir + '/streaming.csv') field_names = ["a", "b", "c"] field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()] data = [(1, "Hi", "Hello"), (2, "Hi", "Hello"), (3, "Hello", "Hello")] csv_source = self.prepare_csv_source(source_path, data, field_types, field_names) source_path2 = os.path.join(self.tempdir + '/streaming2.csv') field_names2 = ["d", "e"] field_types2 = [DataTypes.INT(), DataTypes.STRING()] data2 = [(2, "Flink"), (3, "Python"), (3, "Flink")] csv_source2 = self.prepare_csv_source(source_path2, data2, field_types2, field_names2) t_env = self.t_env t_env.register_table_source("Source1", csv_source) t_env.register_table_source("Source2", csv_source2) source1 = t_env.scan("Source1") source2 = t_env.scan("Source2") field_names = ["a", "b"] field_types = [DataTypes.INT(), DataTypes.STRING()] t_env.register_table_sink( "Results", field_names, field_types, source_sink_utils.TestRetractSink()) result = source1.left_outer_join(source2, "a = d").select("a, b + e") result.insert_into("Results") t_env.execute() actual = source_sink_utils.results() expected = ['1,null', '2,HiFlink', '3,HelloPython', '3,HelloFlink'] self.assert_equals(actual, expected)
def load_model(self, table_env): """ Train the model to get the max_sum value which is used to predict data. """ table_sink = source_sink_utils.TestRetractSink(["max_sum"], [DataTypes.BIGINT()]) table_env.register_table_sink("Model_Results", table_sink) self._model_data_table.execute_insert("Model_Results").wait() actual = source_sink_utils.results() self.max_sum = actual.apply(0)
def test_print_schema(self): t_env = self.t_env t = t_env.from_elements([(1, 'Hi', 'Hello'), (2, 'Hello', 'Hello'), (2, 'Hello', 'Hello')], ['a', 'b', 'c']) field_names = ["a", "b"] field_types = [DataTypes.BIGINT(), DataTypes.STRING()] t_env.register_table_sink("Results", field_names, field_types, source_sink_utils.TestRetractSink()) result = t.group_by("c").select("a.sum, c as b") result.print_schema()
def test_group_by(self): t_env = self.t_env t = t_env.from_elements([(1, 'Hi', 'Hello'), (2, 'Hello', 'Hello'), (2, 'Hello', 'Hello')], ['a', 'b', 'c']) field_names = ["a", "b"] field_types = [DataTypes.BIGINT(), DataTypes.STRING()] t_env.register_table_sink("Results", field_names, field_types, source_sink_utils.TestRetractSink()) result = t.group_by("c").select("a.sum, c as b") result.insert_into("Results") t_env.execute() actual = source_sink_utils.results() expected = ['5,Hello'] self.assert_equals(actual, expected)
def test_distinct(self): t_env = self.t_env t = t_env.from_elements([(1, "Hi", "Hello"), (2, "Hello", "Hello"), (2, "Hello", "Hello")], ['a', 'b', 'c']) field_names = ["a", "b"] field_types = [DataTypes.BIGINT(), DataTypes.STRING()] t_env.register_table_sink("Results", field_names, field_types, source_sink_utils.TestRetractSink()) result = t.distinct().select("a, c as b") result.insert_into("Results") t_env.execute() actual = source_sink_utils.results() expected = ['1,Hello', '2,Hello'] self.assert_equals(actual, expected)
def test_print_schema(self): source_path = os.path.join(self.tempdir + '/streaming.csv') field_names = ["a", "b", "c"] field_types = [DataTypes.INT, DataTypes.STRING, DataTypes.STRING] data = [(1, "Hi", "Hello"), (2, "Hello", "Hello"), (2, "Hello", "Hello")] csv_source = self.prepare_csv_source(source_path, data, field_types, field_names) t_env = self.t_env t_env.register_table_source("Source", csv_source) source = t_env.scan("Source") field_names = ["a", "b"] field_types = [DataTypes.INT, DataTypes.STRING] t_env.register_table_sink( "Results", field_names, field_types, source_sink_utils.TestRetractSink()) result = source.group_by("c").select("a.sum, c as b") result.print_schema()
def test_left_outer_join_with_where(self): t_env = self.t_env t1 = t_env.from_elements([(1, "Hi", "Hello"), (2, "Hi", "Hello"), (3, "Hello", "Hello")], ['a', 'b', 'c']) t2 = t_env.from_elements([(2, "Flink"), (3, "Python"), (3, "Flink")], ['d', 'e']) field_names = ["a", "b"] field_types = [DataTypes.BIGINT(), DataTypes.STRING()] t_env.register_table_sink("Results", field_names, field_types, source_sink_utils.TestRetractSink()) result = t1.left_outer_join(t2).where("a = d").select("a, b + e") result.insert_into("Results") t_env.execute() actual = source_sink_utils.results() expected = ['2,HiFlink', '3,HelloPython', '3,HelloFlink'] self.assert_equals(actual, expected)
def test_distinct(self): source_path = os.path.join(self.tempdir + '/streaming.csv') field_names = ["a", "b", "c"] field_types = [DataTypes.INT(), DataTypes.STRING(), DataTypes.STRING()] data = [(1, "Hi", "Hello"), (2, "Hello", "Hello"), (2, "Hello", "Hello")] csv_source = self.prepare_csv_source(source_path, data, field_types, field_names) t_env = self.t_env t_env.register_table_source("Source", csv_source) source = t_env.scan("Source") field_names = ["a", "b"] field_types = [DataTypes.INT(), DataTypes.STRING()] t_env.register_table_sink("Results", field_names, field_types, source_sink_utils.TestRetractSink()) result = source.distinct().select("a, c as b") result.insert_into("Results") t_env.execute() actual = source_sink_utils.results() expected = ['1,Hello', '2,Hello'] self.assert_equals(actual, expected)