Пример #1
0
def demo01():
    # environment configuration
    t_env = BatchTableEnvironment.create(environment_settings=EnvironmentSettings.new_instance().in_batch_mode().use_blink_planner().build())

    # register Orders table and Result table sink in table environment
    source_data_path = "/path/to/source/directory/"
    result_data_path = "/path/to/result/directory/"
    source_ddl = f"""
            create table Orders(
                a VARCHAR,
                b BIGINT,
                c BIGINT,
                rowtime TIMESTAMP(3),
                WATERMARK FOR rowtime AS rowtime - INTERVAL '1' SECOND
            ) with (
                'connector' = 'filesystem',
                'format' = 'csv',
                'path' = '{source_data_path}'
            )
            """
    t_env.execute_sql(source_ddl)

    sink_ddl = f"""
        create table `Result`(
            a VARCHAR,
            cnt BIGINT
        ) with (
            'connector' = 'filesystem',
            'format' = 'csv',
            'path' = '{result_data_path}'
        )
        """
    t_env.execute_sql(sink_ddl)

    # specify table program
    orders = t_env.from_path("Orders")  # schema (a, b, c, rowtime)

    orders.group_by("a").select(orders.a, orders.b.count.alias('cnt')).execute_insert("result").wait()

    orders.where(orders.a == 'red')
    orders.filter(orders.b % 2 == 0)
    orders.add_columns(concat(orders.c, 'sunny'))
    orders.add_or_replace_columns(concat(orders.c, 'sunny').alias('desc'))
    orders.drop_columns(orders.b, orders.c)
    orders.rename_columns(orders.b.alias('b2'), orders.c.alias('c2'))
    orders.group_by(orders.a).select(orders.a, orders.b.sum.alias('d'))

    # tab.group_by(tab.key).select(tab.key, tab.value.avg.alias('average'))
    # tab.group_by("key").select("key, value.avg as average")
    result = orders.filter(orders.a.is_not_null & orders.b.is_not_null & orders.c.is_not_null) \
        .select(orders.a.lower_case.alias('a'), orders.b, orders.rowtime) \
        .window(Tumble.over(lit(1).hour).on(orders.rowtime).alias("hourly_window")) \
        .group_by(col('hourly_window'), col('a')) \
        .select(col('a'), col('hourly_window').end.alias('hour'), col('b').avg.alias('avg_billing_amount'))
    """
Пример #2
0
    def test_expressions(self):
        expr1 = col('a')
        expr2 = col('b')
        expr3 = col('c')

        self.assertEqual('10', str(lit(10, DataTypes.INT(False))))
        self.assertEqual('rangeTo(1, 2)', str(range_(1, 2)))
        self.assertEqual('and(a, b, c)', str(and_(expr1, expr2, expr3)))
        self.assertEqual('or(a, b, c)', str(or_(expr1, expr2, expr3)))

        from pyflink.table.expressions import UNBOUNDED_ROW, UNBOUNDED_RANGE, CURRENT_ROW, \
            CURRENT_RANGE
        self.assertEqual('unboundedRow()', str(UNBOUNDED_ROW))
        self.assertEqual('unboundedRange()', str(UNBOUNDED_RANGE))
        self.assertEqual('currentRow()', str(CURRENT_ROW))
        self.assertEqual('currentRange()', str(CURRENT_RANGE))

        self.assertEqual('currentDate()', str(current_date()))
        self.assertEqual('currentTime()', str(current_time()))
        self.assertEqual('currentTimestamp()', str(current_timestamp()))
        self.assertEqual('localTime()', str(local_time()))
        self.assertEqual('localTimestamp()', str(local_timestamp()))
        self.assertEquals('toTimestampLtz(123, 0)', str(to_timestamp_ltz(123, 0)))
        self.assertEqual("temporalOverlaps(cast('2:55:00', TIME(0)), 3600000, "
                         "cast('3:30:00', TIME(0)), 7200000)",
                         str(temporal_overlaps(
                             lit("2:55:00").to_time,
                             lit(1).hours,
                             lit("3:30:00").to_time,
                             lit(2).hours)))
        self.assertEqual("dateFormat(time, '%Y, %d %M')",
                         str(date_format(col("time"), "%Y, %d %M")))
        self.assertEqual("timestampDiff(DAY, cast('2016-06-15', DATE), cast('2016-06-18', DATE))",
                         str(timestamp_diff(
                             TimePointUnit.DAY,
                             lit("2016-06-15").to_date,
                             lit("2016-06-18").to_date)))
        self.assertEqual('array(1, 2, 3)', str(array(1, 2, 3)))
        self.assertEqual("row('key1', 1)", str(row("key1", 1)))
        self.assertEqual("map('key1', 1, 'key2', 2, 'key3', 3)",
                         str(map_("key1", 1, "key2", 2, "key3", 3)))
        self.assertEqual('4', str(row_interval(4)))
        self.assertEqual('pi()', str(pi()))
        self.assertEqual('e()', str(e()))
        self.assertEqual('rand(4)', str(rand(4)))
        self.assertEqual('randInteger(4)', str(rand_integer(4)))
        self.assertEqual('atan2(1, 2)', str(atan2(1, 2)))
        self.assertEqual('minusPrefix(a)', str(negative(expr1)))
        self.assertEqual('concat(a, b, c)', str(concat(expr1, expr2, expr3)))
        self.assertEqual("concat_ws(', ', b, c)", str(concat_ws(', ', expr2, expr3)))
        self.assertEqual('uuid()', str(uuid()))
        self.assertEqual('null', str(null_of(DataTypes.BIGINT())))
        self.assertEqual('log(a)', str(log(expr1)))
        self.assertEqual('ifThenElse(a, b, c)', str(if_then_else(expr1, expr2, expr3)))
        self.assertEqual('withColumns(a, b, c)', str(with_columns(expr1, expr2, expr3)))
        self.assertEqual('a.b.c(a)', str(call('a.b.c', expr1)))