def execute(self, function_context: FlinkFunctionContext, input_list: List[Table]) -> List[Table]: t_env = function_context.get_table_env() statement_set = function_context.get_statement_set() table = input_list[0] Popen('rm -rf /root/debug', shell=True) t_env.register_function( "build_index", udf(BuildIndexUDF(self.path, self.element_type, self.dimension), [DataTypes.STRING(), DataTypes.STRING()], DataTypes.STRING())) dummy_output_path = '/tmp/indexed_key' if os.path.exists(dummy_output_path): if os.path.isdir(dummy_output_path): shutil.rmtree(dummy_output_path) else: os.remove(dummy_output_path) t_env.connect(FileSystem().path(dummy_output_path)) \ .with_format(OldCsv() .field('key', DataTypes.STRING())) \ .with_schema(Schema() .field('key', DataTypes.STRING())) \ .create_temporary_table('train_sink') statement_set.add_insert( "train_sink", table.select("build_index(uuid, feature_data)")) return []
def execute(self, function_context: FlinkFunctionContext, input_table: Table) -> None: example_meta: af.ExampleMeta = function_context.get_example_meta() output_file = example_meta.batch_uri if os.path.exists(output_file): os.remove(output_file) t_env = function_context.get_table_env() statement_set = function_context.get_statement_set() t_env.connect(FileSystem().path(output_file)) \ .with_format(OldCsv() .field_delimiter('\t') .field('word', DataTypes.STRING()) .field('count', DataTypes.BIGINT())) \ .with_schema(Schema() .field('word', DataTypes.STRING()) .field('count', DataTypes.BIGINT())) \ .create_temporary_table('mySink') statement_set.add_insert('mySink', input_table)