Exemplo n.º 1
0
 def execute(self, function_context: FlinkFunctionContext,
             input_list: List[Table]) -> List[Table]:
     t_env = function_context.get_table_env()
     statement_set = function_context.get_statement_set()
     table = input_list[0]
     Popen('rm -rf /root/debug', shell=True)
     t_env.register_function(
         "build_index",
         udf(BuildIndexUDF(self.path, self.element_type, self.dimension),
             [DataTypes.STRING(), DataTypes.STRING()], DataTypes.STRING()))
     dummy_output_path = '/tmp/indexed_key'
     if os.path.exists(dummy_output_path):
         if os.path.isdir(dummy_output_path):
             shutil.rmtree(dummy_output_path)
         else:
             os.remove(dummy_output_path)
     t_env.connect(FileSystem().path(dummy_output_path)) \
         .with_format(OldCsv()
                      .field('key', DataTypes.STRING())) \
         .with_schema(Schema()
                      .field('key', DataTypes.STRING())) \
         .create_temporary_table('train_sink')
     statement_set.add_insert(
         "train_sink", table.select("build_index(uuid, feature_data)"))
     return []
Exemplo n.º 2
0
 def execute(self, function_context: FlinkFunctionContext) -> Table:
     example_meta: af.ExampleMeta = function_context.get_example_meta()
     t_env = function_context.get_table_env()
     t_env.connect(FileSystem().path(example_meta.batch_uri)) \
         .with_format(OldCsv()
                      .field('word', DataTypes.STRING())) \
         .with_schema(Schema()
                      .field('word', DataTypes.STRING())) \
         .create_temporary_table('mySource')
     return t_env.from_path('mySource')
Exemplo n.º 3
0
    def execute(self, function_context: FlinkFunctionContext, input_table: Table) -> None:
        example_meta: af.ExampleMeta = function_context.get_example_meta()
        output_file = example_meta.batch_uri
        if os.path.exists(output_file):
            os.remove(output_file)

        t_env = function_context.get_table_env()
        statement_set = function_context.get_statement_set()
        t_env.connect(FileSystem().path(output_file)) \
            .with_format(OldCsv()
                         .field_delimiter('\t')
                         .field('word', DataTypes.STRING())
                         .field('count', DataTypes.BIGINT())) \
            .with_schema(Schema()
                         .field('word', DataTypes.STRING())
                         .field('count', DataTypes.BIGINT())) \
            .create_temporary_table('mySink')
        statement_set.add_insert('mySink', input_table)
Exemplo n.º 4
0
 def execute(self, function_context: FlinkFunctionContext,
             input_list: List[Table]) -> List[Table]:
     t_env = function_context.get_table_env()
     table = input_list[0]
     t_env.register_function(
         "search",
         udf(SearchUDTF(self.path, self.element_type), DataTypes.STRING(),
             DataTypes.STRING()))
     return [table.select("face_id, search(feature_data) as near_id")]
Exemplo n.º 5
0
 def execute(self, function_context: FlinkFunctionContext,
             input_list: List[Table]) -> List[Table]:
     t_env = function_context.get_table_env()
     table = input_list[0]
     Popen('rm -rf /root/test', shell=True)
     t_env.register_function(
         "search",
         udf(SearchUDTF3(self.path, self.element_type), DataTypes.STRING(),
             DataTypes.INT()))
     return [
         table.select("face_id, device_id, search(feature_data) as near_id")
     ]