示例#1
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {1}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    final_code = []
    shared_function_set = set()
    additional_local_code = []
    errors = []
    if (error == ErrorTypes.NO_ERROR):
        if ("portion" in extra["dfs"][0]):
            df_name = "df_" + extra["dfs"][0]["source_id"] + "[" + str(
                extra["dfs"][0]["portion"]) + "]"
        else:
            df_name = "df_" + extra["dfs"][0]["source_id"]

        if (error == ErrorTypes.NO_ERROR):
            my_args = {
                "node_id": node["id"],
                "input_dfs": [df_name],
                "shared_function_set": shared_function_set,
                "additional_local_code": additional_local_code,
                "errors": errors
            }
            gen_code = CodeGenerationUtils.handle_instantination_or_call(
                node["parameters"], 'df_' + node["id"] + '=' + df_name + '.' +
                node["ddfo_name"] + '(', my_args)

            final_code = CodeGenerationUtils.merge_with_additional_code(
                gen_code, additional_local_code)

    return final_code, shared_function_set, error
示例#2
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {0}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    final_code = []
    shared_function_set = set()
    additional_local_code = []
    errors = []
    if (error == ErrorTypes.NO_ERROR):
        error, is_schema_appropriate = DataSourceValidityChecker.check_validity(
            node)
        if (error == ErrorTypes.NO_ERROR):
            my_args = {
                "node_id": node["id"],
                "shared_function_set": shared_function_set,
                "additional_local_code": additional_local_code,
                "errors": errors
            }
            if (is_schema_appropriate):
                gen_code = CodeGenerationUtils.handle_instantination_or_call(
                    node["parameters"], "df_" + node["id"] + "=" +
                    "spark.read." + node["file_type"] + "(", my_args)
            else:
                # For safety, but consider it again
                if ("schema" in node["parameters"]):
                    del node["parameters"]["schema"]

                if (node["can_infer_schema"]):
                    node["parameters"]["inferSchema"] = {
                        "value": True,
                        "type": "boolean"
                    }

                gen_code = CodeGenerationUtils.handle_instantination_or_call(
                    node["parameters"],
                    "df_" + node["id"] + "=" + "spark.read.format(" +
                    CodeGenerationUtils.handle_primitive(node["file_type"]) +
                    ").load(", my_args)

                final_code = CodeGenerationUtils.merge_with_additional_code(
                    gen_code, additional_local_code)

    return final_code, shared_function_set, error
示例#3
0
def __generate_code_for_transformer_instantination(node, df_name, args):
    if (MultiInstanceHandlerUtils.should_generate_multiple_instances(node)):
        args["in_pipeline"] = True
        return MultiInstanceHandlerUtils.multi_instance_generation(
            node, df_name, args)
    else:
        return CodeGenerationUtils.handle_instantination_or_call(
            node["parameters"], 'pipeline_stage_' + node["id"] + ' = ' +
            node["transformer_name"] + '(', args)
示例#4
0
def generate_code(args):
    node = args["node"]
    requireds_info = args["requireds_info"]
    edges = args["edges"]

    checklist = {"df_count": {1}, "model_count": {0}}
    error, extra = IncomingEdgeValidityChecker.check_validity(
        node["id"], requireds_info, edges, checklist)
    final_code = []
    shared_function_set = set()
    additional_local_code = []
    errors = []
    if (error == ErrorTypes.NO_ERROR):
        if ("portion" in extra["dfs"][0]):
            df_name = "df_" + extra["dfs"][0]["source_id"] + "[" + str(
                extra["dfs"][0]["portion"]) + "]"
        else:
            df_name = "df_" + extra["dfs"][0]["source_id"]

        my_args = {
            "node_id": node["id"],
            "input_dfs": [df_name],
            "shared_function_set": shared_function_set,
            "additional_local_code": additional_local_code,
            "errors": errors
        }

        gen_code = CodeGenerationUtils.handle_instantination_or_call(
            node["parameters"], df_name + ".write.format(" +
            CodeGenerationUtils.handle_primitive(node["file_type"]) +
            ").save(", my_args)

        final_code = CodeGenerationUtils.merge_with_additional_code(
            gen_code, additional_local_code)

        args["additional_info"]["written_tables"].append(
            {"table_path": node["parameters"]["path"]["value"]})

    return final_code, shared_function_set, error
示例#5
0
def __single_generation(node, df_name, args):
    code=CodeGenerationUtils.handle_instantination_or_call(node["parameters"], 'estimator_' + node["id"] + ' = ' + node["estimator_name"] + '(', args)
    code.extend(['model_' + node["id"] + "=" + 'estimator_' + node["id"] + ".fit(" + df_name + ")", os.linesep])
    code.extend(['df_' + node["id"] + "=" + 'model_' + node["id"] + '.transform(' + df_name + ')', os.linesep])
    return code
def __generate_code_for_evaluator_instantination(node, args):
    return CodeGenerationUtils.handle_instantination_or_call(
        node["parameters"],
        'evaluator_' + node["id"] + ' = ' + node["evaluator_name"] + '(', args)