Exemplo n.º 1
0
        def run_udf_on_global_node(
            self,
            func_name: str,
            positional_args: List[GlobalNodeTable],
            share_to_locals: bool = False,
        ):  # -> GlobalNodeTable or LocalNodeTable
            # check the input tables are GlobalNodeTable(s)
            # queue exec_udf on the global node
            # wait for it to complete
            # a new table was generated on global node
            # queue create_remote_table on each of the local nodes for the ganerated table

            # TODO: try/catches tasks can throw exceptions
            command_id = get_a_uniqueid()

            positional_args_transfrormed = []
            # keyword_args_transformed = {}
            for val in positional_args:
                if isinstance(val, self.GlobalNodeTable):
                    udf_argument = UDFArgument(
                        type="table",
                        value=list(val.node_table.values())[0].full_table_name,
                    )  # TODO: da f**k is dat
                elif isinstance(val, self.LocalNodeTable):
                    raise Exception(
                        "(run_udf_on_global_node) LocalNodeTable types are not accepted from run_udf_on_global_nodes"
                    )
                else:
                    udf_argument = UDFArgument(type="literal", value=str(val))
                positional_args_transfrormed.append(udf_argument.to_json())

            udf_result_table: str = self._global_node.queue_run_udf(
                command_id=command_id,
                func_name=func_name,
                positional_args=positional_args_transfrormed,
                keyword_args={},
            ).get()

            if share_to_locals:
                table_schema: TableSchema = self._global_node.get_table_schema(
                    TableName(udf_result_table)
                )
                table_info: TableInfo = TableInfo(
                    name=udf_result_table, schema=table_schema
                )
                local_nodes_tables = {}
                for node in self._local_nodes:
                    # TODO do not block here, first send the request to all local nodes and then block for the result
                    node.create_remote_table(
                        table_info=table_info, native_node=self._global_node
                    )
                    local_nodes_tables[node] = TableName(udf_result_table)

                return self.LocalNodeTable(nodes_tables=local_nodes_tables)

            return self.GlobalNodeTable(
                node_table={self._global_node: TableName(udf_result_table)}
            )
Exemplo n.º 2
0
def run_udf(
    command_id: str,
    context_id: str,
    func_name: str,
    positional_args_json: List[str],
    keyword_args_json: Dict[str, str],
) -> str:
    """
    Creates the UDF, if provided, and adds it in the database.
    Then it runs the select statement with the input provided.

    Parameters
    ----------
        command_id: str
            The command identifier, common among all nodes for this action.
        context_id: str
            The experiment identifier, common among all experiment related actions.
        func_name: str
            Name of function from which to generate UDF.
        positional_args_json: list[str(UDFArgument)]
            Positional arguments of the udf call.
        keyword_args_json: dict[str, str(UDFArgument)]
            Keyword arguments of the udf call.

    Returns
    -------
        str
            The name of the table where the udf execution results are in.
    """

    positional_args = [
        UDFArgument.from_json(arg) for arg in positional_args_json
    ]

    keyword_args = {
        key: UDFArgument.from_json(arg)
        for key, arg in keyword_args_json.items()
    }

    udf_creation_stmt, udf_execution_stmt, result_table_name = _generate_udf_statements(
        command_id, context_id, func_name, positional_args, keyword_args)

    udfs.run_udf(udf_creation_stmt, udf_execution_stmt)

    return result_table_name
Exemplo n.º 3
0
def get_run_udf_query(
    command_id: str,
    context_id: str,
    func_name: str,
    positional_args_json: List[str],
    keyword_args_json: Dict[str, str],
) -> Tuple[str, str, str]:
    """
    Fetches the sql statements that represent the execution of the udf.

    Parameters
    ----------
        command_id: str
            The command identifier, common among all nodes for this action.
        context_id: str
            The experiment identifier, common among all experiment related actions.
        func_name: str
            Name of function from which to generate UDF.
        positional_args_json: list[str(UDFArgument)]
            Positional arguments of the udf call.
        keyword_args_json: dict[str, str(UDFArgument)]
            Keyword arguments of the udf call.

    Returns
    -------
        str
            The name of the result table,
            the statement that creates the udf and
            the statement that executes the udf.
    """

    positional_args = [
        UDFArgument.from_json(arg) for arg in positional_args_json
    ]

    keyword_args = {
        key: UDFArgument.from_json(arg)
        for key, arg in keyword_args_json.items()
    }

    return _generate_udf_statements(command_id, context_id, func_name,
                                    positional_args, keyword_args)
Exemplo n.º 4
0
        def run_udf_on_local_nodes(
            self,
            func_name: str,
            positional_args: Dict[LocalNodeTable],
            share_to_global: bool = False,
        ):  # -> GlobalNodeTable or LocalNodeTable
            # queue exec_udf task on all local nodes
            # wait for all nodes to complete the tasks execution
            # one new table per local node was generated
            # queue create_remote_table on global for each of the generated tables
            # create merge table on global node to merge the remote tables

            command_id = get_a_uniqueid()

            tasks = {}
            for (node) in (
                    self._local_nodes
            ):  # TODO get the nodes from the LocalNodeTables in the positional_args
                positional_args_transfrormed = []
                keyword_args_transformed = {}
                for var_name, val in positional_args.items():
                    if isinstance(val, self.LocalNodeTable):
                        udf_argument = UDFArgument(
                            type="table",
                            value=val.nodes_tables[node].full_table_name)
                    elif isinstance(val, self.GlobalNodeTable):
                        raise Exception(
                            "(run_udf_on_local_nodes) GlobalNodeTable types are not accepted from run_udf_on_local_nodes"
                        )
                    else:
                        udf_argument = UDFArgument(type="literal",
                                                   value=str(val))
                    positional_args_transfrormed.append(udf_argument.to_json())
                    keyword_args_transformed[var_name] = udf_argument.to_json()

                task = node.queue_run_udf(
                    command_id=command_id,
                    func_name=func_name,
                    positional_args=positional_args_transfrormed,
                    keyword_args={},
                )
                tasks[node] = task

            udf_result_tables = {}
            for node, task in tasks.items():
                table_name = TableName(task.get())
                udf_result_tables[node] = table_name

                # ceate remote table on global node
                if share_to_global:
                    # TODO: try block missing
                    table_schema = node.get_table_schema(table_name)
                    table_info = TableInfo(name=table_name.full_table_name,
                                           schema=table_schema)
                    self._global_node.create_remote_table(
                        table_info=table_info, native_node=node)

            # create merge table on global
            if share_to_global:
                remote_tables_info = list(udf_result_tables.values())
                remote_table_names = [
                    remote_table_info
                    for remote_table_info in remote_tables_info
                ]
                merge_table_global = self._global_node.create_merge_table(
                    command_id=command_id, table_names=remote_table_names)
                return self.GlobalNodeTable(
                    node_table={self._global_node: merge_table_global})

            else:
                return self.LocalNodeTable(nodes_tables=udf_result_tables)