示例#1
0
    async def execute_result_async(self, a: ast.AST):
        '''We will use generate the query.
        WARNING: this code is fragile - the ast above must end with an invocation of AsROOTTTree!!
        WARNING: Really will only work for xAOD backend due to separate logic required for each backend.

        This code was stolen from the `ServiceX.py` file located in `func_adl_servicex`
        '''
        source = a
        if cast(ast.Name, a.func).id != 'ResultTTree':
            raise Exception('Must be a call to AsROOTTtree at end of query for now')

        # Get the qastle we are going to use!
        return python_ast_to_text_ast(source)
示例#2
0
def translate(tree_name: str,
              selected_columns: str = "",
              tcut_selection: str = "",
              verbose: bool = False):
    if verbose:
        print(f'\033[32mTCut selection syntax:\033[0m\n{tcut_selection}\n\n')
    _check_parentheses(tcut_selection)
    if tree_name is "":
        raise Exception("Tree name is missing")
    if tcut_selection is "":
        query = f"EventDataset(\"ServiceXDatasetSource\", \"{tree_name}\").Select(\"lambda event:  {_translate_selected_columns(selected_columns)} \")"
    else:
        query = f"EventDataset(\"ServiceXDatasetSource\", \"{tree_name}\").Where(\"lambda event: {_translate_selection(tcut_selection, verbose)} \").Select(\"lambda event: {_translate_selected_columns(selected_columns)} \")"
    query_qastle = qastle.python_ast_to_text_ast(
        qastle.insert_linq_nodes(ast.parse(query)))
    if verbose:
        print(f'\033[32mFull func-adl query:\033[0m\n{query}\n\n')
        print(f'\033[32mFull qastle query:\033[0m\n{query_qastle}\n\n')
    return query_qastle
示例#3
0
    async def execute_result_async(self, a: ast.AST, title: str) -> Any:
        'Dummy executor that will return the ast properly rendered. If qastle_roundtrip is true, then we will round trip the ast via qastle first.'
        # Round trip qastle if requested.
        if self._q_roundtrip:
            import qastle
            print(f'before: {ast.dump(a)}')
            a_text = qastle.python_ast_to_text_ast(a)
            a = qastle.text_ast_to_python_ast(a_text).body[0].value
            print(f'after: {ast.dump(a)}')

        # Setup the rep for this dataset
        from func_adl import find_EventDataset
        file = find_EventDataset(a)
        iterator = cpp_variable("bogus-do-not-use",
                                top_level_scope(),
                                cpp_type=None)
        set_rep(file, cpp_sequence(iterator, iterator, top_level_scope()))

        # Use the dummy executor to process this, and return it.
        exe = self.get_dummy_executor_obj()
        exe.evaluate(a)
        return exe
def test_as_qastle_uproot():
    a = ServiceXDatasetSource("junk.root", 'MainTree')
    from qastle import python_ast_to_text_ast
    q = python_ast_to_text_ast(a.query_ast)
    assert q == "(call EventDataset 'ServiceXDatasetSource' 'MainTree')"
示例#5
0
    async def execute_result_async(self, a: ast.AST) -> Any:
        r'''
        Run a query against a func-adl ServiceX backend. The appropriate part of the AST is
        shipped there, and it is interpreted.

        Arguments:

            a:                  The ast that we should evaluate

        Returns:
            v                   Whatever the data that is requested (awkward arrays, etc.)
        '''
        # Now, make sure the ast is formed in a way we cna deal with.
        if not isinstance(a, ast.Call):
            raise FuncADLServerException(
                f'Unable to use ServiceX to fetch a {a}.')
        a_func = a.func
        if not isinstance(a_func, ast.Name):
            raise FuncADLServerException(
                f'Unable to use ServiceX to fetch a call from {a_func}')

        # Make the servicex call, asking for the appropriate return type. Depending on the return-type
        # alter it so it can return something that ServiceX can understand.

        if self._is_uproot:
            # The uproot transformer only returns parquet files at the moment. So we had better look something like that, or something
            # we can convert from.

            if a_func.id == 'ResultParquet':
                # For now, we have to strip off the ResultParquet and send the rest down to uproot.
                source = a.args[0]
                q_str = python_ast_to_text_ast(
                    qastle.insert_linq_nodes(source))
                logging.debug(f'Qastle string sent to uproot query: {q_str}')
                return await self._ds.get_data_parquet_async(q_str)
            elif a_func.id == 'ResultPandasDF':
                raise NotImplementedError()
            elif a_func.id == 'ResultAwkwardArray':
                raise NotImplementedError()
            else:
                raise FuncADLServerException(
                    f'Unable to use ServiceX to fetch a result in the form {a_func.id} - Only ResultParquet, ResultPandasDF and ResultAwkwardArray are supported'
                )

        else:
            # If we are xAOD then we can come back with a pandas df, awkward array, or root files.
            # TODO: #2 Add root files as a legal return type here.
            if a_func.id == 'ResultPandasDF':
                source = a.args[0]
                cols = a.args[1]
                top_level_ast = ast.Call(func=ast.Name('ResultTTree'),
                                         args=[
                                             source, cols,
                                             ast.Str('treeme'),
                                             ast.Str('file.root')
                                         ])
                q_str = python_ast_to_text_ast(top_level_ast)
                logging.debug(f'Qastle string sent to xAOD query: {q_str}')
                return await self._ds.get_data_pandas_df_async(q_str)
            elif a_func.id == 'ResultAwkwardArray':
                source = a.args[0]
                cols = a.args[1]
                top_level_ast = ast.Call(func=ast.Name('ResultTTree'),
                                         args=[
                                             source, cols,
                                             ast.Str('treeme'),
                                             ast.Str('file.root')
                                         ])
                q_str = python_ast_to_text_ast(top_level_ast)
                logging.debug(f'Qastle string sent to xAOD query: {q_str}')
                return await self._ds.get_data_awkward_async(q_str)
            elif a_func.id == 'ResultTTree':
                raise NotImplementedError()
            else:
                raise FuncADLServerException(
                    f'Unable to use ServiceX to fetch a result in the form {a_func.id} - Only ResultTTree, ResultPandasDF and ResultAwkwardArray are supported'
                )
 async def execute_result_async(self, a: ast.AST) -> Any:
     return python_ast_to_text_ast(a)
def tcut_to_qastle(selection, variable):

    if selection.lower() != "none":

        # 1st step: recognize all variable names
        ignore_patterns = {  # These are supported by Qastle
            "abs": " ",
            "(": " ",
            ")": " ",
            "*": " ",
            "/": " ",
            "+": " ",
            "-": " "
        }
        temp = multiple_replace(ignore_patterns, selection)

        output1 = re.sub('[<&>!=|-]', ' ', temp)
        variables = []
        for x in output1.split():
            try:
                float(x)
            except ValueError:
                variables.append(x)
        variables = list(dict.fromkeys(variables))  # Remove duplicates
        # logging.info(f'Number of accessed branches for the selection: {len(variables)}')

        # 2nd step: replace variable names with event.
        for x in variables:
            selection = re.sub(r'\b(%s)\b' % x, r'event.%s' % x, selection)

        # 3rd step: replace operators
        replace_patterns = {
            "&&": " and ",
            "||": " or ",
            "!=": " != ",
            ">=": " >= ",
            "<=": " <= ",
            ">": " > ",
            "<": " < "
        }
        output = multiple_replace(replace_patterns, selection)
        output = " ".join(output.split())  # Remove duplicate whitespace

        # 4th step: bool (!! Still missing many combinations!!)
        output = "and " + output + " and"  # Prepare for search. Better idea?
        for x in variables:
            if re.search(r'and\s*event.%s\s*and' % x,
                         output):  # and variable and
                output = re.sub(r'and\s*event.%s\s*and' % x,
                                r'and event.%s > 0 and' % x, output)
            if re.search(r'and\s*!event.%s\s*and' % x,
                         output):  # and !variable and
                output = re.sub(r'and\s*!event.%s\s*and' % x,
                                r'and event.%s == 0 and' % x, output)
            if re.search(r'and\s*event.%s\s*\)' % x, output):  # and variable )
                output = re.sub(r'and\s*event.%s\s*\)' % x,
                                r'and event.%s > 0)' % x, output)
            if re.search(r'and\s*!event.%s\s*\)' % x,
                         output):  # and !variable )
                output = re.sub(r'and\s*!event.%s\s*\)' % x,
                                r'and event.%s == 0)' % x, output)
            if re.search(r'\(\s*event.%s\s*and' % x, output):  # ( variable and
                output = re.sub(r'\(\s*event.%s\s*and' % x,
                                r'(event.%s > 0 and' % x, output)
            if re.search(r'\(\s*!event.%s\s*and' % x,
                         output):  # ( !variable and
                output = re.sub(r'\(\s*!event.%s\s*and' % x,
                                r'(event.%s == 0 and' % x, output)
            if re.search(r'or\s*event.%s\s*or' % x, output):  # or variable or
                output = re.sub(r'or\s*event.%s\s*or' % x,
                                r'or event.%s > 0 or' % x, output)
            if re.search(r'or\s*!event.%s\s*or' % x,
                         output):  # or !variable or
                output = re.sub(r'or\s*!event.%s\s*or' % x,
                                r'or event.%s == 0 or' % x, output)
            if re.search(r'and\s*event.%s\s*or' % x,
                         output):  # and variable or
                output = re.sub(r'and\s*event.%s\s*or' % x,
                                r'and event.%s > 0 or' % x, output)
            if re.search(r'and\s*!event.%s\s*or' % x,
                         output):  # and !variable or
                output = re.sub(r'and\s*!event.%s\s*or' % x,
                                r'and event.%s == 0 or' % x, output)
            if re.search(r'or\s*event.%s\s*and' % x,
                         output):  # or variable and
                output = re.sub(r'or\s*event.%s\s*and' % x,
                                r'or event.%s > 0 and' % x, output)
            if re.search(r'or\s*!event.%s\s*and' % x,
                         output):  # or !variable and
                output = re.sub(r'or\s*!event.%s\s*and' % x,
                                r'or event.%s == 0 and' % x, output)
            if re.search(r'\(\s*event.%s\s*or' % x, output):  # ( variable or
                output = re.sub(r'\(\s*event.%s\s*or' % x,
                                r'(event.%s > 0 or' % x, output)
            if re.search(r'\(\s*!event.%s\s*or' % x, output):  # ( !variable or
                output = re.sub(r'\(\s*!event.%s\s*or' % x,
                                r'(event.%s == 0 or' % x, output)
            if re.search(r'or\s*event.%s\s*\)' % x, output):  # or variable )
                output = re.sub(r'or\s*event.%s\s*\)' % x,
                                r'or event.%s > 0)' % x, output)
            if re.search(r'or\s*!event.%s\s*\)' % x, output):  # or !variable )
                output = re.sub(r'or\s*!event.%s\s*\)' % x,
                                r'or event.%s == 0)' % x, output)
            if re.search(r'!\([^()]*\)', output):  # Search for !(something)
                output = re.sub(
                    r'!\([^()]*\)',
                    re.search(r'!\([^()]*\)', output).group(0).lstrip('!') +
                    "==0", output)

        output = output.rsplit(' ', 1)[0].split(
            ' ', 1)[1]  # Delete `and` at the beginning and the last
    else:
        variables = []

    passList = False
    passDict = True

    if variable.lower() == 'all':
        variable_text = 'event'
    else:
        if passDict:
            variable = [num.strip() for num in variable.split(',')]
            variable_list_new = [f'\'{i}\': event.{i}' for i in variable]
            variable_text = ', '.join(variable_list_new)
            variable_text = '{' + variable_text + '}'
        elif passList:
            variable = [num.strip() for num in variable.split(',')]
            variable_list_new = [f'event.{i}' for i in variable]
            variable_text = ', '.join(variable_list_new)
            variable_text = '(' + variable_text + ')'

    # Add Func ADL wrapper
    if selection.lower() == "none":
        query = "EventDataset().Select(\"lambda event: " + variable_text + "\")"
    else:
        query = "EventDataset().Where('lambda event: " + output + "').Select(\"lambda event: " + variable_text + "\")"
    text_ast = qastle.python_ast_to_text_ast(
        qastle.insert_linq_nodes(ast.parse(query)))

    return text_ast
示例#8
0
 async def translate(a: ast.AST):
     import qastle
     return qastle.python_ast_to_text_ast(a)