示例#1
0
    class abc_base_2(ABC):
        @abstractmethod
        async def doit_async_1(self):
            raise NotImplementedError()

        @abstractmethod
        async def doit_async_2(self):
            raise NotImplementedError()

        doit_1 = make_sync(doit_async_1)
        doit_2 = make_sync(doit_async_2)
示例#2
0
def test_wrap_kwargs_specified_loop():
    t_wrap = make_sync(func_with_kwargs)

    async def doit():
        assert t_wrap(bins=5, range=(5, 10)) == 10

    loop = get_event_loop()
    loop.run_until_complete(doit())
示例#3
0
def test_wrap_with_running_loop():
    t_wrap = make_sync(simple_func)

    async def doit():
        assert t_wrap(5) == 6

    loop = get_event_loop()
    loop.run_until_complete(doit())
示例#4
0
class tester:
    def __init__(self, b: int):
        self._b = b

    async def my_async(self, a: int) -> int:
        await sleep(0.01)
        return a + self._b

    my = make_sync(my_async)
示例#5
0
async def test_client_session_different_threads():
    async def get_a_client_async():
        return await default_client_session()

    from make_it_sync import make_sync
    get_a_client = make_sync(get_a_client_async)

    c1 = get_a_client()
    c2 = await get_a_client_async()

    assert c1 is not c2
示例#6
0
def test_wrap_normal():
    t_wrap = make_sync(simple_func)
    assert t_wrap(4) == 5
示例#7
0
 def get_playlist(self,
                  channel_id: str,
                  use_cache: bool = True) -> Union[str, None]:
     return make_sync(self.async_client.get_playlist)(channel_id=channel_id,
                                                      use_cache=use_cache)
示例#8
0
 def get_segment(self, path: str) -> Union[bytes, None]:
     return make_sync(self.async_client.get_segment)(path=path)
示例#9
0
    class abc_base(ABC):
        @abstractmethod
        async def doit_async(self):
            raise NotImplementedError()

        doit = make_sync(doit_async)
示例#10
0
def test_wrap_docstring():
    s_orig = simple_func.__doc__
    s_new = make_sync(simple_func).__doc__

    assert s_orig == s_new
示例#11
0
 def get_configuration(self) -> Optional[Dict[str, Any]]:
     return make_sync(self.async_client.get_configuration)()
示例#12
0
def test_wrap_kwargs_default():
    t_wrap = make_sync(simple_kwargs)
    assert t_wrap(1) == 11
示例#13
0
 def get_now_playing(self,
                     channel: XMChannel) -> Union[Dict[str, Any], None]:
     return make_sync(self.async_client.get_now_playing)(channel)
示例#14
0
from typing import Tuple

from dataframe_expressions import DataFrame
import hep_tables
from make_it_sync import make_sync
import matplotlib.pyplot as plt
import numpy as np

import hl_tables.local as local


async def histogram_async(df: DataFrame,
                          bins: int = 10,
                          range: Tuple[float, float] = (0, 1)):
    hist_data = hep_tables.histogram(df, bins=bins, range=range)
    h, bins = await local.make_local_async(hist_data)
    f, ax = plt.subplots()
    ax.fill_between(bins, np.r_[h, h[-1]], step='post')


histogram = make_sync(histogram_async)
示例#15
0
 def get_channels(self) -> List[dict]:
     return make_sync(self.async_client.get_channels)()
示例#16
0
class ObjectStream:
    r'''
    The objects can be events, jets, electrons, or just floats, or arrays of floats.

    `ObjectStream` holds onto the AST that will produce this stream of objects. The chain
    of `ObjectStream` objects, linked together, is a DAG that stores the user's intent.

    Every stream has an _object type_. This is the type of the elements of the stream. For example,
    the top stream, the objects are of type `Event` (or `xADOEvent`). If you transform an `Event`
    into a list of jets, then the object type will be a list of `Jet` objects. Each element of the
    stream is an array. You can also lift this second array of `Jets` and turn it into a plain
    stream of `Jets` using the `SelectMany` method below. In that case, you'll no longer be able
    to tell the boundary between events.
    '''
    def __init__(self, the_ast: ast.AST):
        r"""
        Initialize the stream with the ast that will produce this stream of objects.
        The user will almost never use this initializer.
        """
        self._q_ast = the_ast

    @property
    def query_ast(self) -> ast.AST:
        '''Return the query `ast` that this `ObjectStream` represents

        Returns:
            ast.AST: The python `ast` that is represented by this query
        '''
        return self._q_ast

    def SelectMany(self, func: Union[str, ast.Lambda,
                                     Callable]) -> 'ObjectStream':
        r"""
        Given the current stream's object type is an array or other iterable, return
        the items in this objects type, one-by-one. This has the effect of flattening a
        nested array.

        Arguments:

            func:   The function that should be applied to this stream's objects to return
                    an iterable. Each item of the iterable is now the stream of objects.

        Returns:
            A new ObjectStream of the type of the elements.

        Notes:
            - The function can be a `lambda`, the name of a one-line function, a string that
              contains a lambda definition, or a python `ast` of type `ast.Lambda`.
        """
        return ObjectStream(
            function_call(
                "SelectMany",
                [self._q_ast, cast(ast.AST, parse_as_ast(func))]))

    def Select(self, f: Union[str, ast.Lambda, Callable]) -> 'ObjectStream':
        r"""
        Apply a transformation function to each object in the stream, yielding a new type of
        object. There is a one-to-one correspondence between the input objects and output objects.

        Arguments:

            f:      selection function (lambda)

        Returns:

            A new ObjectStream of the transformed elements.

        Notes:
            - The function can be a `lambda`, the name of a one-line function, a string that
              contains a lambda definition, or a python `ast` of type `ast.Lambda`.
        """
        return ObjectStream(
            function_call(
                "Select",
                [self._q_ast, cast(ast.AST, parse_as_ast(f))]))

    def Where(self, filter: Union[str, ast.Lambda,
                                  Callable]) -> 'ObjectStream':
        r'''
        Filter the object stream, allowing only items for which `filter` evaluates as true through.

        Arguments:

            filter      A filter lambda that returns True/False.

        Returns:

            A new ObjectStream that contains only elements that pass the filter function

        Notes:
            - The function can be a `lambda`, the name of a one-line function, a string that
              contains a lambda definition, or a python `ast` of type `ast.Lambda`.
        '''
        return ObjectStream(
            function_call(
                "Where",
                [self._q_ast, cast(ast.AST, parse_as_ast(filter))]))

    def AsPandasDF(self, columns=[]) -> 'ObjectStream':
        r"""
        Return a pandas stream that contains one item, an pandas `DataFrame`.
        This `DataFrame` will contain all the data fed to it. Only non-array datatypes are
        permitted: the data must look like an Excel table.

        Arguments:

            columns     Array of names of the columns. Will default to "col0", "call1", etc.
                        Exception will be thrown if the number of columns do not match.

        """

        # To get Pandas use the ResultPandasDF function call.
        return ObjectStream(
            function_call("ResultPandasDF",
                          [self._q_ast, as_ast(columns)]))

    def AsROOTTTree(self, filename, treename, columns=[]) -> 'ObjectStream':
        r"""
        Return the sequence of items as a ROOT TTree. Each item in the ObjectStream
        will get one entry in the file. The items must be of types that the infrastructure
        can work with:

            Float               A tree with a single float in each entry will be written.
            vector<float>       A tree with a list of floats in each entry will be written.
            (<tuple>)           A tree with multiple items (leaves) will be written. Each leaf
                                must have one of the above types. Nested tuples are not supported.

        Arguments:

            filename        Name of the file in which a TTree of the objects will be written.
            treename        Name of the tree to be written to the file
            columns         Array of names of the columns. This must match the number of items
                            in a tuple to be written out.

        Returns:

            A new ObjectStream with type [(filename, treename)]. This is because multiple tree's
            may be written by the back end, and need to be concatenated together to get the full
            dataset.  The order of the files back is consistent for different queries on the same
            dataset.
        """
        return ObjectStream(
            function_call("ResultTTree", [
                self._q_ast,
                as_ast(columns),
                as_ast(treename),
                as_ast(filename)
            ]))

    def AsParquetFiles(self,
                       filename: str,
                       columns: Union[str, List[str]] = []) -> 'ObjectStream':
        '''Returns the sequence of items as a `parquet` file. Each item in the ObjectStream gets a separate
        entry in the file. The times must be of types that the infrastructure can work with:

            Float               A tree with a single float in each entry will be written.
            vector<float>       A tree with a list of floats in each entry will be written.
            (<tuple>)           A tree with multiple items (leaves) will be written. Each leaf
                                must have one of the above types. Nested tuples are not supported.
            {k:v, }             A dictionary with named columns. v is either a float or a vector
                                of floats.

        Arguments:

            filename            Name of a file in which the data will be written. Depending on
                                where the data comes from this may not be used - consider it a
                                suggestion.
            columns             If the data does not arrive by dictionary, then these are the
                                column names.

        Returns:

            A new `ObjectStream` with type `[filename]`. This is because multiple files may be
            written by the backend - the data should be concatinated together to get a final
            result. The order of the files back is consistent for different queries on the same
            dataset.
        '''
        return ObjectStream(
            function_call(
                "ResultParquet",
                [self._q_ast, as_ast(columns),
                 as_ast(filename)]))

    def AsAwkwardArray(self, columns=[]) -> 'ObjectStream':
        r'''
        Return a pandas stream that contains one item, an `awkward` array, or dictionary of
        `awkward` arrays. This `awkward` will contain all the data fed to it.

        Arguments:

            columns     Array of names of the columns. Will default to "col0", "call1", etc.
                        Exception will be thrown if the number of columns do not match.

        Returns:

            An `ObjectStream` with the `awkward` array data as its one and only element.
        '''
        return ObjectStream(
            function_call("ResultAwkwardArray",
                          [self._q_ast, as_ast(columns)]))

    def _get_executor(self, executor: Callable[[ast.AST], Awaitable[Any]] = None) \
            -> Callable[[ast.AST], Awaitable[Any]]:
        r'''
        Returns an executor that can be used to run this.
        Logic seperated out as it is used from several different places.

        Arguments:
            executor            Callback to run the AST. Can be synchronous or coroutine.

        Returns:
            An executor that is either synchronous or a coroutine.
        '''
        if executor is not None:
            return executor

        from .event_dataset import find_ed_in_ast
        ed = find_ed_in_ast(self._q_ast)

        return ed.execute_result_async

    async def value_async(self,
                          executor: Callable[[ast.AST], Any] = None) -> Any:
        r'''
        Evaluate the ObjectStream computation graph. Tracks back to the source dataset to
        understand how to evaluate the AST. It is possible to pass in an executor to override that
        behavior (used mostly for testing).

        Arguments:

            executor        A function that when called with the ast will return a future for the
                            result. If None, then uses the default executor. Normally is none
                            and the default executor specified by the `EventDatasource` is called
                            instead.

        Returns

            The first element of the ObjectStream after evaluation.


        Note

            This is the non-blocking version - it will return a future which can
            be `await`ed upon until the query is done.
        '''
        # Fetch the executor
        exe = self._get_executor(executor)

        # Run it
        return await exe(self._q_ast)

    value = make_sync(value_async)
示例#17
0
from dataframe_expressions import DataFrame
from make_it_sync import make_sync


async def count_async(df: DataFrame) -> int:
    '''
    Given a dataframe, it will return an int at the outter most level. And run everything too,
    and return it.
    '''
    from hl_tables.local import make_local_async
    return await make_local_async(df.Count(axis=0))


count = make_sync(count_async)
示例#18
0
 def close_session(self):
     return make_sync(self.async_client.close_session)()
示例#19
0
    class abc_base(ABC):
        @abstractmethod
        async def doit_async(self, a1: int, a2: int = 20, a3: int = 30):
            raise NotImplementedError()

        doit = make_sync(doit_async)
示例#20
0
def test_wrap_no_await():
    t_wrap = make_sync(simple_no_wait)
    assert t_wrap(1) == 2
示例#21
0
def test_wrap_with_loop():
    t_wrap = make_sync(simple_func)
    _ = get_event_loop()
    assert t_wrap(4) == 5
示例#22
0
 def get_channel(self, name: str) -> Union[XMChannel, None]:
     return make_sync(self.async_client.get_channel)(name)
示例#23
0
def test_wrap_exception():
    t_wrap = make_sync(simple_raise)
    with pytest.raises(Exception) as e:
        t_wrap(5)

    assert "hi there" in str(e.value)
示例#24
0
from typing import Any, List

from dataframe_expressions import DataFrame
from make_it_sync import make_sync

from .runner import runner, result
from .servicex.xaod_runner import xaod_runner
from .awkward.awkward_runner import awkward_runner

runners: List[runner] = [xaod_runner(), awkward_runner()]


async def make_local_async(df: DataFrame) -> Any:
    '''
    Get the data from the remote system that is represented by `df` and get it here, locally, on
    this computer.
    '''
    modified_df = df
    for r in runners:
        modified_df = await r.process(modified_df)
        if isinstance(modified_df, result):
            break

    if not isinstance(modified_df, result):
        raise Exception('Unable to process data frame!')

    return modified_df.result


make_local = make_sync(make_local_async)
示例#25
0
def test_wrap_kwargs_given():
    t_wrap = make_sync(simple_kwargs)
    assert t_wrap(1, me=11) == 12
示例#26
0
class ServiceXABC(ABC):
    '''
    Abstract base class for accessing the ServiceX front-end for a particular dataset. This does
    have some implementations, but not a full set (hence why it isn't an ABC).

    A light weight, mostly immutable, base class that holds basic configuration information for use
    with ServiceX file access, including the dataset name. Subclasses implement the various access
    methods. Note that not all methods may be accessible!
    '''
    def __init__(
        self,
        dataset: str,
        image: Optional[str] = None,
        max_workers: int = 20,
        status_callback_factory: Optional[
            StatusUpdateFactory] = _run_default_wrapper,
    ):
        '''
        Create and configure a ServiceX object for a dataset.

        Arguments

            dataset                     Name of a dataset from which queries will be selected.
            image                       Name of transformer image to use to transform the data. If
                                        None the default implementation is used.
            cache_adaptor               Runs the caching for data and queries that are sent up and
                                        down.
            max_workers                 Maximum number of transformers to run simultaneously on
                                        ServiceX.
            cache_path                  Path to the cache
            status_callback_factory     Factory to create a status notification callback for each
                                        query. One is created per query.


        Notes:

            -  The `status_callback` argument, by default, uses the `tqdm` library to render
               progress bars in a terminal window or a graphic in a Jupyter notebook (with proper
               jupyter extensions installed). If `status_callback` is specified as None, no
               updates will be rendered. A custom callback function can also be specified which
               takes `(total_files, transformed, downloaded, skipped)` as an argument. The
               `total_files` parameter may be `None` until the system knows how many files need to
               be processed (and some files can even be completed before that is known).
        '''
        self._dataset = dataset
        self._image = image
        self._max_workers = max_workers

        # We can't create the notifier until the actual query,
        # so only need to save the status update.
        self._status_callback_factory = \
            status_callback_factory if status_callback_factory is not None \
            else _null_progress_feedback

    def _create_notifier(self, downloading: bool) -> _status_update_wrapper:
        'Internal method to create a updater from the status call-back'
        return _status_update_wrapper(
            self._status_callback_factory(self._dataset, downloading))

    @abstractmethod
    async def get_data_rootfiles_async(self,
                                       selection_query: str) -> List[Path]:
        '''
        Fetch query data from ServiceX matching `selection_query` and return it as
        a list of root files. The files are uniquely ordered (the same query will always
        return the same order).

        Arguments:
            selection_query     The `qastle` string specifying the data to be queried

        Returns:
            root_files          The list of root files
        '''

    @abstractmethod
    async def get_data_pandas_df_async(self,
                                       selection_query: str) -> pd.DataFrame:
        '''
        Fetch query data from ServiceX matching `selection_query` and return it as
        a pandas dataframe. The data is uniquely ordered (the same query will always
        return the same order).

        Arguments:
            selection_query     The `qastle` string specifying the data to be queried

        Returns:
            df                  The pandas dataframe

        Exceptions:
            xxx                 If the data is not the correct shape (e.g. a flat,
                                rectangular table).
        '''

    @abstractmethod
    async def get_data_awkward_async(self, selection_query: str) \
            -> Dict[bytes, ak.Array]:
        '''
        Fetch query data from ServiceX matching `selection_query` and return it as
        dictionary of awkward arrays, an entry for each column. The data is uniquely
        ordered (the same query will always return the same order).

        Arguments:
            selection_query     The `qastle` string specifying the data to be queried

        Returns:
            a                   Dictionary of jagged arrays (as needed), one for each
                                column. The dictionary keys are `bytes` to support possible
                                unicode characters.
        '''

    @abstractmethod
    async def get_data_parquet_async(self, selection_query: str) -> List[Path]:
        '''
        Fetch query data from ServiceX matching `selection_query` and return it as
        a list of parquet files. The files are uniquely ordered (the same query will always
        return the same order).

        Arguments:
            selection_query     The `qastle` string specifying the data to be queried

        Returns:
            root_files          The list of parquet files
        '''

    # Define the synchronous versions of the async methods for easy of use

    get_data_rootfiles = make_sync(get_data_rootfiles_async)
    get_data_pandas_df = make_sync(get_data_pandas_df_async)
    get_data_awkward = make_sync(get_data_awkward_async)
    get_data_parquet = make_sync(get_data_parquet_async)
示例#27
0
def test_wrap_signature():
    s_orig = inspect.signature(simple_func)
    t_wrap = make_sync(simple_func)
    s_new = inspect.signature(t_wrap)

    assert str(s_orig) == str(s_new)
示例#28
0
def test_wrap_kwargs_defaults():
    wrap_it = make_sync(func_with_kwargs)
    assert wrap_it() == 14
示例#29
0
def test_wrap_kwargs_specified():
    wrap_it = make_sync(func_with_kwargs)
    assert wrap_it(bins=5, range=(5, 10)) == 10
示例#30
0
 def authenticate(self) -> bool:
     return make_sync(self.async_client.authenticate)()