示例#1
0
def test_list_trees():
    # TTree
    trees = rnp.list_trees(load('vary1.root'))
    assert_equal(trees, ['tree'])
    # TNtuple
    trees = rnp.list_trees(load('ntuple.root'))
    assert_equal(trees, ['ntuple'])
def get_any_tree(tfilepath):
    trees = list_trees(tfilepath)   #returns list of tree-names
    if len(trees) == 1:
        tree_name = trees[0]
    else:
        raise ValueError('More/less than one tree found in {}\nPossible trees: {}'.format(tfilepath, trees))
    return tree_name
示例#3
0
文件: tmva.py 项目: 0x0all/rep
    def _run_tmva_training(self, info):
        """
        Run subprocess to train tmva factory

        :param info: class with additional information
        """
        tmva_process = subprocess.Popen(
            'cd {directory}; {executable} -c "from rep.estimators import _tmvaFactory; _tmvaFactory.main()"'.format(
                directory=info.directory,
                executable=sys.executable),
            stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT,
            shell=True)

        cPickle.dump(self, tmva_process.stdin)
        cPickle.dump(info, tmva_process.stdin)
        stdout, stderr = tmva_process.communicate()
        assert tmva_process.returncode == 0, \
            'ERROR: TMVA process is incorrect finished \n LOG: %s \n %s' % (stderr, stdout)

        assert 'TrainTree' in root_numpy.list_trees(os.path.join(info.directory, info.tmva_root)), \
            'ERROR: Result file has not TrainTree'

        xml_filename = os.path.join(info.directory, 'weights',
                                    '{job}_{name}.weights.xml'.format(job=info.tmva_job, name=self._method_name))
        with open(xml_filename, 'r') as xml_file:
            self.formula_xml = xml_file.read()
示例#4
0
文件: root.py 项目: stefco/gwpy
def table_from_root(source, treename=None, include_names=None, **kwargs):
    """Read a Table from a ROOT tree
    """
    import root_numpy

    if include_names is None:
        try:
            include_names = kwargs.pop('columns')
        except KeyError:
            pass
        else:
            warnings.warn("Keyword argument `columns` has been renamed to "
                          "`include_names` to better match default "
                          "astropy.table.Table.read kwargs, please update "
                          "your call.", DeprecationWarning)

    # parse column filters into tree2array ``selection`` keyword
    # NOTE: not all filters can be passed directly to root_numpy, so we store
    #       those separately and apply them after-the-fact before returning
    try:
        selection = kwargs.pop('selection')
    except KeyError:  # no filters
        filters = None
    else:
        rootfilters = []
        filters = []
        for col, op_, value in parse_column_filters(selection):
            try:
                opstr = [key for key in OPERATORS if OPERATORS[key] is op_][0]
            except (IndexError, KeyError):  # cannot filter with root_numpy
                filters.append((col, op_, value))
            else:  # can filter with root_numpy
                rootfilters.append('{0} {1} {2!r}'.format(col, opstr, value))
        kwargs['selection'] = ' && '.join(rootfilters)

    # pass file name (not path)
    if not isinstance(source, string_types):
        source = source.name

    # find single tree (if only one tree present)
    if treename is None:
        trees = root_numpy.list_trees(source)
        if len(trees) == 1:
            treename = trees[0]
        elif not trees:
            raise ValueError("No trees found in %s" % source)
        else:
            raise ValueError("Multiple trees found in %s, please select on "
                             "via the `treename` keyword argument, e.g. "
                             "`treename='events'`. Available trees are: %s."
                             % (source, ', '.join(map(repr, trees))))

    # read, filter, and return
    t = Table(root_numpy.root2array(source, treename,
                                    branches=include_names, **kwargs))
    if filters:
        return filter_table(t, *filters)
    return t
示例#5
0
def test_list_trees():
    # TTree
    trees = rnp.list_trees(load('vary1.root'))
    assert_equal(trees, ['tree'])
    # TNtuple
    trees = rnp.list_trees(load('ntuple.root'))
    assert_equal(trees, ['ntuple'])
    # Multiple key cycles of the same tree
    with temp() as rfile:
        tree = ROOT.TTree('tree', 'tree')
        rfile.Write()
        assert_equal(len(rnp.list_trees(rfile.GetName())), 1)
        rfile.Write()
        assert_equal(len(rnp.list_trees(rfile.GetName())), 1)
        rdir = rfile.mkdir('dir')
        rdir.cd()
        tree = ROOT.TTree('tree', 'tree')
        rfile.Write()
        assert_equal(set(rnp.list_trees(rfile.GetName())),
                     set(['tree', 'dir/tree']))
示例#6
0
def export_root_to_csv(filename, branches=None):
    """From selected file exports all the trees in separate files, exports all the branches,
    requires rootpy and root_numpy modules"""
    import root_numpy
    import os
    trees = root_numpy.list_trees(filename)
    print("The following branches are found:\n %s" % trees)
    result = []
    for tree_name in trees:
        x = root_numpy.root2array(filename, treename=tree_name, branches=branches)
        new_file_name = os.path.splitext(filename)[0] + '_' + tree_name + '.csv'
        pandas.DataFrame(x).to_csv(new_file_name)
        result.append(new_file_name)
    print("Successfully converted")
    return result
示例#7
0
def check_truncate_impute(filename):
    filename = load(filename)
    # first convert array and find object columns
    arr = rnp.root2array(filename)
    assert_true(len(arr))
    object_fields = [field for field in arr.dtype.names if arr.dtype[field] == 'O']
    fields_1d = [field for field in object_fields
                 if arr[field][0].dtype != 'O' and len(arr[field][0].shape) == 1]
    fields_md = list(set(object_fields) - set(fields_1d))
    assert_true(fields_1d)
    assert_true(fields_md)
    fields_1d.sort()
    fields_md.sort()

    rfile = ROOT.TFile.Open(filename)
    tree = rfile.Get(rnp.list_trees(filename)[0])

    # test both root2array and tree2array
    for func, arg in [(rnp.root2array, filename), (rnp.tree2array, tree)]:

        arr1 = func(arg, branches=[(f, 0) for f in fields_1d])
        assert_true(len(arr1))
        assert_equal(set(arr1.dtype.names), set(fields_1d))
        # Giving length of 1 will result in the same output
        arr2 = func(arg, branches=[(f, 0, 1) for f in fields_1d])
        assert_array_equal(arr1, arr2)
        # fill_value of 1 instead of 0 should change output array
        arr2 = func(arg, branches=[(f, 1, 1) for f in fields_1d])
        assert_raises(AssertionError, assert_array_equal, arr1, arr2)
        # check dtype shape
        arr3 = func(arg, branches=[(f, 0, 3) for f in fields_1d])
        for field in fields_1d:
            assert_equal(arr3.dtype[field].shape, (3,))

        # length must be at least 1
        assert_raises(ValueError, func, arg, branches=[(fields_1d[0], 0, 0)])
        # tuple is not of length 2 or 3
        assert_raises(ValueError, func, arg, branches=[(fields_1d[0], 1, 1, 1)])
        assert_raises(ValueError, func, arg, branches=(fields_1d[0], 1, 1, 1))
        # can only truncate 1d arrays
        assert_raises(TypeError, func, arg, branches=(fields_md[0], 0))

        # expressions
        arr1 = func(arg, branches='{0}==0'.format(fields_1d[0]))
        assert_equal(arr1.dtype, 'O')
        arr2 = func(arg, branches=('{0}==0'.format(fields_1d[0]), 0))
        assert_equal(arr2.dtype, arr1[0].dtype)
示例#8
0
文件: tests.py 项目: jpata/root_numpy
def test_list_trees():
    trees = rnp.list_trees(load('vary1.root'))
    assert_equal(trees, ['tree'])
示例#9
0
def read_root(path, tree_key=None, columns=None, ignore=None, chunksize=None, where=None, *kargs, **kwargs):
    """
    Read a ROOT file into a pandas DataFrame.
    Further *kargs and *kwargs are passed to root_numpy's root2array.
    If the root file contains a branch called index, it will become the DataFrame's index.

    Parameters
    ----------
    path: string
        The path to the root file
    tree_key: string
        The key of the tree to load.
    columns: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). Matching columns are read.
    ignore: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). All matching columns are ignored (overriding the columns argument)
    chunksize: int
        If this parameter is specified, an iterator is returned that yields DataFrames with `chunksize` rows
    where: str
        Only rows that match the expression will be read

    Returns
    -------
        DataFrame created from matching data in the specified TTree

    Notes
    -----

        >>> df = read_root('test.root', 'MyTree', columns=['A{B,C}*', 'D'], where='ABB > 100')

    """
    if not tree_key:
        branches = list_trees(path)
        if len(branches) == 1:
            tree_key = branches[0]
        else:
            raise ValueError('More than one tree found in {}'.format(path))

    branches = list_branches(path, tree_key)

    if not columns:
        all_vars = branches
    else:
        # index is always loaded if it exists
        if isinstance(columns, string_types):
            columns = [columns]
        if 'index' in branches:
            columns = columns[:]
            columns.append('index')
        columns = list(itertools.chain.from_iterable(list(map(expand_braces, columns))))
        all_vars = get_matching_variables(branches, columns)

    if ignore:
        if isinstance(ignore, string_types):
            ignore = [ignore]
        ignored = get_matching_variables(branches, ignore, fail=False)
        ignored = list(itertools.chain.from_iterable(list(map(expand_braces, ignored))))
        if 'index' in ignored:
            raise ValueError('index variable is being ignored!')
        for var in ignored:
            all_vars.remove(var)

    if chunksize:
        f = ROOT.TFile(path)
        n_entries = f.Get(tree_key).GetEntries()
        f.Close()
        def genchunks():
            for chunk in range(int(ceil(float(n_entries) / chunksize))):
                arr = root2array(path, tree_key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *kargs, **kwargs)
                yield convert_to_dataframe(arr)
        return genchunks()

    arr = root2array(path, tree_key, all_vars, selection=where, *kargs, **kwargs)
    return convert_to_dataframe(arr)
示例#10
0
    if len(options.data) > 2:
        raise SystemExit("ERROR: To many arguments for the data file with -d. Use: -d filename.root -d tree.")


    if options.bins == None:
        logging.info("No binning with -n specified, use the default value 100")
        bins = 100
    else:
        bins = options.bins


    #Reference MC
    referenceMC = options.montecarlo[0]
    #if only one argument to -m or -d is given it is assumed that there is only one tree
    if len(options.montecarlo) == 1:
        trees = list_trees(referenceMC)
        if len(trees) == 1:
            referenceMC_tree = trees[0]
        else:
            raise SystemExit('No tree or more than one found in ', referenceMC )
    else:
        referenceMC_tree = options.montecarlo[1]

    #Reference Data
    referenceData = options.data[0]
    #if only one argument to -m or -d is given it is assumed that there is only one tree
    if len(options.data) == 1:
        trees = list_trees(referenceData)
        if len(trees) == 1:
            referenceData_tree = trees[0]
        else:
示例#11
0
def read_root(paths, key=None, columns=None, ignore=None, chunksize=None, where=None, flatten=False, *args, **kwargs):
    """
    Read a ROOT file, or list of ROOT files, into a pandas DataFrame.
    Further *args and *kwargs are passed to root_numpy's root2array.
    If the root file contains a branch matching __index__*, it will become the DataFrame's index.

    Parameters
    ----------
    paths: string or list
        The path(s) to the root file(s)
    key: string
        The key of the tree to load.
    columns: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). Matching columns are read.
        The columns beginning with `noexpand:` are not interpreted as shell-patterns,
        allowing formula columns such as `noexpand:2*x`. The column in the returned DataFrame
        will not have the `noexpand:` prefix.
    ignore: str or sequence of str
        A sequence of shell-patterns (can contain *, ?, [] or {}). All matching columns are ignored (overriding the columns argument).
    chunksize: int
        If this parameter is specified, an iterator is returned that yields DataFrames with `chunksize` rows.
    where: str
        Only rows that match the expression will be read.
    flatten: sequence of str
        A sequence of column names. Will use root_numpy.stretch to flatten arrays in the specified columns into
        individual entries. All arrays specified in the columns must have the same length for this to work.
        Be careful if you combine this with chunksize, as chunksize will refer to the number of unflattened entries,
        so you will be iterating over a number of entries that is potentially larger than chunksize.
        The index of each element within its former array will be saved in the __array_index column.

    Returns
    -------
        DataFrame created from matching data in the specified TTree

    Notes
    -----

        >>> df = read_root('test.root', 'MyTree', columns=['A{B,C}*', 'D'], where='ABB > 100')

    """

    if not isinstance(paths, list):
        paths = [paths]
    # Use a single file to search for trees and branches
    seed_path = paths[0]

    if not key:
        trees = list_trees(seed_path)
        if len(trees) == 1:
            key = trees[0]
        elif len(trees) == 0:
            raise ValueError('No trees found in {}'.format(seed_path))
        else:
            raise ValueError('More than one tree found in {}'.format(seed_path))

    branches = list_branches(seed_path, key)

    if not columns:
        all_vars = branches
    else:
        if isinstance(columns, string_types):
            columns = [columns]
        # __index__* is always loaded if it exists
        # XXX Figure out what should happen with multi-dimensional indices
        index_branches = list(filter(lambda x: x.startswith('__index__'), branches))
        if index_branches:
            columns = columns[:]
            columns.append(index_branches[0])
        columns, noexpand = filter_noexpand_columns(columns)
        columns = list(itertools.chain.from_iterable(list(map(expand_braces, columns))))
        all_vars = get_matching_variables(branches, columns) + noexpand

    if ignore:
        if isinstance(ignore, string_types):
            ignore = [ignore]
        ignored = get_matching_variables(branches, ignore, fail=False)
        ignored = list(itertools.chain.from_iterable(list(map(expand_braces, ignored))))
        if any(map(lambda x: x.startswith('__index__'), ignored)):
            raise ValueError('__index__* branch is being ignored!')
        for var in ignored:
            all_vars.remove(var)

    def do_flatten(arr, flatten):
        if flatten is True:
            warnings.warn(" The option flatten=True is deprecated. Please specify the branches you would like "
                          "to flatten in a list: flatten=['foo', 'bar']", FutureWarning)
            arr_, idx = stretch(arr, return_indices=True)
        else:
            nonscalar = get_nonscalar_columns(arr)
            fields = [x for x in arr.dtype.names if (x not in nonscalar or x in flatten)]
            will_drop = [x for x in arr.dtype.names if x not in fields]
            if will_drop:
                warnings.warn("Ignored the following non-scalar branches: {bad_names}"
                      .format(bad_names=", ".join(will_drop)), UserWarning)
            arr_, idx = stretch(arr, fields=fields, return_indices=True)
        arr = append_fields(arr_, '__array_index', idx, usemask=False, asrecarray=True)
        return arr

    if chunksize:
        tchain = ROOT.TChain(key)
        for path in paths:
            tchain.Add(path)
        n_entries = tchain.GetEntries()
        # XXX could explicitly clean up the opened TFiles with TChain::Reset

        def genchunks():
            for chunk in range(int(ceil(float(n_entries) / chunksize))):
                arr = root2array(paths, key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *args, **kwargs)
                if flatten:
                    arr = do_flatten(arr, flatten)
                yield convert_to_dataframe(arr)
        return genchunks()

    arr = root2array(paths, key, all_vars, selection=where, *args, **kwargs)
    if flatten:
        arr = do_flatten(arr, flatten)
    return convert_to_dataframe(arr)