Пример #1
0
def convertTree(config, treeName, category):
    """ Wrapper for the functionality of preprocessing.dataset  """
    logging.info("Starting conversion")

    checkNcreateFolder(config.outputFolder)

    datasetName = config.outputPrefix + "_" + config.sampleName + "_" + config.categories[
        category].name
    dataset = Dataset(datasetName, config.outputFolder, treeName)

    logging.info("Setting sample selection: %s", config.sampleSelection)
    dataset.sampleSelection = config.sampleSelection
    logging.info("Setting category selection: %s",
                 config.categories[category].selection)
    dataset.selection = config.categories[category].selection

    if config.excludeBranches is not None:
        dataset.ignoreBranches = config.excludeBranches

    logging.info("Setting files")
    dataset.addFiles(config.files)

    logging.info("Setting output branches")
    dataset.setOutputBranches(config.outputVariables)

    logging.debug("Setting indexing branches: %s", config.indexVariables)
    dataset.outputIndex = config.indexVariables

    if config.addRatio:
        dataset.setSF(config.sampleSF, "sampleRatio")

    logging.info("Starting processing dataset")
    dataset.process(config.maxEvents)

    logging.info("Finished processing")
Пример #2
0
def convertTreeMulti(config, treeName, category):
    logging.info("Starting conversion using multi method")
    checkNcreateFolder(config.outputFolder)

    #For multi mode, we generate a dataset per sample. In the loop the output is disabled and in the end the
    #dataframs of the 1:: samples will be added to the first and saved

    eventsLeft = config.maxEvents
    dfs = []
    baseDataset = None
    for iSample, sample in enumerate(config.samples):
        logging.info("Processing sample %s", sample)
        if iSample == 0:
            datasetName = config.outputPrefix + "_" + config.sampleName + "_" + config.categories[
                category].name
        else:
            datasetName = config.outputPrefix + "_" + config.sampleInfo[
                sample].name + "_" + config.categories[category].name
        dataset = Dataset(datasetName, config.outputFolder, treeName)
        logging.info("Setting sample selection: %s",
                     config.sampleInfo[sample].selection)
        dataset.sampleSelection = config.sampleInfo[sample].selection
        logging.info("Setting category selection: %s",
                     config.categories[category].selection)
        dataset.selection = config.categories[category].selection

        if config.excludeBranches is not None:
            dataset.ignoreBranches = config.excludeBranches
        logging.info("Setting files")
        dataset.addFiles(config.sampleInfo[sample].files)

        logging.info("Setting output branches")

        dataset.setOutputBranches(config.outputVariables)

        logging.debug("Setting indexing branches: %s", config.indexVariables)
        dataset.outputIndex = config.indexVariables

        if config.addRatio:
            dataset.setSF(config.sampleInfo[sample].addSF, "sampleRatio")

        logging.info("Starting processing dataset")
        thisSampleDF = dataset.process(eventsLeft, skipOutput=True)
        eventsLeft -= len(thisSampleDF)
        dfs.append(thisSampleDF)
        if iSample == 0:
            baseDataset = copy(dataset)

    baseDataset.makeOutput(pd.concat(dfs))
    logging.info("Finished processing")
Пример #3
0
def test_Dataset_getSelectedDataframe(sampleSel, sel, mockTree, mocker):
    newDataset = Dataset("someName")
    
    newDataset.sampleSelection = sampleSel
    newDataset.selection = sel
    
    dataframe = mockTree.pandas.df()
    print(dataframe)
    if sampleSel != "":
        dataframe = dataframe.query(sampleSel)
    if sel != "":
        dataframe = dataframe.query(sel)
    print(dataframe)
    selectedDF = newDataset.getSelectedDataframe(mockTree)

    assert selectedDF.equals(dataframe)
Пример #4
0
def test_Dataset_process(mockTree, mocker):
    newDataset = Dataset("someName")

    mockTree_1 = copy.deepcopy(mockTree)
    mockTree_2 = copy.deepcopy(mockTree)

    mockTree_1.dataframe.update(pd.DataFrame({'branch2': list(range(2,12))[::-1]}))
    mockTree_1.setDF()
    mockTree_2.dataframe.update(pd.DataFrame({'branch1': (list(range(0,10)))[::-1]}))
    mockTree_2.setDF()

    newDataset.filesAdded = True
    newDataset.files = ["file1.root", "file2.root"]
    newDataset.branches = ["branch1","branch2","branch3"]
    
    newDataset.outputBranchesSet = True
    newDataset.outputBranches = ["branch1", "branch3"]

    def openROOTFile(*args, **kwargs):
        mm = mocker.MagicMock()
        inputfile = args[0]
        if inputfile == "file1.root":
            mm.__enter__ = mocker.Mock(return_value =
                                       {newDataset.treeName : copy.deepcopy(mockTree_1)}
            )
        else:
            mm.__enter__ = mocker.Mock(return_value =
                                       {newDataset.treeName : copy.deepcopy(mockTree_2)}
            )
        return mm
    
    m = mocker.MagicMock() #This mocker, mocks the open call
    m.side_effect = openROOTFile #Returns a mocker to deal with the with statement
    mocker.patch("uproot.open", m, create=True)
    
    newDataset.selection = "branch1 >= 7 and branch2 >=2"

    mockTree_1_df = mockTree_1.dataframe
    mockTree_2_df = mockTree_2.dataframe
    mockTree_1_df = mockTree_1_df.query("branch1 >= 7 and branch2 >=2")
    mockTree_2_df = mockTree_2_df.query("branch1 >= 7 and branch2 >=2")
    expected = pd.concat([mockTree_1_df, mockTree_2_df])
    expected.drop(columns=["branch2"], inplace=True)

    outputDF = newDataset.process(skipOutput = True)
    
    assert outputDF.equals(expected)