Пример #1
0
def convertTree(inputs, outFolder, name, treeName, indexVars):
    logging.info("Starting conversion")

    checkNcreateFolder(outFolder)

    dataset = Dataset(name, outFolder, treeName)

    files = []
    for _input in inputs:
        with open(_input, "r") as f:
            data = f.read()
            for line in data.split("\n"):
                if ".root" in line:
                    files.append(line)

    logging.info("Setting files")
    dataset.addFiles(files)
    logging.info("Setting output branches")
    dataset.setOutputBranches("*")

    logging.debug("Setting indexing branches: %s", indexVars)
    dataset.outputIndex = indexVars

    logging.info("Starting processing dataset")
    dataset.process(999999999999999999999)
    logging.info("Finished processing")
Пример #2
0
def convertTree(config, treeName, category):
    """ Wrapper for the functionality of preprocessing.dataset  """
    logging.info("Starting conversion")

    checkNcreateFolder(config.outputFolder)

    datasetName = config.outputPrefix + "_" + config.sampleName + "_" + config.categories[
        category].name
    dataset = Dataset(datasetName, config.outputFolder, treeName)

    logging.info("Setting sample selection: %s", config.sampleSelection)
    dataset.sampleSelection = config.sampleSelection
    logging.info("Setting category selection: %s",
                 config.categories[category].selection)
    dataset.selection = config.categories[category].selection

    if config.excludeBranches is not None:
        dataset.ignoreBranches = config.excludeBranches

    logging.info("Setting files")
    dataset.addFiles(config.files)

    logging.info("Setting output branches")
    dataset.setOutputBranches(config.outputVariables)

    logging.debug("Setting indexing branches: %s", config.indexVariables)
    dataset.outputIndex = config.indexVariables

    if config.addRatio:
        dataset.setSF(config.sampleSF, "sampleRatio")

    logging.info("Starting processing dataset")
    dataset.process(config.maxEvents)

    logging.info("Finished processing")
Пример #3
0
def test_Dataset_addFiles_exception(mockTree, mocker):
    newDataset = Dataset("someName")
    
    def openROOTFile(inputfile):
        return {newDataset.treeName : mockTree}
    
    mocker.patch("uproot.open", new=openROOTFile)

    newDataset.filesAdded = True
    newDataset.branches = ["branch5"]
    
    with pytest.raises(RuntimeError):
        newDataset.addFiles(["someOtherFile.root"])        
Пример #4
0
def test_Dataset_addFiles(mockTree, mocker):
    newDataset = Dataset("someName")
    def openROOTFile(inputfile):
        return {newDataset.treeName : mockTree}
    
    mocker.patch("uproot.open", new=openROOTFile)

    newDataset.addFiles(["someFile.root"])

    assert newDataset.filesAdded and newDataset.files == ["someFile.root"]
    
    newDataset.addFiles(["someOtherFile.root"])

    assert newDataset.files == ["someFile.root", "someOtherFile.root"]
Пример #5
0
def convertTreeMulti(config, treeName, category):
    logging.info("Starting conversion using multi method")
    checkNcreateFolder(config.outputFolder)

    #For multi mode, we generate a dataset per sample. In the loop the output is disabled and in the end the
    #dataframs of the 1:: samples will be added to the first and saved

    eventsLeft = config.maxEvents
    dfs = []
    baseDataset = None
    for iSample, sample in enumerate(config.samples):
        logging.info("Processing sample %s", sample)
        if iSample == 0:
            datasetName = config.outputPrefix + "_" + config.sampleName + "_" + config.categories[
                category].name
        else:
            datasetName = config.outputPrefix + "_" + config.sampleInfo[
                sample].name + "_" + config.categories[category].name
        dataset = Dataset(datasetName, config.outputFolder, treeName)
        logging.info("Setting sample selection: %s",
                     config.sampleInfo[sample].selection)
        dataset.sampleSelection = config.sampleInfo[sample].selection
        logging.info("Setting category selection: %s",
                     config.categories[category].selection)
        dataset.selection = config.categories[category].selection

        if config.excludeBranches is not None:
            dataset.ignoreBranches = config.excludeBranches
        logging.info("Setting files")
        dataset.addFiles(config.sampleInfo[sample].files)

        logging.info("Setting output branches")

        dataset.setOutputBranches(config.outputVariables)

        logging.debug("Setting indexing branches: %s", config.indexVariables)
        dataset.outputIndex = config.indexVariables

        if config.addRatio:
            dataset.setSF(config.sampleInfo[sample].addSF, "sampleRatio")

        logging.info("Starting processing dataset")
        thisSampleDF = dataset.process(eventsLeft, skipOutput=True)
        eventsLeft -= len(thisSampleDF)
        dfs.append(thisSampleDF)
        if iSample == 0:
            baseDataset = copy(dataset)

    baseDataset.makeOutput(pd.concat(dfs))
    logging.info("Finished processing")