Python Dataset.selection примеры использования

Язык программирования: Python

Пространство имен/Пакет: preprocessing.dataset

Класс/Тип: Dataset

Метод/Функция: selection

Примеров на hotexamples.com: 4

Python Dataset.selection - 4 примера найдено. Это лучшие примеры Python кода для preprocessing.dataset.Dataset.selection, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dataset(21)

branches(8)

filesAdded(8)

setOutputBranches(8)

addFiles(5)

selection(4)

process(4)

sampleSelection(3)

outputIndex(3)

load_questions(3)

addFlatSFtoDataframe(2)

ignoreBranches(2)

_resolveWildcardBranch(2)

rescale_labels(1)

outputBranchesSet(1)

outputBranches(1)

get_validation(1)

index_to_text(1)

cleanBranchList(1)

get_training(1)

get_counters(1)

getSelectedDataframe(1)

getBranchesFromFile(1)

generate_dataset(1)

files(1)

encode_single_question(1)

get_test(1)

Пример #1

Показать файл

def convertTree(config, treeName, category):
    """ Wrapper for the functionality of preprocessing.dataset  """
    logging.info("Starting conversion")

    checkNcreateFolder(config.outputFolder)

    datasetName = config.outputPrefix + "_" + config.sampleName + "_" + config.categories[
        category].name
    dataset = Dataset(datasetName, config.outputFolder, treeName)

    logging.info("Setting sample selection: %s", config.sampleSelection)
    dataset.sampleSelection = config.sampleSelection
    logging.info("Setting category selection: %s",
                 config.categories[category].selection)
    dataset.selection = config.categories[category].selection

    if config.excludeBranches is not None:
        dataset.ignoreBranches = config.excludeBranches

    logging.info("Setting files")
    dataset.addFiles(config.files)

    logging.info("Setting output branches")
    dataset.setOutputBranches(config.outputVariables)

    logging.debug("Setting indexing branches: %s", config.indexVariables)
    dataset.outputIndex = config.indexVariables

    if config.addRatio:
        dataset.setSF(config.sampleSF, "sampleRatio")

    logging.info("Starting processing dataset")
    dataset.process(config.maxEvents)

    logging.info("Finished processing")

Пример #2

Показать файл

def convertTreeMulti(config, treeName, category):
    logging.info("Starting conversion using multi method")
    checkNcreateFolder(config.outputFolder)

    #For multi mode, we generate a dataset per sample. In the loop the output is disabled and in the end the
    #dataframs of the 1:: samples will be added to the first and saved

    eventsLeft = config.maxEvents
    dfs = []
    baseDataset = None
    for iSample, sample in enumerate(config.samples):
        logging.info("Processing sample %s", sample)
        if iSample == 0:
            datasetName = config.outputPrefix + "_" + config.sampleName + "_" + config.categories[
                category].name
        else:
            datasetName = config.outputPrefix + "_" + config.sampleInfo[
                sample].name + "_" + config.categories[category].name
        dataset = Dataset(datasetName, config.outputFolder, treeName)
        logging.info("Setting sample selection: %s",
                     config.sampleInfo[sample].selection)
        dataset.sampleSelection = config.sampleInfo[sample].selection
        logging.info("Setting category selection: %s",
                     config.categories[category].selection)
        dataset.selection = config.categories[category].selection

        if config.excludeBranches is not None:
            dataset.ignoreBranches = config.excludeBranches
        logging.info("Setting files")
        dataset.addFiles(config.sampleInfo[sample].files)

        logging.info("Setting output branches")

        dataset.setOutputBranches(config.outputVariables)

        logging.debug("Setting indexing branches: %s", config.indexVariables)
        dataset.outputIndex = config.indexVariables

        if config.addRatio:
            dataset.setSF(config.sampleInfo[sample].addSF, "sampleRatio")

        logging.info("Starting processing dataset")
        thisSampleDF = dataset.process(eventsLeft, skipOutput=True)
        eventsLeft -= len(thisSampleDF)
        dfs.append(thisSampleDF)
        if iSample == 0:
            baseDataset = copy(dataset)

    baseDataset.makeOutput(pd.concat(dfs))
    logging.info("Finished processing")

Пример #3

Показать файл

def test_Dataset_getSelectedDataframe(sampleSel, sel, mockTree, mocker):
    newDataset = Dataset("someName")
    
    newDataset.sampleSelection = sampleSel
    newDataset.selection = sel
    
    dataframe = mockTree.pandas.df()
    print(dataframe)
    if sampleSel != "":
        dataframe = dataframe.query(sampleSel)
    if sel != "":
        dataframe = dataframe.query(sel)
    print(dataframe)
    selectedDF = newDataset.getSelectedDataframe(mockTree)

    assert selectedDF.equals(dataframe)

Пример #4

Показать файл

def test_Dataset_process(mockTree, mocker):
    newDataset = Dataset("someName")

    mockTree_1 = copy.deepcopy(mockTree)
    mockTree_2 = copy.deepcopy(mockTree)

    mockTree_1.dataframe.update(pd.DataFrame({'branch2': list(range(2,12))[::-1]}))
    mockTree_1.setDF()
    mockTree_2.dataframe.update(pd.DataFrame({'branch1': (list(range(0,10)))[::-1]}))
    mockTree_2.setDF()

    newDataset.filesAdded = True
    newDataset.files = ["file1.root", "file2.root"]
    newDataset.branches = ["branch1","branch2","branch3"]
    
    newDataset.outputBranchesSet = True
    newDataset.outputBranches = ["branch1", "branch3"]

    def openROOTFile(*args, **kwargs):
        mm = mocker.MagicMock()
        inputfile = args[0]
        if inputfile == "file1.root":
            mm.__enter__ = mocker.Mock(return_value =
                                       {newDataset.treeName : copy.deepcopy(mockTree_1)}
            )
        else:
            mm.__enter__ = mocker.Mock(return_value =
                                       {newDataset.treeName : copy.deepcopy(mockTree_2)}
            )
        return mm
    
    m = mocker.MagicMock() #This mocker, mocks the open call
    m.side_effect = openROOTFile #Returns a mocker to deal with the with statement
    mocker.patch("uproot.open", m, create=True)
    
    newDataset.selection = "branch1 >= 7 and branch2 >=2"

    mockTree_1_df = mockTree_1.dataframe
    mockTree_2_df = mockTree_2.dataframe
    mockTree_1_df = mockTree_1_df.query("branch1 >= 7 and branch2 >=2")
    mockTree_2_df = mockTree_2_df.query("branch1 >= 7 and branch2 >=2")
    expected = pd.concat([mockTree_1_df, mockTree_2_df])
    expected.drop(columns=["branch2"], inplace=True)

    outputDF = newDataset.process(skipOutput = True)
    
    assert outputDF.equals(expected)