Python Treatment примеры использования

Язык программирования: Python

Пространство имен/Пакет: learners

Класс/Тип: Treatment

Примеров на hotexamples.com: 6

Python Treatment - 6 примеров найдено. Это лучшие примеры Python кода для learners.Treatment, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Treatment(6)

preprocess(5)

decisions(3)

probs(3)

y_label(3)

eval(2)

stats(1)

Пример #1

Показать файл

Файл: debtfree.py Проект: HuyTu7/DebtFree

def CLAMI_data(data, target, positiveLabel, percentileCutoff, suppress=0, experimental=0, stats={"tp": 0, "p": 0},
               label="Label"):
    '''
	CLAMI - Clustering, Labeling, Metric/Features Selection,
			Instance selection, and Supervised Learning
	----------

	Returns
	-------

	'''
    treatment = Treatment(data, target)
    treatment.preprocess()
    data = treatment.full_train
    testdata = treatment.full_test
    cutoffsForHigherValuesOfAttribute = getHigherValueCutoffs(data, percentileCutoff, "Label")
    print("get cutoffs")
    data = getInstancesByCLA(data, percentileCutoff, positiveLabel)
    print("get CLA instances")

    metricIdxWithTheSameViolationScores = getMetricIndicesWithTheViolationScores(data,
                                                                                 cutoffsForHigherValuesOfAttribute,
                                                                                 positiveLabel, label=label)
    print("get Features and the violation scores")
    # pdb.set_trace()
    keys = list(metricIdxWithTheSameViolationScores.keys())
    # start with the features that have the lowest violation scores
    keys.sort()
    for i in range(len(keys)):
        k = keys[i]
        selectedMetricIndices = metricIdxWithTheSameViolationScores[k]
        # while len(selectedMetricIndices) < 3:
        # 	index = i + 1
        # 	selectedMetricIndices += metricIdxWithTheSameViolationScores[keys[index]]
        print(selectedMetricIndices)
        # pick those features for both train and test sets
        trainingInstancesByCLAMI = getInstancesByRemovingSpecificAttributes(data,
                                                                            selectedMetricIndices, True, label=label)
        newTestInstances = getInstancesByRemovingSpecificAttributes(testdata,
                                                                    selectedMetricIndices, True, label="Label")
        # restart looking for the cutoffs in the train set
        cutoffsForHigherValuesOfAttribute = getHigherValueCutoffs(trainingInstancesByCLAMI,
                                                                  percentileCutoff, "Label")
        # get instaces that violated the assumption in the train set
        instIndicesNeedToRemove = getSelectedInstances(trainingInstancesByCLAMI,
                                                       cutoffsForHigherValuesOfAttribute,
                                                       positiveLabel)
        # remove the violated instances
        trainingInstancesByCLAMI = getInstancesByRemovingSpecificInstances(trainingInstancesByCLAMI,
                                                                           instIndicesNeedToRemove, False)

        # make sure that there are both classes data in the training set
        zero_count = trainingInstancesByCLAMI[trainingInstancesByCLAMI["Label"] == 0].shape[0]
        one_count = trainingInstancesByCLAMI[trainingInstancesByCLAMI["Label"] == 1].shape[0]
        if zero_count > 0 and one_count > 0:
            break

    return trainingInstancesByCLAMI, newTestInstances

Пример #2

Показать файл

Файл: debtfree.py Проект: HuyTu7/DebtFree

def CLA(data, target, positiveLabel, percentileCutoff, suppress=0, experimental=0, both=False):
    treatment = Treatment(data, target)
    treatment.preprocess()
    testdata = treatment.full_test
    data = getInstancesByCLA(testdata, percentileCutoff, positiveLabel)
    treatment.y_label = ["yes" if y == 1 else "no" for y in data["Label"]]
    treatment.decisions = ["yes" if y == 1 else "no" for y in data["CLA"]]
    treatment.probs = data["K"]
    return treatment.eval()

Пример #3

Показать файл

Файл: debtfree.py Проект: HuyTu7/DebtFree

def CLA_SL(data, target, model="RF", est=False, T_rec=0.90, inc=False, seed=0, both=False, stats={"tp": 0, "p": 0}):
    tm = {"RF": RF, "SVM": SVM, "LR": LR, "NB": NB, "DT": DT, "TM": TM}

    treatment = Treatment(data, target)
    treatment.preprocess()
    traindata = treatment.full_train
    full_data = getInstancesByCLA(traindata, 90, None)

    tm = tm[model]
    clf = tm(data, target)
    print(target, model)
    clf.preprocess()
    clf.x_label = ["yes" if x == 1 else "no" for x in full_data['CLA']]
    clf.train()
    clf.stats = stats
    results = clf.eval()
    return results

Пример #4

Показать файл

Файл: debtfree.py Проект: HuyTu7/DebtFree

def ECLA(data, target, model="RF", est=False, T_rec=0.90, inc=False, seed=0, both=False):
    jitterbug = Jitterbug(data, target)
    jitterbug.find_patterns()
    jitterbug.easy_code()
    jitterbug.test_patterns()
    rest_data = jitterbug.rest
    treatment = Treatment(rest_data, target)
    treatment.preprocess()

    test_data = treatment.full_test
    if both:
        test_data = [test_data, treatment.full_train]
        test_data = pd.concat(test_data, ignore_index=True)
    final_data = getInstancesByCLA(test_data, 90, None)
    final_data = final_data[:treatment.full_test.shape[0]]
    treatment.y_label = ["yes" if y == 1 else "no" for y in final_data["Label"]]
    treatment.decisions = ["yes" if y == 1 else "no" for y in final_data["CLA"]]
    treatment.probs = final_data["K"]
    treatment.stats = jitterbug.easy.stats_test

    return treatment, rest_data

Пример #5

Показать файл

Файл: clami.py Проект: SE-Efforts/SE_SSL

def CLA(data,
        positiveLabel,
        percentileCutoff,
        suppress=0,
        experimental=0,
        both=False):
    try:
        treatment = Treatment({}, "")
    except:
        treatment = Treatment(data, "")
    final_data = getInstancesByCLA(data, percentileCutoff, positiveLabel)
    treatment.y_label = [
        "yes" if y == 1 else "no" for y in final_data["Label"]
    ]
    treatment.decisions = [
        "yes" if y == 1 else "no" for y in final_data["CLA"]
    ]
    summary = collections.Counter(treatment.decisions)
    print(summary, summary["yes"] / (summary["yes"] + summary["no"]))
    treatment.probs = final_data["K"]
    results = treatment.eval()
    results["read"] = summary["yes"] / (summary["yes"] + summary["no"])
    return results

Пример #6

Показать файл

Файл: clami.py Проект: SE-Efforts/SE_SSL

def tune_CLAMI(data,
               target,
               positiveLabel,
               percentileCutoff,
               suppress=0,
               experimental=0,
               metric="APFD"):
    treatment = Treatment(data, target)
    treatment.preprocess()
    data = treatment.full_train
    sss = StratifiedShuffleSplit(n_splits=1, test_size=.25, random_state=47)
    testdata = treatment.full_test
    X, y = data[data.columns[:-1]], data[data.columns[-1]]
    for train_index, tune_index in sss.split(X, y):
        train_df = data.iloc[train_index]
        tune_df = data.iloc[tune_index]
        train_df.reset_index(drop=True, inplace=True)
        tune_df.reset_index(drop=True, inplace=True)
        cutoffsForHigherValuesOfAttribute = getHigherValueCutoffs(
            train_df, percentileCutoff, "Label")
        print("get cutoffs")
        train_df = getInstancesByCLA(train_df, percentileCutoff, positiveLabel)
        print("get CLA instances")

        metricIdxWithTheSameViolationScores = getMetricIndicesWithTheViolationScores(
            train_df, cutoffsForHigherValuesOfAttribute, positiveLabel)
        # pdb.set_trace()
        keys = list(metricIdxWithTheSameViolationScores.keys())
        # keys.sort()
        evaluated_configs = random.sample(keys, INIT_POOL_SIZE * 2)
        evaluated_configs = [
            metricIdxWithTheSameViolationScores[k] for k in evaluated_configs
        ]

        tmp_scores = []
        tmp_configs = []
        for selectedMetricIndices in evaluated_configs:
            selectedMetricIndices, res = MI(train_df, tune_df,
                                            selectedMetricIndices,
                                            percentileCutoff, positiveLabel,
                                            target)
            if isinstance(res, dict):
                tmp_configs.append(
                    transform_metric_indices(data.shape[1],
                                             selectedMetricIndices))
                tmp_scores.append(res)

        ids = np.argsort([x[metric] for x in tmp_scores])[::-1][:1]
        best_res = tmp_scores[ids[0]]
        best_config = np.where(tmp_configs[ids[0]] == 1)[0]

        # number of eval
        this_budget = BUDGET
        eval = 0
        lives = 5
        print("Initial Population: %s" % len(tmp_scores))
        searchspace = [
            transform_metric_indices(data.shape[1],
                                     metricIdxWithTheSameViolationScores[k])
            for k in keys
        ]
        while this_budget > 0:
            cart_model = DecisionTreeRegressor()
            cart_model.fit(tmp_configs, [x[metric] for x in tmp_scores])

            cart_models = []
            cart_models.append(cart_model)
            next_config_id = acquisition_fn(searchspace, cart_models)
            next_config = metricIdxWithTheSameViolationScores[keys.pop(
                next_config_id)]
            searchspace.pop(next_config_id)
            next_config, next_res = MI(train_df, tune_df, next_config,
                                       percentileCutoff, positiveLabel, target)
            if not isinstance(next_res, dict):
                continue

            next_config_normal = transform_metric_indices(
                data.shape[1], next_config)
            tmp_scores.append(next_res)
            tmp_configs.append(next_config_normal)
            try:
                if abs(next_res[metric] - best_res[metric]) >= 0.03:
                    lives = 5
                else:
                    lives -= 1

                # pdb.set_trace()
                if isBetter(next_res, best_res, metric):
                    best_config = next_config
                    best_res = next_res

                if lives == 0:
                    print("***" * 5)
                    print("EARLY STOPPING!")
                    print("***" * 5)
                    break

                this_budget -= 1
                eval += 1
            except:
                pdb.set_trace()
    _, res = MI(train_df, testdata, best_config, percentileCutoff,
                positiveLabel, target)
    return res