Python Diff примеры использования

Язык программирования: Python

Пространство имен/Пакет: patsy.contrasts

Класс/Тип: Diff

Примеров на hotexamples.com: 4

Python Diff - 4 примера найдено. Это лучшие примеры Python кода для patsy.contrasts.Diff, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Diff(4)

Основные методы

Diff (4)

Пример #1

Показать файл

    def fit_backward_difference_coding(col, values, handle_missing, handle_unknown):
        if handle_missing == 'value':
            values = values[values > 0]

        values_to_encode = values.get_values()

        if len(values) < 2:
            return pd.DataFrame(index=values_to_encode)

        if handle_unknown == 'indicator':
            values_to_encode = np.append(values_to_encode, -1)

        backwards_difference_matrix = Diff().code_without_intercept(values_to_encode)
        df = pd.DataFrame(data=backwards_difference_matrix.matrix, index=values_to_encode,
                          columns=[str(col) + '_%d' % (i, ) for i in range(len(backwards_difference_matrix.column_suffixes))])

        if handle_unknown == 'return_nan':
            df.loc[-1] = np.nan
        elif handle_unknown == 'value':
            df.loc[-1] = np.zeros(len(values_to_encode) - 1)

        if handle_missing == 'return_nan':
            df.loc[values.loc[np.nan]] = np.nan
        elif handle_missing == 'value':
            df.loc[-2] = np.zeros(len(values_to_encode) - 1)

        return df

Пример #2

Показать файл

    def fit_backward_difference_coding(values):
        if len(values) < 2:
            return pd.DataFrame()

        backwards_difference_matrix = Diff().code_without_intercept(values)
        df = pd.DataFrame(data=backwards_difference_matrix.matrix, columns=backwards_difference_matrix.column_suffixes)
        df.index += 1
        df.loc[0] = np.zeros(len(values) - 1)
        return df

Пример #3

Показать файл

Файл: contrasts.py Проект: patmosxx-v2/Pyto

# This corresponds to a parameterization that forces all the coefficients
# to sum to zero. Notice that the intercept here is the grand mean where the
# grand mean is the mean of means of the dependent variable by each level.

hsb2.groupby('race')['write'].mean().mean()

# ### Backward Difference Coding

# In backward difference coding, the mean of the dependent variable for a
# level is compared with the mean of the dependent variable for the prior
# level. This type of coding may be useful for a nominal or an ordinal
# variable.

from patsy.contrasts import Diff
contrast = Diff().code_without_intercept(levels)
print(contrast.matrix)

mod = ols("write ~ C(race, Diff)", data=hsb2)
res = mod.fit()
print(res.summary())

# For example, here the coefficient on level 1 is the mean of `write` at
# level 2 compared with the mean at level 1. Ie.,

res.params["C(race, Diff)[D.1]"]
hsb2.groupby('race').mean()["write"][2] - hsb2.groupby(
    'race').mean()["write"][1]

# ### Helmert Coding

Пример #4

Показать файл

def contrasting():
    global c
    if c:
        #to account for multiple contrast variables
        contrastvars = []
        if "," in c:
            contrastvars = c.split(",")
        for i in range(len(contrastvars)):
            contrastvars[i] = contrastvars[i].strip()
            if " " in contrastvars[i]:
                contrastvars[i] = contrastvars[i].replace(" ", "_")
            if "/" in contrastvars[i]:  #to account for URLs
                splitted = contrastvars[i].split("/")
                contrastvars[i] = splitted[len(splitted) - 1]
        else:
            splitted = c.split("/")  #to account for URLs
            c = splitted[len(splitted) - 1]

        ind_vars_no_contrast_var = ''
        index = 1
        for i in range(len(full_model_variable_list)):
            if "/" in full_model_variable_list[i]:
                splitted = full_model_variable_list[i].split("/")
                full_model_variable_list[i] = splitted[len(splitted) - 1]
            if " " in full_model_variable_list[i]:
                full_model_variable_list[i] = full_model_variable_list[
                    i].replace(" ", "_")
        for var in full_model_variable_list:
            if var != c and not (var in contrastvars):
                if index == 1:
                    ind_vars_no_contrast_var = var
                    index += 1
                else:
                    ind_vars_no_contrast_var = ind_vars_no_contrast_var + " + " + var
        if len(contrastvars) > 0:
            contraststring = ' + '.join(contrastvars)
        else:
            if " " in c:
                c = c.replace(" ", "_")
            contraststring = c
        # With contrast (treatment coding)
        print(
            "\n\nTreatment (Dummy) Coding: Dummy coding compares each level of the categorical variable to a base reference level. The base reference level is the value of the intercept."
        )
        ctrst = Treatment(reference=0).code_without_intercept(levels)
        mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" +
                  contraststring + ", Treatment)",
                  data=df_final)
        res = mod.fit()
        print("With contrast (treatment coding)")
        print(res.summary())
        if (o is not None):
            # concatenate data frames
            f = open(o, "a")
            f.write("\n" + full_model)
            f.write(
                "\n\n***********************************************************************************************************"
            )

            f.write(
                "\n\n\n\nTreatment (Dummy) Coding: Dummy coding compares each level of the categorical variable to a base reference level. The base reference level is the value of the intercept."
            )
            f.write("With contrast (treatment coding)")
            f.write(res.summary().as_text())
            f.close()
        # Defining the Simple class
        def _name_levels(prefix, levels):
            return ["[%s%s]" % (prefix, level) for level in levels]

        class Simple(object):
            def _simple_contrast(self, levels):
                nlevels = len(levels)
                contr = -1. / nlevels * np.ones((nlevels, nlevels - 1))
                contr[1:][np.diag_indices(nlevels -
                                          1)] = (nlevels - 1.) / nlevels
                return contr

            def code_with_intercept(self, levels):
                c = np.column_stack(
                    (np.ones(len(levels)), self._simple_contrast(levels)))
                return ContrastMatrix(c, _name_levels("Simp.", levels))

            def code_without_intercept(self, levels):
                c = self._simple_contrast(levels)
                return ContrastMatrix(c, _name_levels("Simp.", levels[:-1]))

        ctrst = Simple().code_without_intercept(levels)
        mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" +
                  contraststring + ", Simple)",
                  data=df_final)
        res = mod.fit()
        print(
            "\n\nSimple Coding: Like Treatment Coding, Simple Coding compares each level to a fixed reference level. However, with simple coding, the intercept is the grand mean of all the levels of the factors."
        )
        print(res.summary())
        if (o is not None):
            # concatenate data frames
            f = open(o, "a")
            f.write(
                "\n\n\nSimple Coding: Like Treatment Coding, Simple Coding compares each level to a fixed reference level. However, with simple coding, the intercept is the grand mean of all the levels of the factors."
            )
            f.write(res.summary().as_text())
            f.close()

        #With contrast (sum/deviation coding)
        ctrst = Sum().code_without_intercept(levels)
        mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" +
                  contraststring + ", Sum)",
                  data=df_final)
        res = mod.fit()
        print(
            "\n\nSum (Deviation) Coding: Sum coding compares the mean of the dependent variable for a given level to the overall mean of the dependent variable over all the levels."
        )
        print(res.summary())
        if (o is not None):
            # concatenate data frames
            f = open(o, "a")
            f.write(
                "\n\n\nSum (Deviation) Coding: Sum coding compares the mean of the dependent variable for a given level to the overall mean of the dependent variable over all the levels."
            )
            f.write(res.summary().as_text())
            f.close()

        #With contrast (backward difference coding)
        ctrst = Diff().code_without_intercept(levels)
        mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" +
                  contraststring + ", Diff)",
                  data=df_final)
        res = mod.fit()
        print(
            "\n\nBackward Difference Coding: In backward difference coding, the mean of the dependent variable for a level is compared with the mean of the dependent variable for the prior level."
        )
        print(res.summary())
        if (o is not None):
            # concatenate data frames
            f = open(o, "a")
            f.write(
                "\n\n\nBackward Difference Coding: In backward difference coding, the mean of the dependent variable for a level is compared with the mean of the dependent variable for the prior level."
            )
            f.write(res.summary().as_text())
            f.close()

        #With contrast (Helmert coding)
        ctrst = Helmert().code_without_intercept(levels)
        mod = ols(dep_var + " ~ " + ind_vars_no_contrast_var + " + C(" +
                  contraststring + ", Helmert)",
                  data=df_final)
        res = mod.fit()
        print(
            "\n\nHelmert Coding: Our version of Helmert coding is sometimes referred to as Reverse Helmert Coding. The mean of the dependent variable for a level is compared to the mean of the dependent variable over all previous levels. Hence, the name ‘reverse’ being sometimes applied to differentiate from forward Helmert coding."
        )
        print(res.summary())
        if (o is not None):
            # concatenate data frames
            f = open(o, "a")
            f.write(
                "\n\n\nHelmert Coding: Our version of Helmert coding is sometimes referred to as Reverse Helmert Coding. The mean of the dependent variable for a level is compared to the mean of the dependent variable over all previous levels. Hence, the name ‘reverse’ being sometimes applied to differentiate from forward Helmert coding."
            )
            f.write(res.summary().as_text())
            f.close()