示例#1
0
 def __init__(self, mode=0, elem=None, sample=None):
     if mode == 0:
         self.mu = elem[0]
         self.sigma = elem[1]
     else:
         self.mu, self.sigma = logistic.fit(sample)
     self.math_average = logistic.mean(loc=self.mu, scale=self.sigma)
     self.dispersion = logistic.var(loc=self.mu, scale=self.sigma)
示例#2
0
 def sig(self, phi: OrderedDict[Subspace, Number]) -> Number:
     '''
     Algorithm:
         1. Fit numbers to a line by linear regression
         2. Compute slope m and the goodness-of-fit r^2
         3. Model the slopes by logistic distribution
     '''
     y = np.array(list(phi.values()))
     x = np.arange(1, len(y) + 1).reshape(-1, 1)
     reg = self.lr.fit(x, y)
     r2 = reg.score(x, y)
     errors = y - reg.predict(x)
     mu, std = logistic.fit(errors[1:])
     sig_score = r2 * logistic(mu, std).cdf(errors[0])
     return sig_score
 def dilutionLine(self, row, label, save_loc):
     Output = []
     plt.figure(figsize=(5, 5))
     plt.plot(self.data[label][row])
     plt.title(row + " " + LABELS[label])
     Output.append(
         LABELS[label] + " " + str(row) + ": " +
         str(linregress(self.data[label][row], self.data[label].index)))
     Output.append("Logistic Regression (mean, variance)" + ": " +
                   str(logistic.fit(list(self.data[label][row]))))
     plt.savefig(
         os.path.join(save_loc,
                      row + " " + LABELS[label] + "_lineplot.png"))
     plt.close()
     return Output
示例#4
0
def estimate_dropout_prob(data):
    def log_mean(x):
        x_nz = x[x > 0]

        if len(x_nz) == 0:
            return -1e5
        else:
            return np.mean(np.log(x_nz))

    # Get mean of positive values of each gene
    log_means = np.apply_along_axis(log_mean, 0, truth)
    np.mean(np.log1p(data > 0), axis=0)
    # Get each gene average number of zeros
    dropout = np.mean(data == 0, axis=0)
    # Fit logistic function
    params = logistic.fit((log_means, dropout))

    return params
示例#5
0
import seaborn as sns

# 광고를 듣고 그 사람이 적금을 넣었을지 예측

df = pd.read_csv('bank_marketing_simple.csv', sep=';')

df = pd.get_dummies(df,
                    columns=[
                        'job', 'marital', 'education', 'default', 'housing',
                        'loan', 'contact', 'day', 'month', 'poutcome'
                    ])

logistic = LogisticRegression(solver='newton-cg')
logistic.fit(
    df[[
        'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
        'day', 'month', 'poutcome'
    ]], df["y"])

# s1 = ["age","job","marital","education","default","balance","housing","loan","contact","day","month","duration","campaign","pdays","previous","poutcome"]
# pd.get_dummies(df, columns= ["age","balance","loan","contact","duration","campaign"])

# logistic = LogisticRegression(solver='newton-cg')
# logistic.fit(df[["age","job","marital","education","default","balance","housing","loan","contact","day","month","duration","campaign","pdays","previous","poutcome"]], df["y"])

#train = df.sample(frac=0.8, random_state=200)
#test = df.drop(train.index)

#train_y = train['y']
#del train['y']
#train_x = train
示例#6
0
def bootstrap(a,
              f=None,
              b=100,
              method="balanced",
              family=None,
              strata=None,
              smooth=False,
              random_state=None):
    """
    Calculate function values from bootstrap samples or
    optionally return bootstrap samples themselves

    Parameters
    ----------
    a : array-like
        Original sample
    f : callable or None
        Function to be bootstrapped
    b : int
        Number of bootstrap samples
    method : string
        * 'ordinary'
        * 'balanced'
        * 'parametric'
    family : string or None
        * 'gaussian'
        * 't'
        * 'laplace'
        * 'logistic'
        * 'F'
        * 'gamma'
        * 'log-normal'
        * 'inverse-gaussian'
        * 'pareto'
        * 'beta'
        * 'poisson'
    strata : array-like or None
        Stratification labels, ignored when method
        is parametric
    smooth : boolean
        Whether or not to add noise to bootstrap
        samples, ignored when method is parametric
    random_state : int or None
        Random number seed

    Returns
    -------
    y | X : np.array
        Function applied to each bootstrap sample
        or bootstrap samples if f is None
    """
    np.random.seed(random_state)
    a = np.asarray(a)
    n = len(a)

    # stratification not meaningful for parametric sampling
    if strata is not None and (method != "parametric"):
        strata = np.asarray(strata)
        if len(strata) != len(a):
            raise ValueError("a and strata must have" " the same length")
        # recursively call bootstrap without stratification
        # on the different strata
        masks = [strata == x for x in np.unique(strata)]
        boot_strata = [
            bootstrap(a=a[m],
                      f=None,
                      b=b,
                      method=method,
                      strata=None,
                      random_state=random_state) for m in masks
        ]
        # concatenate resampled strata along first column axis
        X = np.concatenate(boot_strata, axis=1)
    else:
        if method == "ordinary":
            # i.i.d. sampling from ecdf of a
            X = np.reshape(a[np.random.choice(range(a.shape[0]),
                                              a.shape[0] * b)],
                           newshape=(b, ) + a.shape)
        elif method == "balanced":
            # permute b concatenated copies of a
            r = np.reshape([a] * b, newshape=(b * a.shape[0], ) + a.shape[1:])
            X = np.reshape(r[np.random.permutation(range(r.shape[0]))],
                           newshape=(b, ) + a.shape)
        elif method == "parametric":
            if len(a.shape) > 1:
                raise ValueError("a must be one-dimensional")

            # fit parameters by maximum likelihood and sample
            if family == "gaussian":
                theta = norm.fit(a)
                arr = norm.rvs(size=n * b,
                               loc=theta[0],
                               scale=theta[1],
                               random_state=random_state)
            elif family == "t":
                theta = t.fit(a, fscale=1)
                arr = t.rvs(size=n * b,
                            df=theta[0],
                            loc=theta[1],
                            scale=theta[2],
                            random_state=random_state)
            elif family == "laplace":
                theta = laplace.fit(a)
                arr = laplace.rvs(size=n * b,
                                  loc=theta[0],
                                  scale=theta[1],
                                  random_state=random_state)
            elif family == "logistic":
                theta = logistic.fit(a)
                arr = logistic.rvs(size=n * b,
                                   loc=theta[0],
                                   scale=theta[1],
                                   random_state=random_state)
            elif family == "F":
                theta = F.fit(a, floc=0, fscale=1)
                arr = F.rvs(size=n * b,
                            dfn=theta[0],
                            dfd=theta[1],
                            loc=theta[2],
                            scale=theta[3],
                            random_state=random_state)
            elif family == "gamma":
                theta = gamma.fit(a, floc=0)
                arr = gamma.rvs(size=n * b,
                                a=theta[0],
                                loc=theta[1],
                                scale=theta[2],
                                random_state=random_state)
            elif family == "log-normal":
                theta = lognorm.fit(a, floc=0)
                arr = lognorm.rvs(size=n * b,
                                  s=theta[0],
                                  loc=theta[1],
                                  scale=theta[2],
                                  random_state=random_state)
            elif family == "inverse-gaussian":
                theta = invgauss.fit(a, floc=0)
                arr = invgauss.rvs(size=n * b,
                                   mu=theta[0],
                                   loc=theta[1],
                                   scale=theta[2],
                                   random_state=random_state)
            elif family == "pareto":
                theta = pareto.fit(a, floc=0)
                arr = pareto.rvs(size=n * b,
                                 b=theta[0],
                                 loc=theta[1],
                                 scale=theta[2],
                                 random_state=random_state)
            elif family == "beta":
                theta = beta.fit(a)
                arr = beta.rvs(size=n * b,
                               a=theta[0],
                               b=theta[1],
                               loc=theta[2],
                               scale=theta[3],
                               random_state=random_state)
            elif family == "poisson":
                theta = np.mean(a)
                arr = poisson.rvs(size=n * b,
                                  mu=theta,
                                  random_state=random_state)
            else:
                raise ValueError("Invalid family")

            X = np.reshape(arr, newshape=(b, n))
        else:
            raise ValueError("method must be either 'ordinary'"
                             " , 'balanced', or 'parametric',"
                             " '{method}' was supplied".format(method=method))

    # samples are already smooth in the parametric case
    if smooth and (method != "parametric"):
        X += np.random.normal(size=X.shape, scale=1 / np.sqrt(n))

    if f is None:
        return X
    else:
        return np.asarray([f(x) for x in X])
 def precip_test_logistic(self, start, end):
     args = logistic.fit(self.get_precip(start, end))
     return kstest(self.get_precip(start, end), "logistic", args=args)
 def discharge_test_logistic(self, start, end):
     args = logistic.fit(self.get_discharge(start, end))
     return kstest(self.get_discharge(start, end), "logistic", args=args)
示例#9
0
from scipy import stats
from scipy.stats import logistic

f = open("list.txt", "r")
ls = f.readlines()
f.close()
for i in range(len(ls)):
    ls[i] = float(ls[i])

param = logistic.fit(ls)
mode = param[0]
scale = param[1]
mean = logistic.mean(loc=param[0], scale=param[1])
print(mean)
print(scale)