def __init__(self, mode=0, elem=None, sample=None): if mode == 0: self.mu = elem[0] self.sigma = elem[1] else: self.mu, self.sigma = logistic.fit(sample) self.math_average = logistic.mean(loc=self.mu, scale=self.sigma) self.dispersion = logistic.var(loc=self.mu, scale=self.sigma)
def sig(self, phi: OrderedDict[Subspace, Number]) -> Number: ''' Algorithm: 1. Fit numbers to a line by linear regression 2. Compute slope m and the goodness-of-fit r^2 3. Model the slopes by logistic distribution ''' y = np.array(list(phi.values())) x = np.arange(1, len(y) + 1).reshape(-1, 1) reg = self.lr.fit(x, y) r2 = reg.score(x, y) errors = y - reg.predict(x) mu, std = logistic.fit(errors[1:]) sig_score = r2 * logistic(mu, std).cdf(errors[0]) return sig_score
def dilutionLine(self, row, label, save_loc): Output = [] plt.figure(figsize=(5, 5)) plt.plot(self.data[label][row]) plt.title(row + " " + LABELS[label]) Output.append( LABELS[label] + " " + str(row) + ": " + str(linregress(self.data[label][row], self.data[label].index))) Output.append("Logistic Regression (mean, variance)" + ": " + str(logistic.fit(list(self.data[label][row])))) plt.savefig( os.path.join(save_loc, row + " " + LABELS[label] + "_lineplot.png")) plt.close() return Output
def estimate_dropout_prob(data): def log_mean(x): x_nz = x[x > 0] if len(x_nz) == 0: return -1e5 else: return np.mean(np.log(x_nz)) # Get mean of positive values of each gene log_means = np.apply_along_axis(log_mean, 0, truth) np.mean(np.log1p(data > 0), axis=0) # Get each gene average number of zeros dropout = np.mean(data == 0, axis=0) # Fit logistic function params = logistic.fit((log_means, dropout)) return params
import seaborn as sns # 광고를 듣고 그 사람이 적금을 넣었을지 예측 df = pd.read_csv('bank_marketing_simple.csv', sep=';') df = pd.get_dummies(df, columns=[ 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'day', 'month', 'poutcome' ]) logistic = LogisticRegression(solver='newton-cg') logistic.fit( df[[ 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'day', 'month', 'poutcome' ]], df["y"]) # s1 = ["age","job","marital","education","default","balance","housing","loan","contact","day","month","duration","campaign","pdays","previous","poutcome"] # pd.get_dummies(df, columns= ["age","balance","loan","contact","duration","campaign"]) # logistic = LogisticRegression(solver='newton-cg') # logistic.fit(df[["age","job","marital","education","default","balance","housing","loan","contact","day","month","duration","campaign","pdays","previous","poutcome"]], df["y"]) #train = df.sample(frac=0.8, random_state=200) #test = df.drop(train.index) #train_y = train['y'] #del train['y'] #train_x = train
def bootstrap(a, f=None, b=100, method="balanced", family=None, strata=None, smooth=False, random_state=None): """ Calculate function values from bootstrap samples or optionally return bootstrap samples themselves Parameters ---------- a : array-like Original sample f : callable or None Function to be bootstrapped b : int Number of bootstrap samples method : string * 'ordinary' * 'balanced' * 'parametric' family : string or None * 'gaussian' * 't' * 'laplace' * 'logistic' * 'F' * 'gamma' * 'log-normal' * 'inverse-gaussian' * 'pareto' * 'beta' * 'poisson' strata : array-like or None Stratification labels, ignored when method is parametric smooth : boolean Whether or not to add noise to bootstrap samples, ignored when method is parametric random_state : int or None Random number seed Returns ------- y | X : np.array Function applied to each bootstrap sample or bootstrap samples if f is None """ np.random.seed(random_state) a = np.asarray(a) n = len(a) # stratification not meaningful for parametric sampling if strata is not None and (method != "parametric"): strata = np.asarray(strata) if len(strata) != len(a): raise ValueError("a and strata must have" " the same length") # recursively call bootstrap without stratification # on the different strata masks = [strata == x for x in np.unique(strata)] boot_strata = [ bootstrap(a=a[m], f=None, b=b, method=method, strata=None, random_state=random_state) for m in masks ] # concatenate resampled strata along first column axis X = np.concatenate(boot_strata, axis=1) else: if method == "ordinary": # i.i.d. sampling from ecdf of a X = np.reshape(a[np.random.choice(range(a.shape[0]), a.shape[0] * b)], newshape=(b, ) + a.shape) elif method == "balanced": # permute b concatenated copies of a r = np.reshape([a] * b, newshape=(b * a.shape[0], ) + a.shape[1:]) X = np.reshape(r[np.random.permutation(range(r.shape[0]))], newshape=(b, ) + a.shape) elif method == "parametric": if len(a.shape) > 1: raise ValueError("a must be one-dimensional") # fit parameters by maximum likelihood and sample if family == "gaussian": theta = norm.fit(a) arr = norm.rvs(size=n * b, loc=theta[0], scale=theta[1], random_state=random_state) elif family == "t": theta = t.fit(a, fscale=1) arr = t.rvs(size=n * b, df=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "laplace": theta = laplace.fit(a) arr = laplace.rvs(size=n * b, loc=theta[0], scale=theta[1], random_state=random_state) elif family == "logistic": theta = logistic.fit(a) arr = logistic.rvs(size=n * b, loc=theta[0], scale=theta[1], random_state=random_state) elif family == "F": theta = F.fit(a, floc=0, fscale=1) arr = F.rvs(size=n * b, dfn=theta[0], dfd=theta[1], loc=theta[2], scale=theta[3], random_state=random_state) elif family == "gamma": theta = gamma.fit(a, floc=0) arr = gamma.rvs(size=n * b, a=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "log-normal": theta = lognorm.fit(a, floc=0) arr = lognorm.rvs(size=n * b, s=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "inverse-gaussian": theta = invgauss.fit(a, floc=0) arr = invgauss.rvs(size=n * b, mu=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "pareto": theta = pareto.fit(a, floc=0) arr = pareto.rvs(size=n * b, b=theta[0], loc=theta[1], scale=theta[2], random_state=random_state) elif family == "beta": theta = beta.fit(a) arr = beta.rvs(size=n * b, a=theta[0], b=theta[1], loc=theta[2], scale=theta[3], random_state=random_state) elif family == "poisson": theta = np.mean(a) arr = poisson.rvs(size=n * b, mu=theta, random_state=random_state) else: raise ValueError("Invalid family") X = np.reshape(arr, newshape=(b, n)) else: raise ValueError("method must be either 'ordinary'" " , 'balanced', or 'parametric'," " '{method}' was supplied".format(method=method)) # samples are already smooth in the parametric case if smooth and (method != "parametric"): X += np.random.normal(size=X.shape, scale=1 / np.sqrt(n)) if f is None: return X else: return np.asarray([f(x) for x in X])
def precip_test_logistic(self, start, end): args = logistic.fit(self.get_precip(start, end)) return kstest(self.get_precip(start, end), "logistic", args=args)
def discharge_test_logistic(self, start, end): args = logistic.fit(self.get_discharge(start, end)) return kstest(self.get_discharge(start, end), "logistic", args=args)
from scipy import stats from scipy.stats import logistic f = open("list.txt", "r") ls = f.readlines() f.close() for i in range(len(ls)): ls[i] = float(ls[i]) param = logistic.fit(ls) mode = param[0] scale = param[1] mean = logistic.mean(loc=param[0], scale=param[1]) print(mean) print(scale)