def threshold_generator(waveform, bins): try: new_waveform = np.abs(waveform[:, 0]) except: new_waveform = np.abs(waveform) new_waveform_hist, new_waveform_bin = np.histogram(new_waveform, bins=bins) s_max = (0, -10) len_of_new_waveform_hist = len(new_waveform_hist) for th in range(len(new_waveform_hist) + 1): n1 = npsum(new_waveform_hist[:th]) n2 = npsum(new_waveform_hist[th:]) if n1 == 0: mu1 = 0 else: mu1 = npsum(np.arange(0, th) * new_waveform_hist[0:th]) / n1 if n2 == 0: mu2 = 0 else: mu2 = npsum( np.arange(th, len_of_new_waveform_hist) * new_waveform_hist[th:len_of_new_waveform_hist]) / n2 s = n1 * n2 * (mu1 - mu2)**2 if s > s_max[1]: s_max = (th, s) return s_max[0]
def _autocorr_func2(mags, lag, maglen, magmed, magstd): ''' This is an alternative function to calculate the autocorrelation. mags MUST be an array with no nans. lag is the current lag to calculate the autocorr for. MUST be less than the total number of observations in mags (maglen). maglen, magmed, magstd are provided by auto_correlation below. This version is from (first definition): https://en.wikipedia.org/wiki/Correlogram#Estimation_of_autocorrelations ''' lagindex = nparange(0, maglen - lag) products = (mags[lagindex] - magmed) * (mags[lagindex + lag] - magmed) autocovarfunc = npsum(products) / lagindex.size varfunc = npsum( (mags[lagindex] - magmed) * (mags[lagindex] - magmed)) / mags.size acorr = autocovarfunc / varfunc return acorr
def _replace_missing_values_in_matrix(all_data, missing_value_indicator, data_max_missing_values, samples_max_missing_values, replace = False, col_names=None, row_names=None): number_of_data, number_of_samples = all_data.shape na_count_per_sample = zeros(number_of_samples) data_indices_to_remove = [] print "Replacing missing values by mean..." na_count_per_sample = npsum(isnan(all_data), axis=0) samples_indices_to_keep = where(na_count_per_sample <= samples_max_missing_values*number_of_data)[0] print("%s samples were not replaced because they have more than %s missing values" % (number_of_samples - len(samples_indices_to_keep), samples_max_missing_values)) na_count_per_site = npsum(isnan(all_data), axis=1) sites_indices_to_keep = where(na_count_per_site <= data_max_missing_values*number_of_samples)[0] print("%s sites were not replaced because they have more than %s missing values" % (number_of_data - len(sites_indices_to_keep), data_max_missing_values)) print("replacing each missing value by it's site mean...") all_data = all_data[sites_indices_to_keep, :][:,samples_indices_to_keep] sites_mean = nanmean(all_data, axis=1) for i, data_for_all_samples in enumerate(all_data): na_indices = where(isnan(all_data[i,:]))[0] all_data[i][na_indices] = sites_mean[i] # return the relevant samples ids and sites ids if col_names is not None: col_names = col_names[samples_indices_to_keep] if row_names is not None: row_names = row_names[sites_indices_to_keep] return all_data, col_names, row_names
def chaowangjost(counts): """Entropy calculation using Chao, Wang, Jost correction. doi: 10.1111/2041-210X.12108 Parameters ---------- counts : list bin counts Returns ------- entropy : float """ n_samples = npsum(counts) bcbc = bincount(counts.astype(int)) if len(bcbc) < 3: return grassberger(counts) if bcbc[2] == 0: if bcbc[1] == 0: A = 1. else: A = 2. / ((n_samples - 1.) * (bcbc[1] - 1.) + 2.) else: A = 2. * bcbc[2] / ((n_samples - 1.) * (bcbc[1] - 1.) + 2. * bcbc[2]) pr = arange(1, int(n_samples)) pr = 1. / pr * (1. - A) ** pr entropy = npsum(counts / n_samples * (psi(n_samples) - nan_to_num(psi(counts)))) if bcbc[1] > 0 and A != 1.: entropy += nan_to_num(bcbc[1] / n_samples * (1 - A) ** (1 - n_samples * (-log(A) - npsum(pr)))) return entropy
def NormEmpHistFP(Epsi, p, tau, k_): # This function estimates the empirical histogram with Flexible # Probabilities of an invariant whose distribution is # represented in terms of simulations/historical realizations # INPUT # Epsi :[vector](1 x t_end) MC scenarios/historical realizations # p :[vector](1 x t_end) flexible probabilities # tau :[scalar] projection horizon # k_ :[scalar] coarseness level # OP # xi :[1 x k_] centers of the bins # f :[1 x k_] discretized pdf of invariant # For details on the exercise, see here . ## code # bins width a = -norm.inv(10**(-15), 0, sqrt(tau)) h = 2 * a / k_ # centers of the bins xi = arange(-a + h, a + h, h) # frequency p_bin = zeros((len(xi), 1)) for k in range(len(xi)): index = (Epsi > xi[k] - h / 2) & (Epsi <= xi[k] + h / 2) p_bin[k] = npsum(p[index]) # discretized pdf of an invariant f = 1 / h * p_bin # normalized heights f[k_] = 1 / h * (1 - npsum(p_bin[:-1])) return xi, f
def stetson_kindex(fmags, ferrs): ''' This calculates the Stetson K index (robust measure of the kurtosis). Requires finite mags and errs. ''' # use a fill in value for the errors if they're none if ferrs is None: ferrs = npfull_like(mags, 0.005) ndet = len(fmags) if ndet > 9: # get the median and ndet medmag = npmedian(fmags) # get the stetson index elements delta_prefactor = (ndet / (ndet - 1)) sigma_i = delta_prefactor * (fmags - medmag) / ferrs stetsonk = (npsum(npabs(sigma_i)) / (npsqrt(npsum(sigma_i * sigma_i))) * (ndet**(-0.5))) return stetsonk else: LOGERROR('not enough detections in this magseries ' 'to calculate stetson K index') return npnan
def calculate_K(self,K0, P, T): def funct(V): F = self.mole_fraction*(K0 - 1)/(V*(K0-1)+1) if V>0: return F.sum() else: return np.nan (root,root_res) = brenth(funct, 0.1e-8, 1,disp = False, full_output = True) x = self.mole_fraction/(root*(K0-1)+1) self.x = x/npsum(x) y = x*K0 self.y = y/sum(y) phiL, self.ZL = self.fugacity_coeffecient(x,P,T,'liquid') phiV, self.ZV = self.fugacity_coeffecient(y,P,T,'vapor') Knew = phiL/phiV diff = abs(npsum(Knew) - npsum(K0)) K = Knew self.V = root self.L = 1 - root return K , diff
def softmax_cost( theta, nclasses, dim, wdecay, data, labels ): # unroll parameters from theta theta = reshape(theta,(dim, nclasses)) # This was wrong theta= theta.T nsamp = data.shape[1] # generate ground truth matrix onevals = squeeze(ones((1,nsamp))) rows = squeeze(labels)-1 # This was wrong cols = arange(nsamp) ground_truth = csr_matrix((onevals,(rows,cols))).todense() # compute hypothesis; use some in-place computations theta_dot_prod = dot(theta,data) theta_dot_prod = theta_dot_prod - numpy.amax(theta_dot_prod, axis=0) # This was wrong soft_theta = npexp(theta_dot_prod) soft_theta_sum = npsum(soft_theta,axis=0) soft_theta_sum = tile(soft_theta_sum,(nclasses,1)) hyp = soft_theta/soft_theta_sum # compute cost log_hyp = nplog(hyp) temp = array(multiply(ground_truth,log_hyp)) temp = npsum(npsum(temp,axis=1),axis=0) cost = (-1.0/nsamp)*temp + 0.5*wdecay*pow(norm(theta,'fro'),2) return cost
def lightcurve_moments(ftimes, fmags, ferrs): '''This calculates the weighted mean, stdev, median, MAD, percentiles, skew, kurtosis, fraction of LC beyond 1-stdev, and IQR. Parameters ---------- ftimes,fmags,ferrs : np.array The input mag/flux time-series with all non-finite elements removed. Returns ------- dict A dict with all of the light curve moments calculated. ''' ndet = len(fmags) if ndet > 9: # now calculate the various things we need series_median = npmedian(fmags) series_wmean = (npsum(fmags * (1.0 / (ferrs * ferrs))) / npsum(1.0 / (ferrs * ferrs))) series_mad = npmedian(npabs(fmags - series_median)) series_stdev = 1.483 * series_mad series_skew = spskew(fmags) series_kurtosis = spkurtosis(fmags) # get the beyond1std fraction series_above1std = len(fmags[fmags > (series_median + series_stdev)]) series_below1std = len(fmags[fmags < (series_median - series_stdev)]) # this is the fraction beyond 1 stdev series_beyond1std = (series_above1std + series_below1std) / float(ndet) # get the magnitude percentiles series_mag_percentiles = nppercentile( fmags, [5.0, 10, 17.5, 25, 32.5, 40, 60, 67.5, 75, 82.5, 90, 95]) return { 'median': series_median, 'wmean': series_wmean, 'mad': series_mad, 'stdev': series_stdev, 'skew': series_skew, 'kurtosis': series_kurtosis, 'beyond1std': series_beyond1std, 'mag_percentiles': series_mag_percentiles, 'mag_iqr': series_mag_percentiles[8] - series_mag_percentiles[3], } else: LOGERROR('not enough detections in this magseries ' 'to calculate light curve moments') return None
def rombextrap(StepRatio, der_init, rombexpon): # do romberg extrapolation for each estimate # # StepRatio - Ratio decrease in step # der_init - initial derivative estimates # rombexpon - higher order terms to cancel using the romberg step # # der_romb - derivative estimates returned # errest - error estimates # amp - noise amplification factor due to the romberg step srinv = 1 / StepRatio # do nothing if no romberg terms nexpon = len(rombexpon) rombexpon = array(rombexpon) rmat = ones((nexpon + 2, nexpon + 1)) if nexpon == 0: pass # rmat is simple: ones((2,1)) elif nexpon == 1: # only one romberg term rmat[1, 1] = srinv ** rombexpon rmat[2, 1] = srinv ** (2 * rombexpon) elif nexpon == 2: # two romberg terms rmat[1, 1:3] = srinv ** rombexpon rmat[2, 1:3] = srinv ** (2 * rombexpon) rmat[3, 1:3] = srinv ** (3 * rombexpon) elif nexpon == 3: # three romberg terms rmat[1, 1:4] = srinv ** rombexpon rmat[2, 1:4] = srinv ** (2 * rombexpon) rmat[3, 1:4] = srinv ** (3 * rombexpon) rmat[4, 1:4] = srinv ** (4 * rombexpon) # qr factorization used for the extrapolation as well # as the uncertainty estimates qromb, rromb = qr(rmat) # the noise amplification is further amplified by the Romberg step. # amp = cond(rromb) # this does the extrapolation to a zero step size. ne = len(der_init) rhs = vec2mat(der_init, nexpon + 2, max(1, ne - (nexpon + 2))) rombcoefs = solve(rromb, qromb.T @ rhs.astype(np.float64)) der_romb = rombcoefs[0].T # uncertainty estimate of derivative prediction s = sqrt(npsum((rhs - rmat @ rombcoefs) ** 2, 0)) rinv = solve(rromb, eye(nexpon + 1)) cov1 = npsum(rinv ** 2, 1) # 1 spare dof errest = s.T * 12.7062047361747 ** sqrt(cov1[0]) return der_romb, errest
def facquisition(xx, X, F, N, alpha, delta_E, dF, W, rbf, useRBF, isUnknownFeasibilityConstrained, isUnknownSatisfactionConstrained, Feasibility_unkn, SatConst_unkn, delta_G, delta_S, iw_ibest, maxevals): # Acquisition function to minimize to get next sample d = npsum((X[0:N, ] - xx)**2, axis=-1) ii = where(d < 1e-12) if ii[0].size > 0: fhat = F[ii[0]][0] dhat = 0 if isUnknownFeasibilityConstrained: Ghat = Feasibility_unkn[ii] else: Ghat = 1 if isUnknownSatisfactionConstrained: Shat = SatConst_unkn[ii] else: Shat = 1 else: w = exp(-d) / d sw = sum(w) if useRBF: v = rbf(X[0:N, :], xx) fhat = v.ravel().dot(W.ravel()) else: fhat = npsum(F[0:N, ] * w) / sw if maxevals <= 30: # for comparision, used in the original GLIS and when N_max <= 30 in C-GLIS dhat = delta_E * atan( 1 / sum(1 / d)) * 2 / pi * dF + alpha * sqrt( sum(w * (F[0:N, ] - fhat).flatten("c")**2) / sw) else: dhat = delta_E * ( (1 - N / maxevals) * atan( (1 / sum(1. / d)) / iw_ibest) + N / maxevals * atan(1 / sum(1. / d))) * 2 / pi * dF + alpha * sqrt( sum(w * (F[0:N, ] - fhat).flatten("c")**2) / sw) # to account for the unknown constraints if isUnknownFeasibilityConstrained: Ghat = npsum(Feasibility_unkn[0:N].T * w) / sw else: Ghat = 1 if isUnknownSatisfactionConstrained: Shat = npsum(SatConst_unkn[0:N].T * w) / sw else: Shat = 1 f = fhat - dhat + (delta_G * (1 - Ghat) + delta_S * (1 - Shat)) * dF return f
def get_tfidf(self): terms_per_doc = npsum(self.term_matrix, axis=0) docs_per_term = npsum(asarray(self.term_matrix > 0, 'i'), axis=1) rows, cols = self.term_matrix.shape for i in range(rows): for j in range(cols): self.term_matrix[i, j] = ((self.term_matrix[i, j] / terms_per_doc[j]) * log(cols / docs_per_term[j]))
def approx_pnd(X_pred, X_cov, X_train, signs, n=int(1e4), seed=None): r"""Approximate the PND via mixture importance sampling Approximate the probability non-dominated (PND) for a set of predictive points using a mixture importance sampling approach. Predictive points are assumed to have predictive gaussian distributions (with specified mean and covariance matrix). Args: X_pred (2d numpy array): Predictive values X_cov (iterable of 2d numpy arrays): Predictive covariance matrices X_train (2d numpy array): Training values, used to determine existing Pareto frontier signs (numpy array of +/-1 values): Array of optimization signs: {-1: Minimize, +1 Maximize} Kwargs: n (int): Number of draws for importance sampler seed (int): Seed for random state Returns: pr_scores (array): Estimated PND values var_values (array): Estimated variance values References: Owen *Monte Carlo theory, methods and examples* (2013) """ ## Setup X_wk_train = -X_train * signs X_wk_pred = -X_pred * signs n_train, n_dim = X_train.shape n_pred = X_pred.shape[0] ## Find the training Pareto frontier idx_pareto = pareto_min_rel(X_wk_train) n_pareto = len(idx_pareto) ## Sample the mixture points Sig_mix = make_proposal_sigma(X_wk_train, idx_pareto, X_cov) X_mix = rprop(n, Sig_mix, X_wk_train[idx_pareto, :], seed=seed) ## Take non-dominated points only idx_ndom = pareto_min_rel(X_mix, X_base=X_wk_train[idx_pareto, :]) X_mix = X_mix[idx_ndom, :] ## Evaluate the Pr[non-dominated] d_mix = dprop(X_mix, Sig_mix, X_wk_train[idx_pareto, :]) pr_scores = zeros(n_pred) var_values = zeros(n_pred) for i in range(n_pred): dist_test = mvnorm(mean=X_wk_pred[i], cov=X_cov[i]) w_test = dist_test.pdf(X_mix) / d_mix # Owen (2013), Equation (9.3) pr_scores[i] = npsum(w_test) / n # Owen (2013), Equation (9.5) var_values[i] = npsum((w_test - pr_scores[i])**2) / n return pr_scores, var_values
def get_tfidf(self): terms_per_doc = npsum(self.term_matrix, axis=0) docs_per_term = npsum(asarray(self.term_matrix > 0, 'i'), axis=1) rows, cols = self.term_matrix.shape for i in range(rows): for j in range(cols): self.term_matrix[i, j] = ( (self.term_matrix[i, j] / terms_per_doc[j]) * log(cols / docs_per_term[j]))
def likelihood(intensity, cases): ''' Sum over care homes i and dates t: ln(\\lambda^k exp(-\\lambda) / (k!)) = (k ln \\lambda - \\lambda - ln(k!)) where k = cases and \\lambda = intensity ''' non_zero_cases = (cases > 0) # gammaln(n) = ln((n-1)!) for integer n return (npsum(cases[non_zero_cases] * log(intensity[non_zero_cases])) - npsum(intensity) - npsum(gammaln(cases[cases > 1] + 1)))
def _van_der_walls_mixing(self, mole_fraction, P, T): alpha = self._Twu91(T) attraction = 0.45724 * alpha * (self.R * self.Tc).pow(2)/(self.Pc * 100000) cohesion = 0.07780 * self.R * self.Tc / (self.Pc * 100000) # aij = [(ai.aj)0.5(1 - kij)] = aji mixture = outer(attraction, attraction) mixture = power(mixture,0.5) mixture = multiply(mixture, subtract(1,self.interaction_params)) a_ij = mixture * P * 100000 / (self.R * T) ** 2 b = cohesion * P * 100000 / (self.R * T) A = npsum(npsum(multiply(a_ij, outer(mole_fraction, mole_fraction)))) B = npsum(b*mole_fraction) return A, B, a_ij, b
def cof(self, mode="log"): from numpy import sum as npsum data = None if mode == "log": data = self.get_logpow() elif mode in ("linear", 'lin'): data = self.get_pow() freq = self.get_xdata() c = npsum(data * freq) / npsum(data) return c
def _calc_accel(jack_dist): from numpy import mean as npmean from numpy import sum as npsum from numpy import errstate jack_mean = npmean(jack_dist) numer = npsum((jack_mean - jack_dist)**3) denom = 6.0 * (npsum((jack_mean - jack_dist)**2)**1.5) with errstate(invalid='ignore'): # does not raise warning if invalid division encountered. return numer / denom
def logdet(a): # Fast logarithm-determizerost of large matrix ## code try: v = 2 * npsum(log(diag(cholesky(a)))) except: ##ok<CTCH> dummy, u, p = lu(a) ##ok<ASGLU> du = diag(u) c = det(p) * prod(sign(du)) v = log(c) + npsum(log(abs(du))) v = real(v) return v
def mU1F1C2(U_in, F_in, C_in, C_slab, dt): """ Model of a simple room that has heating/cooling applied in a different node than that of the air, eg.: a radiant slab system. Node Number: Object 0: room air node, connected to ambient air (F0) node 1: under slab node, connected to capacitor 1 (slab) and Node 0 Node Number with known temperatures: Object 0: ambient air External input: U_in: conductance under slab to slab surface F_in: conductance room air to slab surface C_in: capacitance of air C_slab: capacitance of slab """ # Load dependencies from numpy import zeros from numpy import sum as npsum from numpy.linalg import inv nN = 2 # number of nodes nM = 1 # number of nodes with known temperatures #%% Nodal Connections # Declare variables Uin = zeros((nN, nN)) # W/K F = zeros((nN, nM)) # W/K C = zeros((nN, 1)) # J/K # How are the nodes connected? Uin[0, 1] = U_in # Connected to temperature sources F[0, 0] = F_in # Nodes with capacitance C[0] = C_in C[1] = C_slab #%% U-matrix completion, and its inverse U = -Uin - Uin.T # U is symmetrical, non-diagonals are -ve s = -npsum(U, 1) for i in range(0, nN): U[i, i] = s[i] + npsum(F[i, ]) + C[i] / dt Uinv = inv(U) #%% Ship it return (Uinv, F, C, nN, nM)
def specwindow_lsp_value(times, mags, errs, omega): ''' This calculates the peak associated with the spectral window function for times and at the specified omega. ''' norm_times = times - times.min() tau = ((1.0 / (2.0 * omega)) * nparctan( npsum(npsin(2.0 * omega * norm_times)) / npsum(npcos(2.0 * omega * norm_times)))) lspval_top_cos = (npsum(1.0 * npcos(omega * (norm_times - tau))) * npsum(1.0 * npcos(omega * (norm_times - tau)))) lspval_bot_cos = npsum((npcos(omega * (norm_times - tau))) * (npcos(omega * (norm_times - tau)))) lspval_top_sin = (npsum(1.0 * npsin(omega * (norm_times - tau))) * npsum(1.0 * npsin(omega * (norm_times - tau)))) lspval_bot_sin = npsum((npsin(omega * (norm_times - tau))) * (npsin(omega * (norm_times - tau)))) lspval = 0.5 * ((lspval_top_cos / lspval_bot_cos) + (lspval_top_sin / lspval_bot_sin)) return lspval
def stellingwerf_pdm_theta(times, mags, errs, frequency, binsize=0.05, minbin=9): ''' This calculates the Stellingwerf PDM theta value at a test frequency. ''' period = 1.0/frequency fold_time = times[0] phased = phase_magseries(times, mags, period, fold_time, wrap=False, sort=True) phases = phased['phase'] pmags = phased['mags'] bins = np.arange(0.0, 1.0, binsize) nbins = bins.size binnedphaseinds = npdigitize(phases, bins) binvariances = [] binndets = [] goodbins = 0 for x in npunique(binnedphaseinds): thisbin_inds = binnedphaseinds == x thisbin_phases = phases[thisbin_inds] thisbin_mags = pmags[thisbin_inds] if thisbin_mags.size > minbin: thisbin_variance = npvar(thisbin_mags,ddof=1) binvariances.append(thisbin_variance) binndets.append(thisbin_mags.size) goodbins = goodbins + 1 # now calculate theta binvariances = nparray(binvariances) binndets = nparray(binndets) theta_top = npsum(binvariances*(binndets - 1)) / (npsum(binndets) - goodbins) theta_bot = npvar(pmags,ddof=1) theta = theta_top/theta_bot return theta
def eval(self, x, y): xfound = False yfound = False itest = self.itest if itest > 0: Xtest = self.Xtest Ftest = self.Ftest else: fx = self.f(x) itest = 1 Xtest = array([x]) Ftest = array([fx]) xfound = True for i in range(itest): if not (xfound) and npsum(abs(Xtest[i, :] - x)) <= 1e-10: xfound = True fx = Ftest[i] if not (yfound) and npsum(abs(Xtest[i, :] - y)) <= 1e-10: yfound = True fy = Ftest[i] if not (xfound): fx = self.f(x) Xtest = vstack((Xtest, x)) Ftest = append(Ftest, fx) itest = itest + 1 if not (yfound): fy = self.f(y) Xtest = vstack((Xtest, y)) Ftest = append(Ftest, fy) itest = itest + 1 # Make comparison if fx < fy - self.comparetol: out = -1 elif fx > fy + self.comparetol: out = 1 else: out = 0 self.Xtest = Xtest self.Ftest = Ftest self.itest = itest return out
def Stats(epsi, FP=None): # Given a time series (epsi) and the associated probabilities FP, # this def computes the statistics: mean, # standard deviation,VaR,CVaR,skewness and kurtosis. # INPUT # epsi :[vector] (1 x t_end) # FP :[matrix] (q_ x t_end) statistics are computed for each of the q_ sets of probabilities. # OUTPUT # m :[vector] (q_ x 1) mean of epsi with FP (for each set of FP) # stdev :[vector] (q_ x 1) standard deviation of epsi with FP (for each set of FP) # VaR :[vector] (q_ x 1) value at risk with FP # CVaR :[vector] (q_ x 1) conditional value at risk with FP # sk :[vector] (q_ x 1) skewness with FP # kurt :[vector] (q_ x 1) kurtosis with FP ########################################################################### # size check if epsi.shape[0] > epsi.shape[1]: epsi = epsi.T # eps: row vector if FP.shape[1] != epsi.shape[1]: FP = FP.T # if FP argument is missing, set equally weighted FP t_ = epsi.shape[1] if FP is None: FP = ones((1, t_)) / t_ q_ = FP.shape[0] m = zeros((q_, 1)) stdev = zeros((q_, 1)) VaR = zeros((q_, 1)) CVaR = zeros((q_, 1)) sk = zeros((q_, 1)) kurt = zeros((q_, 1)) for q in range(q_): m[q] = (epsi * FP[[q], :]).sum() stdev[q] = sqrt(npsum(((epsi - m[q])**2) * FP[q, :])) SortedEps, idx = sort(epsi), argsort(epsi) SortedP = FP[[q], idx] VarPos = where(cumsum(SortedP) >= 0.01)[0][0] VaR[q] = -SortedEps[:, VarPos] CVaR[q] = -FPmeancov( SortedEps[[0], :VarPos + 1], SortedP[:, :VarPos + 1].T / npsum(SortedP[:, :VarPos + 1]))[0] sk[q] = npsum(FP[q, :] * ((epsi - m[q])**3)) / (stdev[q]**3) kurt[q] = npsum(FP[q, :] * ((epsi - m[q])**4)) / (stdev[q]**4) return m, stdev, VaR, CVaR, sk, kurt
def cren1D(etaC,nulamC,M): " fonction crÈneau " eta1, eta2 =vstack([0,etaC.reshape(-1,1)]), vstack([etaC.reshape(-1,1),1]) # remise sous forme colonne : .reshape(-1,1) ; remise sous forme ligne : .reshape(1,-1) epr, m_ =nulamC.reshape(-1,1)**2, -2*pi*(arange(2*M)+1) #print (eta1, eta2, epr) epr0 = npsum( (eta2-eta1) * epr ) eprm = dot(epr,ones((1,2*M))) m = m_.reshape(1,-1) e1m, e2m =1j*dot(eta1,m), 1j*dot(eta2,m) epr_m =hstack([0., -1j* npsum( (exp(e2m) -exp(e1m)) *eprm ,0) /m_ ]) epr_p =hstack([0., 1j* npsum( (exp(-e2m)-exp(-e1m))*eprm ,0) /m_ ]) return diagflat(epr0*ones(2*M+1)) + triu( toeplitz(epr_m.real)+1j*toeplitz(epr_m.imag) ) + tril( toeplitz(epr_p.real)+1j*toeplitz(epr_p.imag) )
def GarchResiduals(x, t_garch=None, p_garch=None, g=0.95, p0=[0, 0.01, 0.8, 0]): # This function computes the residuals of a Garch((1,1)) fit on x. # If t_garch < t_obs=x.shape[1] the fit is performed on a rolling window of # t_garch observations # INPUTS # x [matrix]: (n_ x t_obs) dataset of observations # t_garch [scalar]: number of observations processed at every iteration # p_garch [vector]: (1 x t_end) flexible probabilities (optional default=exponential decay flexible probability with half life 6 months) # g [scalar]: we impose the contraint a+b <= g on the GARCH(1,1) parameters (default: g=0.95) # p0 [vector]: (1 x 4) initial guess (for compatibility with OCTAVE) # OPS # epsi [matrix]: (n_ x t_end) residuals # note: sigma**2 is initialized with a forward exponential smoothing ## Code if t_garch is None: t_garch = x.shape[1] if p_garch is None: lambda1 = log(2) / 180 p_garch = exp(-lambda1 * arange(t_garch, 1 + -1, -1)).reshape(1, -1) p_garch = p_garch / npsum(p_garch) n_, t_obs = x.shape lam = 0.7 if t_garch == t_obs: #no rolling window epsi = zeros((n_, t_obs)) for n in range(n_): s2_0 = lam * var(x[n, :], ddof=1) + (1 - lam) * npsum( (lam**arange(0, t_obs - 1 + 1)) * (x[n, :]**2)) _, _, epsi[n, :], _ = FitGARCHFP(x[[n], :], s2_0, p0, g, p_garch) #GARCH fit else: t_ = t_obs - t_garch #use rolling window epsi = zeros((n_, t_)) for t in range(t_): for n in range(n_): x_t = x[n, t:t + t_garch - 1] s2_0 = lam * var(x_t) + (1 - lam) * npsum( (lam**arange(0, t_garch - 1 + 1)) * (x_t**2)) _, _, e, _ = FitGARCHFP(x_t, s2_0, p0, g, p_garch) epsi[n, t] = e[-1] return epsi
def cosine_similarity_low_rank_multi(G, y): """ Cosine similarity between matrices from outer products of matrix :math:`\mathbf{G}` and vector :math:`\mathbf{y}`. :param G: (``numpy.ndarray``) Low-rank matrix. :param y: (``numpy.ndarray``) Column vector. :return: (``float``) Cosine similarity (kernel alignment). """ enum = npsum(G.T.dot(y)**2) denom = y.T.dot(y) * sqrt(npsum([npsum(G[:, i] * G[:, j])**2 for i, j in product(xrange(G.shape[1]), xrange(G.shape[1]))])) return 1.0 * enum / denom
def gaussian_kernel(**kwargs): s = kwargs.get('size', 2) mu = kwargs.get('mu', 0.0) sigma = kwargs.get('sigma', 1.0) integer = kwargs.get('integer', False) x = array(range(-s,s+1)) g = gaussian(x, mu=mu, sigma=sigma) m = outer(g,g) if integer: gi = (m/m[0,0]).astype(int) return gi, npsum(gi) else: return m, npsum(m)
def multivariate_student_t_pdf(x, mu, Sigma2, df): x = np.atleast_2d(x) # requires x as 2d n_ = Sigma2.shape[0] # dimensionality R = cholesky(Sigma2) z = solve(R, x) logSqrtDetC = npsum(log(diag(R))) logNumer = -((df + n_) / 2) * log(1 + npsum(z**2, axis=0) / df) logDenom = logSqrtDetC + (n_ / 2) * log(df * np.pi) y = exp(gammaln((df + n_) / 2) - gammaln(df / 2) + logNumer - logDenom) return y
def grassberger(counts): """Entropy calculation using Grassberger correction. doi:10.1016/0375-9601(88)90193-4 Parameters ---------- counts : list bin counts Returns ------- entropy : float """ n_samples = npsum(counts) return npsum(counts * (log(n_samples) - nan_to_num(psi(counts)) - ((-1.)**counts / (counts + 1.)))) / n_samples
def bubble(P): y = self.mole_fraction phiL, ZL = self.fugacity_coeffecient(y,P,self.T,'liquid') fL = phiL*y*P K_est = self.Pc/P * exp(5.37*(1 - self.w)*(1 - self.Tc/self.T)) x = y*K_est x = x/npsum(x) phiV, ZV = self.fugacity_coeffecient(x,P,self.T,'vapor') fV = phiV*x*P for i in range(50): x = fL/fV*x phiV, ZV = self.fugacity_coeffecient(x,P,self.T,'vapor') fV = phiV*x*P sum2 = npsum(x) return abs(sum2 - 1)
def gen_data(self): self.num_bg_meas = self.peakdata[:, :, :, :].shape[2] - 1 channels_per_step = self.med_stepsize max_i = int(floor(npmax(self.mset) - 1)) self.peak_ranges = self.peak_ranges[:max_i] self.raw_signal = npsum(self.peakdata[:, :, 0, :max_i], axis=1) * self.numadds self.raw_background = npsum(npsum(self.peakdata[:, :, 1:, :max_i], axis=2), axis=1) * self.numadds out_array = zeros([len(self.wl_inds), self.raw_signal.shape[1]]) bg_out_array = zeros([len(self.wl_inds), self.raw_background.shape[1]]) for i, ind0 in enumerate(self.wl_inds): out_array[i, :] = npsum( self.raw_signal[ind0:int(ind0 + channels_per_step), :max_i], axis=0) bg_out_array[i, :] = npsum( self.raw_background[ind0:int(ind0 + channels_per_step), :max_i], axis=0) self.signal = out_array self.background = bg_out_array signal_err = sqrt(self.signal) background_err = sqrt(self.background) with catch_warnings(): simplefilter("ignore", category=RuntimeWarning) self.diff_signal = self.signal - (self.background / self.num_bg_meas) self.flat_signal = self.signal / (self.background / self.num_bg_meas) self.flat_signal_err = sqrt( signal_err**2 * (self.num_bg_meas / self.background)**2 + background_err**2 * (self.num_bg_meas * self.signal / self.background**2)**2) self.flat_signal[isnan(self.flat_signal)] = 0 self.flat_signal_err[isnan(self.flat_signal_err)] = 0 self.flat_signal[self.flat_signal == 0] = 1e-6 self.flat_signal_err[self.flat_signal_err == 0] = 1e-6 self.wls_pow, self.flat_signal_pow, self.flat_signal_err_pow = laser_corr( self.wls, self.flat_signal, self.flat_signal_err)
def pinball_loss(y_true, y_pred, probs): """Compute the pinball loss. Parameters ---------- pred : {array-like}, shape = [n_quantiles, n_samples] or [n_samples] Predictions. y : {array-like}, shape = [n_samples] Targets. Returns ------- l : {array}, shape = [n_quantiles] Average loss for each quantile level. """ probs = asarray(probs).reshape(-1) check_consistent_length(y_true, y_pred.T) y_true = check_array(y_true.reshape((-1, 1)), ensure_2d=True) y_pred = check_array(y_pred.T.reshape((y_true.shape[0], -1)), ensure_2d=True) residual = y_true - y_pred loss = npsum([fmax(prob * res, (prob - 1) * res) for (res, prob) in zip(residual.T, probs)], axis=1) return loss / y_true.size
def EffectiveScenarios(p, Type=None): # This def computes the Effective Number of Scenarios of Flexible # Probabilities via different types of defs # INPUTS # p : [vector] (1 x t_) vector of Flexible Probabilities # Type : [struct] type of def: 'ExpEntropy', 'GenExpEntropy' # OUTPUTS # ens : [scalar] Effective Number of Scenarios # NOTE: # The exponential of the entropy is set as default, otherwise # Specify Type.ExpEntropy.on = true to use the exponential of the entropy # or # Specify Type.GenExpEntropy.on = true and supply the scalar # Type.ExpEntropy.g to use the generalized exponential of the entropy # For details on the exercise, see here . if Type is None: Type = namedtuple('type',['Entropy']) Type.Entropy = 'Exp' if Type.Entropy != 'Exp': Type.Entropy = 'GenExp' ## Code if Type.Entropy == 'Exp': p[p==0] = 10**(-250) #avoid log(0) in ens computation ens = exp(-p@log(p.T)) else: ens = npsum(p ** Type.g) ** (-1 / (Type.g - 1)) return ens
def softmax_grad( theta, nclasses, dim, wdecay, data, labels ): # unroll parameters from theta theta = reshape(theta,(dim, nclasses)) # Do this theta= theta.T nsamp = data.shape[1] # generate ground truth matrix onevals = squeeze(ones((1,nsamp))) rows = squeeze(labels)-1 # Here should -1 to align zero-indexing cols = arange(nsamp) ground_truth = csr_matrix((onevals,(rows,cols))).todense() # plt.imshow(ground_truth,interpolation='nearest') # plt.draw() # print ground_truth # compute hypothesis; use some in-place computations theta_dot_prod = dot(theta,data) theta_dot_prod = theta_dot_prod - numpy.amax(theta_dot_prod, axis=0) # This was wrong soft_theta = npexp(theta_dot_prod) soft_theta_sum = npsum(soft_theta,axis=0) soft_theta_sum = tile(soft_theta_sum,(nclasses,1)) hyp = soft_theta/soft_theta_sum # compute gradient thetagrad = (-1.0/nsamp)*dot(ground_truth-hyp,transpose(data)) + wdecay*theta thetagrad = asarray(thetagrad) thetagrad = thetagrad.flatten(1) return thetagrad
def countStoppedVehiclesVissim(filename, lanes = None, proportionStationaryTime = 0.7): '''Counts the number of vehicles stopped for a long time in a VISSIM trajectory file and the total number of vehicles Vehicles are considered finally stationary if more than proportionStationaryTime of their total time If lanes is not None, only the data for the selected lanes will be provided (format as string x_y where x is link index and y is lane index)''' from pandas import read_csv from numpy import array, sum as npsum columns = ['NO', '$VEHICLE:SIMSEC', 'POS'] if lanes is not None: columns += ['LANE\LINK\NO', 'LANE\INDEX'] data = read_csv(filename, delimiter=';', comment='*', header=0, skiprows = 1, usecols = columns) data = selectPDLanes(data, lanes) data.sort(['$VEHICLE:SIMSEC'], inplace = True) nStationary = 0 from matplotlib.pyplot import plot, figure nVehicles = 0 for name, group in data.groupby(['NO'], sort = False): nVehicles += 1 positions = array(group['POS']) diff = positions[1:]-positions[:-1] if npsum(diff == 0.) >= proportionStationaryTime*len(positions): nStationary += 1 return nStationary, nVehicles
def grassberger(counts): """Entropy calculation using Grassberger correction. doi:10.1016/0375-9601(88)90193-4 Parameters ---------- counts : list bin counts Returns ------- entropy : float """ n_samples = npsum(counts) return npsum(counts * (log(n_samples) - nan_to_num(psi(counts)) - ((-1.) ** counts / (counts + 1.)))) / n_samples
def project(self, tdata): """ This function projects test data :math:`D'` onto the mean map, i.e. it computes and returns :math:`\\langle\mu_{{D_1}},\\psi(x)\\rangle_\mathcal{H}` for each :math:`x\in D'`, where :math:`\\psi:\mathbb{R}^{d}\\to\mathcal{H}` is the feature map associated to the RKHS :math:`\mathcal{H}`. :param data: testdata matrix :math:`D'\in\mathbb{R}^{d \\times n}`, where :math:`d` is the dimensionality and :math:`n` is the number of training samples :type data: numpy array :return: projected data :rtype: numpy array """ # parse data and ensure it meets requirements test_dim = tdata.shape[0] ntest = tdata.shape[1] if test_dim != self.dim: raise Exception("ERROR: dimensionality of data must be consistent with training set.") # compute kernel matrix between data and tdata kmat = kernel(self.data,tdata,self.kernel) kmat = npsum(kmat,axis=0) kmat = (1/self.nsamp)*kmat return kmat
def object_val_calc(self, codebook_comps, ksi, gamma, theta, vecs): ''' Calculate objective function value ''' _bs_ = np.dot(codebook_comps, vecs) square_term = 0.5 * npsum((ksi - _bs_)**2, axis=0) res = (square_term + gamma * dot(theta.T, vecs)).ravel() return res
def gaussian_kde(data,x,w): """ kernel density estimate of the pdf represented by data at point x with bandwidth w """ N = float(len(data)) return npsum([_gauss_kern(x,xn,w) for xn in data])/(N*w*sqrt(2.*pi))
def sparse_CostFunc(weightAllLayers, *args): r''' Vectorized/regulated sparse cost function (described in the sparseae_reading.pdf on the `UFLDL Tutorial Exercise:Sparse_Autoencoder <http://ufldl.stanford.edu/wiki/index.php/Exercise:Sparse_Autoencoder>`_) that computes the total cost over multiple inputs: .. math:: &define: \hat{\rho}_j=\frac{1}{m}\sum_{i=1}^{m}[actvh_j(x^i)]\\ &define: \sum_{j=1}^{h}KL(\rho||\hat{\rho}_j)=\sum_{j=1}^{f}\rho~log\frac{\rho}{\hat{\rho}_j}+(1-\rho)log\frac{1-\rho}{1-\hat{\rho}_j}\\ &costFunction:~\frac{1}{m}\sum_{i=1}^{m}(0.5~||forwardThruAllLayers(x^i)-y^i||^2)]+\frac{\lambda}{2}\sum^{allLayers~excludeAnyBias} (weight^2)\\ &+\beta\sum_{j=1}^{f}KL(\rho||\hat{\rho}_j) where :math:`\hat{\rho}_j` =average activation of hidden unit j; :math:`m` =the number of inputs; :math:`h` =number of hidden units exclude bias units; :math:`y^i` =a single target array; :math:`x^i` =a single input array; :math:`\beta` =this is sparseParam. :param weightAllLayers: A flatten array contains all forward weights. :param *args: Must in the following order: **inputArr2D**: 1 training example per row. **targets**: Must be a 2D ndarray instead of matrix. And the number of labels must match the number of units in output layer. **weightDecayParam**: For model complexity regulation. **sparsity**: Setting the sparsity of neural network. **sparseParam**: For sparsity regulation. **nn**: An instance of class FeedforwardNeuNet. :returns: A scalar representing the cost of current input using weightAllLayers. ''' inputArr2D, targets, weightDecayParam, sparsity, sparseParam, nn = args startIndex, weightsExcBias = 0, 0 avgEx = 1.0 / targets.shape[0] for ly in nn.layersExOutputLy: # update all forward weights newWeight = reshape(weightAllLayers[startIndex:startIndex + ly.forwardWeight.size], ly.forwardWeight.shape) ly.forwardWeight = asmatrix(newWeight) startIndex += ly.forwardWeight.size weightsExcBias = append(weightsExcBias, newWeight[:-1]) # exclude weights for bias unit with [:-1] output = asarray(nn.forwardPropogateAllInput(inputArr2D)) assert output.shape[1] == targets.shape[1], 'dimension mismatch in next layer' avgActvArrAllLyAllEx = 0 for ly in nn.layersExOutputLy[1:]: ly.avgActvArrAllEx = avgEx * npsum(ly.self2D[:, :-1], 0) avgActvArrAllLyAllEx = append(avgActvArrAllLyAllEx, ly.avgActvArrAllEx) # not sure whether I should include bias here? avgActvArrAllLyAllEx = avgActvArrAllLyAllEx[1:] # discard 0 at the beginning return avgEx * npsum(0.5 * (output - targets) ** 2) + 0.5 * weightDecayParam * npsum(weightsExcBias ** 2) + sparseParam * npsum(sparsity * log(sparsity / avgActvArrAllLyAllEx) + (1 - sparsity) * log((1 - sparsity) / (1 - avgActvArrAllLyAllEx)))
def sigma_distribution(xoff, n, sigma_s, x_range, angles, interp_kind='slinear'): s = sigma_integrate(x_range, xoff, sigma_s, angles) # weight the contribution of each radial bin sarray = [ni * si(x_range) for ni, si in izip(n, s)] s = npsum(sarray, axis=0) / n.sum() # go back to the required format s = interp1d(x_range, s, kind=interp_kind) return s
def AvAnimalNSum_f(NYrs, NGPctManApp, GrazingAnimal_0, NumAnimals, AvgAnimalWt, AnimalDailyN, NGAppNRate, Prec, DaysMonth, NGPctSoilIncRate, GRPctManApp, GRAppNRate, GRPctSoilIncRate, NGBarnNRate, AWMSNgPct, NgAWMSCoeffN, RunContPct, RunConCoeffN, PctGrazing, GRBarnNRate, AWMSGrPct, GrAWMSCoeffN, PctStreams, GrazingNRate): return npsum( AvAnimalN_f(NYrs, NGPctManApp, GrazingAnimal_0, NumAnimals, AvgAnimalWt, AnimalDailyN, NGAppNRate, Prec, DaysMonth, NGPctSoilIncRate, GRPctManApp, GRAppNRate, GRPctSoilIncRate, NGBarnNRate, AWMSNgPct, NgAWMSCoeffN, RunContPct, RunConCoeffN, PctGrazing, GRBarnNRate, AWMSGrPct, GrAWMSCoeffN, PctStreams, GrazingNRate))
def mmd(self, data2): """ Compute the maximum mean discrepancy between :math:`D_1` and :math:`D_2`, i.e. :math:`\\|\\mu_{{D_1}} - \\mu_{{D_2}}\\|_{\mathcal{H}}` :param data2: data matrix :math:`D_2\in\mathbb{R}^{d \\times n}`, where :math:`d` is the dimensionality and :math:`n` is the number of training samples :type data: numpy array :return: diffused data :rtype: numpy array """ # parse data and ensure it meets requirements dim_data2 = data2.shape[0] ntest = data2.shape[1] if dim_data2 != self.dim: raise Exception("ERROR: dimensionality of data must be consistent with training set.") # now, compute MMD equation, using in-place computations kmat = kernel(data2,data2,self.kernel); ktestsum = npsum(npsum(kmat,axis=0),axis=1)/(pow(ntest,2)); kmat = kernel(self.data,self.data,self.kernel); ktrainsum = npsum(npsum(kmat,axis=0),axis=1)/(pow(self.nsamp,2)); kmat = kernel(self.data,data2,self.kernel); kcrossum = npsum(npsum(kmat,axis=0),axis=1)/(self.nsamp*ntest); mmdcomputed = ktestsum + ktrainsum - 2*kcrossum return mmdcomputed
def AvStreamBankNSum_f(NYrs, DaysMonth, Temp, InitSnow_0, Prec, NRur, NUrb, Area, CNI_0, AntMoist_0, Grow_0, CNP_0, Imper, ISRR, ISRA, CN, UnsatStor_0, KV, PcntET, DayHrs, MaxWaterCap, SatStor_0, RecessionCoef, SeepCoef, Qretention, PctAreaInfil, n25b, Landuse, TileDrainDensity, PointFlow, StreamWithdrawal, GroundWithdrawal, NumAnimals, AvgAnimalWt, StreamFlowVolAdj, SedAFactor_0, AvKF, AvSlope, SedAAdjust, StreamLength, n42b, n46c, n85d, AgLength, n42, n54, n85, UrbBankStab, SedNitr, BankNFrac, n69c, n45, n69): return npsum(StreamBankN_1_f(NYrs, DaysMonth, Temp, InitSnow_0, Prec, NRur, NUrb, Area, CNI_0, AntMoist_0, Grow_0, CNP_0, Imper, ISRR, ISRA, CN, UnsatStor_0, KV, PcntET, DayHrs, MaxWaterCap, SatStor_0, RecessionCoef, SeepCoef, Qretention, PctAreaInfil, n25b, Landuse, TileDrainDensity, PointFlow, StreamWithdrawal, GroundWithdrawal, NumAnimals, AvgAnimalWt, StreamFlowVolAdj, SedAFactor_0, AvKF, AvSlope, SedAAdjust, StreamLength, n42b, AgLength, UrbBankStab, SedNitr, BankNFrac, n69c, n45, n69, n46c, n42)) / NYrs
def auto_roi(self): """ Attempts to automatically determine the location of the atom clouds using find_clouds, and then creates a region of interest for the Cycle that is the union of the ROI for all frames """ if self.clouds == []: self.find_clouds() if self.clouds == []: self.roi = ROI(tblr=[0, self.height - 1, 0, self.width - 1]) self.roi = npsum([cloud.roi for cloud in self.clouds]) for cloud in self.clouds: cloud.roi = self.roi
def auto_roi(self): """ Invokes auto_roi for each cycle, which attempts to automatically determine the location of the atom clouds using find_clouds, and then creates a region of interest for the Cycle that is the union of the ROI for all frames """ roilist = [] for cycle in self.cycles: cycle.auto_roi() for frame in cycle.frames: roilist.append(frame.roi) allroi = npsum(roilist) self.set_roi(tblr = allroi.tblr)
def find_clouds(self, thresh_OD=0.05, thresh_sum_OD=500): """ Finds all clouds in the image frame with an OD > thresh_OD. If many are found, the clouds with "mass" below sum_OD are eliminated. These clouds are stored in frame.clouds, sorted in order of total mass. keywords -------- 'thresh_OD' : number, default = 0.05 threshold value above which a pixel is counted as part of a cloud 'thresh_sumOD' : number, default = 500 Minimum "mass" of a cloud for it to be counted Usage ------ :: > Frame.find_clouds() # default values > Frame.find_clouds( thresh_OD = 0.1, thresh_sumOD = 1000 ) """ self.clouds = [] self.limg = [] self.limg, object_regions = binary_threshold(self.OD, thresh_OD) if len(object_regions) == 0: print('%s: 0 clouds with OD > %.2f' % (self.frame_id, thresh_OD)) return else: sum_OD_list = [npsum(self.OD[obj]) for obj in object_regions] obj_info = zip(sum_OD_list, object_regions) big_obj_info = [ bo for bo in obj_info if bo[0] > thresh_sum_OD ] if len(big_obj_info) == 0: print('%s: %d clouds with OD > %.2f and sum_OD > %d' % \ (self.frame_id, len(self.clouds), thresh_OD, thresh_sum_OD)) else: # sort the clouds in order of decreasing mass for num, o in enumerate(sorted(big_obj_info, reverse=True)): roi = ROI(slices = o[1]) roi.square() roi.scale(factor = 1.2) next_cloud = Cloud(frame_OD=self.OD, cloudnum = num, sumOD=o[0], init_slices=roi.slices, frame_id = self.frame_id, cam_info=self.cam_info,) self.clouds.append(next_cloud) print('%s: %d clouds (OD > %.2f, sum_OD > %d)' % \ (self.frame_id, len(self.clouds), thresh_OD, thresh_sum_OD))
def fro_prod(A, B): """ The Frobenius product is an inner product between matrices :math:`\mathbf{A}` and :math:`\mathbf{B}` of same shape. .. math:: <\mathbf{A}, \mathbf{B}>_F = \sum_{i, j} \mathbf{A}_{ij} \mathbf{B}_{ij} :param A: (``numpy.ndarray``) a matrix. :param B: (``numpy.ndarray``) a matrix. :return: (``float``) Frobenius product value. """ return npsum(multiply(A, B))
def fit(self, Ks, y, holdout=None): """Learn weights for kernel matrices or Kinterfaces. :param Ks: (``list``) of (``numpy.ndarray``) or of (``Kinterface``) to be aligned. :param y: (``numpy.ndarray``) Class labels :math:`y_i \in {-1, 1}` or regression targets. :param holdout: (``list``) List of indices to exlude from alignment. """ model = CSI(**self.csi_args) p = len(Ks) Gs = [] Qs = [] Rs = [] for K in Ks: model.fit(K, y, holdout) Gs.append(model.G) Rs.append(model.R) Qs.append(model.Q) # Construct holdin set if doing transductive learning holdin = None if holdout is not None: n = Ks[0].shape[0] holdin = list(set(range(n)) - set(holdout)) # Solve for the best linear combination of weights a = zeros((p, 1)) M = zeros((p, p)) for (i, Gu), (j, Hu) in combinations(enumerate(list(Gs)), 2): G = center_kernel_low_rank(Gu) H = center_kernel_low_rank(Hu) M[i, j] = M[j, i] = npsum(G.T.dot(H)**2) if a[i] == 0: M[i, i] = npsum(G.T.dot(G)**2) if holdin is None: a[i] = npsum(G.T.dot(y)**2) else: a[i] = npsum(G[holdin, :].T.dot(y[holdin])**2) if a[j] == 0: M[j, j] = npsum(H.T.dot(H)**2) if holdin is None: a[j] = npsum(H.T.dot(y)**2) else: a[j] = npsum(H[holdin, :].T.dot(y[holdin])**2) Mi = inv(M) mu = Mi.dot(a) / norm(Mi.dot(a), ord=2) self.Gs = map(center_kernel_low_rank, Gs) self.G = hstack(Gs) self.mu = mu self.trained = True
def cost(self, theta): """ This function computes the cost associated to the softmax classifier. :param data: data matrix :math:`D_1\in\mathbb{R}^{d \\times n}`, where :math:`d` is the dimensionality and :math:`n` is the number of training samples :type data: numpy array """ # unroll parameters from theta theta = reshape(theta, (self.dim, self.nclasses)) theta = theta.T nsamp = self.data.shape[1] # generate ground truth matrix onevals = squeeze(ones((1, nsamp))) rows = squeeze(self.labels) - 1 cols = arange(nsamp) ground_truth = csr_matrix((onevals, (rows, cols))).todense() # compute hypothesis; use some in-place computations theta_dot_prod = dot(theta, self.data) theta_dot_prod = theta_dot_prod - numpy.amax(theta_dot_prod, axis=0) soft_theta = npexp(theta_dot_prod) soft_theta_sum = npsum(soft_theta, axis=0) soft_theta_sum = tile(soft_theta_sum, (self.nclasses, 1)) hyp = soft_theta / soft_theta_sum # compute cost log_hyp = nplog(hyp) temp = array(multiply(ground_truth, log_hyp)) temp = npsum(npsum(temp, axis=1), axis=0) cost = (-1.0 / nsamp) * temp + 0.5 * self.wdecay * pow(norm(theta, "fro"), 2) thetagrad = (-1.0 / nsamp) * dot(ground_truth - hyp, transpose(self.data)) + self.wdecay * theta thetagrad = thetagrad.flatten(1) return cost, thetagrad
def train(self, inputArr2D, targets, costFunc, costFuncGrad, maxIter=100): ''' This method will fit the weights of the neural network to the targets. :param inputArr2D: 1 input per row. :param targets: ground truth class label for each input :param costFunc: callable *f(paramToOptimize, \*arg)* that will be used as cost function. :param costFuncGrad: callable *f'(paramToOptimize, \*arg)* that will be used to compute partial derivative of cost function over each parameter in paramToOptimize. ''' self.forwardPropogateAllInput(inputArr2D) # perform forward propagation to set self.outputs avgEx = 1.0 / targets.shape[0] flatWeights = asarray(self.layersExOutputLy[0].forwardWeight) for ly in self.layersExOutputLy[1:]: ly.avgActvArrAllEx = avgEx * npsum(ly.self2D[:, :-1], 0) flatWeights = append(flatWeights, asarray(ly.forwardWeight)) fmin_cg(costFunc, flatWeights, costFuncGrad, (inputArr2D, targets, self.__weightDecayParam, self.__sparsity, self.__sparseParam, self), maxiter=maxIter, full_output=True) # fmin_cg calls grad before cost func
def softmax_predict( theta, nclasses, dim, data ): # unroll parameters from theta theta = reshape(theta,(dim, nclasses)) # Do this theta= theta.T # compute hypothesis; use some in-place computations theta_dot_prod = dot(theta,data) theta_dot_prod = theta_dot_prod - numpy.amax(theta_dot_prod, axis=0) # This was wrong soft_theta = npexp(theta_dot_prod) soft_theta_sum = npsum(soft_theta,axis=0) soft_theta_sum = tile(soft_theta_sum,(nclasses,1)) hyp = soft_theta/soft_theta_sum print "hyp.shape" print hyp.shape pred=numpy.argmax(hyp, axis=0) return numpy.asarray(pred)
def courseraML_CostFunc(weightAllLayers, *args): r''' Vectorized/regulated cost function (described in the Coursera Stanford Machine Learning course) that computes the total cost over multiple inputs: .. math:: &\frac{1}{m}\sum_{i=1}^{m}\sum_{k=1}^{K}[-y_k^i log(forwardThruAllLayers(x^i)_k)-(1-y_k^i) log(1-forwardThruAllLayers(x^i)_k)]\\ &+\frac{\lambda}{2m}\sum^{allLayers~excludeAnyBias} (weight^2)\\ where :math:`m` =the number of inputs; :math:`k` =the number of labels in targets; :math:`y` =a single target array; :math:`x` =a single input array. :param weightAllLayers: A flatten array contains all forward weights. :param *args: Must in the following order: **inputArr2D**: 1 training example per row. **targets**: Must be a 2D ndarray instead of matrix. And the number of labels must match the number of units in output layer. **weightDecayParam**: For model complexity regulation. **sparsity**: Ignored. **sparseParam**: Ignored. **nn**: An instance of class FeedforwardNeuNet. :returns: A scalar representing the cost of current input using weightAllLayers. ''' inputArr2D, targets, weightDecayParam, _, _, nn = args startIndex, weightsExcBias = 0, 0 for ly in nn.layersExOutputLy: # update all forward weights newWeight = reshape(weightAllLayers[startIndex:startIndex + ly.forwardWeight.size], ly.forwardWeight.shape) ly.forwardWeight = asmatrix(newWeight) startIndex += ly.forwardWeight.size weightsExcBias = append(weightsExcBias, newWeight[:-1]) # exclude weights for bias unit with [:-1] output = asarray(nn.forwardPropogateAllInput(inputArr2D)) assert output.shape[1] == targets.shape[1], 'dimension mismatch in next layer' return 1.0 / targets.shape[0] * (npsum(-targets * log(output) - (1 - targets) * log(1 - output)) + weightDecayParam / 2.0 * npsum(weightsExcBias ** 2))
def sliding_window(self, x, coeff): """ The principle of the sliding window is the following: We have an array that we want to reduce by a certain factor (16 by default). We only reduce the array in the column dimension, as the row dimension is critical for certain features (as the 12 step chromagram). So we take the array and average over coeff + coeff/2 cases (50 percent overlapping) and store it as a new array. Example: if the initial shape of x is (50,1600), after the sliding window has been applied, its new shape is (50,100) """ new_array = [] # Determine the dimensions of x if x.shape != (x.shape[0],): for i in x: template = [] for j in arange(int(len(i)/coeff)): if(j != 0): # If it is neither the first, nor the last case template.append(float(npsum(i[j*coeff-coeff/2:(j+1)*coeff])/(coeff+coeff/2))) elif j == int(len(i)/coeff)-1: # Last case, filling up template.append(float(npsum(i[j*coeff-coeff/2:])/len(i[j*coeff-coeff/2:]))) else: # First case template.append(float(npsum(i[:(j+1)*coeff])/coeff)) new_array.append(template) xshape = (len(new_array),len(new_array[0])) else: template = [] for j in arange(int(x.shape[0]/coeff)): if(j != 0): template.append(float(npsum(x[j*coeff-coeff/2:(j+1)*coeff])/(coeff+coeff/2))) elif j == int(x.shape[0]/coeff)-1: # Last case, filling up template.append(float(npsum(x[j*coeff-coeff/2:])/len(x[j*coeff-coeff/2:]))) else: # First case template.append(float(npsum(x[:(j+1)*coeff])/coeff)) new_array = template xshape = (len(new_array),1) return new_array, xshape
def sampleV(self,v): self.KE.append(npsum(v**2)/2)
def currentTemperature(v): #script 6.39 mvsq=npsum(v**2) mvsq/=N currentT=mvsq/(3.0*N) return currentT
from numpy import array, sum as npsum NA = 39.5e-3 # m^2 D = 10 k = 1.78e-6 # from last assignment S = array([998, 945, 953, 989, 934, 995]) S_mean = npsum(S) / float(S.size) B = k * D * S_mean / float(NA) with open("Oppgave2.2.txt", 'w') as f: f.write("B = %.3g \\text{ T}" % B)