def display_data(X, width=None, save=False): m, n = X.shape width = sp.int_(width or sp.around(sp.sqrt(n))) height = sp.int_(n / width) display_rows = sp.int_(sp.floor(sp.sqrt(m))) display_cols = sp.int_(sp.ceil(m / display_rows)) def rightward(acc, curr): return sp.hstack([acc, curr]) def downward(acc, curr): return sp.vstack([acc, curr]) def merge(func, init): return lambda arr: reduce(func, arr, init) init_rightward = sp.matrix([]).reshape([height, 0]) init_downward = sp.matrix([]).reshape([0, width * display_cols]) img_list = [X[i].reshape([height, width]).T for i in range(0, m)] img_list_split = [img_list[i:i+display_cols] for i in range(0, len(img_list), display_cols)] img = merge(downward, init_downward)(map(merge(rightward, init_rightward), img_list_split)) plt.figure(1) plt.imshow(img, cmap='gray') plt.tick_params(labelbottom='off', labelleft='off') if save: plt.savefig('1.png') else: plt.show() return None
def tree_sample(self): if self.subsampling: n_sample = SP.int_(self.n*self.sampsize) subsample = SP.random.permutation(self.n)[:n_sample] else: subsample = SP.random.random_integers(0, self.n-1, self.n) return subsample
def updateW(self, m): M = self.components if self.noise == 'gauss': YmeanX = self.Z.E1 elif self.noise == 'hurdle' or self.noise == 'poisson': YmeanX = self.meanX if (m < self.nKnown) or (m in self.iLatentSparse) or (m in self.iLatent): logPi = SP.log(self.Pi[:, m] / (1 - self.Pi[:, m])) elif self.nScale > 0 and self.nScale < YmeanX.shape[0]: logPi = SP.log(self.Pi[:, m] / (1 - self.Pi[:, m])) isOFF_ = self.Pi[:, m] < .5 logPi[isOFF_] = (YmeanX.shape[0] / self.nScale) * SP.log( self.Pi[isOFF_, m] / (1 - self.Pi[isOFF_, m])) isON_ = self.Pi[:, m] > .5 if self.onF > 1.: logPi[isON_] = self.onF * SP.log(self.Pi[isON_, m] / (1 - self.Pi[isON_, m])) else: onF = 1. logPi = SP.log(self.Pi[:, m] / (1 - self.Pi[:, m])) sigma2Sigmaw = (1.0 / self.Eps.E1) * self.Alpha.E1[m] setMinus = SP.int_( SP.hstack([list(range(M))[0:m], list(range(M))[m + 1::]])) SmTSk = SP.sum( SP.tile(self.S.E1[:, m:m + 1], (1, M - 1)) * self.S.E1[:, setMinus], 0) SmTSm = SP.dot(self.S.E1[:, m].transpose(), self.S.E1[:, m]) + self.S.diagSigmaS[:, m].sum() b = SP.dot((self.W.C[:, setMinus, 0] * self.W.E1[:, setMinus]), (SmTSk.transpose())) diff = SP.dot(self.S.E1[:, m].transpose(), YmeanX) - b SmTSmSig = SmTSm + sigma2Sigmaw #update C and W u_qm = logPi + 0.5 * SP.log(sigma2Sigmaw) - 0.5 * SP.log(SmTSmSig) + ( 0.5 * self.Eps.E1) * ((diff**2) / SmTSmSig) self.W.C[:, m, 0] = 1. / (1 + SP.exp(-u_qm)) self.W.C[:, m, 1] = 1 - self.W.C[:, m, 0] self.W.E1[:, m] = (diff / SmTSmSig) #q(w_qm | s_qm=1), q=1,...,Q self.W.sigma2[:, m] = (1. / self.Eps.E1) / SmTSmSig self.W.E2diag[:, m] = self.W.E1[:, m]**2 + self.W.sigma2[:, m]
def updateS(self, m): M = self.components if m >= self.nKnown: if self.noise == 'gauss': YmeanX = self.Z.E1 elif self.noise == 'hurdle' or self.noise == 'poisson': YmeanX = self.meanX setMinus = SP.int_( SP.hstack([list(range(M))[0:m], list(range(M))[m + 1::]])) #only account for actors that haven't been switched off already setMinus = setMinus[self.doUpdate[setMinus] == 1] #update S SW_sigma = (self.W.C[:, m, 0] * self.W.E1[:, m]) * self.Eps.E1 SW2_sigma = (self.W.C[:, m, 0] * (self.W.E2diag[:, m])) * self.Eps.E1 setMinus = SP.int_( SP.hstack([list(range(M))[0:m], list(range(M))[m + 1::]])) b0 = SP.dot(self.S.E1[:, setMinus], (self.W.C[:, setMinus, 0] * self.W.E1[:, setMinus]).transpose()) b = SP.dot(b0, SW_sigma) alphaSm = SP.sum(SW2_sigma, 0) barmuS = SP.dot(YmeanX, SW_sigma) - b self.S.diagSigmaS[:, m] = 1. / (1 + alphaSm) self.S.E1[:, m] = barmuS / (1. + alphaSm) #keep diagSigmaS self.Eps.diagSigmaS[m] = SP.sum(self.S.diagSigmaS[:, m]) else: SW2_sigma = (self.W.C[:, m, 0] * (self.W.E2diag[:, m])) * self.Eps.E1 alphaSm = SP.sum(SW2_sigma, 0) self.S.diagSigmaS[:, m] = 1. / (1 + alphaSm)
def compute(nn_params): m = Y.shape[0] # Reshape nn_params back into the parameters theta_1 and theta_2 theta_1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))]. \ reshape([hidden_layer_size, input_layer_size+1]) theta_2 = nn_params[(hidden_layer_size*(input_layer_size+1)):]. \ reshape([num_labels, hidden_layer_size+1]) theta_1_reg = sp.copy(theta_1) theta_1_reg[:, 0] = 0 theta_2_reg = sp.copy(theta_2) theta_2_reg[:, 0] = 0 # Forward propagation f = forward_prop(X)(theta_1, theta_2) # Initialize variables for back propagation a = f['a'] # Add bias a_1 = a[0] a_2 = a[1] a_3 = a[2] z = f['z'] z_2 = z[0] z_3 = z[1] # Transform Y b = sp.matrix( sp.apply_along_axis( lambda n: sp.int_(sp.array(range(1, num_labels + 1)) == n), 1, Y)) DEL_1 = sp.matrix(sp.zeros((hidden_layer_size, input_layer_size + 1))) DEL_2 = sp.matrix(sp.zeros((num_labels, hidden_layer_size + 1))) for i in range(0, m): del_3 = a_3[i, :].T - b[i, :].T del_2 = sp.multiply(theta_2[:, 1:].T * del_3, sigmoid_gradient(z_2[i, :].T)) DEL_2 = DEL_2 + del_3 * a_2[i, :] DEL_1 = DEL_1 + del_2 * a_1[i, :] # Regularize theta_1_grad = DEL_1 / m + (_lambda / m) * theta_1_reg theta_2_grad = DEL_2 / m + (_lambda / m) * theta_2_reg grad = sp.concatenate([sp.ravel(theta_1_grad), sp.ravel(theta_2_grad)]) return grad
def CenteredLagProduct(rawbeams,numtype=sp.complex128,pulse =sp.ones(14)): """ This function will create a centered lag product for each range using the raw IQ given to it. It will form each lag for each pulse and then integrate all of the pulses. Inputs: rawbeams - This is a NpxNs complex numpy array where Ns is number of samples per pulse and Npu is number of pulses N - The number of lags that will be created, default is 14. numtype - The type of numbers used to create the data. Default is sp.complex128 Output: acf_cent - This is a NrxNl complex numpy array where Nr is number of range gate and Nl is number of lags. """ N=len(pulse) # It will be assumed the data will be pulses vs rangne rawbeams = rawbeams.transpose() (Nr,Np) = rawbeams.shape # Make masks for each piece of data arex = sp.arange(0,N/2.0,0.5); arback = sp.array([-sp.int_(sp.floor(k)) for k in arex]); arfor = sp.array([sp.int_(sp.ceil(k)) for k in arex]) ; # figure out how much range space will be kept ap = sp.nanmax(abs(arback)); ep = Nr- sp.nanmax(arfor); rng_ar_all = sp.arange(ap,ep); # wearr = (1./(N-sp.tile((arfor-arback)[:,sp.newaxis],(1,Np)))).astype(numtype) #acf_cent = sp.zeros((ep-ap,N))*(1+1j) acf_cent = sp.zeros((ep-ap,N),dtype=numtype) for irng in sp.arange(len(rng_ar_all)): rng_ar1 =sp.int_(rng_ar_all[irng]) + arback rng_ar2 = sp.int_(rng_ar_all[irng]) + arfor # get all of the acfs across pulses # sum along the pulses acf_tmp = sp.conj(rawbeams[rng_ar1,:])*rawbeams[rng_ar2,:]#*wearr acf_ave = sp.sum(acf_tmp,1) acf_cent[irng,:] = acf_ave# might need to transpose this return acf_cent
def compare(self, chunk, tiles): assert (chunk.shape[0] == self.compareChunkSize) chunk = scipy.int_(chunk) S = chunk.shape[0] # distance will contain the distance for each tile, for each position distances = scipy.zeros((self.shiftDim[0], self.shiftDim[1], tiles.shape[0])) for i in range(self.shiftDim[0]): for j in range(self.shiftDim[1]): distances[i,j,:] = self.distance(chunk, tiles[:,i:i+S,j:j+S,:]) combinedIndex = scipy.unravel_index(scipy.argmin(distances), distances.shape) idx = combinedIndex[-1] pos = self.translatePos(combinedIndex[:-1]) dist = distances[combinedIndex] return (idx, pos, dist)
def test_delta_updating(self): n_sample = 100 # A 20 x 2 random integer matrix X = SP.empty((n_sample, 2)) X[:, 0] = SP.arange(0, 1, 1.0 / n_sample) X[:, 1] = SP.random.rand(n_sample) sd_noise = .5 sd_conf = .5 noise = SP.random.randn(n_sample, 1) * sd_noise # print 'true delta equals', (sd_noise**2)/(sd_conf**2) # Here, the observed y is just a linear function of the first column # in X and # a little independent gaussian noise y_fixed = (X[:, 0:1] > .5) * 1.0 y_fn = y_fixed + noise # Divide into training and test sample using 2/3 of data for training training_sample = SP.zeros(n_sample, dtype='bool') training_sample[SP.random.permutation(n_sample) [:SP.int_(.66 * n_sample)]] = True test_sample = ~training_sample kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\ 1e-3*SP.eye(n_sample) # The confounded version of y_lin is computed as y_conf = sd_conf * SP.random.multivariate_normal( SP.zeros(n_sample), kernel, 1).reshape(-1, 1) y_tot = y_fn + y_conf # Selects rows and columns kernel_train = kernel[SP.ix_(training_sample, training_sample)] kernel_test = kernel[SP.ix_(test_sample, training_sample)] lm_forest = MF(kernel=kernel_train, update_delta=False, max_depth=1, verbose=0) # Returns prediction for random effect lm_forest.fit(X[training_sample], y_tot[training_sample]) response_lmf = lm_forest.predict(X[test_sample], k=kernel_test) # print 'fitting forest (delta-update)' # earn random forest, not accounting for the confounding random_forest = MF(kernel=kernel_train, update_delta=True, max_depth=5, verbose=0) random_forest.fit(X[training_sample], y_tot[training_sample]) response_rf = random_forest.predict(X[test_sample], k=kernel_test)
def get_distribution(list): """Returns al que posible probability distributions for the given list""" size = len(list) x = scipy.arange(size) y = scipy.int_(scipy.round_(scipy.stats.vonmises.rvs(5, size=size) * 255)) plt.figure(1) h = plt.hist(y, bins=range(256), color='w') dist_names = ['gamma', 'beta', 'rayleigh', 'norm', 'rayleigh'] for dist_name in dist_names: dist = getattr(scipy.stats, dist_name) param = dist.fit(y) pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2], scale=param[-1]) * size plt.plot(pdf_fitted, label=dist_name) plt.xlim(0, 255) plt.legend(loc='upper right') plt.savefig('distribuciones.png', bbox_inches='tight')
def test_delta_updating(self): n_sample = 100 # A 20 x 2 random integer matrix X = SP.empty((n_sample, 2)) X[:, 0] = SP.arange(0, 1, 1.0/n_sample) X[:, 1] = SP.random.rand(n_sample) sd_noise = .5 sd_conf = .5 noise = SP.random.randn(n_sample, 1)*sd_noise # print 'true delta equals', (sd_noise**2)/(sd_conf**2) # Here, the observed y is just a linear function of the first column # in X and # a little independent gaussian noise y_fixed = (X[:, 0:1] > .5)*1.0 y_fn = y_fixed + noise # Divide into training and test sample using 2/3 of data for training training_sample = SP.zeros(n_sample, dtype='bool') training_sample[ SP.random.permutation(n_sample)[:SP.int_(.66*n_sample)]] = True test_sample = ~training_sample kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\ 1e-3*SP.eye(n_sample) # The confounded version of y_lin is computed as y_conf = sd_conf*SP.random.multivariate_normal(SP.zeros(n_sample), kernel, 1).reshape(-1, 1) y_tot = y_fn + y_conf # Selects rows and columns kernel_train = kernel[SP.ix_(training_sample, training_sample)] kernel_test = kernel[SP.ix_(test_sample, training_sample)] lm_forest = MF(kernel=kernel_train, update_delta=False, max_depth=1, verbose=0) # Returns prediction for random effect lm_forest.fit(X[training_sample], y_tot[training_sample]) response_lmf = lm_forest.predict(X[test_sample], k=kernel_test) # print 'fitting forest (delta-update)' # earn random forest, not accounting for the confounding random_forest = MF(kernel=kernel_train, update_delta=True, max_depth=5, verbose=0) random_forest.fit(X[training_sample], y_tot[training_sample]) response_rf = random_forest.predict(X[test_sample], k=kernel_test)
def compute(nn_params): m = Y.shape[0] # Reshape nn_params back into the parameters theta_1 and theta_2 theta_1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))]. \ reshape([hidden_layer_size, input_layer_size+1]) theta_2 = nn_params[(hidden_layer_size*(input_layer_size+1)):]. \ reshape([num_labels, hidden_layer_size+1]) theta_1_reg = sp.copy(theta_1) theta_1_reg[:, 0] = 0 theta_2_reg = sp.copy(theta_2) theta_2_reg[:, 0] = 0 # Forward propagation f = forward_prop(X)(theta_1, theta_2) a = f['a'] a_3 = a[2] # Transform Y b = sp.matrix( sp.apply_along_axis( lambda n: sp.int_(sp.array(range(1, num_labels + 1)) == n), 1, Y)) J = 0 for i in range(0, m): J = J + (1 / m) * (-b[i, :] * sp.log(a_3[i, :].T) - (1 - b[i, :]) * sp.log(1 - a_3[i, :].T))[0, 0] # Regularize J = J + (_lambda / (2 * m)) * (sp.sum(sp.power(theta_1_reg, 2)) + sp.sum(sp.power(theta_2_reg, 2))).real return J
def findDuplicateVectors(vec, tol=vTol, equivPM=False): """ Find vectors in an array that are equivalent to within a specified tolerance USAGE: eqv = DuplicateVectors(vec, *tol) INPUT: 1) vec is n x m, a double array of m horizontally concatenated n-dimensional vectors. *2) tol is 1 x 1, a scalar tolerance. If not specified, the default tolerance is 1e-14. *3) set equivPM to True if vec and -vec are to be treated as equivalent OUTPUT: 1) eqv is 1 x p, a list of p equivalence relationships. NOTES: Each equivalence relationship is a 1 x q vector of indices that represent the locations of duplicate columns/entries in the array vec. For example: | 1 2 2 2 1 2 7 | vec = | | | 2 3 5 3 2 3 3 | eqv = [[1x2 double] [1x3 double]], where eqv[0] = [0 4] eqv[1] = [1 3 5] """ vlen = vec.shape[1] vlen0 = vlen orid = asarray(range(vlen), dtype="int") torid = orid.copy() tvec = vec.copy() eqv = [] eqvTot = 0 uid = 0 ii = 1 while vlen > 1 and ii < vlen0: dupl = tile(tvec[:, 0], (vlen, 1)) if not equivPM: diff = abs(tvec - dupl.T).sum(0) match = abs(diff[1:]) <= tol # logical to find duplicates else: diffn = abs(tvec - dupl.T).sum(0) matchn = abs(diffn[1:]) <= tol diffp = abs(tvec + dupl.T).sum(0) matchp = abs(diffp[1:]) <= tol match = matchn + matchp kick = hstack([True, match]) # pick self too if kick.sum() > 1: eqv += [torid[kick].tolist()] eqvTot = hstack([eqvTot, torid[kick]]) uid = hstack([uid, torid[kick][0]]) cmask = ones((vlen, )) cmask[kick] = 0 cmask = cmask != 0 tvec = tvec[:, cmask] torid = torid[cmask] vlen = tvec.shape[1] ii += 1 if len(eqv) == 0: eqvTot = [] uid = [] else: eqvTot = eqvTot[1:].tolist() uid = uid[1:].tolist() # find all single-instance vectors singles = sort(setxor1d(eqvTot, range(vlen0))) # now construct list of unique vector column indices uid = int_(sort(union1d(uid, singles))).tolist() # make sure is a 1D list if not hasattr(uid, '__len__'): uid = [uid] return eqv, uid
def var_ksFit(data, npoints, perc, extra=None): diag_vksf = dict() diag_vksf['data'] = data diag_vksf['npoints'] = npoints diag_vksf['perc'] = perc sio.savemat(home + '/diag_vksf.mat', diag_vksf) # kde_pdf = stats.gaussian_kde(flattened) kde_pdf = stats.gaussian_kde(data) # xi, dx = sp.linspace(flattened.min(), flattened.max(), npoints, retstep=True) xi, dx = sp.linspace(data.min(), data.max(), npoints, retstep=True) diag_vksf['xi'] = xi diag_vksf['dx'] = dx f = kde_pdf(xi) diag_vksf['f'] = f plt.figure() plt.title(extra) # plt.hist(flattened, bins=npoints, color=extra) plt.hist(data, bins=npoints, color=extra, alpha=0.5) mdx = sp.where(f == f.max())#[0][0] diag_vksf['mdx'] = mdx mu = xi[mdx] diag_vksf['mu'] = mu # sigma = sp.std(flattened) sigma = sp.std(data) diag_vksf['sigma'] = sigma err_lookforward = sp.int_(sp.floor(mdx + 0.5 * sigma / dx)) diag_vksf['err_lookforward'] = err_lookforward diag_vksf['sigma_hat_0'] = list() diag_vksf['sigma_hat_1'] = list() diag_vksf['mu_hat_0'] = list() diag_vksf['mu_hat_1'] = list() diag_vksf['local_norm'] = list() diag_vksf['y_sigma'] = list() diag_vksf['y_mu'] = list() diag_vksf['s_sigma'] = list() diag_vksf['s_mu'] = list() diag_vksf['my_sigma'] = list() diag_vksf['my_mu'] = list() diag_vksf['delta_sigma'] = list() diag_vksf['delta_mu'] = list() diag_vksf['ci'] = list() for kk in xrange(3): sigma_hat = sp.arange(sigma*0.5, sigma*1.5 + sigma/200, sigma/200) diag_vksf['sigma_hat_0'].append(sigma_hat) delta = list() for i in xrange(len(sigma_hat)): local_norm = stats.norm(mu, sigma_hat[i]) y = local_norm.pdf(xi) my = y.max() s = (y[sp.arange(0, err_lookforward)]/my - f[sp.arange(0, err_lookforward)]/f.max()) ** 2 delta.append(s.sum()) diag_vksf['y_sigma'].append(y) diag_vksf['my_sigma'].append(my) diag_vksf['s_sigma'].append(s) diag_vksf['delta_sigma'].append(delta) delta = sp.array(delta) mx, mdx = delta.min(), sp.where(delta == delta.min()) diag_vksf['mx_sigma'], diag_vksf['mdx_sigma'] = mx, mdx sigma_hat = sigma_hat[mdx] sigma = sigma_hat diag_vksf['sigma_hat_1'].append(sigma_hat) mu_hat = sp.arange(mu * 0.5, mu * 1.5 + mu/200, mu/200) diag_vksf['mu_hat_0'].append(mu_hat) delta = list() for i in xrange(len(mu_hat)): local_norm = stats.norm(mu_hat[i], sigma_hat) y = local_norm.pdf(xi) my = y.max() s = (y[sp.arange(0, err_lookforward)]/my - f[sp.arange(0, err_lookforward)]/f.max()) ** 2 delta.append(s.sum()) diag_vksf['y_mu'].append(y) diag_vksf['my_mu'].append(my) diag_vksf['s_mu'].append(s) diag_vksf['delta_mu'].append(delta) delta = sp.array(delta) sio.savemat(home + '/diag_vksf.mat', diag_vksf) mx, mdx = delta.min(), sp.where(delta == delta.min()) diag_vksf['mx_mu'], diag_vksf['mdx_mu'] = mx, mdx mu_hat = mu_hat[mdx] mu = mu_hat diag_vksf['mu_hat_1'].append(mu_hat) local_norm = stats.norm(mu_hat, sigma_hat) y = local_norm.pdf(xi) ci = local_norm.ppf(perc) diag_vksf['ci'].append(ci) sio.savemat(home + '/diag_vksf.mat', diag_vksf) # plt.plot(xi, y * f.max()/y.max() * len(flattened) * dx, plt.plot(xi, y * f.max()/y.max() * len(data) * dx, marker='', linestyle='--', color='k') plt.plot((ci, ci), plt.ylim(), marker='', linestyle='-', color='k') plt.savefig(home + '/cell_profiler_hist_' + extra + str(kk) + '.pdf') return ci
def findDuplicateVectors(vec, tol=vTol, equivPM=False): """ Find vectors in an array that are equivalent to within a specified tolerance USAGE: eqv = DuplicateVectors(vec, *tol) INPUT: 1) vec is n x m, a double array of m horizontally concatenated n-dimensional vectors. *2) tol is 1 x 1, a scalar tolerance. If not specified, the default tolerance is 1e-14. *3) set equivPM to True if vec and -vec are to be treated as equivalent OUTPUT: 1) eqv is 1 x p, a list of p equivalence relationships. NOTES: Each equivalence relationship is a 1 x q vector of indices that represent the locations of duplicate columns/entries in the array vec. For example: | 1 2 2 2 1 2 7 | vec = | | | 2 3 5 3 2 3 3 | eqv = [[1x2 double] [1x3 double]], where eqv[0] = [0 4] eqv[1] = [1 3 5] """ vlen = vec.shape[1] vlen0 = vlen orid = asarray(range(vlen), dtype="int") torid = orid.copy() tvec = vec.copy() eqv = [] eqvTot = 0 uid = 0 ii = 1 while vlen > 1 and ii < vlen0: dupl = tile(tvec[:, 0], (vlen, 1)) if not equivPM: diff = abs(tvec - dupl.T).sum(0) match = abs(diff[1:]) <= tol # logical to find duplicates else: diffn = abs(tvec - dupl.T).sum(0) matchn = abs(diffn[1:]) <= tol diffp = abs(tvec + dupl.T).sum(0) matchp = abs(diffp[1:]) <= tol match = matchn + matchp kick = hstack([True, match]) # pick self too if kick.sum() > 1: eqv += [torid[kick].tolist()] eqvTot = hstack( [ eqvTot, torid[kick] ] ) uid = hstack( [ uid, torid[kick][0] ] ) cmask = ones((vlen,)) cmask[kick] = 0 cmask = cmask != 0 tvec = tvec[:, cmask] torid = torid[cmask] vlen = tvec.shape[1] ii += 1 if len(eqv) == 0: eqvTot = [] uid = [] else: eqvTot = eqvTot[1:].tolist() uid = uid[1:].tolist() # find all single-instance vectors singles = sort( setxor1d( eqvTot, range(vlen0) ) ) # now construct list of unique vector column indices uid = int_( sort( union1d( uid, singles ) ) ).tolist() # make sure is a 1D list if not hasattr(uid,'__len__'): uid = [uid] return eqv, uid
def one_vs_all(X, Y, num_labels, _lambda, cost_func, grad_func): m, n = X.shape X = sp.hstack((sp.ones((m, 1)), X)) all_theta = sp.zeros([num_labels, n+1]) for c in range(1, num_labels+1): init_theta = sp.ones(X.shape[1]) all_theta[c%num_labels, :] = fmin_bfgs(cost_func, init_theta, fprime=grad_func, args=(X, sp.int_(Y==c), 0), maxiter=100) return all_theta
def fit(self, X, y, recycle=True, **grow_params): """Build a linear mixed forest of trees from the training set (X, y). Parameters ---------- X : array-like of shape = [n_samples, n_features] The training input samples. y : array-like, shape = [n_samples] or [n_samples, 1] The real valued targets Returns ------- self : object Returns self. """ if self.kernel == 'data': self.kernel = SC.estimateKernel(X, maf=1.0/X.shape[0]) elif self.kernel == 'iid': self.kernel = SP.identity(X.shape[0]) # Use dedicated part of data as background model elif self.kernel.size == X.shape[1]: tmp_ind = self.kernel self.kernel = utils.estimateKernel(X[:, self.kernel], maf=1.0/X.shape[0]) X = X[:, ~tmp_ind] # Extract and reshape data self.y = y.reshape(-1, 1) self.X = X self.n, self.m = self.X.shape if self.delta is None: self.BLUP = BLUP.BLUP() if self.verbose > 1: print('fitting BLUP') self.BLUP.fit(XTrain=self.X, yTrain=self.y, KTrain=self.kernel, delta=self.delta) if self.verbose > 1: print('done fitting BLUP') # Update delta if it used to be 'None' self.delta = self.BLUP.delta self.max_features = SP.maximum(SP.int_(self.ratio_features*self.m), 1) self.var_used = SP.zeros(self.m) self.log_importance = SP.zeros(self.m) self.depth = 0 if self.verbose > 0: print(('log(delta) fitted to ', SP.log(self.delta))) # Initialize individual trees if recycle and self.trees != []: for tree in self.trees: tree.cut_to_stump() else: n_trees = 0 self.trees = [] while n_trees < self.n_estimators: if self.verbose > 1: print(('init. tree number ', n_trees)) subsample = self.tree_sample() tree = MixedForestTree(self, subsample) self.trees.append(tree) n_trees += 1 # Fitting with optimal depth constraint if self.fit_optimal_depth or self.update_delta: self.opt_depth = 0 self.min_oob_err = self.get_oob_error(self.depth) if self.verbose > 0: print(('initial oob error is:', self.min_oob_err)) grow_further = True curr_depth = self.depth while grow_further: # Updating ensemble increasing its depth by one self.further(depth=self.depth+1) if self.update_delta: self.delta = self.delta_update() if self.verbose > 0: print(('delta was fitted to', self.delta)) if self.verbose > 0: print(('depth is:', self.depth)) oob_err = self.get_oob_error(self.depth) if self.verbose > 0: print(('oob error is:', oob_err)) if oob_err < self.min_oob_err: self.min_oob_err = oob_err self.opt_depth = self.depth # Decide whether tree needs to be furthered grow_further = (curr_depth < self.depth) and\ (self.depth < self.max_depth) if self.build_to_opt_depth and (self.depth >= self.min_depth): grow_further = grow_further and\ (oob_err == self.min_oob_err) pass curr_depth = self.depth ##################################################### # Growing full tree one by one else: self.further(depth=self.max_depth) return self
# [opt_model_params,opt_lml]=GPR.optHyper(gpr_BP,hyperparams,priors=priors,gradcheck=True,Ifilter=Ifilter) import pygp.plot.gpr_plot as gpr_plot first = True [M, S] = gpr_opt_hyper.predict(opt_model_params, X) gpr_plot.plot_sausage(X, M[0], SP.sqrt(S[0])) gpr_plot.plot_sausage(X, M[1], SP.sqrt(S[1])) gpr_plot.plot_training_data(x1, C[1], replicate_indices=x1_rep.reshape(-1)) gpr_plot.plot_training_data(x2, T[1], replicate_indices=x2_rep.reshape(-1)) # norm = PL.Normalize() break_lml = [] plots = SP.int_(SP.sqrt(24) + 1) PL.figure() for i, BP in enumerate(x1[0, :]): # PL.subplot(plots,plots,i+1) _hyper = copy.deepcopy(opt_model_params) _logtheta = _hyper["covar"] _logtheta = SP.concatenate((_logtheta, [BP, 10])) # SP.var(y[:,i])])) _hyper["covar"] = _logtheta priors_BP[3] = [lnpriors.lnGauss, [BP, 3]] # [opt_model_params,opt_lml] = opt_hyper(gpr_BP,_hyper,priors=priors_BP,gradcheck=False,Ifilter=Ifilter_BP) # break_lml.append(opt_lml) try: break_lml.append(gpr_BP.LML(_hyper, priors_BP)) print "Variance: %s" % (_logtheta)
@author: james """ import matplotlib.pyplot as plt import scipy import scipy.stats df = pd.read_excel( r"C:\Users\james\Documents\School\Machine Learning & Data mining\Project1\Data\hprice2.xls", header=None) cols = range(0, 11) raw_data = df.get_values() X = raw_data[:, cols] size = 30000 x = scipy.arange(size) y = scipy.int_(scipy.round_(scipy.stats.vonmises.rvs(5, size=size) * 47)) h = plt.hist(y, bins=range(48)) dist_names = ['gamma', 'beta', 'rayleigh', 'norm', 'pareto'] for dist_name in dist_names: dist = getattr(scipy.stats, dist_name) param = dist.fit(y) pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2], scale=param[-1]) * size plt.plot(pdf_fitted, label=dist_name) plt.xlim(0, 47) plt.legend(loc='upper right') plt.show()
def best_split_full_model(X, Uy, C, S, U, noderange, delta): mBest = -1 sBest = -float('inf') score_best = -float('inf') left_mean = None right_mean = None ldelta = SP.log(delta) levels = map(SP.unique, X[noderange].T) feature_map = [] s = [] UXt = [] cnt = 0 for i in xrange(X.shape[1]): lev = levels[i] for j in xrange(lev.size - 1): split_point = SP.median(lev[j:j + 2]) x = SP.int_(X[noderange, i] > split_point) UXt.append(SP.dot(U.T[:, noderange], x)) feature_map.append(i) s.append(split_point) cnt += 1 UXt = SP.array(UXt).T if UXt.size == 0: #predictors are homogeneous return mBest, sBest, left_mean, right_mean, score_best else: #print UXt # print X[noderange] # print '' # print '' # test all transformed predictors scores = -NP.ones(cnt) * float('inf') UC = SP.dot(U.T, C) ######################## #finding the best split# ######################## score_0 = lmm_fast.nLLeval(ldelta, Uy[:, 0], UC, S) for snp_cnt in SP.arange(cnt): UX = SP.hstack((UXt[:, snp_cnt:snp_cnt + 1], UC)) scores[snp_cnt] = -lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S) scores[snp_cnt] += score_0 ############################ ###evaluate the new means### ############################ kBest = SP.argmax(scores) score_best = scores[kBest] sBest = s[kBest] if score_best > 0: sBest = s[kBest] score_best = scores[kBest] UX = SP.hstack((UXt[:, kBest:kBest + 1], UC)) _, beta, _ = lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S, MLparams=True) mBest = feature_map[kBest] CX = SP.zeros_like(Uy) CX[noderange] = SP.int_(X[noderange, mBest:mBest + 1] > sBest) C_new = SP.hstack((CX, C)) mean = SP.dot(C_new, beta.reshape(beta.size, -1)) #TODO:is this the correct way? left_mean = ((mean[noderange])[CX[noderange] == 0])[0] right_mean = ((mean[noderange])[CX[noderange] == 1])[0] return mBest, sBest, left_mean, right_mean, score_best
def get_ancestors(node_ind, node, parents): ancestors = SP.empty(SP.int_(SP.floor(SP.log2(node + 1))), dtype='int') for i in SP.arange(ancestors.size): node_ind = parents[node_ind] ancestors[i] = node_ind return ancestors
def best_split_full_model(X, Uy, C, S, U, noderange, delta): mBest = -1 sBest = -float('inf') score_best = -float('inf') left_mean = None right_mean = None ldelta = SP.log(delta) levels = list(map(SP.unique, X[noderange].T)) feature_map = [] s = [] UXt = [] cnt = 0 for i in range(X.shape[1]): lev = levels[i] for j in range(lev.size-1): split_point = SP.median(lev[j:j+2]) x = SP.int_(X[noderange,i] > split_point) UXt.append(SP.dot(U.T[:,noderange], x)) feature_map.append(i) s.append(split_point) cnt += 1 UXt = SP.array(UXt).T if UXt.size == 0: #predictors are homogeneous return mBest, sBest, left_mean, right_mean, score_best else: #print UXt # print X[noderange] # print '' # print '' # test all transformed predictors scores = -NP.ones(cnt)*float('inf') UC = SP.dot(U.T,C) ######################## #finding the best split# ######################## score_0 = lmm_fast.nLLeval(ldelta,Uy[:,0],UC,S) for snp_cnt in SP.arange(cnt): UX=SP.hstack((UXt[:,snp_cnt:snp_cnt+1], UC)) scores[snp_cnt] = -lmm_fast.nLLeval(ldelta,Uy[:,0],UX,S) scores[snp_cnt] += score_0 ############################ ###evaluate the new means### ############################ kBest = SP.argmax(scores) score_best = scores[kBest] sBest = s[kBest] if score_best > 0: sBest = s[kBest] score_best = scores[kBest] UX=SP.hstack((UXt[:,kBest:kBest+1], UC)) _, beta,_ = lmm_fast.nLLeval(ldelta, Uy[:,0], UX, S, MLparams=True) mBest = feature_map[kBest] CX = SP.zeros_like(Uy) CX[noderange] = SP.int_(X[noderange,mBest:mBest+1] > sBest) C_new = SP.hstack((CX,C)) mean = SP.dot(C_new,beta.reshape(beta.size, -1)) #TODO:is this the correct way? left_mean = ((mean[noderange])[CX[noderange]==0])[0] right_mean = ((mean[noderange])[CX[noderange]==1])[0] return mBest, sBest, left_mean, right_mean, score_best
def process(pars, data=None): #%% load the parameters that CAN be specified from the command line NPlacers = pars['NPlacers'] NScrapers = pars['NScrapers'] iters = pars['iters'] MaxTilesVert = pars['MaxTilesVert'] per_page = pars['per_page'] fidelity = pars['fidelity'] poolSize = pars['poolSize'] if (data != None): tags = data['search'].split(', ') else: #tags = ('Minimalism',) tags = ('Face','Leuven','Belgium','Computer') #tags = ('Bussum','Football','PSV','Minimalism','urbex') #%% MPI stuff comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() status = MPI.Status() #%% initiate plogger #execfile('../mosaic_gui/daemon/params.par') logger = plogger.PLogger(rank, host_url=LOGGER_HOST) #%% print the values of those parameters that CAN be specified via the command line #for key, value in pars.iteritems(): #print "M{}: {} is now {}".format(rank, key, value) #%% identify oneself #print "Master, process {} out of {}".format(rank, size) #print "M{}: > init".format(rank) logger.write('Initializing', status=plogger.INIT) #%% initialize the photo matcher pmPars = {'fidelity': fidelity} pm = photo_match.photoMatch(pmPars) # create empty save-path if not pars['useDB']: if (os.path.exists(pars['savepath'])): shutil.rmtree(pars['savepath'], ignore_errors=True) os.mkdir(pars['savepath']) #%% call the scrapers right at the beginning, as it is probably the slowest PixPerTile = scipy.array((75,75)) ComparePixPerTile = scipy.array((fidelity,fidelity)) scraperPars = {'pm': pm, 'tags': tags, 'PixPerTile': PixPerTile, 'poolSize': poolSize} for scraper in range(1,1+NScrapers): comm.send(scraperPars, dest=scraper, tag=0) TilesVert = int(MaxTilesVert/NPlacers) * NPlacers TargetImg = Image.open('./output/doesnotmatter.jpg') #TargetImg = Image.open('./Matilda.JPG') #TargetImg = Image.open('./rainbow_flag_by_kelly.jpg') #TargetImg = Image.open('./korneel_test.jpg') TargetSize = TargetImg.size TilesHor = (TargetSize[0]*PixPerTile[1]*TilesVert)/(TargetSize[1]*PixPerTile[0]) Tiles = scipy.array((TilesHor, TilesVert), dtype=int) TilesPerNode = scipy.array((TilesHor, TilesVert/NPlacers), dtype=int) Pixels = Tiles*PixPerTile ratio = 2.0 / 3.0 TargetChunkPixels = Tiles*scipy.int_(PixPerTile*ratio) ComparePixels = Tiles*scipy.int_(ComparePixPerTile*ratio) #%% adjust the image to have the correct shape (aspect ratio) for turning it into a mosaic UnscaledWidth = (TargetSize[1]*Tiles[0])/Tiles[1]# the width of the original size image to yield the correct aspect ratio CropMargin = (TargetSize[0] - UnscaledWidth)/2 #TargetImg.crop((0,0,)) #TargetImg.resize(Pixels) CroppedImg = TargetImg.transform((ComparePixels[0],ComparePixels[1]), Image.EXTENT, (CropMargin,0, CropMargin+UnscaledWidth,TargetImg.size[1])) CroppedArr = color.rgb2lab(scipy.array(CroppedImg)) #%% send each placer some parameters placerPars = {'TilesPerNode': TilesPerNode, 'UnscaledWidth': UnscaledWidth, 'Tiles': Tiles, 'pm': pm, 'iters': iters, 'PixPerTile': PixPerTile, 'ComparePixPerTile' : ComparePixPerTile} for placer in range(NPlacers): comm.send(placerPars, dest=1+NScrapers+placer, tag=0) #print "M{}: < init".format(rank) #print "M{}: > dividing image".format(rank) #%% reduce CroppedArr to NPlacers NodeArrs NodeArrs = scipy.split(CroppedArr, NPlacers, axis=0) #%% send each of the placers its piece of the picture for placer in range(NPlacers): comm.send(NodeArrs[placer], dest=1+NScrapers+placer, tag=1) #%% create the final image and divide it into pieces for the placers to FinalArr = CroppedArr.copy() # now the division has to be accurate! FinalArr = scipy.zeros((TargetChunkPixels[1], TargetChunkPixels[0], 3), dtype='i') # FinalArr = scipy.zeros((Tiles[1]*PixPerTile[1], Tiles[0]*PixPerTile[0], 3), dtype='i') NodeFinalArrs = scipy.split(FinalArr, NPlacers, axis=0) #print "M{}: < dividing image".format(rank) #%% listen to the placers' intermediate results tempNodeFinalArr = NodeFinalArrs[0].copy() # for receiving the data, before it is known whence it came for it in range(iters): #print "M{}: > not listening to the placer to scraper broadcast".format(rank) dummy_arrs = scipy.zeros((per_page, PixPerTile[1], PixPerTile[0], 3), dtype=scipy.uint8) for scraper in range(1, 1+NScrapers): #print "M{}: not listening to scraper {}".format(rank, scraper) comm.Bcast(dummy_arrs, root=scraper) #print "M{}: < not listening to the placer to scraper broadcast".format(rank) #print "M{}: now listening for placer results at iter {} out of {}".format(rank, it, iters) #print "M{}: > listening for results".format(rank) logger.write('Listening for placers', status=plogger.RECEIVING) for p in range(NPlacers): # listen for the placers #print "M{}: NodeFinalArrs[{}] has shape ".format(rank, placer), NodeFinalArrs[placer].shape #print "M{}: NodeFinalArrs[{}] has type ".format(rank, placer), type(NodeFinalArrs[placer][0,0,0]) comm.Recv([tempNodeFinalArr, MPI.INT], source=MPI.ANY_SOURCE, tag=4, status=status) placer = status.Get_source() NodeFinalArrs[placer-(1+NScrapers)][:,:,:] = tempNodeFinalArr #print "M{}: < listening for results".format(rank) #print "M{}: > writing image".format(rank) partial_filename = 'output/mosaic_{}.png'.format(it) FinalImg = Image.fromarray(scipy.array(FinalArr, dtype=scipy.uint8), 'RGB') FinalImg.save(partial_filename) # for fewer output images # Notify gui logger.emit_partial(partial_filename) #print "M{}: < writing image at iter {}".format(rank, it) writepars = pars.copy() del(writepars['savepath']) strrep = '_'.join(['{}{:d}'.format(item, value) for item, value in sorted(writepars.items())]) final_filename = 'output/final{}_{}.png'.format(strrep, int(time.time())) FinalImg.save(final_filename) os.chmod(final_filename, 0744) print "M{}: Final image saved".format(rank) shutil.copy('log', 'output/log_'+strrep) # email result if (data != None): msg = MIMEMultipart() msg['Subject'] = "KU Leuven openbedrijvendag - uw mozaiek" msg['From'] = "SuperPi <*****@*****.**>" msg['To'] = data['email'] fp = open(final_filename, 'rb') img = MIMEImage(fp.read()) fp.close() msg.attach(img) s = smtplib.SMTP('mail4.cs.kuleuven.be') s.sendmail('*****@*****.**', [msg['To']], msg.as_string()) s.quit() logger.emit_finished(final_filename) #%% signal completion logger.write('Finished', status=plogger.FINISHED) comm.barrier()
# from https://stackoverflow.com/questions/6620471/fitting-empirical-distribution-to-theoretical-ones-with-scipy-python # Saullo's answer import matplotlib.pyplot as plt import scipy import scipy.stats size = 20000 x = scipy.arange(size) # creating the dummy sample (using beta distribution) y = scipy.int_(scipy.round_(scipy.stats.beta.rvs(6,2,size=size)*47)) # creating the histogram h = plt.hist(y, bins=range(48), density=True) dist_names = ['alpha', 'beta', 'arcsine', 'weibull_min', 'weibull_max', 'rayleigh'] for dist_name in dist_names: dist = getattr(scipy.stats, dist_name) param = dist.fit(y) pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2], scale=param[-1]) plt.plot(pdf_fitted, label=dist_name) plt.xlim(0,47) plt.legend(loc='upper left') plt.show() #NB : from scipy.stats._continuous_distns import _distn_names #all dist names #fit() method mentioned by @Saullo Castro provides maximum likelihood estimates (MLE). The best distribution for your data is the one give you the highest can be determined by several different ways: such as #1, the one that gives you the highest log likelihood. #2, the one that gives you the smallest AIC, BIC or BICc values (see wiki: http://en.wikipedia.org/wiki/Akaike_information_criterion, basically can be viewed as log likelihood adjusted for number of parameters, as distribution with more parameters are expected to fit better) #3, the one that maximize the Bayesian posterior probability. (see wiki: http://en.wikipedia.org/wiki/Posterior_probability)
Those are the same as python types and may be interchanged. Fixed widths: - int32 Integer (-2147483648 to 2147483647) - uint32 Unsigned integer (0 to 4294967295) - float32 Single precision float: sign bit, 8 bits exponent, 23 bits mantissa - float64 Double precision float: sign bit, 11 bits exponent, 52 bits mantissa - complex64 Complex number, represented by two 32-bit floats (real and imaginary components) Those have fixed width on all systems, and may not be compatible with the python types. """ assert sp.array_equal( sp.array([1, 2, 3], dtype = sp.int_), sp.int_([1, 2, 3]) ) # Different types evaluate to equal sp.arrays assert sp.array_equal( sp.array([1, 2, 3], dtype = sp.int_ ), sp.array([1, 2, 3], dtype = sp.float_) ) # Get type v = sp.array([1,2], dtype = sp.int32) assert v.dtype == sp.int32 # Subtype:
# _hyperparams['covar'] = _logtheta #[opt_model_params,opt_lml]=GPR.optHyper(gpr_BP,hyperparams,priors=priors,gradcheck=True,Ifilter=Ifilter) import pygp.plot.gpr_plot as gpr_plot first = True [M, S] = gpr_opt_hyper.predict(opt_model_params, X) gpr_plot.plot_sausage(X, M[0], SP.sqrt(S[0])) gpr_plot.plot_sausage(X, M[1], SP.sqrt(S[1])) gpr_plot.plot_training_data(x1, C[1], replicate_indices=x1_rep.reshape(-1)) gpr_plot.plot_training_data(x2, T[1], replicate_indices=x2_rep.reshape(-1)) # norm = PL.Normalize() break_lml = [] plots = SP.int_(SP.sqrt(24) + 1) PL.figure() for i, BP in enumerate(x1[0,:]): #PL.subplot(plots,plots,i+1) _hyper = copy.deepcopy(opt_model_params) _logtheta = _hyper['covar'] _logtheta = SP.concatenate((_logtheta, [BP, 10]))#SP.var(y[:,i])])) _hyper['covar'] = _logtheta priors_BP[3] = [lnpriors.lnGauss, [BP, 3]] # [opt_model_params,opt_lml] = opt_hyper(gpr_BP,_hyper,priors=priors_BP,gradcheck=False,Ifilter=Ifilter_BP) #break_lml.append(opt_lml) try: break_lml.append(gpr_BP.LML(_hyper, priors_BP))
Those are the same as python types and may be interchanged. Fixed widths: - int32 Integer (-2147483648 to 2147483647) - uint32 Unsigned integer (0 to 4294967295) - float32 Single precision float: sign bit, 8 bits exponent, 23 bits mantissa - float64 Double precision float: sign bit, 11 bits exponent, 52 bits mantissa - complex64 Complex number, represented by two 32-bit floats (real and imaginary components) Those have fixed width on all systems, and may not be compatible with the python types. """ assert sp.array_equal(sp.array([1, 2, 3], dtype=sp.int_), sp.int_([1, 2, 3])) # Different types evaluate to equal sp.arrays assert sp.array_equal(sp.array([1, 2, 3], dtype=sp.int_), sp.array([1, 2, 3], dtype=sp.float_)) # Get type v = sp.array([1, 2], dtype=sp.int32) assert v.dtype == sp.int32 # Subtype: sp.issubdtype(sp.int32, sp.int_)
def get_ancestors(node_ind, node, parents): ancestors = SP.empty(SP.int_(SP.floor(SP.log2(node+1))), dtype='int') for i in SP.arange(ancestors.size): node_ind = parents[node_ind] ancestors[i] = node_ind return ancestors