def getLocalCorrelation(a, b, scope=(7,7), verbose=True): """ (811401,) (811401,) (811401,) 208.300295858 54067.9823039 localCorr.max() 1.0 the mean and var for local corr: 0.18482584644 0.189520182231 time spent: 19.9111940861 >>> LC=dbz(matrix=lc) """ #localCov = getLocalCovariance(a=a, b=b, scope=scope) #aVar = getLocalVariance(a=a, scope=scope) #bVar = getLocalVariance(a=b, scope=scope) #localCorr = localCov / (aVar * bVar)**0.5 tic() aa = a.copy() bb = b.copy() height, width = aa.matrix.shape commonMask = aa.matrix.mask + bb.matrix.mask aa.matrix.mask = commonMask bb.matrix.mask = commonMask aa.matrix.unshare_mask bb.matrix.unshare_mask aShifts = getMatrixShifts(aa, scope) bShifts = getMatrixShifts(bb, scope) aShifts = [v.reshape(height*width) for v in aShifts] #flatten before stacking aShifts = ma.vstack(aShifts) bShifts = [v.reshape(height*width) for v in bShifts] #flatten before stacking bShifts = ma.vstack(bShifts) #print "\n........................." #print aShifts.shape localProduct = (aShifts * bShifts) localProduct = localProduct.mean(axis=0) #print 'local product', localProduct.shape aVar = aShifts.var(axis=0) bVar = bShifts.var(axis=0) aMean = aShifts.mean(axis=0) bMean = bShifts.mean(axis=0) #print "........................." #print aVar.shape, aMean.shape, localProduct.shape #print (localProduct-aMean*bMean).max() #print (aVar*bVar).max() localCorr = (localProduct - aMean*bMean) / (aVar * bVar)**.5 #localCorr = localCorr * (localCorr>=-1.0000001) * (localCorr<=1.0000001) #cutting the pathologies localCorr.mask+= (localCorr<-1.0000001) + (localCorr>1.0000001) #masking the pathologies #print "localCorr.max()", localCorr.max() if verbose: print "the mean and var for local corr:", localCorr.mean(), localCorr.var() localCorr = localCorr.reshape(height,width) toc() return localCorr
def getLocalProduct(a, b, scope=(9,9)): """ to get the local "dot product" in a neighbourhood of every point of a 11 march 2013 """ height, width = a.matrix.shape aShifts = getMatrixShifts(a, scope) bShifts = getMatrixShifts(b, scope) aShifts = [v.reshape(height*width) for v in aShifts] #flatten before stacking aShifts = ma.vstack(aShifts) bShifts = [v.reshape(height*width) for v in bShifts] #flatten before stacking bShifts = ma.vstack(bShifts) localProduct = (aShifts * bShifts) localProduct = 1. * localProduct.mean(axis=0) localProduct = localProduct.reshape(height,width) return localProduct
def calculate_aic(eigenworms_matrix_path, shapes_file, coiled_modes_file, num_modes): eigenworms_matrix = np.loadtxt(eigenworms_matrix_path, delimiter=",").astype(np.float32) # Load angle library f = scipy.io.loadmat(shapes_file) thetas_w = ma.array(f["theta_ensemble"]) thetas_w[thetas_w == 0] = ma.masked thetas_library_raw = ma.compress_rows(ma.vstack(thetas_w)) raw_samples = thetas_library_raw[::2] # Load coiled modes library with h5py.File(coiled_modes_file, "r") as mat: refs = list(mat["#refs#"].keys())[1:] tseries_w = [ ma.masked_invalid(np.array(mat["#refs#"][ref]).T)[:, :num_modes] for ref in refs ] modes_library = ma.compress_rows(ma.vstack(tseries_w)) # find indices with larger curvature (on the tail of the distribution of angles that can be solved) indices_curved = np.abs(modes_library[:, 2]) > np.percentile( raw_samples.dot(eigenworms_matrix[:, 2]), 95) curved_samples = modes_library[indices_curved].dot( eigenworms_matrix[:, :num_modes].T) # combine samples thetas_library_combined = np.vstack((curved_samples, raw_samples)) # sample uniformly from various degrees of curvature indices = _uniform_samples( thetas_library_combined.dot(eigenworms_matrix[:, 2])) training_data = thetas_library_combined[indices] aic = [] n_components_range = np.arange(150, 350, 10) for n_components in n_components_range: # Fit a Gaussian mixture with EM try: gmm = GaussianMixture(n_components=n_components) gmm.fit(training_data) aic.append(gmm.aic(training_data)) except: aic.append(np.nan) return np.vstack((n_components_range, aic)).T
def sum_running_stats(): """Find avg per realisation and do a cumulative rolling mean. Memory consumption shall be very low. """ for irel in range(NRUN): # load as Eclipse run; this will look for EGRID, INIT, UNRST print("Loading realization no {}".format(irel)) srf = xtgeo.surface_from_file(EXPATH1) nnum = float(irel + 1) srf.values += irel * 1 # just to mimic variability if irel == 0: pcum = srf.values1d else: pavg = srf.values1d / nnum pcum = pcum * (nnum - 1) / nnum pcum = npma.vstack([pcum, pavg]) pcum = pcum.sum(axis=0) # find the averages: print(pcum) print(pcum.mean()) return pcum.mean()
def insert(self, key, record): if key in self.index_table: raise KeyError("key %s already exists in table" % str(key)) added_row_index = self.index_table[key] num_current_rows = len(self.index_table) if num_current_rows >= self.data_table.shape[0]: num_new_rows = int(self._data_table_growth_factor * num_current_rows) new_rows = ma.masked_all((num_new_rows, len(self.column_table)), dtype=self._data_type) self.data_table = ma.vstack((self.data_table, new_rows)) print "Table enlarged to %d rows" % self.data_table.shape[0] for key, in_value in record.items(): if key not in self.column_table: raise KeyError( 'Variable "%s" is not registered as a table column' % str(key)) if key in self.hash_table: value = self.hash_table[key][in_value] else: value = in_value index = self.column_table[key] self.data_table[added_row_index, index] = value
def sum_running_stats_bytestream(): """Find avg per realisation and do a cumulative rolling mean. Memory consumption shall be very low. """ for irel in range(NRUN): # load as Eclipse run; this will look for EGRID, INIT, UNRST print('Loading realization no {}'.format(irel)) with open(EXPATH1, "rb") as myfile: stream = io.BytesIO(myfile.read()) srf = xtgeo.RegularSurface(stream, fformat="irap_binary") nnum = float(irel + 1) srf.values += irel * 1 # just to mimic variability if irel == 0: pcum = srf.values1d else: pavg = srf.values1d / nnum pcum = pcum * (nnum - 1) / nnum pcum = npma.vstack([pcum, pavg]) pcum = pcum.sum(axis=0) # find the averages: print(pcum) print(pcum.mean()) return pcum.mean()
def _expected(self, transpose=False): data = self.data if transpose: data = self.data.T # Expected raster weights per target grid cell. # This is the (fractional) source cell contribution # to each target cell (out of 255) weights = np.array([[[63, 127, 127], # top left hand cell (tlhc) [127, 255, 255]], [[127, 127, 63], # top right hand cell (trhc) [255, 255, 127]], [[127, 255, 255], # bottom left hand cell (blhc) [63, 127, 127]], [[255, 255, 127], # bottom right hand cell (brhc) [127, 127, 63]]], dtype=np.uint8) weights = weights / 255 # Expected source points per target grid cell. tmp = data[1:-1, 1:-1] shape = (-1, 2, 3) cells = [tmp[slice(0, 2), slice(0, 3)].reshape(shape), # tlhc tmp[slice(0, 2), slice(3, None)].reshape(shape), # trhc tmp[slice(2, None), slice(0, 3)].reshape(shape), # blhc tmp[slice(2, None), slice(3, None)].reshape(shape)] # brhc cells = ma.vstack(cells) # Expected fractional weighted result. num = (cells * weights).sum(axis=(1, 2)) dom = weights.sum(axis=(1, 2)) expected = num / dom expected = ma.asarray(expected.reshape(2, 2)) if transpose: expected = expected.T return expected
def postProcessor_characteristic_collector(self): collected_characteristics_vector = self.collected_burst for key in self.converted_characteristic.keys(): collected_characteristics_vector = vstack( (collected_characteristics_vector, self.converted_characteristic[key])) return collected_characteristics_vector
def generate(shapes_file, coiled_modes_file, eigenworms_matrix_path, out_file, num_gaussians): # Load angle library from Greg f = scipy.io.loadmat(shapes_file) thetas_w = ma.array(f["theta_ensemble"]) thetas_w[thetas_w == 0] = ma.masked thetas_library_raw = ma.compress_rows(ma.vstack(thetas_w)) # Load library from Onno mat = h5py.File(coiled_modes_file, "r") refs = list(mat["#refs#"].keys())[1:] tseries_w = [ma.masked_invalid(np.array(mat["#refs#"][ref]).T)[:, :5] for ref in refs] mat.close() modes_library = ma.compress_rows(ma.vstack(tseries_w)) eigenworms_matrix = np.loadtxt(eigenworms_matrix_path, delimiter=",").astype(np.float32) # same number of samples from full theta # raw_samples = thetas_library_raw[np.random.choice(np.arange(len(thetas_library_raw)),np.sum(indices_curved),replace=False)] raw_samples = thetas_library_raw[::2] # find indices with larger curvature indices_curved = np.abs(modes_library[:, 2]) > np.percentile(raw_samples.dot(eigenworms_matrix[:, 2]), 97.5) # get same number of samples from raw angles and projected modes curved_samples = modes_library[indices_curved].dot(eigenworms_matrix[:, :5].T) thetas_library_combined = np.vstack((curved_samples, raw_samples)) indices = uniform_samples(thetas_library_combined.dot(eigenworms_matrix[:, 2])) training_data = thetas_library_combined[indices] # fit gaussian mixture model gmm = GaussianMixture(n_components=num_gaussians) gmm.fit(training_data) # sort according to curvature sorting_indices = np.argsort(np.sum(np.abs(np.diff(gmm.means_, axis=1)), axis=1)) means = gmm.means_[sorting_indices] covariances = gmm.covariances_[sorting_indices] weights = gmm.weights_[sorting_indices] with gzip.open(out_file, "wt") as f: json.dump({"means": means.tolist(), "covariances": covariances.tolist(), "weights": weights.tolist()}, f)
def compute_master_theta(model_indices, all_models, windows_sim, tseries_sim): master_tseries = [] for model in all_models[model_indices]: sim_idx, kw = model t0, tf = windows_sim[sim_idx][kw] ts = tseries_sim[sim_idx][t0:tf] master_tseries.append(ts) master_tseries.append([np.nan] * ts.shape[1]) master_tseries = ma.masked_invalid(ma.vstack(master_tseries)) master_theta, eps = lvarc.get_theta_masked(master_tseries) return master_theta
def compute_master_theta(models, windows, tseries): master_tseries = [] for window_idx in models: window = windows[window_idx] t0, tf = window ts = tseries[t0:tf] master_tseries.append(ts) master_tseries.append([np.nan] * ts.shape[1]) master_tseries = ma.masked_invalid(ma.vstack(master_tseries)) master_theta, eps = lvarc.get_theta_masked(master_tseries) return master_theta
def getLocalMean(a, scope=(5,5), verbose=True): matrixShifts = getMatrixShifts(a, scope) height, width = a.matrix.shape matrixShifts = [v.reshape(height*width) for v in matrixShifts] #flatten before stacking matrixShifts = ma.vstack(matrixShifts) localMean = matrixShifts.mean(axis=0) localMean = localMean.reshape(height,width) if verbose: print "the mean and var for localMean:", localMean.mean(), localMean.var() return localMean
def test_steepest(): data = mlclass.ex4() x = add_bias(StandardScaler().fit_transform(data['x'])) y = data['y'] theta = array([.01, .01, .01]) c = lambda theta: logistic.model.cost(x, y, theta) g = lambda theta: logistic.model.grad(x, y, theta) assert_array_almost_equal([-0.0254469, 1.14114, 1.21333], steepest_gd(c, g, theta, max_iter=500)[0], decimal=1) y = vstack([data['y'], 1 - data['y']]).T theta = array([[.01, .01, .01]]) c = lambda theta: maxent.model.cost(x, y, theta) g = lambda theta: maxent.model.grad(x, y, theta) assert_array_almost_equal([[-0.0254469, 1.14114, 1.21333]], steepest_gd(c, g, theta, max_iter=500)[0], decimal=1)
def test_corrcoef(self): r = ma.masked_equal(np.load("data/ml-1m/rating.npy"), 0) # sim = ma.corrcoef(r[0], r[2412]) # print(sim) # print(np.corrcoef(r[0].filled(0), r[2412].filled(0))) sim2 = ma.corrcoef(ma.vstack([r[0], r[2412]])) print(sim2) print(ma.dot(r[0], r[2412])/math.sqrt(ma.dot(r[0],r[0]))/math.sqrt(ma.dot(r[2412],r[2412]))) r0_m = r[0] - ma.mean(r[0]) r1_m = r[2412] - ma.mean(r[2412]) print(ma.dot(r0_m, r1_m)/math.sqrt(ma.dot(r0_m,r0_m))/math.sqrt(ma.dot(r1_m,r1_m)))
def test_newton(): data = mlclass.ex4() x = add_bias(data['x']) y = data['y'] theta = array([.01, .01, .01]) c = lambda theta: logistic.model.cost(x, y, theta, 0.) g = lambda theta: logistic.model.grad(x, y, theta, 0.) h = lambda theta: logistic.model.hessian(x, theta, 0.) assert_array_almost_equal([-16.3787, 0.1483, 0.1589], newton(c, g, h, theta)[0], decimal=3) y = vstack([data['y'], 1 - data['y']]).T theta = array([[.01, .01, .01]]) c = lambda theta: maxent.model.cost(x, y, theta, 0.) g = lambda theta: maxent.model.grad(x, y, theta, 0.) h = lambda theta: maxent.model.hessian(x, theta, 0.) assert_array_almost_equal([[-16.3787, 0.1483, 0.1589]], newton(c, g, h, theta)[0], decimal=3)
def XWrap2(x, P0, fill_value=0, pow2=False): """ Extend and wrap array. Fold array every y indecies. There will typically be a hanging part of the array. This is padded out. Parameters ---------- x : input P0 : Base period, units of elements pow2 : If true, pad out nRows so that it's the next power of 2. Return ------ xwrap : Wrapped array. """ ncad = x.size # Number of cadences # for some reason np.ceil(ncad/P0) doesn't work! nrow = int(np.floor(ncad / P0) + 1) nExtend = nrow * P0 - ncad # Pad out remainder of array with 0s. if type(x) is np.ma.core.MaskedArray: pad = ma.empty(nExtend) pad.mask = True x = ma.hstack((x, pad)) else: pad = np.empty(nExtend) pad[:] = fill_value x = np.hstack((x, pad)) xwrap = x.reshape(nrow, -1) if pow2: k = np.ceil(np.log2(nrow)).astype(int) nrow2 = 2**k fill = ma.empty((nrow2 - nrow, P0)) fill[:] = fill_value fill.mask = True xwrap = ma.vstack([xwrap, fill]) return xwrap
def _expected(self, transpose=False): data = self.data if transpose: data = self.data.T # Expected raster weights per target grid cell. # This is the (fractional) source cell contribution # to each target cell (out of 255) weights = np.array( [ [ [63, 127, 127], # top left hand cell (tlhc) [127, 255, 255] ], [ [127, 127, 63], # top right hand cell (trhc) [255, 255, 127] ], [ [127, 255, 255], # bottom left hand cell (blhc) [63, 127, 127] ], [ [255, 255, 127], # bottom right hand cell (brhc) [127, 127, 63] ] ], dtype=np.uint8) weights = weights / 255 # Expected source points per target grid cell. tmp = data[1:-1, 1:-1] shape = (-1, 2, 3) cells = [ tmp[slice(0, 2), slice(0, 3)].reshape(shape), # tlhc tmp[slice(0, 2), slice(3, None)].reshape(shape), # trhc tmp[slice(2, None), slice(0, 3)].reshape(shape), # blhc tmp[slice(2, None), slice(3, None)].reshape(shape) ] # brhc cells = ma.vstack(cells) # Expected fractional weighted result. num = (cells * weights).sum(axis=(1, 2)) dom = weights.sum(axis=(1, 2)) expected = num / dom expected = ma.asarray(expected.reshape(2, 2)) if transpose: expected = expected.T return expected
def XWrap2(x,P0,fill_value=0,pow2=False): """ Extend and wrap array. Fold array every y indecies. There will typically be a hanging part of the array. This is padded out. Parameters ---------- x : input P0 : Base period, units of elements pow2 : If true, pad out nRows so that it's the next power of 2. Return ------ xwrap : Wrapped array. """ ncad = x.size # Number of cadences # for some reason np.ceil(ncad/P0) doesn't work! nrow = int( np.floor(ncad/P0) +1 ) nExtend = nrow * P0 - ncad # Pad out remainder of array with 0s. if type(x) is np.ma.core.MaskedArray: pad = ma.empty(nExtend) pad.mask = True x = ma.hstack( (x ,pad) ) else: pad = np.empty(nExtend) pad[:] = fill_value x = np.hstack( (x ,pad) ) xwrap = x.reshape( nrow,-1 ) if pow2: k = np.ceil(np.log2(nrow)).astype(int) nrow2 = 2**k fill = ma.empty( (nrow2-nrow,P0) ) fill[:] = fill_value fill.mask=True xwrap = ma.vstack([xwrap,fill]) return xwrap
def getLocalVariance(a, scope=(5,5), verbose=True): """ to get the local variance in a neighbourhood of every point of a variance = mean(X^2) - mean(X)^2 """ matrixShifts = getMatrixShifts(a, scope, verbose=verbose) # matrixShifts[i+Nj]=a.shiftMatrix(i,j) # where N=9 for the moment height, width = a.matrix.shape matrixShifts = [v.reshape(height*width) for v in matrixShifts] #flatten before stacking matrixShifts = ma.vstack(matrixShifts) #matrixCounts = (1-matrixShifts.mask).sum(axis=0) #count the valid entries at each position #matrixSums = matrixShifts.sum(axis=0) #matrixSquares = matrixShifts**2 #matrixSquareSums= matrixSquares.sum(axis=0) #localVariance = matrixSquareSums*1./matrixCounts - (matrixSums*1./matrixCounts)**2 localVariance = matrixShifts.var(axis=0) localVariance = localVariance.reshape(height, width) # reform the matrix if verbose: print "local variance sum, var=", localVariance.sum(), localVariance.var() return localVariance
def sum_stats(): """Accumulate numpies for all realisations and then do stats. This will be quite memory intensive, and memory consumption will increase linearly. """ propsd = {} for irel in range(NRUN): # load as Eclipse run; this will look for EGRID, INIT, UNRST print("Loading realization no {}".format(irel)) grd = xtgeo.grid3d.Grid() grd.from_file( GRIDFILEROOT, fformat="eclipserun", initprops=INITPROPS, restartprops=RESTARTPROPS, restartdates=RDATES, ) for prop in grd.props: if prop.name not in propsd: propsd[prop.name] = [] if prop.name == "PORO": prop.values += irel * 0.001 # mimic variability aka ensembles else: prop.values += irel * 1 # just to mimic variability propsd[prop.name].append(prop.values1d) # find the averages: porovalues = npma.vstack(propsd["PORO"]) poromeanarray = porovalues.mean(axis=0) porostdarray = porovalues.std(axis=0) print(poromeanarray) print(poromeanarray.mean()) print(porostdarray) print(porostdarray.mean()) return poromeanarray.mean()
def rotate_rects(rects, center, angle): """ @param rects: n by 5 by 2 array of n rectangles. Each rectangle consists of five (x, y) coordinates. Any polygon would work, in fact. @type rects: ndarray @type center: tuple @type angle: float @rtype: ndarray """ # 2 by 3 rotation matrix rot_mat = cv2.getRotationMatrix2D(center, angle, 1.0) n, p, d = rects.shape # 2 by n*5 array points = vstack((rects.T.reshape(d, n * p, order='F'), ones((1, n * p)))) rotated_points = num.dot(rot_mat, points) rotated_rects = rotated_points.T.reshape(n, p, d) return rotated_rects
def sum_running_stats(): """Find avg per realisation and do a cumulative rolling mean. Memory consumption shall be very low. """ for irel in range(NRUN): # load as Eclipse run; this will look for EGRID, INIT, UNRST print("Loading realization no {}".format(irel)) grd = xtgeo.grid3d.Grid() grd.from_file( GRIDFILEROOT, fformat="eclipserun", restartprops=RESTARTPROPS, restartdates=RDATES, initprops=INITPROPS, ) nnum = float(irel + 1) for prop in grd.props: if prop.name == "PORO": prop.values += irel * 0.001 # mimic variability aka ensembles else: prop.values += irel * 1 # just to mimic variability if prop.name == "PORO": if irel == 0: pcum = prop.values1d else: pavg = prop.values1d / nnum pcum = pcum * (nnum - 1) / nnum pcum = npma.vstack([pcum, pavg]) pcum = pcum.sum(axis=0) # find the averages: print(pcum) print(pcum.mean()) return pcum.mean()
def probAdjustEquil(binProb,rates,uncert,threshold=0.0,fullCalcClust=False,fullCalcBins=False): """This function adjusts bin pops in binProb using rates and uncert matrices fullCalcBins --> True for weighted avg, False for simple calc fullCalcClust --> True for weighted avg, False for simple calc threshold --> minimum weight (relative to max) for another value to be averaged only matters if fullCalcBins == True (or later perhaps if fullCalcClust == True) """ # Check that rate matrix is square Ni,Nj = rates.shape if Ni != Nj: print('\nWARNING: Not a square matrix!\n') zi = np.where(binProb == 0.0)[0] # indices of bins with zero probability rates_uncert = UncertMath.UncertContainer(rates,rates - uncert,rates + uncert) # STEP 1a: Create matrix of ratios of probabilities based on DIRECT estimates # that is, ij element is p_i / p_j = k_ji / k_ij ratios_direct = rates_uncert.transpose() / rates_uncert # STEP 1b: Create averaged matrix of ratios of probabilities based on both direct and indirect estimates # Indirect means '3rd bin' estimates: p_i / p_j = ( k_ki / k_ik ) ( k_jk / k_kj ) # Turns out this is not helpful, so generally set fullCalcBins = 0 if fullCalcBins: # Calculate indirect ratios using Einstein Summation convention where # ratios_indirect_kij = ( k_ki / k_ik ) ( k_jk / k_kj ) = ratios_direct_ik * ratios_direct_kj ri_vals = np.einsum('ik,kj->kij',ratios_direct.vals,ratios_direct.vals) ri_min = np.einsum('ik,kj->kij',ratios_direct.dmin,ratios_direct.dmin) ri_max = np.einsum('ik,kj->kij',ratios_direct.dmax,ratios_direct.dmax) ratios_indirect = UncertMath.UncertContainer(ri_vals,ri_min,ri_max,mask=ratios_direct.vals.mask) # Threshold indirect ratios ti = ratios_indirect.wt < ratios_direct * threshold ratios_indirect.mask = ti ratios_indirect.update_mask() ratios_indirect.concatenate(ratios_direct,axis=0) ratios_average = ratios_indirect.weighted_average(axis=0) else: ratios_average = ratios_direct.weighted_average(axis=0,expaxis=0) # STEP 2: Form clusters # STEP 2a: Sort probability ratios based on uncertainty # Sort uncertainties of ratios_average subject to the convention that p_i < p_j i,j = np.triu_indices(Ni,1) # indices of ij pairs where i != j # Remove pairs that include a bin that has zero probability nzi = (binProb[i] != 0.0) & (binProb[j] != 0.0) i = i[nzi] j = j[nzi] vals = ma.vstack((ratios_average.vals[i,j],ratios_average.vals[j,i])) ias = ma.argsort(vals,axis=0,fill_value=np.inf) ordered_ind = np.vstack((i,j)) flip_ind = np.nonzero(ias[0,:]) # Find pairs in which to select ji rather than ij ordered_ind[:,flip_ind[0]] = ordered_ind[:,flip_ind[0]][::-1] iind = ordered_ind[0,:] jind = ordered_ind[1,:] uncertij = ratios_average.uncert[iind,jind] # Get the uncert for ij pairs count = uncertij.count() # Count of the unmasked uncertainties ias = ma.argsort(uncertij,fill_value=np.inf) # Get the indices that would sort uncertij iind = iind[ias[:count]] # Sort the indices excluding masked/undefined values jind = jind[ias[:count]] # STEP 2b: Create ClusterList object and cluster bins clusters = BinCluster.ClusterList(ratios_average,Ni) if fullCalcClust: clusters.join((iind,jind)) else: clusters.join_simple((iind,jind)) total_prob = 0.0 # total probability in all clusters for cid in clusters.cluster_contents: binlist = list(clusters.cluster_contents[cid]) if len(binlist): prob_cluster = binProb[binlist].sum() total_prob += prob_cluster binProb[binlist] = prob_cluster * clusters.bin_data[binlist].vals binProb[zi] = 0.0 # re-zero bins that previously had zero prob #for bi,p in enumerate(binProb): # print('bin: {} -- {}'.format(bi,p)) print('.........Total Probability: {}'.format(binProb.sum()))
def gaussianSmooothNormalisedCorrelation(obs, wrf, sigma=20, sigmaWRF=5, thres=15, showImage=True, saveImage=True, outputFolder="", outputType="correlation", *args, **kwargs): """ to used normalised correlation to study the similarity between obs and wrf codes from armor.tests.gaussianSmoothNormalisedCorrelation2 input: sigma = sigma for obs sigmaWRF = sigma for wrf """ if outputFolder =="": try: outputFolder = obs.imageFolder except AttributeError: outputFolder = pattern.defaultOutputFolderForImages if showImage: import pylab pylab.ion() k = obs # alias w = wrf matrix0 = copy.copy(k.matrix) k.getCentroid() k.setThreshold(thres) #2014-05-30 k.matrix = k.gaussianFilter(sigma).matrix #k.matrix = 100.* (k.matrix>=thres) k.matrix.mask = np.zeros(k.matrix.shape) #k.makeImage(closeAll=True) #pylab.draw() #correlations = [] w.getCentroid() w.setThreshold(thres) #2014-05-30 w1 = w.gaussianFilter(sigmaWRF) topRowName = w.name + ', gaussian(' + str(sigmaWRF) + ') and ' + k.name topRow = ma.hstack([w.matrix, w1.matrix, matrix0]) #w1.matrix = 100.*(w1.matrix>=thres) w1.matrix.mask = np.zeros(w1.matrix.shape) try: ############################################ # key lines w2 = w1.momentNormalise(k) w3 = w1.momentNormalise(k, extraAngle=np.pi) if outputType=="correlation" or outputType=="corr": corr = w2.corr(k) corr2 = w3.corr(k) if corr2 > corr: print '180 degree switch: ' print ' ', k.name, w.name ,corr, corr2, '\n................................' corr = corr2 w2 = w3 returnValue= corr #elif outputType=="regression" or outputType=="regress": else: x, residuals = w2.regress(k) x2, residuals2 = w3.regress(k) if residuals2 < residuals: print '180 degree switch: ' print ' ', k.name, w.name, residuals2, "<", residuals, '\n................................' x = x2 w2 = w3 returnValue = x # ############################################# ####### # making the output image w2.matrix = ma.hstack([w1.matrix, w2.matrix, k.matrix]) w2.name = w.name + ', normalised, and ' + k.name + '\nnormalised ' if outputType=="corr" or outputType=="correlation": w2.name += 'correlation: ' + str(corr) w2.matrix = ma.vstack([w2.matrix, topRow]) w2.name = topRowName + '\n' + "bottom row:" + w2.name w2.imagePath = outputFolder + w.name + '_' + k.name + '_sigma' + str(sigma) + '_thres' + str(thres) + '.png' w2.vmin= -20. w2.vmax = 100. if saveImage: w2.saveImage() if showImage: w2.makeImage(closeAll=True) pylab.draw() # ############################################ #except IndexError: except SyntaxError: returnValue = -999 # restoring the matrix k.backupMatrix('gaussian smooth normalised correlations, sigma='+ str(sigma) + 'threshold=' + str(thres)) k.matrix = matrix0 return returnValue
def update(i): offsets = ma.vstack((targets_array[i, :, :], centroids[i, :])) scat.set_offsets(offsets) scat.set_facecolors(['b'] * (np.shape(offsets)[0] - 1) + ['r']) return
def fuzzyfy(features, cfg): """ FIXME: Looks like skfuzzy.trapmf does not handle well masked values. I must think better what to do with masked input values. What to do when there is one feature, but the other features are masked? """ features_list = list(cfg['features'].keys()) N = features[features_list[0]].size # The fuzzy set are usually: low, medium, high # The membership of each fuzzy set are each feature scaled. membership = {} for f in cfg['output'].keys(): membership[f] = {} for t in features_list: for m in membership: assert m in cfg['features'][t], \ "Missing %s in %s" % (m, cfg['features'][t]) membership[m][t] = ma.masked_all_like(features[t]) ind = ~ma.getmaskarray(features[t]) if m == 'low': membership[m][t][ind] = zmf( np.asanyarray(features[t])[ind], cfg['features'][t][m]) elif m == 'high': membership[m][t][ind] = smf( np.asanyarray(features[t])[ind], cfg['features'][t][m]) else: membership[m][t][ind] = trapmf( np.asanyarray(features[t])[ind], cfg['features'][t][m]) # Rule Set rules = {} # Low: u_low = mean(S_l(spike), S_l(clim)...) #u_low = np.mean([weights['spike']['low'], # weights['woa_relbias']['low']], axis=0) tmp = membership['low'][features_list[0]] for f in features_list[1:]: tmp = ma.vstack((tmp, membership['low'][f])) # FIXME: If there is only one feature, it will return 1 value # instead of an array with N values. rules['low'] = ma.mean(tmp, axis=0) # IMPROVE IT: Morello2014 doesn't even use the medium uncertainty, # so no reason to estimate it. In the generalize this once the # membership combining rules are defined in the cfg, so I can # decide to use mean or max. if 'medium' in membership: # Medium: u_medium = mean(S_l(spike), S_l(clim)...) #u_medium = np.mean([weights['spike']['medium'], # weights['woa_relbias']['medium']], axis=0) tmp = membership['medium'][features_list[0]] for f in features_list[1:]: tmp = ma.vstack((tmp, membership['medium'][f])) rules['medium'] = ma.mean(tmp, axis=0) # High: u_high = max(S_l(spike), S_l(clim)...) #u_high = np.max([weights['spike']['high'], # weights['woa_relbias']['high']], axis=0) tmp = membership['high'][features_list[0]] for f in features_list[1:]: tmp = ma.vstack((tmp, membership['high'][f])) rules['high'] = ma.max(tmp, axis=0) return rules
w1.matrix.mask = np.zeros(w1.matrix.shape) #w1.vmax = 2 #w1.vmin =-2 #w.makeImage(closeAll=True) #pylab.draw() #print "w.matrix.shape, w.matrix.mask.shape", w.matrix.shape, w.matrix.mask.shape try: ############################################ # punchlines w2 = w1.momentNormalise(k) corr = w2.corr(k) #w2.vmax = 2 #w2.vmin =-2 w2.matrix = ma.hstack([w1.matrix, w2.matrix, k.matrix]) w2.name = w.name + ', normalised, and ' + k.name + '\nnormalised correlation: ' + str(corr) w2.matrix = ma.vstack([w2.matrix, topRow]) w2.name = topRowName + '\n' + "bottom row:" + w2.name w2.imagePath = '/home/k/ARMOR/python/testing/' + w.name + '_' + k.name + '_sigma' + str(sigma) + '_thres' + str(thres) + '.png' w2.vmin= -20. w2.vmax = 100. if saveImages: w2.saveImage() if makeImages: w2.makeImage(closeAll=True) pylab.draw() # ############################################ #except IndexError: except SyntaxError: corr = -999
linestyle='dotted') ax[0].set_ylim(-0.006, 0.064) ax[0].set_xlim(np.min(t_in), np.max(t_in)) ax[0].tick_params(axis='x', which='major', labelsize=14) ax[1].tick_params(axis='x', which='major', labelsize=14) ax[1].hlines(mlim, np.min(t_in), np.max(t_in), color='black', alpha=0.9, linestyle='dotted') #we stack up the separate instruments tstack = ma.vstack([tmabrite, tmaastep, tmabring]) astack = ma.vstack([amabrite, amaastep, amabring]) estack = ma.vstack([emabrite, emaastep, emabring]) # now find which instrument has the smallest error and make an array from that min_amp_ind = ma.argmin(estack, axis=0) # pull out the lowest values min_amp = astack[min_amp_ind, np.arange(min_amp_ind.size)] min_tim = tstack[min_amp_ind, np.arange(min_amp_ind.size)] min_err = estack[min_amp_ind, np.arange(min_amp_ind.size)] ax[1].errorbar(min_tim, min_amp,
def choose_for_augmentation(X, Y_class, Y_loc, n_per_class): """A function to randomly select only some of the data, but in fashion that ensures that the classes are balanced (i.e. there are equal numbers of each class). Particularly useful if working with real data, and the classes are usually very unbalanced (lots of no_def lables, normally). Inputs: X | rank 4 array | data. Y_class | rank 2 array | One hot encoding of class labels Y_loc | rank 2 array | locations of deformation n_per_class | int | number of data per class. e.g. 3 Returns: X_sample | rank 4 array | data. Y_class_sample | rank 2 array | One hot encoding of class labels Y_loc_sample | rank 2 array | locations of deformation History: 2019/??/?? | MEG | Written 2019/10/28 | MEG | Update to handle dicts 2020/10/29 | MEG | Write the docs. 2020/10/30 | MEG | Fix bug that was causing Y_class and Y_loc to become masked arrays. """ import numpy as np import numpy.ma as ma n_classes = Y_class.shape[1] # only works if one hot encoding is used X_sample = [] Y_class_sample = [] Y_loc_sample = [] for i in range(n_classes): # loop through each class args_class = np.ravel(np.argwhere( Y_class[:, i] != 0)) # get the args of the data of this label args_sample = args_class[np.random.randint( 0, len(args_class), n_per_class )] # choose n_per_class of these (ie so we always choose the same number from each label) X_sample.append( X[args_sample, :, :, :] ) # choose the data, and keep adding to a list (each item in the list is n_per_class_label x ny x nx n chanels) Y_class_sample.append(Y_class[args_sample, :]) # and class labels Y_loc_sample.append(Y_loc[args_sample, :]) # and location labels X_sample = ma.vstack( X_sample ) # maskd array, merge along the first axis, so now have n_class x n_per_class of data Y_class_sample = np.vstack( Y_class_sample ) # normal numpy array, note that these would be in order of the class (ie calss 0 first, then class 1 etc. ) Y_loc_sample = np.vstack(Y_loc_sample) # also normal numpy array data_dict = { 'X': X_sample, # package the data and labels together into a dict 'Y_class': Y_class_sample, 'Y_loc': Y_loc_sample } data_dict_shuffled = shuffle_arrays( data_dict ) # shuffle (so that these aren't in the order of the class labels) X_sample = data_dict_shuffled[ 'X'] # and unpack as this function doesn't use dictionaries Y_class_sample = data_dict_shuffled['Y_class'] Y_loc_sample = data_dict_shuffled['Y_loc'] return X_sample, Y_class_sample, Y_loc_sample
def _do_combine(hdu_no: int, progress: float, progress_step: float, data_width: int, data_height: int, input_data: List[Union[pyfits.HDUList, Tuple[ndarray, pyfits.Header]]], mode: str = 'average', scaling: Optional[str] = None, rejection: Optional[str] = None, min_keep: int = 2, percentile: float = 50.0, lo: Optional[float] = None, hi: Optional[float] = None, max_mem_mb: float = 100.0, callback: Optional[callable] = None) \ -> Tuple[Union[ndarray, ma.MaskedArray], float]: """ Combine the given HDUs from all input images; used by :func:`combine` to get a stack of either all input images or, if lucky imaging is enabled, of their subset :return: image stack data and rejection percent """ n = len(input_data) # Calculate scaling factors k_ref, k = None, [] if scaling: for data_no, f in enumerate(input_data): if isinstance(f, pyfits.HDUList): data = f[hdu_no].data else: data = f[0] if scaling == 'average': k.append(data.mean()) elif scaling == 'percentile': if percentile == 50: k.append( median(data) if not isinstance(data, ma.MaskedArray) else ma.median(data)) else: k.append( np_percentile(data, percentile) if not isinstance(data, ma.MaskedArray) else np_percentile(data.compressed(), percentile)) elif scaling == 'mode': # Compute modal values from histograms; convert to integer # and assume 2 x 16-bit data range if isinstance(data, ma.MaskedArray): data = data.compressed() else: data = data.ravel() min_val = data.min(initial=0) k.append( argmax(bincount( (data - min_val).clip(0, 2*0x10000 - 1) .astype(int32))) + min_val) else: raise ValueError( 'Unknown scaling mode "{}"'.format(scaling)) if callback is not None: callback(progress + (data_no + 1)/n/2*progress_step) # Normalize to the first frame with non-zero average; keep images # with zero or same average as is k_ref = k[0] if not k_ref: for ki in k[1:]: if ki: k_ref = ki break # Process data in chunks to fit in the maximum amount of RAM allowed rowsize = 0 for data in input_data: if isinstance(data, pyfits.HDUList): data = data[hdu_no].data else: data = data[0] rowsize += data[0].nbytes if rejection or isinstance(data, ma.MaskedArray): rowsize += data_width chunksize = min(max(int(max_mem_mb*(1 << 20)/rowsize), 1), data_height) while chunksize > 1: # Use as small chunks as possible but keep their total number if len(list(range(0, data_height, chunksize - 1))) > \ len(list(range(0, data_height, chunksize))): break chunksize -= 1 chunks = [] rej_percent = 0 for chunk in range(0, data_height, chunksize): datacube = [ f[hdu_no].data[chunk:chunk + chunksize] if isinstance(f, pyfits.HDUList) else f[0][chunk:chunk + chunksize] for f in input_data ] if k_ref: # Scale data for data, ki in zip(datacube, k): if ki not in (0, k_ref): data *= k_ref/ki # Reject outliers if rejection or any(isinstance(data, ma.MaskedArray) for data in datacube): datacube = ma.masked_array(datacube) if not datacube.mask.shape: # No initially masked data, but we'll need an array instead # of mask=False to do slicing operations datacube.mask = full(datacube.shape, datacube.mask) else: datacube = array(datacube) if rejection == 'chauvenet': datacube.mask = chauvenet(datacube, min_vals=min_keep) elif rejection == 'iraf': if lo is None: lo = 1 if hi is None: hi = 1 if n - (lo + hi) < min_keep: raise ValueError( 'IRAF rejection with lo={}, hi={} would keep less than ' '{} values for a {}-image set'.format(lo, hi, min_keep, n)) if lo or hi: # Mask "lo" smallest values and "hi" largest values along # the 0th axis order = datacube.argsort(0) mg = tuple(i.ravel() for i in indices(datacube.shape[1:])) for j in range(-hi, lo): datacube.mask[(order[j].ravel(),) + mg] = True del order, mg elif rejection == 'minmax': if lo is not None and hi is not None: if lo > hi: raise ValueError( 'lo={} > hi={} for minmax rejection'.format(lo, hi)) datacube.mask[((datacube < lo) | (datacube > hi)).nonzero()] = True if datacube.mask.all(0).any(): logging.warning( '%d completely masked pixels left after minmax ' 'rejection', datacube.mask.all(0).sum()) elif rejection == 'sigclip': if lo is None: lo = 3 if hi is None: hi = 3 if lo < 0 or hi < 0: raise ValueError( 'Lower and upper limits for sigma clipping must be ' 'positive, got lo={}, hi={}'.format(lo, hi)) max_rej = n - min_keep while True: avg = datacube.mean(0) sigma = datacube.std(0) resid = datacube - avg outliers = (datacube.mask.sum(0) < max_rej) & \ (sigma > 0) & ((resid < -lo*sigma) | (resid > hi*sigma)) if not outliers.any(): del avg, sigma, resid, outliers break datacube.mask[outliers.nonzero()] = True elif rejection: raise ValueError( 'Unknown rejection mode "{}"'.format(rejection)) if isinstance(datacube, ma.MaskedArray): if datacube.mask is None or not datacube.mask.any(): # Nothing was rejected datacube = datacube.data else: # Calculate the percentage of rejected pixels rej_percent += datacube.mask.sum() # Combine data if mode == 'average': res = datacube.mean(0) elif mode == 'sum': res = datacube.sum(0) elif mode == 'percentile': if percentile == 50: if isinstance(datacube, ma.MaskedArray): res = ma.median(datacube, 0) else: res = median(datacube, 0) else: if isinstance(datacube, ma.MaskedArray): res = nanpercentile( datacube.filled(nan), percentile, 0) else: res = np_percentile(datacube, percentile, 0) else: raise ValueError('Unknown stacking mode "{}"'.format(mode)) chunks.append(res) if callback is not None: callback( progress + ((0.5 if scaling else 0) + min(chunk + chunksize, data_height)/data_height / (2 if scaling else 1))*progress_step) if len(chunks) > 1: res = ma.vstack(chunks) else: res = chunks[0] if isinstance(res, ma.MaskedArray) and ( res.mask is None or not res.mask.any()): res = res.data return res, rej_percent