def mcosine(u, v): r""" Computes the Cosine distance between two n-vectors u and v, which is defined as .. math:: \frac{1-uv^T} {||u||_2 ||v||_2}. Parameters ---------- u : ndarray An :math:`n`-dimensional vector. v : ndarray An :math:`n`-dimensional vector. Returns ------- d : double The Cosine distance between vectors ``u`` and ``v``. """ u = ma.asarray(u, order='c') v = ma.asarray(v, order='c') return (1.0 - (ma.dot(u, v.T) / \ (ma.sqrt(ma.dot(u, u.T)) * ma.sqrt(ma.dot(v, v.T)))))
def filter(s0, s1): for i, dindex in enumerate(s0.bands): s1flux = ma.array(data=s1.flux[dindex], mask=s1.mask[dindex]) s0flux = ma.array(data=s0.flux[dindex], mask=s0.mask[dindex]) s1ivar = ma.array(data=s1.ivar[dindex], mask=s1.mask[dindex]) s0ivar = ma.array(data=s0.ivar[dindex], mask=s0.mask[dindex]) ston1 = s1.flux[dindex].sum( axis=1) / (1 / ma.sqrt(s1.ivar[dindex])).sum(axis=1) ston0 = s0.flux[dindex].sum( axis=1) / (1 / ma.sqrt(s0.ivar[dindex])).sum(axis=1) # ston1 = s1.flux[dindex].sum(axis=1) / ma.sqrt((1/s1.ivar[dindex]).sum(axis=1)) # ston0 = s0.flux[dindex].sum(axis=1) / ma.sqrt((1/s0.ivar[dindex]).sum(axis=1)) if i == 0: ans = np.logical_and(np.greater(ston0, HasSignal.ston_cut), np.greater(ston1, HasSignal.ston_cut)) else: ans = np.logical_or( ans, np.logical_and(np.greater(ston0, HasSignal.ston_cut), np.greater(ston1, HasSignal.ston_cut))) return ans
def combine_nights(combined_catalog, filterlist, refcat): header = [ 'BEGIN CATALOG HEADER', 'nfields 13', ' ra 1 0 d degrees %10.6f', ' dec 2 0 d degrees %10.6f', ' id 3 0 c INDEF %3d' ] for filt in filterlist: header.append(' {} {:2d} 0 r INDEF %6.3f'.format( filt, len(header) - 1)) header.append(' {}err {:2d} 0 r INDEF %6.3f'.format( filt, len(header) - 1)) header += ['END CATALOG HEADER', ''] catalog = Table([refcat['ra'], refcat['dec'], refcat['id']], meta={'comments': header}, masked=True) for filt in filterlist: mags = combined_catalog['mag'][combined_catalog['filter'] == filt] median = np.median(mags, axis=0) absdev_mag = mags - median mad = np.median(np.abs(absdev_mag), axis=0) * np.sqrt(pi / 2) mags.mask |= np.abs(absdev_mag) > 5 * mad catalog[filt] = np.median(mags, axis=0) catalog[filt + 'err'] = np.median(np.abs(mags - catalog[filt]), axis=0) * np.sqrt(pi / 2) return catalog
def cache_angles(tile_size=500): half_size = float(tile_size - 1) / 2 get_zp = lambda x, y: arccos(1 / sqrt(x**2 + y**2 + 1)) get_zm = lambda x, y: arccos(-1 / sqrt(x**2 + y**2 + 1)) get_xypm = lambda x, y: arccos(y / sqrt(x**2 + y**2 + 1)) get_phi = lambda x, y: arctan(float(y) / x) cache = { 'zp': np.zeros((tile_size, tile_size, ), dtype=np.float), 'zm': np.zeros((tile_size, tile_size, ), dtype=np.float), 'xypm': np.zeros((tile_size, tile_size, ), dtype=np.float), 'phi': np.zeros((tile_size, tile_size, ), dtype=np.float) } print 'Perform cache angles...' for tile_y in xrange(tile_size): y = float(tile_y) / half_size - 1 for tile_x in xrange(tile_size): x = float(tile_x) / half_size - 1 cache['zp'][tile_y, tile_x] = get_zp(x, y) cache['zm'][tile_y, tile_x] = get_zm(x, y) cache['xypm'][tile_y, tile_x] = get_xypm(x, y) if x != 0: cache['phi'][tile_y, tile_x] = get_phi(x, y) # print 'cache for: [{}, {}] -> [{},{}]'.format(tile_y, tile_x, y, x) return cache
def gamma(self, mkt_dict_, engine_, unit_=None): """calculate option GAMMA with market data and engine""" _rate, _spot, _vol, _div, _method, _param, _sign, _strike, _t = self._prepare_risk_data( mkt_dict_, engine_) _unit = unit_ or self.unit if _method == EngineMethod.BS.value: _d1 = (log(_spot / _strike) + (_rate + _vol**2 / 2) * _t) / _vol / sqrt(_t) return exp(-_d1**2 / 2) / sqrt( 2 * pi) / _spot / _vol / sqrt(_t) * exp(-_div * _t) * _unit elif _method == EngineMethod.MC.value: from utils.monte_carlo import MonteCarlo _iteration = self._check_iter( _param[EngineParam.MCIteration.value]) _spot = MonteCarlo.stock_price(_iteration, isp=_spot, rate=_rate, div=_div, vol=_vol, t=_t) _step = 0.01 _gamma = [ ((max(_sign * (_s + 2 * _step - _strike), 0) - max(_sign * (_s - _strike), 0)) - (max(_sign * (_s - _strike), 0) - max(_sign * (_s - 2 * _step - _strike), 0))) / (4 * _step**2) for _s in _spot ] return average(_gamma) * exp(-_rate * _t) * _unit
def showDataFigure(dataArr, *args): m, n = shape(dataArr) print m, n example_width = round(sqrt(n)) print "特征数目:", n print "example_width:", example_width # 20 example_height = n / example_width # 20 display_rows = floor(sqrt(m)) # 10 display_cols = ceil(m / display_rows) # 10 pad = 1 display_array = -ones((pad + display_rows * (example_height + pad), pad + display_cols * (example_width + pad))) print "display_array 的维度为:", shape(display_array) curr_ex = 0; for j in range(0, int(display_rows)): for i in range(0, int(display_cols)): # for j in range(0, int(1)): #for i in range(0, int(1)): if curr_ex >= m: break max_val = max(abs(display_array[curr_ex, :])) #print "##################maxval",max_val display_array[ int(pad + j * (example_height + pad)): int(pad + j * (example_height + pad)) + int(example_height), int(pad + i * (example_width + pad)):int(pad + i * (example_width + pad)) + int(example_width)] = \ reshape(dataArr[curr_ex, :], (example_height, example_width)) / max_val curr_ex = curr_ex + 1; if curr_ex >= m: break; plt.imshow(display_array.T, CM.gray) # 类似matlib中的imagesc # scaledimage.scaledimage(display_array) plt.show()
def _ttest_ind(Sample_mean_ArrayA, Sample_var_ArrayA, n_subjectsA, Sample_mean_ArrayB, Sample_var_ArrayB, n_subjectsB, equal_var=True, sign=1): if equal_var: # force df to be an array for masked division not to throw a warning df = ma.asanyarray(n_subjectsA + n_subjectsB - 2.0) svar = ((n_subjectsA - 1) * Sample_var_ArrayA + (n_subjectsB - 1) * Sample_var_ArrayB) / df denom = ma.sqrt( svar * (1.0 / n_subjectsA + 1.0 / n_subjectsB)) # n-D computation here! else: vn1 = Sample_var_ArrayA / n_subjectsA vn2 = Sample_var_ArrayB / n_subjectsB df = (vn1 + vn2)**2 / (vn1**2 / (n_subjectsA - 1) + vn2**2 / (n_subjectsB - 1)) # If df is undefined, variances are zero. # It doesn't matter what df is as long as it is not NaN. df = np.where(np.isnan(df), 1, df) denom = ma.sqrt(vn1 + vn2) with np.errstate(divide='ignore', invalid='ignore'): ttest_ind = (Sample_mean_ArrayA - Sample_mean_ArrayB) * sign / denom pvalues = special.betainc(0.5 * df, 0.5, df / (df + ttest_ind * ttest_ind)).reshape( ttest_ind.shape) # ttest_ind, pvalues = ma.filled(ttest_ind), ma.filled(pvalues) return ttest_ind, pvalues
def pv(self, mkt_dict_, engine_, unit_=None): """calculate option PV with market data and engine""" _rate, _spot, _vol, _div, _method, _param, _sign, _strike, _t = self._prepare_risk_data( mkt_dict_, engine_) _unit = unit_ or self.unit if _method == EngineMethod.BS.value: _d1 = (log(_spot / _strike) + (_rate - _div + _vol**2 / 2) * _t) / _vol / sqrt(_t) _d2 = _d1 - _vol * sqrt(_t) return _sign * ( _spot * exp(-_div * _t) * norm.cdf(_sign * _d1) - _strike * exp(-_rate * _t) * norm.cdf(_sign * _d2)) * _unit elif _method == EngineMethod.MC.value: from utils.monte_carlo import MonteCarlo _iteration = self._check_iter( _param[EngineParam.MCIteration.value]) _spot = MonteCarlo.stock_price(_iteration, isp=_spot, rate=_rate, div=_div, vol=_vol, t=_t) _price = [max(_sign * (_s - _strike), 0) for _s in _spot] return average(_price) * exp(-_rate * _t) * _unit
def mcorrelation(u, v): r""" Computes the correlation distance between two n-vectors ``u`` and ``v``, which is defined as .. math:: \frac{1 - (u - \bar{u}){(v - \bar{v})}^T} {{||(u - \bar{u})||}_2 {||(v - \bar{v})||}_2^T} where :math:`\bar{u}` is the mean of a vectors elements and ``n`` is the common dimensionality of ``u`` and ``v``. Parameters ---------- u : ndarray An :math:`n`-dimensional vector. v : ndarray An :math:`n`-dimensional vector. Returns ------- d : double The correlation distance between vectors ``u`` and ``v``. """ umu = u.mean() vmu = v.mean() um = u - umu vm = v - vmu return 1.0 - (ma.dot(um, vm) / (ma.sqrt(ma.dot(um, um)) \ * ma.sqrt(ma.dot(vm, vm))))
def test_forest_topology(): """Test building a forest topology. """ t = build_forest_topology(2, depth=3, arity=3, dx=2.5, dy=1.25, roe=0.4, dt=5) nodes = t.nodes.all(order_by='address') connections = t.connections.all(order_by='from_addr') assert len(nodes) == 80 assert len(connections) == 78 # - ranges on levels 0, 1, 2 are the same and determined by sufficient # distances for level 1; # - range on level 3 is determined by sufficient distances for connecting # to parents on level 2. range_1 = 1.4 * sqrt(22.5**2 + 1.25**2) range_2 = 1.4 * sqrt(7.5**2 + 1.25**2) # Estimate positions and addresses by level: y = [3.75, 2.5, 1.25, 0] x1 = [ np.asarray([32.5]), 10.0 + np.arange(3) * 22.5, 2.5 + np.arange(9) * 7.5, np.arange(27) * 2.5 ] x = [x1, [pos + 70 for pos in x1]] addr1 = [ np.asarray([1]), np.arange(3) + 2, np.arange(9) + 5, np.arange(27) + 14 ] addr = [addr1, [a + 40 for a in addr1]] # Validate nodes level by level: for level in range(4): for tree in range(2): for index in range(3**level): address = addr[tree][level][index] expected_node = Node( address, GATEWAY_NODE if level == 0 else SENSOR_NODE, x[tree][level][index], y[level], range_1 if level < 3 else range_2) assert expected_node == t.nodes.get(address) # Validate connections: for level, tree in product(range(3), range(2)): offset = 0 if tree == 0 else 41 for index in range(3**level): address = addr[tree][level][index] children_addresses = addr[tree][level + 1][(3 * index):(3 * (index + 1))] for child_address in children_addresses: assert (child_address, address) in connections
def ttest_ind(a, b, axis=0, equal_var=True): """ Calculates the T-test for the means of two independent samples of scores. Parameters ---------- a, b : array_like The arrays must have the same shape, except in the dimension corresponding to `axis` (the first, by default). axis : int or None, optional Axis along which to compute test. If None, compute over the whole arrays, `a`, and `b`. equal_var : bool, optional If True, perform a standard independent 2 sample test that assumes equal population variances. If False, perform Welch's t-test, which does not assume equal population variance. .. versionadded:: 0.17.0 Returns ------- statistic : float or array The calculated t-statistic. pvalue : float or array The two-tailed p-value. Notes ----- For more details on `ttest_ind`, see `stats.ttest_ind`. """ a, b, axis = _chk2_asarray(a, b, axis) if a.size == 0 or b.size == 0: return 'One of the vector is empty' (mean1, mean2) = (a.mean(axis), b.mean(axis)) (var1, var2) = (a.var(axis=axis, ddof=1), b.var(axis=axis, ddof=1)) (n1, n2) = (a.count(axis), b.count(axis)) if equal_var: # force df to be an array for masked division not to throw a warning df = ma.asanyarray(n1 + n2 - 2.0) svar = ((n1 - 1) * var1 + (n2 - 1) * var2) / df denom = ma.sqrt(svar * (1.0 / n1 + 1.0 / n2)) # n-D computation here! else: vn1 = var1 / n1 vn2 = var2 / n2 with np.errstate(divide='ignore', invalid='ignore'): df = (vn1 + vn2)**2 / (vn1**2 / (n1 - 1) + vn2**2 / (n2 - 1)) # If df is undefined, variances are zero. # It doesn't matter what df is as long as it is not NaN. df = np.where(np.isnan(df), 1, df) denom = ma.sqrt(vn1 + vn2) with np.errstate(divide='ignore', invalid='ignore'): t = (mean1 - mean2) / denom probs = special.betainc(0.5 * df, 0.5, df / (df + t * t)).reshape(t.shape) return Ttest_ind(t, probs.squeeze())
def __getitem__(self, index): data_r = self.arguments[0] data = data_r if is_constant(data_r) else data_r[index] if isinstance(data, PhysArray): return PhysArray(sqrt(data), units=self._units, name='sqrt({})'.format(data.name), dimensions=data.dimensions, positive=data.positive) else: return sqrt(data)
def coscalc(arr1, arr2): x = 0 y = 0 xy = 0 for num in range(0, arr1.size - 1): x += arr1[num] * arr1[num] y += arr2[num] * arr2[num] xy += arr1[num] * arr2[num] y = sqrt(y) x = sqrt(x) cosenocal = 1 - (xy / (x * y)) return cosenocal
def _get_noise_mask(size: int) -> np.matrix: """Return random noise""" density = 0.1 mask = np.zeros((size, size)) for ind in np.random.choice(size - 1, int(floor(size * density))): # pylint: disable=no-member how_many_elmns = random.randint(1, int(sqrt(ind)) + 1) for var in range(1, how_many_elmns): # pylint: disable=unused-variable if random.randint(0, 1): length = random.randint(1, int(sqrt(size * density))) rnd_from = random.randint(1, ind) - length rnd_to = rnd_from + length mask[rnd_from:rnd_to, ind] = 1 mask[ind, rnd_from:rnd_to] = 1 return mask
def filter(pspectra0, pspectra1, zbest, norm=False, ston_cut=7., frac_inc_cut= .50): fibermap = pspectra0.fibermap #Table.read(datafile0, 'FIBERMAP') isTGT = fibermap['OBJTYPE'] == 'TGT' okFibers = np.logical_and(pspectra0.fibermap['FIBERSTATUS'] == 0, pspectra1.fibermap['FIBERSTATUS'] == 0) isGalaxy = zbest['SPECTYPE']=='GALAXY' hasSignal = HasSignal.filter(pspectra0,pspectra1) if norm: pspectra0, pspectra1 = renorm(pspectra0,pspectra1) diff = difference(pspectra0,pspectra1) skymask = maskskylines(diff) nspec = diff.flux[diff.bands[0]].shape[0] signal=np.zeros(nspec) var=np.zeros(nspec) ref_signal=np.zeros(nspec) if 'b' in diff.bands: for sindex in range(nspec): nmask = diff.mask['b'][sindex,:]==0 signal[sindex] = diff.flux['b'][sindex,nmask].sum() var[sindex] = (1/diff.ivar['b'][sindex,nmask]).sum() ref_signal[sindex] = pspectra1.flux['b'][sindex,nmask].sum() # nan's should fail here brighter = np.logical_or(np.abs(signal/ref_signal) >= frac_inc_cut, ref_signal <=0) significant = (np.abs(signal)/ma.sqrt(var) >= ston_cut) triggered = np.logical_and.reduce((significant, isTGT, hasSignal, okFibers, isGalaxy, brighter)) return triggered, diff
def biweight(x, cst): """ Computes the biweight average and midvariance for a given 1D array. Returns a tuple (biweight mean, biweight variance). Parameters ---------- x: {ndarray} Input Array cst : {float} Parameter controlling how outliers are censored. Notes ----- The function is restricted to 1D data only. """ assert (x.ndim == 1, "1D array only !") xmed = ma.median(x, 0) manom = x - xmed mad = ma.median(ma.absolute(manom)) u_i = (manom/float(cst*mad)) u_i *= ma.less_equal(ma.absolute(u_i), 1.).astype(float) w_i = (1-u_i**2) if ma.count(w_i) > 0: biw_m = xmed + ma.sum(manom * w_i**2)/ma.sum(w_i**2) else: biw_m = xmed biw_sd = ma.sqrt(ma.count(x)*ma.sum(manom**2 * w_i**4)) biw_sd *= 1./ma.absolute(ma.sum(w_i * (1-5*u_i**2))) return (biw_m, biw_sd.item())
def mag(u, v, missing=MISSING): ''' Compute the magnitude of a vector from its components Parameters ---------- u : number, array_like U-component of the wind v : number, array_like V-component of the wind missing : number (optional) Optional missing parameter. If not given, assume default missing value from sharppy.sharptab.constants.MISSING Returns ------- mag : number, array_like The magnitude of the vector (units are the same as input) ''' u = np.ma.asanyarray(u).astype(np.float64) v = np.ma.asanyarray(v).astype(np.float64) u.set_fill_value(missing) v.set_fill_value(missing) if u.shape: u[u == missing] = ma.masked v[v == missing] = ma.masked else: if u == missing or v == missing: return ma.masked return ma.sqrt(u**2 + v**2)
def calculateCentroidMeasurements(self): self.X[self.badFrames, :] = ma.masked if not self.useSmoothingFilterDerivatives: self.v[1:-1] = (self.X[2:, :] - self.X[0:-2])/(2.0/self.frameRate) else: # use a cubic polynomial filter to estimate the velocity self.v = ma.zeros(self.X.shape) halfWindow = int(np.round(self.filterWindow/2.*self.frameRate)) for i in xrange(halfWindow, self.v.shape[0]-halfWindow): start = i-halfWindow mid = i finish = i+halfWindow+1 if not np.any(self.X.mask[start:finish,:]): px = np.polyder(np.polyfit(self.t[start:finish]-self.t[mid], self.X[start:finish, 0], 3)) py = np.polyder(np.polyfit(self.t[start:finish]-self.t[mid], self.X[start:finish, 1], 3)) self.v[i,:] = [np.polyval(px, 0), np.polyval(py, 0)] else: self.v[i,:] = ma.masked self.s = ma.sqrt(ma.sum(ma.power(self.v, 2), axis=1)) self.phi = ma.arctan2(self.v[:, 1], self.v[:, 0]) self.t[self.badFrames] = ma.masked self.X[self.badFrames, :] = ma.masked self.v[self.badFrames, :] = ma.masked self.s[self.badFrames] = ma.masked self.phi[self.badFrames] = ma.masked
def _calc_correlation(self, values_1, values_2, conf_level=0.95): """ Calculates Pearson's correlation coeffcient. Arguments: values_1 -- first data values_2 -- second data conf_level -- confidence level Returns: (corr_coeff, significance) -- correlation coefficient and significance arrays """ n_samples = values_1.shape[0] # Sample length # Calculate Pearson's correlatiob coefficient values_cov = ma.sum((values_1 - ma.mean(values_1, axis=0)) * (values_2 - ma.mean(values_2, axis=0)), axis=0) corr_coef = values_cov / (ma.std(values_1, axis=0) * ma.std(values_2, axis=0)) / n_samples # Calculate significance using t-distribution with n-2 degrees of freedom. deg_fr = n_samples - 2 # Degrees of freedom. t_distr = ma.abs( corr_coef * ma.sqrt(deg_fr / (1. - corr_coef**2))) # Student's t-distribution. prob = 0.5 + conf_level / 2 # Probability for two tails. cr_value = student_t.ppf(prob, deg_fr) # Student's Critical value. significance = ma.greater(t_distr, cr_value) return corr_coef, significance
def column_stdevs(dataset, means): stdevs = [0 for i in range(len(dataset[0]))] for i in range(len(dataset[0])): variance = [pow(row[i] - means[i], 2) for row in dataset] stdevs[i] = sum(variance) stdevs = [sqrt(x / (float(len(dataset) - 1))) for x in stdevs] return stdevs
def all_pairs_euclidean(M): C = np.zeros((len(M), len(M))) for i in xrange(len(M)): for j in xrange(i+1, len(M)): q=M[i]-M[j] C[i][j] = ma.sqrt((q*q.T).sum()) return C
def find_an_approximation(self, function_table: dict) -> Function: try: SLNX = sum(log(x) for x in function_table.keys()) SLNXX = sum(log(x) * log(x) for x in function_table.keys()) SLNY = sum(log(y) for y in function_table.values()) SLNXY = sum(log(x) * log(y) for x, y in function_table.items()) n = len(function_table) except ValueError: return None try: b, a = self.solve_matrix22([[n, SLNX], [SLNX, SLNXX]], [SLNY, SLNXY]) if a is None: return None a = exp(a) fun = lambda x: a * (x**b) s = sum( (fun(x) - function_table[x])**2 for x in function_table.keys()) root_mean_square_deviation = sqrt(s / n) f = Function(fun, f'ф = {round(a, 3)}*x^({round(b, 3)})', s, root_mean_square_deviation) self.print_approximation_table(function_table, f, self.function_type) return f except TypeError: return None
def mmahalanobis(u, v, VI): r""" Computes the Mahalanobis distance between two n-vectors ``u`` and ``v``, which is defiend as .. math:: (u-v)V^{-1}(u-v)^T where ``VI`` is the inverse covariance matrix :math:`V^{-1}`. Parameters ---------- u : ndarray An :math:`n`-dimensional vector. v : ndarray An :math:`n`-dimensional vector. Returns ------- d : double The Mahalanobis distance between vectors ``u`` and ``v``. """ u = ma.asarray(u, order='c') v = ma.asarray(v, order='c') VI = ma.asarray(VI, order='c') return ma.sqrt(ma.dot(ma.dot((u - v), VI), (u - v).T).sum())
def _apply_function(func, arg): # type: (QuilParser.FunctionContext, Any) -> Any if isinstance(arg, Expression): if func.SIN(): return parameters.quil_sin(arg) elif func.COS(): return parameters.quil_cos(arg) elif func.SQRT(): return parameters.quil_sqrt(arg) elif func.EXP(): return parameters.quil_exp(arg) elif func.CIS(): return parameters.quil_cis(arg) else: raise RuntimeError("Unexpected function to apply: " + func.getText()) else: if func.SIN(): return sin(arg) elif func.COS(): return cos(arg) elif func.SQRT(): return sqrt(arg) elif func.EXP(): return exp(arg) elif func.CIS(): return cos(arg) + complex(0, 1) * sin(arg) else: raise RuntimeError("Unexpected function to apply: " + func.getText())
def train_step_batch(self, epoch): D1 = ma.dot(self.vectors**2, self.weight_matrix) D2 = ma.dot(self.vectors, self.constant_matrix) Dist = D1 - D2 best_nodes = ma.argmin(Dist, 0) distances = ma.min(Dist, 0) ## print "q error:", ma.mean(ma.sqrt(distances + self.dist_cons)), self.radius(epoch) self.qerror.append(ma.mean(ma.sqrt(distances + self.dist_cons))) if self.neighbourhood == Map.NeighbourhoodGaussian: H = numpy.exp(-self.unit_distances / (2 * self.radius(epoch))) * ( self.unit_distances <= self.radius(epoch)) elif self.neighbourhood == Map.NeighbourhoodEpanechicov: H = 1.0 - (self.unit_distances / self.radius(epoch))**2 H = H * (H >= 0.0) else: H = 1.0 * (self.unit_distances <= self.radius(epoch)) P = numpy.zeros((self.vectors.shape[0], self.data.shape[0])) P[(best_nodes, list(range(len(best_nodes))))] = numpy.ones(len(best_nodes)) S = ma.dot(H, ma.dot(P, self.data)) A = ma.dot(H, ma.dot(P, ~self.data._mask)) ## nonzero = (range(epoch%2, len(self.vectors), 2), ) nonzero = (numpy.array(sorted(set(ma.nonzero(A)[0]))), ) self.vectors[nonzero] = S[nonzero] / A[nonzero]
def test_testUfuncs1(self): # Test various functions such as sin, cos. (x, y, a10, m1, m2, xm, ym, z, zm, xf, s) = self.d assert_(eq(np.cos(x), cos(xm))) assert_(eq(np.cosh(x), cosh(xm))) assert_(eq(np.sin(x), sin(xm))) assert_(eq(np.sinh(x), sinh(xm))) assert_(eq(np.tan(x), tan(xm))) assert_(eq(np.tanh(x), tanh(xm))) with np.errstate(divide='ignore', invalid='ignore'): assert_(eq(np.sqrt(abs(x)), sqrt(xm))) assert_(eq(np.log(abs(x)), log(xm))) assert_(eq(np.log10(abs(x)), log10(xm))) assert_(eq(np.exp(x), exp(xm))) assert_(eq(np.arcsin(z), arcsin(zm))) assert_(eq(np.arccos(z), arccos(zm))) assert_(eq(np.arctan(z), arctan(zm))) assert_(eq(np.arctan2(x, y), arctan2(xm, ym))) assert_(eq(np.absolute(x), absolute(xm))) assert_(eq(np.equal(x, y), equal(xm, ym))) assert_(eq(np.not_equal(x, y), not_equal(xm, ym))) assert_(eq(np.less(x, y), less(xm, ym))) assert_(eq(np.greater(x, y), greater(xm, ym))) assert_(eq(np.less_equal(x, y), less_equal(xm, ym))) assert_(eq(np.greater_equal(x, y), greater_equal(xm, ym))) assert_(eq(np.conjugate(x), conjugate(xm))) assert_(eq(np.concatenate((x, y)), concatenate((xm, ym)))) assert_(eq(np.concatenate((x, y)), concatenate((x, y)))) assert_(eq(np.concatenate((x, y)), concatenate((xm, y)))) assert_(eq(np.concatenate((x, y, x)), concatenate((x, ym, x))))
def compare_medians_ms(group_1, group_2, axis=None): """Compares the medians from two independent groups along the given axis. The comparison is performed using the McKean-Schrader estimate of the standard error of the medians. Parameters ---------- group_1 : {sequence} First dataset. group_2 : {sequence} Second dataset. axis : {integer} Axis along which the medians are estimated. If None, the arrays are flattened. Returns ------- A (p,) array of comparison values. """ (med_1, med_2) = (ma.median(group_1, axis=axis), ma.median(group_2, axis=axis)) (std_1, std_2) = (mstats.stde_median(group_1, axis=axis), mstats.stde_median(group_2, axis=axis)) W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2) return 1 - norm.cdf(W)
def forward(self, graph: dgl.DGLGraph) -> torch.Tensor: start_index = 0 subtoken_ids = [] node_slices = [] for node in graph.ndata['token_id']: node_id = node.item() if node_id in self.full_token_id_to_subtokens: cur_subtokens = self.full_token_id_to_subtokens[node_id] else: unk_id = self.subtoken_to_id[UNK] cur_subtokens = [ self.subtoken_to_id.get(st, unk_id) for st in self.full_token_id_to_token[node_id].split(self.delimiter) ] subtoken_ids += cur_subtokens node_slices.append(slice(start_index, start_index + len(cur_subtokens))) start_index += len(cur_subtokens) full_subtokens_embeds = self.subtoken_embedding( graph.ndata['token_id'].new_tensor(subtoken_ids) ) token_embeds = graph.ndata['token_id'].new_empty((graph.number_of_nodes(), self.h_emb), dtype=torch.float) for node in range(graph.number_of_nodes()): token_embeds[node] = full_subtokens_embeds[node_slices[node]].sum(0) if self.normalize: return token_embeds * sqrt(self.h_emb) return token_embeds
def compare_medians_ms(group_1, group_2, axis=None): """ Compares the medians from two independent groups along the given axis. The comparison is performed using the McKean-Schrader estimate of the standard error of the medians. Parameters ---------- group_1 : array_like First dataset. group_2 : array_like Second dataset. axis : int, optional Axis along which the medians are estimated. If None, the arrays are flattened. If `axis` is not None, then `group_1` and `group_2` should have the same shape. Returns ------- compare_medians_ms : {float, ndarray} If `axis` is None, then returns a float, otherwise returns a 1-D ndarray of floats with a length equal to the length of `group_1` along `axis`. """ (med_1, med_2) = (ma.median(group_1, axis=axis), ma.median(group_2, axis=axis)) (std_1, std_2) = (mstats.stde_median(group_1, axis=axis), mstats.stde_median(group_2, axis=axis)) W = np.abs(med_1 - med_2) / ma.sqrt(std_1 ** 2 + std_2 ** 2) return 1 - norm.cdf(W)
def train_step_batch(self, epoch): """A single step of batch training algorithm. """ D1 = ma.dot(self.vectors**2, self.weight_matrix) D2 = ma.dot(self.vectors, self.constant_matrix) Dist = D1 - D2 best_nodes = ma.argmin(Dist, 0) distances = ma.min(Dist, 0) ## print "q error:", ma.mean(ma.sqrt(distances + self.dist_cons)), self.radius(epoch) self.qerror.append(ma.mean(ma.sqrt(distances + self.dist_cons))) if self.neighbourhood == Map.NeighbourhoodGaussian: H = numpy.exp(-self.unit_distances**2/(2*self.radius(epoch)**2)) * (self.unit_distances**2 <= self.radius(epoch)**2) elif self.neighbourhood == Map.NeighbourhoodEpanechicov: H = 1.0 - (self.unit_distances/self.radius(epoch))**2 H = H * (H >= 0.0) else: H = 1.0*(self.unit_distances <= self.radius(epoch)) P = numpy.zeros((self.vectors.shape[0], self.data.shape[0])) P[(best_nodes, range(len(best_nodes)))] = numpy.ones(len(best_nodes)) S = ma.dot(H, ma.dot(P, self.data)) A = ma.dot(H, ma.dot(P, ~self.data._mask)) ## nonzero = (range(epoch%2, len(self.vectors), 2), ) nonzero = (numpy.array(sorted(set(ma.nonzero(A)[0]))), ) self.vectors[nonzero] = S[nonzero] / A[nonzero]
def mseuclidean(u, v, V): """ Returns the standardized Euclidean distance between two n-vectors ``u`` and ``v``. ``V`` is an m-dimensional vector of component variances. It is usually computed among a larger collection vectors. Parameters ---------- u : ndarray An :math:`n`-dimensional vector. v : ndarray An :math:`n`-dimensional vector. Returns ------- d : double The standardized Euclidean distance between vectors ``u`` and ``v``. """ u = ma.asarray(u, order='c') v = ma.asarray(v, order='c') V = ma.asarray(V, order='c') if len(V.shape ) != 1 or V.shape[0] != u.shape[0] or u.shape[0] != v.shape[0]: raise TypeError( 'V must be a 1-D array of the same dimension as u and v.') return ma.sqrt(((u - v)**2 / V).sum())
def cryo_CTF_Relion(square_side, star_record): """ Compute the contrast transfer function corresponding an n x n image with the sampling interval DetectorPixelSize. """ # wavelength in nm wave_length = 1.22639 / math.sqrt(star_record.voltage * 1000 + 0.97845 * star_record.voltage**2) # Divide by 10 to make pixel size in nm. BW is the bandwidth of # the signal corresponding to the given pixel size bw = 1 / (star_record.pixel_size / 10) s, theta = radius_norm(square_side, origin=fctr(square_side)) # RadiusNorm returns radii such that when multiplied by the # bandwidth of the signal, we get the correct radial frequnecies # corresponding to each pixel in our nxn grid. s = s * bw DFavg = (star_record.DefocusU + star_record.DefocusV) / 2 DFdiff = (star_record.DefocusU - star_record.DefocusV) df = DFavg + DFdiff * np.cos(2 * (theta - star_record.DefocusAngle)) / 2 k2 = math.pi * wave_length * df # 10**6 converts spherical_aberration from mm to nm k4 = math.pi / 2 * 10**6 * star_record.spherical_aberration * wave_length**3 chi = k4 * s**4 - k2 * s**2 return (sqrt(1 - star_record.amplitude_contrast**2) * np.sin(chi) - star_record.amplitude_contrast * np.cos(chi))
def compare_medians_ms(group_1, group_2, axis=None): """Compares the medians from two independent groups along the given axis. The comparison is performed using the McKean-Schrader estimate of the standard error of the medians. Parameters ---------- group_1 : {sequence} First dataset. group_2 : {sequence} Second dataset. axis : {integer} Axis along which the medians are estimated. If None, the arrays are flattened. Returns ------- A (p,) array of comparison values. """ (med_1, med_2) = (ma.median(group_1,axis=axis), ma.median(group_2,axis=axis)) (std_1, std_2) = (mstats.stde_median(group_1, axis=axis), mstats.stde_median(group_2, axis=axis)) W = np.abs(med_1 - med_2) / ma.sqrt(std_1**2 + std_2**2) return 1 - norm.cdf(W)
def _get_filament_dx(self): ''' Displacement of filament end-point to become stretched. ''' y_arr, z_arr = self.cut_data[1:3] dy_free = self._free_length(y_arr) dz_free = self._free_length(z_arr) dx = ma.sqrt(self.fs_bond_free_length ** 2 - dy_free ** 2 - dz_free ** 2) - self.fs_bond_free_length_x return dx
def combine_nights(combined_catalog, filterlist, refcat): header = ['BEGIN CATALOG HEADER', 'nfields 13', ' ra 1 0 d degrees %10.6f', ' dec 2 0 d degrees %10.6f', ' id 3 0 c INDEF %3d'] for filt in filterlist: header.append(' {} {:2d} 0 r INDEF %6.3f'.format(filt, len(header) - 1)) header.append(' {}err {:2d} 0 r INDEF %6.3f'.format(filt, len(header) - 1)) header += ['END CATALOG HEADER', ''] catalog = Table([refcat['ra'], refcat['dec'], refcat['id']], meta={'comments': header}, masked=True) for filt in filterlist: mags = combined_catalog['mag'][combined_catalog['filter'] == filt] median = np.median(mags, axis=0) absdev_mag = mags - median mad = np.median(np.abs(absdev_mag), axis=0) * np.sqrt(pi / 2) mags.mask |= np.abs(absdev_mag) > 5 * mad catalog[filt] = np.median(mags, axis=0) catalog[filt+'err'] = np.median(np.abs(mags - catalog[filt]), axis=0) * np.sqrt(pi / 2) return catalog
def DD_FF(u,v): ''' calculates wind/current speed and direction from u and v components # if u and v are easterly and northerly components, DD is heading direction of the wind. to get meteorological-standard, call DD_FF(-u, -v) ''' DD = ma.arctan2(u, v)*180/sp.pi DD[DD < 0] = 360 + DD[DD < 0] FF = ma.sqrt(u**2 + v**2) return DD, FF
def _get_fs_length_between_cuts(self): ''' Return linear filament length between cuts. Matrix contain mask information. ''' x_arr = self.cut_data[0] y_arr = self.cut_data[1] z_arr = self.cut_data[2] length = ma.sqrt((x_arr[:, 1:] - x_arr[:, :-1]) ** 2 + (y_arr[:, 1:] - y_arr[:, :-1]) ** 2 + (z_arr[:, 1:] - z_arr[:, :-1]) ** 2) return length
def vector_sum(x_arr, y_arr): """ Calculate the vector sum of arrays of x and y vectors. :param x_arr: array of x-directed vectors :type x_arr: numpy.array :param y_arr: array of y-directed vectors :type y_arr: numpy.array :return: array of vector sums :rtype: numpy.array """ return ma.sqrt(x_arr**2 + y_arr**2)
def result(self): ddof = self.kwargs['ddof'] if self.masked: mask = self.running_count == 0 denominator = ma.array(self.running_count, mask=mask) - ddof q = ma.array(self.q, mask=mask) / denominator result = ma.sqrt(q) result.shape = self.array[0].shape else: self.q /= (self.k - ddof) result = np.sqrt(self.q) # Promote array-scalar to 0-dimensional array. if result.ndim == 0: result = self._mod.array(result) return result
def cart2polar(x, y, degrees=True): """ Convert cartesian X and Y to polar RHO and THETA. :param x: x cartesian coordinate :param y: y cartesian coordinate :param degrees: True = return theta in degrees, False = return theta in radians. [default: True] :return: r, theta """ rho = ma.sqrt(x ** 2 + y ** 2) theta = ma.arctan2(y, x) if degrees: theta *= (180 / math.pi) return rho, theta
def crossmatch(cat0, cat1, threshold=1., racol0='ra', deccol0='dec', racol1='ra', deccol1='dec', right_join=False): dra = cat0[racol0] - cat1[racol1][:, newaxis] ddec = cat0[deccol0] - cat1[deccol1][:, newaxis] sep = np.sqrt(dra**2 + ddec**2) * 3600. matches = np.min(sep, axis=1) < threshold inds = np.argmin(sep, axis=1) out = Table(cat0[inds], masked=True) if right_join: for col in out.colnames: out[col].mask = ~matches cat1 = cat1.copy() else: out = out[matches] cat1 = cat1[matches] return out, cat1
def DD_FF(u,v,met=True): ''' calculates wind/current speed and direction from u and v components # if u and v are easterly and northerly components, returns: FF : wind speed DD : wind direction in meteorological standard (directions the wind is coming from) call with met=False for oceanographic standard ''' if met==False: u,v = -u, -v DD = ma.arctan2(-u, -v)*180/sp.pi DD[DD < 0] = 360 + DD[DD < 0] FF = ma.sqrt(u**2 + v**2) return DD, FF
def attest_ind(a, b, dim=None): """ Return the t-test statistics on arrays a and b over the dim axis. Returns both the t statistic as well as the p-value """ # dim = a.ndim - 1 if dim is None else dim x1, x2 = ma.mean(a, dim), ma.mean(b, dim) v1, v2 = ma.var(a, dim), ma.var(b, dim) n1, n2 = (a.shape[dim], b.shape[dim]) if dim is not None else (a.size, b.size) df = float(n1+n2-2) svar = ((n1-1)*v1+(n2-1)*v2) / df t = (x1-x2)/ma.sqrt(svar*(1.0/n1 + 1.0/n2)) if t.ndim == 0: return (t, scipy.stats.betai(0.5*df,0.5,df/(df+t**2)) if t is not ma.masked and df/(df+t**2) <= 1.0 else ma.masked) else: prob = [scipy.stats.betai(0.5*df,0.5,df/(df+tsq)) if tsq is not ma.masked and df/(df+tsq) <= 1.0 else ma.masked for tsq in t*t] return t, prob
def flatten(ech, method='average'): """Flatten 2-D echelle spectrum to 1-D flat spectrum """ wav = ech.wav[0] assert np.allclose(ech.wav - wav, 0), "ech.wav rows must be identical" ech.flux = ma.masked_invalid(ech.flux) ech.uflux = ma.masked_invalid(ech.uflux) if method=='average': ivar = ech.uflux**-2 # Weighted mean and uncertanty on weighted mean flux = ma.sum( ech.flux * ivar, axis=0 ) / ma.sum(ivar, axis=0) uflux = ma.sqrt( 1 / ma.sum(ivar, axis=0) ) flux.fill_value = np.nan uflux.fill_value = np.nan flux = flux.filled() uflux = uflux.filled() return flux, uflux
def _get_gamma_cdf(aseries, condition): """ Returns the CDF values for aseries. Parameters ---------- aseries : TimeSeries Annual series of data (one column per period) condition : TimeSeries Period mask. """ # Mask the months for which no precipitations were recorded aseries_ = ma.masked_values(aseries, 0) # Get the proportion of 0 precipitation for each period (MM/WW) pzero = 1. - aseries_.count(axis=0) / aseries.count(axis=0).astype(float) # Mask outside the reference period aseries_._mask |= condition._data meanrain = aseries_.mean(axis=0) aleph = ma.log(meanrain) - ma.log(aseries_).mean(axis=0) alpha = (1. + ma.sqrt(1.+4./3*aleph)) / (4.*aleph) beta = meanrain/alpha # Get the Gamma CDF (per month) gcdf = pzero + (1.-pzero) * ssd.gamma.cdf(aseries,alpha,scale=beta) return gcdf
variables[var]['slabel'],total,variables[var]['tunits']) print('%s'%(variables[var]['label'])) # compute min & max of annual (temporal) mean mdatamin = vars()[var].min() mdatamax = vars()[var].max() # compute spatial mean & R.M.S. of annual (temporal) mean mmask = MA.masked_values(vars()[var]/vars()[var],variables[var]['fillvalue']) mdatamean = MA.masked_values(MA.sum(vars()[var] * area * mmask) / MA.sum(area * mmask), variables[var]['fillvalue']) mdatarms = MA.masked_values(MA.sqrt(MA.sum(vars()[var] * vars()[var] * area * mmask) / MA.sum(area * mmask)),variables[var]['fillvalue']) if POPDIAGPY == 'TRUE': if var in vintlist: # vertically integrated variables[var]['title_mod'] = '%s: Min:%.2f, Max:%.2f, Mean:%.2f, RMS:%.2f '%( variables[var]['klabel'],mdatamin,mdatamax,mdatamean,mdatarms) elif 'FLUX_IN' in var: # particle flux variables[var]['title_mod'] = '%s at %.0f m: Min:%.2f, Max:%.2f, Mean:%.2f, RMS:%.2f '%( variables[var]['slabel'],depth,mdatamin,mdatamax,mdatamean,mdatarms) else: variables[var]['title_mod'] = '%s: Min:%.2f, Max:%.2f, Mean:%.2f, RMS:%.2f '%( variables[var]['slabel'],mdatamin,mdatamax,mdatamean,mdatarms) else: variables[var]['title_mod'] = variables[var]['label'] # plot maps -----------------------------------------------------------
def MSE(statistic, knownParameter=THETA): return sqrt(np.mean((statistic - knownParameter) ** 2))
def multiply_by_cal(Data, CalData) : """Function scales data by the noise cal temperature. """ # For now we just assume that the cal and polarizations are arranged in a # certain way and then check to make sure we are right. calibrate_to_I = False if tuple(Data.field['CRVAL4']) == (-5, -7, -8, -6) : xx_ind = 0 yy_ind = 3 xy_inds = [1,2] elif tuple(Data.field['CRVAL4']) == (1, 2, 3, 4) : # This is a hack. Completly temporairy. calibrate_to_I = True else : raise ce.DataError('Polarization types not as expected in data.') cal_xx_ind = 0 cal_yy_ind = 1 if (CalData.field['CRVAL4'][cal_xx_ind] != -5 or CalData.field['CRVAL4'][cal_yy_ind] != -6) : raise ce.DataError('Polarization types not as expected in cal.') # Cal should only have 1 time, 1 cal state and 2 polarizations. if CalData.dims[:3] != (1,2,1) : raise ce.DataError('Cal temperature data has wrong dimensions.') # Cal state should be special state 'R'. if CalData.field['CAL'][0] != 'R' : raise ce.DataError("Cal state in cal temperture data should be " "'R'.") # Bring the Cal data to the same frequencies as the other data. Data.calc_freq() CalData.calc_freq() if sp.allclose(Data.freq, CalData.freq) : cdata = CalData.data elif abs(Data.field['CDELT1']) <= abs(CalData.field['CDELT1']) : calfunc = interpolate.interp1d(CalData.freq, CalData.data, fill_value=sp.nan, bounds_error=False) cdata = ma.array(calfunc(Data.freq)) cdata[sp.logical_not(sp.isfinite(cdata))] = ma.masked else : nf = len(Data.freq) width = abs(Data.field['CDELT1']) cdata = ma.empty((1,2,1,nf)) for find in range(nf) : f = Data.freq[find] inds, = sp.where(sp.logical_and(CalData.freq >= f - width/2.0, CalData.freq < f + width/2.0)) cdata[:,:,:,find] = ma.mean(CalData.data[:,:,:,inds], 3) if calibrate_to_I : Data.data *= (cdata[0,cal_xx_ind,0,:] + cdata[0,cal_yy_ind,0,:])/2.0 else : # Loop over times and cal and scale each polarization appropriately. for tind in range(Data.dims[0]) : for cind in range(Data.dims[2]) : Data.data[tind,xx_ind,cind,:] *= cdata[0,cal_xx_ind,0,:] Data.data[tind,yy_ind,cind,:] *= cdata[0,cal_yy_ind,0,:] Data.data[tind,xy_inds,cind,:] *= ma.sqrt( cdata[0,cal_yy_ind,0,:] * cdata[0,cal_xx_ind,0,:])
def scale_by_cal(Data, scale_t_ave=True, scale_f_ave=False, sub_med=False, scale_f_ave_mod=False, rotate=False) : """Puts all data in units of the cal temperature. Data is put into units of the cal temperature, thus removing dependence on the gain. This can be done by dividing by the time average of the cal (scale_t_ave=True, Default) thus removing dependence on the frequency- dependant gain. Alternatively, you can scale by the frequency average to remove the time-dependent gain (scale_f_ave=True). Data is then in units of the frequency averaged cal temperture. You can also do both (recommended). After some scaling the data ends up in units of the cal temperture as a funciton of frequency. Optionally you can also subtract the time average of the data off here (subtract_time_median), since you might be done with the cal information at this point. """ on_ind = 0 off_ind = 1 if (Data.field['CAL'][on_ind] != 'T' or Data.field['CAL'][off_ind] != 'F') : raise ce.DataError('Cal states not in expected order.') if tuple(Data.field['CRVAL4']) == (-5, -7, -8, -6) : # Here we check the polarizations and cal indicies xx_ind = 0 yy_ind = 3 xy_inds = [1,2] # A bunch of calculations used to test phase closure. Not acctually # relevant to what is being done here. #a = (Data.data[5, xy_inds, on_ind, 15:20] # - Data.data[5, xy_inds, off_ind, 15:20]) #a /= sp.sqrt( Data.data[5, xx_ind, on_ind, 15:20] # - Data.data[5, xx_ind, off_ind, 15:20]) #a /= sp.sqrt( Data.data[5, yy_ind, on_ind, 15:20] # - Data.data[5, yy_ind, off_ind, 15:20]) #print a[0,:]**2 + a[1,:]**2 diff_xx = Data.data[:,xx_ind,on_ind,:] - Data.data[:,xx_ind,off_ind,:] diff_yy = Data.data[:,yy_ind,on_ind,:] - Data.data[:,yy_ind,off_ind,:] if scale_t_ave : # Find the cal means (in time) and scale by them. # Means work much better than medians. Medians seems to bias the # result by up to 10%. This seems to be discretization noise. Cal # switches fast enough that we shouldn't need this anyway. cal_tmed_xx = ma.mean(diff_xx, 0) cal_tmed_yy = ma.mean(diff_yy, 0) cal_tmed_xx[sp.logical_or(cal_tmed_xx<=0, cal_tmed_yy<=0)] = ma.masked cal_tmed_yy[cal_tmed_xx.mask] = ma.masked Data.data[:,xx_ind,:,:] /= cal_tmed_xx Data.data[:,yy_ind,:,:] /= cal_tmed_yy Data.data[:,xy_inds,:,:] /= ma.sqrt(cal_tmed_yy*cal_tmed_xx) if scale_f_ave : # The frequency gains have have systematic structure to them, # they are not by any approximation gaussian distributed. Use # means, not medians across frequency. operation = ma.mean cal_fmea_xx = operation(diff_xx, -1) cal_fmea_yy = operation(diff_yy, -1) # Flag data with wierd cal power. Still Experimental. cal_fmea_xx[sp.logical_or(cal_fmea_xx<=0,cal_fmea_yy<=0)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked cal_xx = ma.mean(cal_fmea_xx) cal_yy = ma.mean(cal_fmea_yy) cal_fmea_xx[sp.logical_or(abs(cal_fmea_xx.anom()) >= 0.1*cal_xx, abs(cal_fmea_yy.anom()) >= 0.1*cal_yy)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked ntime = len(cal_fmea_xx) cal_fmea_xx.shape = (ntime, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1) Data.data[:,xx_ind,:,:] /= cal_fmea_xx Data.data[:,yy_ind,:,:] /= cal_fmea_yy cal_fmea_xx.shape = (ntime, 1, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1, 1) Data.data[:,xy_inds,:,:] /= ma.sqrt(cal_fmea_yy*cal_fmea_xx) if scale_f_ave_mod : # The frequency gains have have systematic structure to them, # they are not by any approximation gaussian distributed. Use # means, not medians across frequency. operation = ma.mean cal_fmea_xx = operation(diff_xx, -1) cal_fmea_yy = operation(diff_yy, -1) cal_fmea_xx_off = operation(Data.data[:,xx_ind,off_ind,:], -1) cal_fmea_yy_off = operation(Data.data[:,yy_ind,off_ind,:], -1) sys_xx = cal_fmea_xx_off/cal_fmea_xx sys_yy = cal_fmea_yy_off/cal_fmea_yy percent_ok = 0.03 sys_xx_tmed = ma.median(sys_xx) sys_yy_tmed = ma.median(sys_yy) maskbad_xx = (sys_xx > sys_xx_tmed + sys_xx_tmed*percent_ok)|(sys_xx < sys_xx_tmed - sys_xx_tmed*percent_ok) maskbad_yy = (sys_yy > sys_yy_tmed + sys_yy_tmed*percent_ok)|(sys_yy < sys_yy_tmed - sys_yy_tmed*percent_ok) cal_fmea_xx[sp.logical_or(cal_fmea_xx<=0,cal_fmea_yy<=0)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked cal_fmea_xx[maskbad_xx] = ma.masked cal_fmea_yy[maskbad_yy] = ma.masked cal_xx = ma.mean(cal_fmea_xx) cal_yy = ma.mean(cal_fmea_yy) ntime = len(cal_fmea_xx) cal_fmea_xx.shape = (ntime, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1) Data.data[:,xx_ind,:,:] /= cal_fmea_xx Data.data[:,yy_ind,:,:] /= cal_fmea_yy cal_fmea_xx.shape = (ntime, 1, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1, 1) Data.data[:,xy_inds,:,:] /= ma.sqrt(cal_fmea_yy*cal_fmea_xx) if scale_f_ave and scale_t_ave : # We have devided out t_cal twice so we need to put one factor back # in. cal_xx = operation(cal_tmed_xx) cal_yy = operation(cal_tmed_yy) Data.data[:,xx_ind,:,:] *= cal_xx Data.data[:,yy_ind,:,:] *= cal_yy Data.data[:,xy_inds,:,:] *= ma.sqrt(cal_yy*cal_xx) if scale_f_ave_mod and scale_t_ave : #Same divide out twice problem. cal_xx = operation(cal_tmed_xx) cal_yy = operation(cal_tmed_yy) Data.data[:,xx_ind,:,:] *= cal_xxcal_imag_mean Data.data[:,yy_ind,:,:] *= cal_yy Data.data[:,xy_inds,:,:] *= ma.sqrt(cal_yy*cal_xx) if scale_f_ave and scale_f_ave_mod : raise ce.DataError("time averaging twice") if rotate: # Define the differential cal phase to be zero and rotate all data # such that this is true. cal_real_mean = ma.mean(Data.data[:,1,0,:] - Data.data[:,1,1,:], 0) cal_imag_mean = ma.mean(Data.data[:,2,0,:] - Data.data[:,2,1,:], 0) # Get the cal phase angle as a function of frequency. cal_phase = -ma.arctan2(cal_imag_mean, cal_real_mean) # Rotate such that the cal phase is zero. Imperative to have a # temporary variable. New_data_real = (ma.cos(cal_phase) * Data.data[:,1,:,:] - ma.sin(cal_phase) * Data.data[:,2,:,:]) New_data_imag = (ma.sin(cal_phase) * Data.data[:,1,:,:] + ma.cos(cal_phase) * Data.data[:,2,:,:]) Data.data[:,1,:,:] = New_data_real Data.data[:,2,:,:] = New_data_imag elif tuple(Data.field['CRVAL4']) == (1, 2, 3, 4) : # For the shot term, just devide everything by on-off in I. I_ind = 0 cal_I_t = Data.data[:,I_ind,on_ind,:] - Data.data[:,I_ind,off_ind,:] cal_I = ma.mean(cal_I_t, 0) Data.data /= cal_I else : raise ce.DataError("Unsupported polarization states.") # Subtract the time median if desired. if sub_med : Data.data -= ma.median(Data.data, 0)
sys.exit() fpmod.close() # compute temporal mean datam[var] = datam[var].mean(axis=0) if 'cfac' in variables[var]: # apply unit conversions if any datam[var] = datam[var] * variables[var]['cfac'] # compute max & min mdatamin = datam[var].min() mdatamax = datam[var].max() # compute mean & r.m.s. mdatamean = MA.sum(datam[var] * area) / MA.sum(area) mdatarms = MA.sqrt(MA.sum(datam[var] * datam[var] * area) / MA.sum(area)) # compute annual totals and include them in plot labels if 'tcfac' in variables[var]: totalm = MA.sum(datam[var] * area) * variables[var]['tcfac'] if 'FLUX_IN' in var: # particle flux variables[var]['title_exp'] = 'Total %s at %.0f m = %.2f %s'%( variables[var]['label'],depth,totalm,variables[var]['tunits']) else: # vertical integrals variables[var]['title_exp'] = 'Total %s = %.2f %s'%( variables[var]['label'],totalm,variables[var]['tunits']) if POPDIAGPY == 'TRUE': variables[var]['title_exp'] = '%s: Min:%.2f, Max:%.2f, Mean:%.2f, RMS:%.2f '%( variables[var]['slabel'],mdatamin,mdatamax,mdatamean,mdatarms)
def __call__(self, value, clip=None): #read in parameters method = self.stretch exponent = self.exponent midpoint = self.midpoint # ORIGINAL MATPLOTLIB CODE if clip is None: clip = self.clip if cbook.iterable(value): vtype = 'array' val = ma.asarray(value).astype(np.float) else: vtype = 'scalar' val = ma.array([value]).astype(np.float) self.autoscale_None(val) vmin, vmax = self.vmin, self.vmax if vmin > vmax: raise ValueError("minvalue must be less than or equal to maxvalue") elif vmin==vmax: return 0.0 * val else: if clip: mask = ma.getmask(val) val = ma.array(np.clip(val.filled(vmax), vmin, vmax), mask=mask) result = (val-vmin) * (1.0/(vmax-vmin)) # CUSTOM APLPY CODE # Keep track of negative values negative = result < 0. if self.stretch == 'linear': pass elif self.stretch == 'log': result = ma.log10(result * (self.midpoint - 1.) + 1.) \ / ma.log10(self.midpoint) elif self.stretch == 'sqrt': result = ma.sqrt(result) elif self.stretch == 'arcsinh': result = ma.arcsinh(result/self.midpoint) \ / ma.arcsinh(1./self.midpoint) elif self.stretch == 'power': result = ma.power(result, exponent) else: raise Exception("Unknown stretch in APLpyNormalize: %s" % self.stretch) # Now set previously negative values to 0, as these are # different from true NaN values in the FITS image result[negative] = -np.inf if vtype == 'scalar': result = result[0] return result
def weighted_rms_var_from_yr(var,reg_name,reg_num,mask_var,wgt_var,year,hist_dict,ave_info,file_dict,avg_test_slice,obs_file,nlev): ''' Computes the weighted rms for a year @param var The name of the variable that is being averaged. @reg_name The name of the region to average over. @reg_num The number of the region in the region_mask. @mask_var The name of the netCDF variable that contain the region mask. @wgt_var The name of the netCDF variable that contains the weight information. @param year The year to average over. @param hist_dict A dictionary that holds file references for all years/months. @param ave_info A dictionary of the type of average that is to be done. Includes: type, months_to_average, fn, and weights (weights are not used in this function/average) @param file_dict A dictionary which holds file pointers to the input files that are needed by this average calculation. @param avg_test_slice Averaged slice used in this calculation. @param obs_file Observation file that contains the values to be used in the caluculation. @param nlev Number of ocean vertical levels @return nrms The normalized rms results for this variable. ''' import warnings # Get the weighted values from the yearly average file slev_weights = rover.fetch_slice(hist_dict,year,0,wgt_var,file_dict,time=False) # Get the region mask slev_mask = rover.fetch_slice(hist_dict,year,0,mask_var,file_dict,time=False) # Since weights and region mask are only one level, we need to expand them to all levels region_mask = MA.expand_dims(slev_mask, axis=0) weights = MA.expand_dims(slev_weights, axis=0) for lev in range(1,nlev): new_region_mask = MA.expand_dims(slev_mask, axis=0) region_mask = np.vstack((region_mask,new_region_mask)) new_weights = MA.expand_dims(slev_weights, axis=0) weights = np.vstack((weights,new_weights)) # Calculate the weighted average # First, we need to reshape the arrays to average along two dims if (reg_name == 'Glo'): temp_mask = MA.masked_where(region_mask<=int(reg_num),avg_test_slice) else: temp_mask = MA.masked_where(region_mask!=int(reg_num),avg_test_slice) ma_to_average = temp_mask.reshape(temp_mask.shape[0], -1) weights_flattened = weights.reshape(weights.shape[0],-1) warnings.filterwarnings("ignore") rms_Ave = MA.sqrt(MA.average((ma_to_average*ma_to_average), axis=1, weights=weights_flattened)) warnings.filterwarnings("default") #nrms = rms_Ave/(MA.max(rms_Ave) - MA.min(rms_Ave)) nrms = rms_Ave return nrms
def initialize_cluster_centers(self, pXY, K): """ Initializes the cluster assignments along each axis, by first selecting k centers, and then map each row to its closet center under cosine similarity. Args: pXY: original data matrix K: numbers of clusters desired in each dimension Return: new_C: a list of list of cluster id that the current index in the current axis is assigned to. """ if not isinstance(pXY, SparseMatrix): raise Exception("Matrix argument to initialize_cluster_centers is not an instance of SparseMatrix.") new_C = [[-1] * Ni for Ni in pXY.N] for axis in xrange(len(K)): # loop over each dimension # choose cluster centers axis_length = pXY.N[axis] center_indices = random.sample(xrange(axis_length), K[axis]) cluster_ids = {} for i in xrange(K[axis]): # assign identifiers to clusters center_index = center_indices[i] cluster_ids[center_index] = i centers = defaultdict(lambda: defaultdict(float)) # all nonzero indices for each center for coords in pXY.nonzero_elements: coord_this_axis = coords[axis] if coord_this_axis in cluster_ids: # is a center reduced_coords = tuple([coords[i] for i in xrange(len(coords)) if i != axis]) # coords without the current axis centers[cluster_ids[coord_this_axis]][reduced_coords] = pXY.nonzero_elements[coords] # (cluster_id, other coords) -> value # assign rows to clusters scores = np.zeros(shape=(pXY.N[axis], K[axis])) # scores: axis_size x cluster_number denoms_P = np.zeros(shape=(pXY.N[axis])) denoms_Q = np.zeros(shape=(K[axis])) for coords in pXY.nonzero_elements: coord_this_axis = coords[axis] if coord_this_axis in center_indices: continue # don't reassign cluster centers, please reduced_coords = tuple([coords[i] for i in xrange(len(coords)) if i != axis]) for cluster_index in cluster_ids: xhat = cluster_ids[cluster_index] # need cluster ID, not the axis index if reduced_coords in centers[xhat]: # overlapping point P_i = pXY.nonzero_elements[coords] Q_i = centers[xhat][reduced_coords] scores[coords[axis]][xhat] += P_i * Q_i # now doing based on cosine similarity denoms_P[coords[axis]] += P_i * P_i # magnitude of this slice of original matrix denoms_Q[xhat] += Q_i * Q_i # magnitude of cluster centers # normalize scores scores = divide(scores, outer(sqrt(denoms_P), sqrt(denoms_Q))) scores[scores == 0] = -1.0 # add random jitter to scores to handle tie-breaking scores += self.jitter_max * random_sample(scores.shape) new_cXYi = list(scores.argmax(1)) # this needs to be argmax because cosine similarity # make sure to assign the cluster centers to themselves for center_index in cluster_ids: new_cXYi[center_index] = cluster_ids[center_index] # ensure numbers of clusters are correct self.ensure_correct_number_clusters(new_cXYi, K[axis]) new_C[axis] = new_cXYi return new_C