def project(x, mask=None): """ Take a vector x (with possible nonnegative entries and non-normalized) and project it onto the unit simplex. mask: do not project these entries project remaining entries onto lower dimensional simplex """ if mask is not None: mask = np.asarray(mask) xsorted = np.sort(x[~mask])[::-1] # remaining entries need to sum up to 1 - sum x[mask] sum_ = 1.0 - np.sum(x[mask]) else: xsorted = np.sort(x)[::-1] # entries need to sum up to 1 (unit simplex) sum_ = 1.0 lambda_a = (np.cumsum(xsorted) - sum_) / np.arange(1.0, len(xsorted)+1.0) for i in xrange(len(lambda_a)-1): if lambda_a[i] >= xsorted[i+1]: astar = i break else: astar = -1 p = np.maximum(x-lambda_a[astar], 0) if mask is not None: p[mask] = x[mask] return p
def testMutableHashTableOfTensors(self): with self.test_session(): default_val = tf.constant([-1, -1], tf.int64) keys = tf.constant(["brain", "salad", "surgery"]) values = tf.constant([[0, 1], [2, 3], [4, 5]], tf.int64) table = tf.contrib.lookup.MutableHashTable(tf.string, tf.int64, default_val) self.assertAllEqual(0, table.size().eval()) table.insert(keys, values).run() self.assertAllEqual(3, table.size().eval()) input_string = tf.constant(["brain", "salad", "tank"]) output = table.lookup(input_string) self.assertAllEqual([3, 2], output.get_shape()) result = output.eval() self.assertAllEqual([[0, 1], [2, 3], [-1, -1]], result) exported_keys, exported_values = table.export() self.assertAllEqual([None], exported_keys.get_shape().as_list()) self.assertAllEqual([None, 2], exported_values.get_shape().as_list()) # exported data is in the order of the internal map, i.e. undefined sorted_keys = np.sort(exported_keys.eval()) sorted_values = np.sort(exported_values.eval()) self.assertAllEqual([b"brain", b"salad", b"surgery"], sorted_keys) self.assertAllEqual([[4, 5], [2, 3], [0, 1]], sorted_values)
def test_vectorizer_unicode(): # tests that the count vectorizer works with cyrillic. document = ( "\xd0\x9c\xd0\xb0\xd1\x88\xd0\xb8\xd0\xbd\xd0\xbd\xd0\xbe\xd0" "\xb5 \xd0\xbe\xd0\xb1\xd1\x83\xd1\x87\xd0\xb5\xd0\xbd\xd0\xb8\xd0" "\xb5 \xe2\x80\x94 \xd0\xbe\xd0\xb1\xd1\x88\xd0\xb8\xd1\x80\xd0\xbd" "\xd1\x8b\xd0\xb9 \xd0\xbf\xd0\xbe\xd0\xb4\xd1\x80\xd0\xb0\xd0\xb7" "\xd0\xb4\xd0\xb5\xd0\xbb \xd0\xb8\xd1\x81\xd0\xba\xd1\x83\xd1\x81" "\xd1\x81\xd1\x82\xd0\xb2\xd0\xb5\xd0\xbd\xd0\xbd\xd0\xbe\xd0\xb3" "\xd0\xbe \xd0\xb8\xd0\xbd\xd1\x82\xd0\xb5\xd0\xbb\xd0\xbb\xd0" "\xb5\xd0\xba\xd1\x82\xd0\xb0, \xd0\xb8\xd0\xb7\xd1\x83\xd1\x87" "\xd0\xb0\xd1\x8e\xd1\x89\xd0\xb8\xd0\xb9 \xd0\xbc\xd0\xb5\xd1\x82" "\xd0\xbe\xd0\xb4\xd1\x8b \xd0\xbf\xd0\xbe\xd1\x81\xd1\x82\xd1\x80" "\xd0\xbe\xd0\xb5\xd0\xbd\xd0\xb8\xd1\x8f \xd0\xb0\xd0\xbb\xd0\xb3" "\xd0\xbe\xd1\x80\xd0\xb8\xd1\x82\xd0\xbc\xd0\xbe\xd0\xb2, \xd1\x81" "\xd0\xbf\xd0\xbe\xd1\x81\xd0\xbe\xd0\xb1\xd0\xbd\xd1\x8b\xd1\x85 " "\xd0\xbe\xd0\xb1\xd1\x83\xd1\x87\xd0\xb0\xd1\x82\xd1\x8c\xd1\x81\xd1" "\x8f.") vect = CountVectorizer() X_counted = vect.fit_transform([document]) assert_equal(X_counted.shape, (1, 15)) vect = HashingVectorizer(norm=None, non_negative=True) X_hashed = vect.transform([document]) assert_equal(X_hashed.shape, (1, 2 ** 20)) # No collisions on such a small dataset assert_equal(X_counted.nnz, X_hashed.nnz) # When norm is None and non_negative, the tokens are counted up to # collisions assert_array_equal(np.sort(X_counted.data), np.sort(X_hashed.data))
def sortrows(A, col=None): A = np.asarray(A) if not ismatrix(A): if isrow(A): return np.sort(A), np.argsort(A) else: return np.sort(A, axis=0), np.argsort(A, axis=0) # Sort the whole row if not col: col = list(range(A.shape[1])) nrows = A.shape[0] I = np.arange(nrows)[:, np.newaxis] A = np.concatenate((A, I), axis=1) A = np.asarray(sorted(A, key=operator.itemgetter(*col))) I = list(A[:, -1]) # get the index # convert to numeric if index in string for n, i in enumerate(I): if not isnumeric(i): I[n] = str2num(i) # I = I[:, np.newaxis] I = np.asarray(I) A = A[:, :-1] return A, I
def compute_steepness_vector(self): gradient_vec0 = np.zeros((self.size_ws)) tempvec = np.zeros((self.Y.shape[1])) _interactive_rls_classifier.compute_gradient(self.Y_ws, gradient_vec0, self.classcounts_ws, self.classvec_ws, self.size_ws, self.size, self.DVTY, self.sqrtRx2_ws, self.sqrtR.shape[1], 0, tempvec, self.Y.shape[1]) gradient_vec1 = np.zeros((self.size_ws)) tempvec = np.zeros((self.Y.shape[1])) _interactive_rls_classifier.compute_gradient(self.Y_ws, gradient_vec1, self.classcounts_ws, self.classvec_ws, self.size_ws, self.size, self.DVTY, self.sqrtRx2_ws, self.sqrtR.shape[1], 1, tempvec, self.Y.shape[1]) steepness_vector = np.zeros((self.size_ws)) steepness_vector[0:self.classcounts_ws[1]] = np.sort(gradient_vec0)[0:self.classcounts_ws[1]][::-1] steepness_vector[self.classcounts_ws[1]:] = np.sort(gradient_vec1)[0:self.classcounts_ws[0]] #print steepness_vector return steepness_vector
def brightestPxl(img, threshold, **kwargs): """ Centroids using brightest Pixel Algorithm (A. G. Basden et al, MNRAS, 2011) Finds the nPxlsth brightest pixel, subtracts that value from frame, sets anything below 0 to 0, and finally takes centroid. Parameters: img (ndarray): 2d or greater rank array of imgs to centroid threshold (float): Percentage of pixels to use for centroid Returns: ndarray: Array of centroid values """ nPxls = threshold*img.shape[-1]*img.shape[-2] if len(img.shape)==2: pxlValue = numpy.sort(img.flatten())[-nPxls] img-=pxlValue img.clip(0, img.max()) elif len(img.shape)==3: pxlValues = numpy.sort( img.reshape(img.shape[0], img.shape[-1]*img.shape[-2]) )[:,-nPxls] img[:] = (img.T - pxlValues).T img.clip(0, img.max(), out=img) return centreOfGravity(img)
def _cells_to_rects(self, cells): """ Converts the extents of a list of cell grid coordinates (i,j) into a list of rect tuples (x,y,w,h). The set should be disjoint, but may or may not be minimal. """ # Since this function is generally used to generate clipping regions # or other screen-related graphics, we should try to return large # rectangular blocks if possible. # For now, we just look for horizontal runs and return those. cells = array(cells) y_sorted = sort_points(cells, index=1) # sort acoording to row rownums = sort(array(tuple(set(cells[:,1])))) row_start_indices = searchsorted(y_sorted[:,1], rownums) row_end_indices = left_shift(row_start_indices, len(cells)) rects = [] for rownum, start, end in zip(rownums, row_start_indices, row_end_indices): # y_sorted is sorted by the J (row) coordinate, so after we # extract the column indices, we need to sort them before # passing them to find_runs(). grid_column_indices = sort(y_sorted[start:end][:,0]) #pdb.set_trace() #print grid_column_indices.shape for span in find_runs(grid_column_indices): x = self._cell_lefts[span[0]] y = self._cell_bottoms[rownum] w = (span[-1] - span[0] + 1) * self._cell_extents[0] h = self._cell_extents[1] rects.append((x,y,w,h)) return rects
def intervalo_confianza(muestra_x, muestra_y, err_x, err_y, porcentaje): ''' busca intervalo de confianza. genera una muestra aleatoria en base a los datos experimentales. a partir de cada muestra obtiene el valor de la constante apropiada para modelo lineal. corresponde al metodo de monte carlo. porcentaje refiere al porcentaje del intervalo de confianza que se busca. ''' N = len(muestra_x) Nmc = 10000 promedios_a = np.zeros(Nmc) promedios_b = np.zeros(Nmc) for i in range(Nmc): r = np.random.normal(0, 1, size=len(muestra_x)) x_i = muestra_x + err_x * r y_i = muestra_y + err_y * r a_i, b_i = biseccion(x_i, y_i) promedios_a[i-1] = a_i promedios_b[i-1] = b_i promedios_a = np.sort(promedios_a) promedios_b = np.sort(promedios_b) minim = ((100 - porcentaje) / 2) * 0.01 maxim = 1 - (minim) lim_min_a = promedios_a[int(Nmc * minim)] lim_max_a = promedios_a[int(Nmc * maxim)] lim_min_b = promedios_b[int(Nmc * minim)] lim_max_b = promedios_b[int(Nmc * maxim)] histograma_confianza(promedios_a, promedios_b, lim_min_a, lim_max_a, lim_min_b, lim_max_b) return lim_min_a, lim_max_a, lim_min_b, lim_max_b pass
def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5), transform=lambda x: x): R"""Returns a dictionary of requested quantiles from array Parameters ---------- x : Numpy array An array containing MCMC samples qlist : tuple or list A list of desired quantiles (defaults to (2.5, 25, 50, 75, 97.5)) transform : callable Function to transform data (defaults to identity) Returns ------- `dictionary` with the quantiles {quantile: value} """ # Make a copy of trace x = transform(x.copy()) # For multivariate node if x.ndim > 1: # Transpose first, then sort, then transpose back sx = np.sort(x.T).T else: # Sort univariate node sx = np.sort(x) try: # Generate specified quantiles quants = [sx[int(len(sx) * q / 100.0)] for q in qlist] return dict(zip(qlist, quants)) except IndexError: pm._log.warning("Too few elements for quantile calculation")
def computeError(self, Res, method="None"): """ Compute median absolute and relative errors """ absErr = np.abs(Res - self.trueRes) idx_nonzero = np.where(self.trueRes != 0) absErr_nonzero = absErr[idx_nonzero] true_nonzero = self.trueRes[idx_nonzero] relErr = absErr_nonzero / true_nonzero # log_str_rel = "\n".join(map(str, relErr)) # log_str_abs = "\n".join(map(str, absErr)) if Params.IS_LOGGING: log_str = "" for i in range(len(self.query_list)): area = rect_area(self.query_list[i]) query_str = str(self.query_list[i][0][0]) + "\t" + str(self.query_list[i][0][1]) + "\t" + str( self.query_list[i][1][0]) + "\t" + str(self.query_list[i][1][1]) + "\t" + str(area) err_str = str(self.trueRes[i]) + "\t" + str(Res[i]) + "\t" + str(absErr[i]) + "\t" + str(relErr[i]) log_str = log_str + query_str + "\t" + err_str + "\n" log(method, log_str) absErr = np.sort(absErr) relErr = np.sort(relErr) n_abs = len(absErr) n_rel = len(relErr) return absErr[int(n_abs / 2)], relErr[int(n_rel / 2)]
def plot_raw_data(ratings): """plot the statistics result on raw rating data.""" # do statistics. num_items_per_user = np.array((ratings != 0).sum(axis=0)).flatten() num_users_per_item = np.array((ratings != 0).sum(axis=1).T).flatten() sorted_num_movies_per_user = np.sort(num_items_per_user)[::-1] sorted_num_users_per_movie = np.sort(num_users_per_item)[::-1] # plot fig = plt.figure() ax1 = fig.add_subplot(1, 2, 1) ax1.plot(sorted_num_movies_per_user, color='blue') ax1.set_xlabel("users") ax1.set_ylabel("number of ratings (sorted)") ax1.grid() ax2 = fig.add_subplot(1, 2, 2) ax2.plot(sorted_num_users_per_movie) ax2.set_xlabel("items") ax2.set_ylabel("number of ratings (sorted)") ax2.set_xticks(np.arange(0, 2000, 300)) ax2.grid() plt.tight_layout() plt.savefig("stat_ratings") plt.show() # plt.close() return num_items_per_user, num_users_per_item
def test_fetch_rcv1(): try: data1 = fetch_rcv1(shuffle=False, download_if_missing=False) except IOError as e: if e.errno == errno.ENOENT: raise SkipTest("Download RCV1 dataset to run this test.") X1, Y1 = data1.data, data1.target cat_list, s1 = data1.target_names.tolist(), data1.sample_id # test sparsity assert_true(sp.issparse(X1)) assert_true(sp.issparse(Y1)) assert_equal(60915113, X1.data.size) assert_equal(2606875, Y1.data.size) # test shapes assert_equal((804414, 47236), X1.shape) assert_equal((804414, 103), Y1.shape) assert_equal((804414,), s1.shape) assert_equal(103, len(cat_list)) # test ordering of categories first_categories = [u'C11', u'C12', u'C13', u'C14', u'C15', u'C151'] assert_array_equal(first_categories, cat_list[:6]) # test number of sample for some categories some_categories = ('GMIL', 'E143', 'CCAT') number_non_zero_in_cat = (5, 1206, 381327) for num, cat in zip(number_non_zero_in_cat, some_categories): j = cat_list.index(cat) assert_equal(num, Y1[:, j].data.size) # test shuffling and subset data2 = fetch_rcv1(shuffle=True, subset='train', random_state=77, download_if_missing=False) X2, Y2 = data2.data, data2.target s2 = data2.sample_id # test return_X_y option fetch_func = partial(fetch_rcv1, shuffle=False, subset='train', download_if_missing=False) check_return_X_y(data2, fetch_func) # The first 23149 samples are the training samples assert_array_equal(np.sort(s1[:23149]), np.sort(s2)) # test some precise values some_sample_ids = (2286, 3274, 14042) for sample_id in some_sample_ids: idx1 = s1.tolist().index(sample_id) idx2 = s2.tolist().index(sample_id) feature_values_1 = X1[idx1, :].toarray() feature_values_2 = X2[idx2, :].toarray() assert_almost_equal(feature_values_1, feature_values_2) target_values_1 = Y1[idx1, :].toarray() target_values_2 = Y2[idx2, :].toarray() assert_almost_equal(target_values_1, target_values_2)
def test_weaklimit(self): a = distributions.CRP(10,1) b = distributions.GammaCompoundDirichlet(1000,10,1) a.concentration = b.concentration = 10. from matplotlib import pyplot as plt plt.figure() crp_counts = np.zeros(10) gcd_counts = np.zeros(10) for itr in range(500): crp_rvs = np.sort(a.rvs(25))[::-1][:10] crp_counts[:len(crp_rvs)] += crp_rvs gcd_counts += np.sort(b.rvs(25))[::-1][:10] plt.plot(crp_counts/200,gcd_counts/200,'bx-') plt.xlim(0,10) plt.ylim(0,10) import os from mixins import mkdir figpath = os.path.join(os.path.dirname(__file__),'figures', self.__class__.__name__,'weaklimittest.pdf') mkdir(os.path.dirname(figpath)) plt.savefig(figpath)
def check_obs_scheme(self): " Checks the internal validity of provided observation schemes " # check sub_pops idx_union = np.sort(self._sub_pops[0]) i = 1 while idx_union.size < self._p and i < len(self._sub_pops): idx_union = np.union1d(idx_union, self._sub_pops[i]) i += 1 if idx_union.size != self._p or np.any(idx_union!=np.arange(self._p)): raise Exception(('all subpopulations together have to cover ' 'exactly all included observed varibles y_i in y.' 'This is not the case. Change the difinition of ' 'subpopulations in variable sub_pops or reduce ' 'the number of observed variables p. ' 'The union of indices of all subpopulations is'), idx_union ) # check obs_time if not self._obs_time[-1]==self._T: raise Exception(('Entries of obs_time give the respective ends of ' 'the periods of observation for any ' 'subpopulation. Hence the last entry of obs_time ' 'has to be the full recording length. The last ' 'entry of obs_time before is '), self._obs_time[-1]) if np.any(np.diff(self._obs_time)<1): raise Exception(('lengths of observation have to be at least 1. ' 'Minimal observation time for a subpopulation: '), np.min(np.diff(self._obs_time))) # check obs_pops if not self._obs_time.size == self._obs_pops.size: raise Exception(('each entry of obs_pops gives the index of the ' 'subpopulation observed up to the respective ' 'time given in obs_time. Thus the sizes of the ' 'two arrays have to match. They do not. ' 'no. of subpop. switch points and no. of ' 'subpopulations ovserved up to switch points ' 'are '), (self._obs_time.size, self._obs_pops.size)) idx_pops = np.sort(np.unique(self._obs_pops)) if not np.min(idx_pops)==0: raise Exception(('first subpopulation has to have index 0, but ' 'is given the index '), np.min(idx_pops)) elif not idx_pops.size == len(self._sub_pops): raise Exception(('number of specified subpopulations in variable ' 'sub_pops does not meet the number of ' 'subpopulations indexed in variable obs_pops. ' 'Delete subpopulations that are never observed, ' 'or change the observed subpopulations in ' 'variable obs_pops accordingly. The number of ' 'indexed subpopulations is '), len(self._sub_pops)) elif not np.all(np.diff(idx_pops)==1): raise Exception(('subpopulation indices have to be consecutive ' 'integers from 0 to the total number of ' 'subpopulations. This is not the case. ' 'Given subpopulation indices are '), idx_pops)
def test_multiindex_objects(self): mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], labels=[[0, 1, 0, 2], [2, 0, 0, 1]], names=['col1', 'col2']) recons = mi._sort_levels_monotonic() # these are equal assert mi.equals(recons) assert Index(mi.values).equals(Index(recons.values)) # _hashed_values and hash_pandas_object(..., index=False) # equivalency expected = hash_pandas_object( mi, index=False).values result = mi._hashed_values tm.assert_numpy_array_equal(result, expected) expected = hash_pandas_object( recons, index=False).values result = recons._hashed_values tm.assert_numpy_array_equal(result, expected) expected = mi._hashed_values result = recons._hashed_values # values should match, but in different order tm.assert_numpy_array_equal(np.sort(result), np.sort(expected))
def remove_outliers(inpd, min_fiber_distance, n_jobs=0, distance_method ='Mean'): """ Remove fibers that have no other nearby fibers, i.e. outliers. The pairwise fiber distance matrix is computed, then fibers are rejected if their average neighbor distance (using closest 3 neighbors) is higher than min_fiber_distance. """ fiber_array = fibers.FiberArray() #fiber_array.points_per_fiber = 5 fiber_array.points_per_fiber = 10 fiber_array.convert_from_polydata(inpd) fiber_indices = range(0, fiber_array.number_of_fibers) # squared distances are computed min_fiber_distance = min_fiber_distance * min_fiber_distance # pairwise distance matrix if USE_PARALLEL and n_jobs > 0: distances = Parallel(n_jobs=n_jobs, verbose=1)( delayed(similarity.fiber_distance)( fiber_array.get_fiber(lidx), fiber_array, threshold = 0, distance_method = distance_method) for lidx in fiber_indices) distances = numpy.array(distances) # now we check where there are no nearby fibers in d mindist = numpy.zeros(fiber_array.number_of_fibers) for lidx in fiber_indices: dist = numpy.sort(distances[lidx, :]) # robust minimum distance mindist[lidx] = (dist[1] + dist[2] + dist[3]) / 3.0 #mindist[lidx] = (dist[1] + dist[2]) / 2.0 else: # do this in a loop to use less memory. then parallelization can # happen over the number of subjects. mindist = numpy.zeros(fiber_array.number_of_fibers) for lidx in fiber_indices: distances = similarity.fiber_distance(fiber_array.get_fiber(lidx), fiber_array, 0, distance_method = distance_method) dist = numpy.sort(distances) # robust minimum distance mindist[lidx] = (dist[1] + dist[2] + dist[3]) / 3.0 # keep only fibers who have nearby similar fibers fiber_mask = mindist < min_fiber_distance if True: num_fibers = len(numpy.nonzero(fiber_mask)[0]), "/", len(fiber_mask) print "<filter.py> Number retained after outlier removal: ", num_fibers outpd = mask(inpd, fiber_mask, mindist) outpd_reject = mask(inpd, ~fiber_mask, mindist) return outpd, fiber_mask, outpd_reject
def index_trim_outlier(resid, k): '''returns indices to residual array with k outliers removed Parameters ---------- resid : array_like, 1d data vector, usually residuals of a regression k : int number of outliers to remove Returns ------- trimmed_index : array, 1d index array with k outliers removed outlier_index : array, 1d index array of k outliers Notes ----- Outliers are defined as the k observations with the largest absolute values. ''' sort_index = np.argsort(np.abs(resid)) # index of non-outlier trimmed_index = np.sort(sort_index[:-k]) outlier_index = np.sort(sort_index[-k:]) return trimmed_index, outlier_index
def indices_from_grid(c, ref): """ Convert coordinates to indices defined by grid of reference values. Parameters ---------- c : array of floats, shape (M,) Coordinates. ref : array of floats, shape (N,) Reference grid coordinates. They must be equally spaced. Returns ------- ind : arrays of floats Coordinates mapped onto the indices of the reference grid. """ ref = np.sort(ref) dref = ref[1:] - ref[:-1] dref0 = float(dref[0]) assert np.allclose(dref0, dref[1:]) c = np.sort(c) assert c[0] >= ref[0] and c[-1] <= ref[-1] ind = (c - ref[0]) / dref0 return ind
def by_lblimg(self, lbldata): """ Get specific template regions by rois given by user All regions overlapped with a specific label region will be covered Parameters: ----------- lbldata: rois given by user Return: ------- out_template: new template contains part of regions if lbldata has multiple different rois, then new template will extract regions with each of roi given by user Example: -------- >>> glr_cls = GetLblRegion(template) >>> out_template = glr_cls.by_lblimg(lbldata) """ assert lbldata.shape == self._template.shape, "the shape of template should be equal to the shape of lbldata" labels = np.sort(np.unique(lbldata)[1:]).astype('int') out_template = np.zeros_like(lbldata) out_template = out_template[...,np.newaxis] out_template = np.tile(out_template, (1, len(labels))) for i,lbl in enumerate(labels): lbldata_tmp = tools.get_specificroi(lbldata, lbl) lbldata_tmp[lbldata_tmp!=0] = 1 part_template = self._template*lbldata_tmp template_lbl = np.sort(np.unique(part_template)[1:]) out_template[...,i] = tools.get_specificroi(self._template, template_lbl) return out_template
def get_fn(data, fp): """ Given some scores data and a false negatives rate find the corresponding false positive rate in the ROC curve. If the point does not exist, we will interpolate it. """ if fp in data.fpr: pos = np.where(data.fpr == fp) fnr, thr = np.mean(data.fnr[pos]), np.mean(data.thrs[pos]) else: # Set data for interpolation x = np.sort(data.fpr) # Set new arange whichs includes the wanted value xnew = np.arange(fp, x[-1]) # Interpolate the FN y = np.sort(data.tpr) f = interpolate.interp1d(x, y) tpr = f(xnew)[0] fnr = 1 - tpr # Interpolate the threashold y = np.sort(data.thrs) f = interpolate.interp1d(x, y) thr = f(xnew)[0] print("Dado el valor de fp: {0}, el valor de fnr es: {1} y el umbral: {2} " .format(fp, fnr, thr))
def stats(arr): """ Show the minimum, maximum median, mean, shape and size of an array. Also show the number of NaN entries (if any). """ arr = np.asarray(arr) shape = arr.shape arr = arr.ravel() size = len(arr) bad = np.isnan(arr) nbad = bad.sum() if nbad == size: return "#NaN %i of %i" % (nbad, size) elif nbad == 0: arr = np.sort(arr) else: arr = np.sort(arr[~bad]) if len(arr) % 2 == 0: i = len(arr) // 2 median = 0.5 * (arr[i - 1] + arr[i]) else: median = arr[len(arr) // 2] return "min %.5g max %.5g median %.5g mean %.5g shape %s #NaN %i of %i" % ( arr[0], arr[-1], median, arr.mean(), shape, nbad, size, )
def pick_channels(ch_names, include, exclude=[]): """Pick channels by names Returns the indices of the good channels in ch_names. Parameters ---------- ch_names : list of string List of channels. include : list of string List of channels to include (if empty include all available). exclude : list of string List of channels to exclude (if empty do not exclude any channel). Defaults to []. Returns ------- sel : array of int Indices of good channels. """ if len(np.unique(ch_names)) != len(ch_names): raise RuntimeError('ch_names is not a unique list, picking is unsafe') sel = [] for k, name in enumerate(ch_names): if (len(include) == 0 or name in include) and name not in exclude: sel.append(k) sel = np.unique(sel) np.sort(sel) return sel
def test_mass_grid(self): """ Check that the mass-based grid is constructed correctly. """ ## Test typical input - should be sorted levels = utl.define_density_mass_grid(self.unique_density) answer = np.sort(self.unique_density) assert_array_equal(answer, levels) ## Test more levels than density values (answer is the same as typical # input). levels = utl.define_density_mass_grid(self.unique_density, num_levels=self.n * 2) assert_array_equal(answer, levels) ## Test fewer levels than density values. levels = utl.define_density_mass_grid(self.unique_density, num_levels=2) answer = np.array([1, 10]) assert_array_equal(answer, levels) ## Test negative values. levels = utl.define_density_mass_grid(self.generic_array) answer = np.sort(self.generic_array) assert_array_equal(answer, levels) ## Test uniform input. levels = utl.define_density_mass_grid(self.uniform_density) self.assertItemsEqual(levels, [1.])
def read_multivector_griddata_ascii(name_or_obj): """Read 2-d grid data from a text file. Each line has values `x0 x1 y0 y1 ...`. Space separated. Assumed to be grid of values. Parameters ---------- name_or_obj : str or file-like object The name of the file or a file-like object containing the data. Returns ------- x0 : numpy.ndarray 1-d array. x1 : numpy.ndarray 1-d array. y : numpy.ndarray 3-d array of shape ``(n, len(x0), len(x1))`` where ``n`` is the number of y values on each line. """ data = np.loadtxt(name_or_obj) x0 = np.sort(np.unique(data[:, 0])) x1 = np.sort(np.unique(data[:, 1])) y = np.zeros((len(data[0]) - 2, len(x0), len(x1))) for i0, p in enumerate(x0): for i1, q in enumerate(x1): ind = (data[:, 0] == p) & (data[:, 1] == q) y[:, i0, i1] = data[ind, 2:] return x0, x1, y
def regenerate_dim(x): """ assume x in ns since epoch from the current time """ msg = None # msg allows us to see which shot/diag was at fault diffs = np.diff(x) # bincount needs a positive input and needs an array with N elts where N is the largest number input small = (diffs > 0) & (diffs < 1000000) sorted_diffs = np.sort(diffs[np.where(small)[0]]) counts = np.bincount(sorted_diffs) bigcounts, bigvals = myhist(diffs[np.where(~small)[0]]) if pyfusion.VERBOSE>0: print('[[diff, count],....]') print('small:', [[argc, counts[argc]] for argc in np.argsort(counts)[::-1][0:5]]) print('big or negative:', [[bigvals[argc], bigcounts[argc]] for argc in np.argsort(bigcounts)[::-1][0:10]]) dtns = 1 + np.argmax(counts[1:]) # skip the first position - it is 0 # wgt0 = np.where(sorted_diffs > 0)[0] # we are in ns, so no worry about rounding histo = plt.hist if pyfusion.DBG() > 1 else np.histogram cnts, vals = histo(x, bins=200)[0:2] # ignore the two end bins - hopefully there will be very few there wmin = np.where(cnts[1:-1] < np.max(cnts[1:-1]))[0] if len(wmin)>0: print('**********\n*********** Gap in data > {p:.2f}%'.format(p=100*len(wmin)/float(len(cnts)))) x01111 = np.ones(len(x)) # x01111 will be all 1s except for the first elt. x01111[0] = 0 errcnt = np.sum(bigcounts) + np.sum(np.sort(counts)[::-1][1:]) if errcnt>0 or (pyfusion.VERBOSE > 0): msg = str('** repaired length of {l:,}, dtns={dtns:,}, {e} erroneous utcs' .format(l=len(x01111), dtns=dtns, e=errcnt)) fixedx = np.cumsum(x01111)*dtns wbad = np.where((x - fixedx)>1e8)[0] fixedx[wbad] = np.nan debug_(pyfusion.DEBUG, 3, key="repair", msg="repair of W7-X scrambled Langmuir timebase") return(fixedx, msg)
def test_non_euclidean_kneighbors(): rng = np.random.RandomState(0) X = rng.rand(5, 5) # Find a reasonable radius. dist_array = pairwise_distances(X).flatten() np.sort(dist_array) radius = dist_array[15] # Test kneighbors_graph for metric in ['manhattan', 'chebyshev']: nbrs_graph = neighbors.kneighbors_graph( X, 3, metric=metric).toarray() nbrs1 = neighbors.NearestNeighbors(3, metric=metric).fit(X) assert_array_equal(nbrs_graph, nbrs1.kneighbors_graph(X).toarray()) # Test radiusneighbors_graph for metric in ['manhattan', 'chebyshev']: nbrs_graph = neighbors.radius_neighbors_graph( X, radius, metric=metric).toarray() nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X) assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).toarray()) # Raise error when wrong parameters are supplied, X_nbrs = neighbors.NearestNeighbors(3, metric='manhattan') X_nbrs.fit(X) assert_raises(ValueError, neighbors.kneighbors_graph, X_nbrs, 3, metric='euclidean') X_nbrs = neighbors.NearestNeighbors(radius=radius, metric='manhattan') X_nbrs.fit(X) assert_raises(ValueError, neighbors.radius_neighbors_graph, X_nbrs, radius, metric='euclidean')
def test_calculate_landslide_probability_lognormal_method(): """Testing the main method 'calculate_landslide_probability()' with 'lognormal' method. """ grid_2 = RasterModelGrid((5, 4), spacing=(0.2, 0.2)) gridnum = grid_2.number_of_nodes np.random.seed(seed=6) grid_2.at_node['topographic__slope'] = np.random.rand(gridnum) scatter_dat = np.random.randint(1, 10, gridnum) grid_2.at_node['topographic__specific_contributing_area']= ( np.sort(np.random.randint(30, 900, gridnum))) grid_2.at_node['soil__transmissivity']= ( np.sort(np.random.randint(5, 20, gridnum), -1)) grid_2.at_node['soil__mode_total_cohesion']= ( np.sort(np.random.randint(30, 900, gridnum))) grid_2.at_node['soil__minimum_total_cohesion']= ( grid_2.at_node['soil__mode_total_cohesion'] - scatter_dat) grid_2.at_node['soil__maximum_total_cohesion']= ( grid_2.at_node['soil__mode_total_cohesion'] + scatter_dat) grid_2.at_node['soil__internal_friction_angle']= ( np.sort(np.random.randint(26, 37, gridnum))) grid_2.at_node['soil__thickness']= ( np.sort(np.random.randint(1, 10, gridnum))) grid_2.at_node['soil__density']= (2000. * np.ones(gridnum)) ls_prob_lognormal = LandslideProbability(grid_2, number_of_iterations=10, groundwater__recharge_distribution='lognormal', groundwater__recharge_mean=5., groundwater__recharge_standard_deviation=0.25, seed=6) ls_prob_lognormal.calculate_landslide_probability() np.testing.assert_almost_equal( grid_2.at_node['landslide__probability_of_failure'][5], 0.8) np.testing.assert_almost_equal( grid_2.at_node['landslide__probability_of_failure'][9], 0.4)
def quantiles(x, qlist=(2.5, 25, 50, 75, 97.5)): """Returns a dictionary of requested quantiles from array :Arguments: x : Numpy array An array containing MCMC samples qlist : tuple or list A list of desired quantiles (defaults to (2.5, 25, 50, 75, 97.5)) """ # Make a copy of trace x = x.copy() # For multivariate node if x.ndim > 1: # Transpose first, then sort, then transpose back sx = np.sort(x.T).T else: # Sort univariate node sx = np.sort(x) try: # Generate specified quantiles quants = [sx[int(len(sx)*q/100.0)] for q in qlist] return dict(zip(qlist, quants)) except IndexError: print("Too few elements for quantile calculation")
def unit_maker(func, func0): "Test bn.(arg)partsort gives same output as bn.slow.(arg)partsort." msg = '\nfunc %s | input %s (%s) | shape %s | n %d | axis %s\n' msg += '\nInput array:\n%s\n' for i, arr in enumerate(arrays()): for axis in list(range(-arr.ndim, arr.ndim)) + [None]: if axis is None: n = arr.size else: n = arr.shape[axis] n = max(n // 2, 1) with np.errstate(invalid='ignore'): actual = func(arr.copy(), n, axis=axis) actual[:n] = np.sort(actual[:n], axis=axis) actual[n:] = np.sort(actual[n:], axis=axis) desired = func0(arr.copy(), n, axis=axis) if 'arg' in func.__name__: desired[:n] = np.sort(desired[:n], axis=axis) desired[n:] = np.sort(desired[n:], axis=axis) tup = (func.__name__, 'a'+str(i), str(arr.dtype), str(arr.shape), n, str(axis), arr) err_msg = msg % tup assert_array_equal(actual, desired, err_msg) err_msg += '\n dtype mismatch %s %s' if hasattr(actual, 'dtype') or hasattr(desired, 'dtype'): da = actual.dtype dd = desired.dtype assert_equal(da, dd, err_msg % (da, dd))
def test_fit(self): self.kmeans.fit(input_fn=self.input_fn(), steps=10) centers = normalize(self.kmeans.clusters()) self.assertAllClose( np.sort( centers, axis=0), np.sort( self.true_centers, axis=0))
def bbox(self): """numpy.ndarray(dtype=int): The bounding box of the object. Its default format is [[x_ll, y_ll], [x_ur, y_ur]]""" return np.sort(np.array([self.xy[0, :], self.xy[1, :]]), axis=0)
etr_y = etr.predict(X_test) #采用梯度提升模型 gbr = GradientBoostingRegressor() gbr.fit(X_train,y_train) gbr_y = gbr.predict(X_test) #对单一回归树做出预测 from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error print('R_squared value of DecisionTreeRegressor is ',dtr.score(X_test,y_test)) print('The mean squared error of DecisionTreeRegressor is ',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(dtr_y))) print('The mean absolute error of DecisionTreeRegressor is ',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(dtr_y))) #对随机森林做出预测 print('R_squared value of RandomForestRegressor is ',rfr.score(X_test,y_test)) print('The mean squared error of RandomForestRegressor is ',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rfr_y))) print('The mean absolute error of RandomForestRegressor is ',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(rfr_y))) #对极端森林做出预测 print('R_squared value of ExtraTreesRegressor is ',etr.score(X_test,y_test)) print('The mean squared error of ExtraTreesRegressor is ',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(etr_y))) print('The mean absolute error of ExtraTreesRegressor is ',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(etr_y))) #输出每种特征的贡献度 print(np.sort(list(zip(etr.feature_importances_,boston.feature_names)),axis = 0)) #对梯度提升做出预测 print('R_squared value of GradientBoostingRegressor is ',gbr.score(X_test,y_test)) print('The mean squared error of GradientBoostingRegressor is ',mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(gbr_y))) print('The mean absolute error of GradientBoostingRegressor is ',mean_absolute_error(ss_y.inverse_transform(y_test),ss_y.inverse_transform(gbr_y)))
with open('jpm_quotes.csv', 'r') as quotes_csv: quotes = np.array(list(csv.reader(quotes_csv))[1:]).astype('double') prices = quotes[:,1] counts = quotes[:,2] Neff = 32 results = compute_ab_ema(Neff, prices, counts) ema_prices = results[0] vwap_prices = results[2] ema_counts = results[1] Neff_star = results[3] Neff_star_sorted = np.sort(Neff_star) prob_axis = np.arange(len(Neff_star)) / (len(Neff_star) - 1) axarr[0, 0].set_title('Prices, Ema(dashed) and VWAP') axarr[0, 0].plot(prices) axarr[0, 0].plot(ema_prices, '--') axarr[0, 0].plot(vwap_prices) axarr[0, 1].set_title('Intensity series (quote counts)') axarr[0, 1].plot(counts) axarr[1, 0].set_title('Neff*') axarr[1, 0].plot(Neff_star) axarr[1, 1].set_title('CDF of Neff*') axarr[1, 1].plot(Neff_star_sorted, prob_axis)
cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array else: for j in xrange(1, imdb.num_classes): all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('./output/vis/result_%d.png' % (i), im2show) #pdb.set_trace() #cv2.imshow('test', im2show)
action='store', type=float, dest='window_size', help='hamming window size (default: 0.01ms)', default=0.01) args = parser.parse_args() Fs, signal = wavfile.read(args.input_file) signal = signal / max(abs(signal)) sampsPerMilli = Fs / 1000 millisPerFrame = int(args.window_size * 1000) sampsPerFrame = int(sampsPerMilli * millisPerFrame) nFrames = int(len(signal) / sampsPerFrame) STEs = [] for k in range(nFrames): startIdx = k * sampsPerFrame stopIdx = startIdx + sampsPerFrame window = signal[startIdx:stopIdx] STE = np.sum(window**2) / np.float64(len(window)) STEs.append(STE) F = np.sort(STEs)[args.step_size:args.num_results * args.step_size + args.step_size:args.step_size] seconds = [[np.where(STEs == e)[0][0] * millisPerFrame / 1000, e] for e in F] seconds = np.array(seconds) for s in seconds[seconds[:, 1].argsort()]: print "%.2f %.7f" % (s[0], s[1])
def bbox(self): """numpy.ndarray(dtype=int): The bounding box for this object, represented as a Numpy array [[x_ll, y_ll], [x_ur, y_ur]].""" return np.sort(np.array([self.xy, self.xy]), axis=0)
def bbox(self): """numpy.ndarray(dtype=int): The bounding box of the object. Its default format is [[x0, y0], [x1, y1]], where [x0, y0] is the lower-left corner of this object, and [x1, y1] is the upper-right one.""" return np.sort(np.array([self.xy[0, :], self.xy[1, :]]), axis=0)
def load_umc_sheets(data_dir="/home/matthias/Data/umc_mozart", require_performance=False): """ load unwarpped sheets """ import glob import cv2 # initialize omr system from omr.omr_app import OpticalMusicRecognizer from omr.utils.data import prepare_image from lasagne_wrapper.network import SegmentationNetwork from omr.models import system_detector, bar_detector net = system_detector.build_model() system_net = SegmentationNetwork(net, print_architecture=False) system_net.load('sheet_utils/omr_models/system_params.pkl') net = bar_detector.build_model() bar_net = SegmentationNetwork(net, print_architecture=False) bar_net.load('sheet_utils/omr_models/bar_params.pkl') piece_names = [] unwrapped_sheets = [] piece_paths = [] # get list of all pieces piece_dirs = np.sort(glob.glob(os.path.join(data_dir, '*'))) n_pieces = len(piece_dirs) # iterate pieces kept_pages = 0 for i_piece, piece_dir in enumerate(piece_dirs): piece_name = piece_dir.split('/')[-1] # if "214_" not in piece_name: # continue print(col.print_colored("Processing piece %d of %d (%s)" % (i_piece + 1, n_pieces, piece_name), col.OKBLUE)) # check if there is a performance if require_performance and len(glob.glob(os.path.join(piece_dir, "*performance*"))) == 0: print("No performance found!") continue # load pages page_paths = np.sort(glob.glob(os.path.join(piece_dir, "sheet/*.png"))) if len(page_paths) == 0: print("No sheet available!!!") continue unwrapped_sheet = np.zeros((SYSTEM_HEIGHT, 0), dtype=np.uint8) system_problem = False for i_page, page_path in enumerate(page_paths): kept_pages += 1 # load sheet image I = cv2.imread(page_path, 0) # load system coordinates # page_id = i_page + 1 # page_systems = np.load(os.path.join(piece_dir, "coords", "systems_%02d.npy" % (i_page + 1))) # detect systems I_prep = prepare_image(I) omr = OpticalMusicRecognizer(note_detector=None, system_detector=system_net, bar_detector=bar_net) try: page_systems = omr.detect_systems(I_prep, verbose=False) except: print("Problem in system detection!!!") system_problem = True continue # plt.figure("System Localization") # plt.clf() # plt.imshow(I, cmap=plt.cm.gray) # plt.xlim([0, I.shape[1] - 1]) # plt.ylim([I.shape[0] - 1, 0]) # for system in page_systems: # plt.plot(system[:, 1], system[:, 0], 'mo', alpha=0.5) # plt.show(block=True) # unwrap sheet for system in page_systems: r0 = int(np.mean([system[0, 0], system[2, 0]])) - SYSTEM_HEIGHT // 2 r1 = r0 + SYSTEM_HEIGHT c0 = int(system[0, 1]) c1 = int(system[1, 1]) # fix row slice coordinates r0 = max(0, r0) r1 = min(r1, I.shape[0]) r0 = max(r0, r1 - SYSTEM_HEIGHT) staff_img = I[r0:r1, c0:c1].astype(np.uint8) if staff_img.shape[0] < SYSTEM_HEIGHT: to_pad = SYSTEM_HEIGHT - staff_img.shape[0] if to_pad > (0.1 * SYSTEM_HEIGHT): print("Problem in system padding!!!") continue staff_img = np.pad(staff_img, ((0, to_pad), (0, 0)), mode="edge") unwrapped_sheet = np.hstack((unwrapped_sheet, staff_img)) # plt.figure("Unwrapped") # plt.imshow(unwrapped_sheet) # plt.show(block=True) if not system_problem: piece_names.append(piece_name) piece_paths.append(piece_dir) unwrapped_sheets.append(unwrapped_sheet) print("%d pieces covering %d pages of sheet music." % (len(piece_names), kept_pages)) return piece_names, piece_paths, unwrapped_sheets
def cluster(coord_array, tri_array, v, simil): """ Function to find clusters of points with similar heat persistence values :param coord_array: xyz coordinates per vertex in array of shape=(#points, 3) :param tri_array: vertex connection indices of the part in array of shape=(#triangles, 3) :param simil: array with cluster similarity fractions :param v: (#points x 2) array with cluster points for part specified :return clusters: array of shape=(2*len(simil), #points) with cluster index/persistence values on even/odd rows """ print("Finding clusters..") simil = np.sort(list( set(simil))) # Remove duplicate entries in simil, order unordered set clusters = np.zeros(shape=(2 * len(simil), coord_array.shape[0])) for l in range(len(simil)): starttime = datetime.datetime.now() newcluster = cluster5(coord_array, tri_array, simil[l], v) newcluster_i = newcluster[0, :] newcluster_v = newcluster[1, :] clmat = get_cluster_adj_matrix(newcluster_i, tri_array) # find very small clusters for tcli in range(int(np.amax(newcluster_i))): if np.count_nonzero(newcluster_i == tcli + 1) < 3: # combine small clusters to smallest cluster of their neighbors neis_tcl = np.nonzero(clmat[:, tcli])[0] count = 0 if np.size(neis_tcl) > 0: for nei in neis_tcl: # find biggest neighbor cluster nnei = np.count_nonzero(newcluster_i == nei + 1) if nnei > count: com_nei = nei count = nnei pts_nei = np.nonzero( newcluster_i == com_nei + 1)[0] # index of points in chosen cluster v_nei = np.amax(newcluster_v[pts_nei]) # value of points newcluster_v[newcluster_i == tcli + 1] = v_nei newcluster_i[newcluster_i == tcli + 1] = com_nei + 1 # resort the index of clusters, remove empty cluster while len(np.unique(newcluster_i)) != np.amax(newcluster_i): for i in range(1, int(np.amax(newcluster_i)) + 1): if i not in newcluster_i: for n in range(len(newcluster_i)): if newcluster_i[n] > i: newcluster_i[n] -= 1 newcluster[0, :] = newcluster_i newcluster[1, :] = newcluster_v clusters[2 * l:2 * l + 2, :] = newcluster endtime = datetime.datetime.now() elapsedtime = (endtime - starttime).seconds print( "%d%% similarity complete. %d clusters found. Elapsed time is %s seconds." % (simil[l] * 100, np.amax(newcluster_i), elapsedtime)) np.savez_compressed('temp/clusters', clusters=clusters) print("Clusters complete.\n") return clusters
def test_pi_positive(pts): pi = PersistenceImage(sigma=1) diagrams = np.expand_dims(np.concatenate([ np.sort(pts, axis=1), np.zeros((pts.shape[0], 1))], axis=1), axis=0) assert np.all(pi.fit_transform(diagrams) >= 0.)
# turn into timestamps times = np.cumsum(gaps, axis=0) # draw from each column according to distribution pass_on = np.random.uniform(size=times.shape) < probabilities # sanity check - are the correct probabilites demonstrated and the correct # lambdas? print('simulated probabilities:', p_e:= np.mean(pass_on, axis=0), 'expected:', probabilities, '\ndiff:', p_e - probabilities, end='\n\n') print('simultated lambdas:', l_e := np.mean(gaps, axis=0), 'expected:', lambdas, '\ndiff:', l_e - lambdas, end='\n\n') # concatenate arrays and remove unwanted supervisor = np.sort(np.concatenate(times)[np.concatenate(pass_on)]) # remove any past the last entry of the shortest simulation (to ensure all # streams run for the same amount of time) supervisor = supervisor[supervisor < np.min(times[-1,:])] # print the final estimate print('the mean time between customers for the supervisor was', res := np.diff(supervisor).mean(), '\nThis is accurate to' f' {np.abs(res - 1155/2648)/1155*2648 * 100:.2f}%.') # Bonus: save data for visualization import json data_size = 500 # number of samples to save maxtime = supervisor[data_size]
def evaluate_recall(json_dataset, roidb, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2] ] # 512-inf assert area in areas, 'Unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for entry in roidb: gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0] gt_boxes = entry['boxes'][gt_inds, :] gt_areas = entry['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) non_gt_inds = np.where(entry['gt_classes'] == 0)[0] boxes = entry['boxes'][non_gt_inds, :] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = box_utils.bbox_overlaps( boxes.astype(dtype=np.float32, copy=False), gt_boxes.astype(dtype=np.float32, copy=False)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(min(boxes.shape[0], gt_boxes.shape[0])): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps, 'num_pos': num_pos }
def generate_maps(): import random print 'Generating optic aberration maps using Proper' wfo = proper.prop_begin(tp.diam, 1., tp.grid_size, tp.beam_ratio) # rms_error = 5e-6#500.e-9 # RMS wavefront error in meters # c_freq = 0.005 # correlation frequency (cycles/meter) # high_power = 1. # high frewquency falloff (r^-high_power) rms_error = 2.5e-3 #500.e-9 # RMS wavefront error in meters c_freq = 0.000005 # correlation frequency (cycles/meter) high_power = 1. # high frewquency falloff (r^-high_power) # tp.abertime = [0.5,2,10] # characteristic time for each aberation in secs tp.abertime = [ 100 ] # if beyond numframes then abertime will be auto set to duration of simulation abercubes = [] for abertime in tp.abertime: # ap.numframes = 100 aberfreq = 1. / abertime # tp.abertime=2 # aberfreq: number of frames goals per sec? num_longframes = aberfreq * ap.numframes * cp.frame_time print num_longframes, ap.numframes, cp.frame_time aber_cube = np.zeros((ap.numframes + 1, tp.grid_size, tp.grid_size)) lin_size = tp.grid_size**2 # spacing = int(ap.numframes/num_longframes) # frame_idx = np.int_(np.linspace(0,ap.numframes,num_longframes+1)) c = range(0, ap.numframes) print num_longframes frame_idx = np.sort(random.sample(c, int(num_longframes + 1 - 2))) # frame_idx = np.int_(np.sort(np.round(np.random.uniform(0,ap.numframes,num_longframes+1-2)))) frame_idx = np.hstack(([0], frame_idx, [ap.numframes])) # frame_idx = [0, 15, 69, 278, 418, 703, 1287, 1900, 3030, 3228, 5000] print frame_idx for f in frame_idx: aber_cube[f] = proper.prop_psd_errormap( wfo, rms_error, c_freq, high_power, MAP="prim_map") #FILE=td.aberdir+'/telzPrimary_Map.fits') # quicklook_im(aber_cube[f], logAmp=False) for i, f in enumerate(frame_idx[:-1]): spacing = int(frame_idx[i + 1] - frame_idx[i]) # quicklook_im(aber_cube[f], logAmp=False, show=False) frame1 = aber_cube[f] frame2 = aber_cube[frame_idx[i + 1]] lin_map = [ np.linspace(f1, f2, spacing) for f1, f2 in zip( frame1.reshape(lin_size), frame2.reshape(lin_size)) ] interval_cube = np.array(lin_map).reshape(tp.grid_size, tp.grid_size, spacing) interval_cube = np.transpose(interval_cube) print i, f, frame_idx[i], frame_idx[i + 1], np.shape(interval_cube) # loop_frames(interval_cube, logAmp=False) aber_cube[f:frame_idx[i + 1]] = interval_cube abercubes.append(aber_cube) plt.plot(aber_cube[:, 20, 20]) plt.show() abercubes = np.array(abercubes) # print abercubes.shape # plt.plot(aber_cube[:,20,20]) aber_cube = np.sum(abercubes, axis=0) plt.plot(aber_cube[:, 20, 20]) plt.show() if not os.path.isdir(iop.aberdir): os.mkdir(iop.aberdir) for f in range(0, ap.numframes, 1): # print 'saving frame #', f if f % 100 == 0: misc.progressBar(value=f, endvalue=ap.numframes) rawImageIO.saveFITS(aber_cube[f], '%stelz%f.fits' % (iop.aberdir, f * cp.frame_time)) # quicklook_im(aber_cube[f], logAmp=False, show=True) plt.show()
def fit_variance(human_data_original, save_path, trial_type, model_params_df, constraints): # written to fit 4 parameters but we hold the likelihood width at 0 so its not stochastic at all, test_x = np.arange(0, 400, 20) collumn_values = [""] if constraints is not "": collumn_values = np.unique(human_data_original[constraints]) for collumn_value in collumn_values: # only filter results by the constraint if a constraint is parsed if collumn_value is not "": human_data = human_data_original[( human_data_original[constraints] == collumn_value)] else: human_data = human_data_original group_parameter_fits = model_params_df[ (model_params_df["ppid"] == "group") & (model_params_df["trial_type"] == trial_type) & (model_params_df[constraints] == collumn_value)] model_params.likelihood_width = 0 model_params.gamma = group_parameter_fits["gamma"].iloc[0] model_params.lambda_coef = group_parameter_fits["lambda"].iloc[0] model_params.k = group_parameter_fits["k"].iloc[0] human_data = human_data.dropna() human_mean_data = human_data.groupby([ 'Trial type', 'integration_length' ])['first_stop_location'].mean().reset_index() human_mean_data_with_id = human_data.groupby( ['Trial type', 'integration_length', "ppid"])['first_stop_location'].mean().reset_index() human_std_data_with_id = human_data.groupby( ['Trial type', 'integration_length', "ppid"])['first_stop_location'].std().reset_index() human_std_data_group = human_data.groupby([ 'Trial type', 'integration_length' ])['first_stop_location'].std().reset_index() human_mean_with_id_x = np.asarray( human_mean_data_with_id[(human_mean_data_with_id["Trial type"] == trial_type)]['integration_length']) human_mean_with_id_y = np.asarray(human_mean_data_with_id[( human_mean_data_with_id["Trial type"] == trial_type )]['first_stop_location']) human_mean_data_x = np.asarray( human_mean_data[(human_mean_data["Trial type"] == trial_type )]['integration_length']) human_mean_data_y = np.asarray( human_mean_data[(human_mean_data["Trial type"] == trial_type )]['first_stop_location']) human_data_x = np.asarray(human_data[( human_data["Trial type"] == trial_type)]['integration_length']) human_data_y = np.asarray( human_data[(human_data["Trial type"] == trial_type )]['first_stop_location']) human_data_y_std = compute_sigmas(human_data_x, human_data_y) #human_data_with_std_individual = get_std_with_id(human_data, human_std_data_with_id, y_column='first_stop_location') #human_data_with_std_group = get_std_with_group(human_data, human_std_data_group, y_column='first_stop_location') #human_data_std_id = np.asarray(human_data_with_std_individual[(human_data_with_std_individual["Trial type"] == trial_type)]['y_std']) #human_data_std_group = np.asarray(human_data_with_std_group[(human_data_with_std_group["Trial type"] == trial_type)]['y_std']) var_param = fit_variance_to_model(human_data_x, human_data_y_std) # plotting model fit test_x = np.sort(human_data_x) best_fit_responses, best_fit_sigmas = simple_variance_model_full( test_x, likelihood_width=var_param[0]) # plot optimised response target fig = plt.figure(figsize=(6, 6)) ax = fig.add_subplot(1, 1, 1) #stops per trial plt.title("All subjects", fontsize="20") plt.scatter(human_mean_with_id_x, human_mean_with_id_y, color="r", marker="o") plt.plot(human_mean_data_x, human_mean_data_y, "r", label="data") _, unique_idx = np.unique(test_x, return_index=True) unique_mask = create_mask(indices=unique_idx, size=len(test_x)) model_data_x_means, model_data_y_std_means = sort_by_other_array( first_array_orderby=test_x[unique_mask], second_array=compute_means(test_x, best_fit_responses)[unique_mask]) plt.plot(model_data_x_means, model_data_y_std_means, "g", label="model") plt.plot(np.arange(0, 400), np.arange(0, 400), "k--", label="Unity") plt.xlabel("Target (VU)", fontsize=20) plt.xlim((0, 400)) plt.ylim((0, 400)) plt.ylabel("Response (VU)", fontsize=20) plt.subplots_adjust(left=0.2) ax.tick_params(axis='both', which='major', labelsize=15) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) textstr = '\n'.join( (r'$\Gamma=%.2f$' % (model_params.gamma, ), r'$\lambda=%.2f$' % (model_params.lambda_coef, ), r'$\mathrm{k}=%.2f$' % (model_params.k, ), r'$\L=%.2f$' % (var_param[0], ))) props = dict(boxstyle='round', facecolor='white', alpha=0.5) ax.text(0.80, 0.05, textstr, transform=ax.transAxes, fontsize=14, bbox=props) plt.legend(loc="upper left") if constraints is not "": plt.savefig(save_path + "\\" + trial_type + "_" + constraints + remove_dots(str(collumn_value)) + "_group_model_fit.png") else: plt.savefig(save_path + "\\" + trial_type + "_" + constraints + "_group_model_fit.png") plt.show() plt.close() # plot optimised variance target fig = plt.figure(figsize=(6, 6)) ax = fig.add_subplot(1, 1, 1) #stops per trial plt.title("All subjects", fontsize="20") plt.scatter(human_data_x, human_data_y_std, color="r", marker="o") _, unique_idx = np.unique(human_data_x, return_index=True) unique_mask = create_mask(indices=unique_idx, size=len(human_data_x)) human_data_x_means, human_data_y_std_means = sort_by_other_array( first_array_orderby=human_data_x[unique_mask], second_array=compute_means(human_data_x, human_data_y_std)[unique_mask]) plt.plot(human_data_x_means, human_data_y_std_means, "r", label="data") _, unique_idx = np.unique(test_x, return_index=True) unique_mask = create_mask(indices=unique_idx, size=len(test_x)) model_data_x_means, model_data_y_std_means = sort_by_other_array( first_array_orderby=test_x[unique_mask], second_array=compute_means(test_x, best_fit_sigmas)[unique_mask]) plt.plot(model_data_x_means, model_data_y_std_means, "g", label="model") plt.xlabel("Target (VU)", fontsize=20) plt.xlim((0, 400)) plt.ylim((0, 100)) plt.ylabel("Response STD (VU)", fontsize=20) plt.subplots_adjust(left=0.2) ax.tick_params(axis='both', which='major', labelsize=15) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) textstr = '\n'.join( (r'$\Gamma=%.2f$' % (model_params.gamma, ), r'$\lambda=%.2f$' % (model_params.lambda_coef, ), r'$\mathrm{k}=%.2f$' % (model_params.k, ), r'$\L=%.2f$' % (var_param[0], ))) props = dict(boxstyle='round', facecolor='white', alpha=0.5) ax.text(0.80, 0.05, textstr, transform=ax.transAxes, fontsize=14, bbox=props) plt.legend(loc="upper left") if constraints is not "": plt.savefig(save_path + "\\" + trial_type + "_" + constraints + remove_dots(str(collumn_value)) + "_group_model_variance_fit.png") else: plt.savefig(save_path + "\\" + trial_type + "_" + constraints + "_group_model_variance_fit.png") plt.show() plt.close() ''' # now we do it per subject ppids = np.unique(human_data["ppid"]) subjects_model_params = np.zeros((len(ppids), 4)) # fitting 3 parameters for j in range(len(ppids)): subject_parameter_fits = model_params_df[(model_params_df["ppid"] == ppids[j]) & (model_params_df["trial_type"] == trial_type) & (model_params_df[constraints] == collumn_value)] model_params.likelihood_width = 0 model_params.gamma = subject_parameter_fits["gamma"].iloc[0] model_params.lambda_coef = subject_parameter_fits["lambda"].iloc[0] model_params.k = subject_parameter_fits["k"].iloc[0] subject_data_mean_x = np.asarray(human_mean_data_with_id[(human_mean_data_with_id["Trial type"] == trial_type) & (human_mean_data_with_id["ppid"] == ppids[j])]['integration_length']) subject_data_mean_y = np.asarray(human_mean_data_with_id[(human_mean_data_with_id["Trial type"] == trial_type) & (human_mean_data_with_id["ppid"] == ppids[j])]['first_stop_location']) subject_data_x = np.asarray(human_data[(human_data["Trial type"] == trial_type) & (human_data["ppid"] == ppids[j])]['integration_length']) subject_data_y = np.asarray(human_data[(human_data["Trial type"] == trial_type) & (human_data["ppid"] == ppids[j])]["first_stop_location"]) subject_data_y_std = compute_sigmas(subject_data_x, subject_data_y) subject_human_data_with_std_individual = np.asarray(human_data_with_std_individual[(human_data_with_std_individual["Trial type"] == trial_type) & (human_data_with_std_individual["ppid"] == ppids[j])]["y_std"]) sub_var_param = fit_variance_to_model(subject_data_x, subject_data_y_std) subjects_model_params[j] = sub_var_param # plotting model fit best_fit_responses, best_fit_sigmas = simple_variance_model_full(test_x, likelihood_width=sub_var_param[0]) # plot optimised response target fig = plt.figure(figsize = (6,6)) ax = fig.add_subplot(1,1,1) #stops per trial plt.title(ppids[j], fontsize="20") plt.scatter(subject_data_x, subject_data_y, color="r", marker="o") plt.plot(subject_data_mean_x, subject_data_mean_y, "r", label="data") plt.plot(test_x, best_fit_responses, "g", label="model") plt.plot(np.arange(0,400), np.arange(0,400), "k--", label="Unity") plt.xlabel("Target", fontsize=20) plt.xlim((0,400)) plt.ylim((0,400)) plt.ylabel("Optimal Response", fontsize=20) plt.subplots_adjust(left=0.2) ax.tick_params(axis='both', which='major', labelsize=15) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) textstr = '\n'.join(( r'$\Gamma=%.2f$' % (model_params.gamma, ), r'$\lambda=%.2f$' % (model_params.lambda_coef, ), r'$\mathrm{k}=%.2f$' % (model_params.k,), r'$\L=%.2f$' % (sub_var_param[0],))) props = dict(boxstyle='round', facecolor='white', alpha=0.5) ax.text(0.80, 0.05, textstr, transform=ax.transAxes, fontsize=14, bbox=props) plt.legend(loc="upper left") plt.savefig(save_path+"\\"+trial_type+"_"+ppids[j]+"_model_stochastic_fit.png") plt.show() plt.close() # plot optimised variance target fig = plt.figure(figsize = (6,6)) ax = fig.add_subplot(1,1,1) #stops per trial plt.title(ppids[j], fontsize="20") plt.scatter(subject_data_x, subject_data_y_std, color="r", marker="o") _, unique_idx = np.unique(subject_data_x, return_index=True) unique_mask = create_mask(indices=unique_idx, size=len(subject_data_x)) subject_data_x_means, subject_data_y_std_means = sort_by_other_array(first_array_orderby= subject_data_x[unique_mask], second_array=compute_means(subject_data_x, subject_data_y_std)[unique_mask]) plt.plot(subject_data_x_means, subject_data_y_std_means, "r", label="data") plt.plot(test_x, best_fit_sigmas, "g", label="model") #plt.plot(np.arange(0,400), np.arange(0,400), "k--", label="Unity") plt.xlabel("Target (VU)", fontsize=20) plt.xlim((0,400)) plt.ylim((0,100)) plt.ylabel("Response SD (VU)", fontsize=20) plt.subplots_adjust(left=0.2) ax.tick_params(axis='both', which='major', labelsize=15) plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) textstr = '\n'.join(( r'$\Gamma=%.2f$' % (model_params.gamma, ), r'$\lambda=%.2f$' % (model_params.lambda_coef, ), r'$\mathrm{k}=%.2f$' % (model_params.k,), r'$\L=%.2f$' % (sub_var_param[0],))) props = dict(boxstyle='round', facecolor='white', alpha=0.5) ax.text(0.80, 0.05, textstr, transform=ax.transAxes, fontsize=14, bbox=props) plt.legend(loc="upper left") if constraints is not "": plt.savefig(save_path+"\\"+trial_type+"_"+constraints+remove_dots(str(collumn_value))+"_group_model_variance_fit.png") else: plt.savefig(save_path+"\\"+trial_type+"_"+constraints+"_group_model_variance_fit.png") plt.show() plt.close() ''' return model_params_df
def data_to_json(degree, points, weights): d = {"s1": [], "s2": [], "s3": []} idx = numpy.argsort(weights) weights = weights[idx] points = points[idx] # get groups of equal weights for s, length in zip(*_grp_start_len(weights, 1.0e-12)): weight = weights[s] pts = points[s:s + length] if length == 1: d["s3"].append([weight]) elif length == 3: # Symmetry group [[a, a, b], [a, b, a], [b, a, a]]. # Find the equal value `a`. tol = 1.0e-12 beta = pts[0] - pts[0][0] ct = numpy.count_nonzero(abs(beta) < tol) assert ct in [1, 2], beta val = pts[0][0] if ct == 2 else pts[0][1] d["s2"].append([weight, val]) else: # Symmetry group perm([[a, b, c]]). Deliberately take the two smallest of a, # b, c as representatives. assert length == 6 srt = numpy.sort(pts[0]) d["s1"].append([weight, srt[0], srt[1]]) d["degree"] = degree if len(d["s1"]) == 0: d.pop("s1") if len(d["s2"]) == 0: d.pop("s2") if len(d["s3"]) == 0: d.pop("s3") # Getting floats in scientific notation in python.json is almost impossible, so do # some work here. Compare with <https://stackoverflow.com/a/1733105/353337>. class PrettyFloat(float): def __repr__(self): return '{:.16e}'.format(self) def pretty_floats(obj): if isinstance(obj, float): return PrettyFloat(obj) elif isinstance(obj, dict): return dict((k, pretty_floats(v)) for k, v in obj.items()) elif isinstance(obj, (list, tuple)): return list(map(pretty_floats, obj)) return obj with open('wv{:02d}.json'.format(degree), "w") as f: string = pretty_floats(d).__repr__() \ .replace("'", "\"") \ .replace("[[", "[\n [") \ .replace("],", "],\n ") \ .replace("]],", "]\n ],") f.write(string) return
def histogramdd(sample, bins=10, range=None, normed=False, weights=None): """ Compute the multidimensional histogram of some data. Parameters ---------- sample : array_like The data to be histogrammed. It must be an (N,D) array or data that can be converted to such. The rows of the resulting array are the coordinates of points in a D dimensional polytope. bins : sequence or int, optional The bin specification: * A sequence of arrays describing the bin edges along each dimension. * The number of bins for each dimension (nx, ny, ... =bins) * The number of bins for all dimensions (nx=ny=...=bins). range : sequence, optional A sequence of lower and upper bin edges to be used if the edges are not given explicitly in `bins`. Defaults to the minimum and maximum values along each dimension. normed : bool, optional If False, returns the number of samples in each bin. If True, returns the bin density ``bin_count / sample_count / bin_volume``. weights : array_like (N,), optional An array of values `w_i` weighing each sample `(x_i, y_i, z_i, ...)`. Weights are normalized to 1 if normed is True. If normed is False, the values of the returned histogram are equal to the sum of the weights belonging to the samples falling into each bin. Weights can also be a list of (weight arrays or None), in which case a list of histograms is returned as H. Returns ------- H : ndarray The multidimensional histogram of sample x. See normed and weights for the different possible semantics. edges : list A list of D arrays describing the bin edges for each dimension. See Also -------- histogram: 1-D histogram histogram2d: 2-D histogram Examples -------- >>> r = np.random.randn(100,3) >>> H, edges = np.histogramdd(r, bins = (5, 8, 4)) >>> H.shape, edges[0].size, edges[1].size, edges[2].size ((5, 8, 4), 6, 9, 5) """ try: # Sample is an ND-array. N, D = sample.shape except (AttributeError, ValueError): # Sample is a sequence of 1D arrays. sample = atleast_2d(sample).T N, D = sample.shape if weights is None: W = None else: try: # Weights is a 1D-array weights.shape W = -1 except (AttributeError, ValueError): # Weights is a list of 1D-arrays or None's W = len(weights) if W == -1 and weights.ndim != 1: raise AttributeError('Weights must be a 1D-array, None, or a list of both') nbin = empty(D, int) edges = D*[None] dedges = D*[None] if weights is not None: if W == -1: weights = asarray(weights) assert weights.shape == (N,) else: for i in arange(W): if weights[i] is not None: weights[i] = asarray(weights[i]) assert weights[i].shape == (N,) try: M = len(bins) if M != D: raise AttributeError( 'The dimension of bins must be equal to the dimension of the ' ' sample x.') except TypeError: # bins is an integer bins = D*[bins] # Select range for each dimension # Used only if number of bins is given. if range is None: # Handle empty input. Range can't be determined in that case, use 0-1. if N == 0: smin = zeros(D) smax = ones(D) else: smin = atleast_1d(array(sample.min(0), float)) smax = atleast_1d(array(sample.max(0), float)) else: smin = zeros(D) smax = zeros(D) for i in arange(D): smin[i], smax[i] = range[i] # Make sure the bins have a finite width. for i in arange(len(smin)): if smin[i] == smax[i]: smin[i] = smin[i] - .5 smax[i] = smax[i] + .5 # Create edge arrays for i in arange(D): if isscalar(bins[i]): if bins[i] < 1: raise ValueError( "Element at index %s in `bins` should be a positive " "integer." % i) nbin[i] = bins[i] + 2 # +2 for outlier bins edges[i] = linspace(smin[i], smax[i], nbin[i]-1) else: edges[i] = asarray(bins[i], float) nbin[i] = len(edges[i]) + 1 # +1 for outlier bins dedges[i] = diff(edges[i]) if np.any(np.asarray(dedges[i]) <= 0): raise ValueError( "Found bin edge of size <= 0. Did you specify `bins` with" "non-monotonic sequence?") nbin = asarray(nbin) # Handle empty input. if N == 0: if W > 0: return [np.zeros(nbin-2) for _ in arange(W)], edges else: return np.zeros(nbin-2), edges # Compute the bin number each sample falls into. Ncount = {} for i in arange(D): # searchsorted is faster for many bins Ncount[i] = searchsorted(edges[i], sample[:, i], "right") #Ncount[i] = digitize(sample[:, i], edges[i]) # Using digitize, values that fall on an edge are put in the right bin. # For the rightmost bin, we want values equal to the right # edge to be counted in the last bin, and not as an outlier. for i in arange(D): # Rounding precision mindiff = dedges[i].min() if not np.isinf(mindiff): decimal = int(-log10(mindiff)) + 6 # Find which points are on the rightmost edge. not_smaller_than_edge = (sample[:, i] >= edges[i][-1]) on_edge = (around(sample[:, i], decimal) == around(edges[i][-1], decimal)) # Shift these points one bin to the left. Ncount[i][where(on_edge & not_smaller_than_edge)[0]] -= 1 # Compute the sample indices in the flattened histogram matrix. ni = nbin.argsort() xy = zeros(N, int) for i in arange(0, D-1): xy += Ncount[ni[i]] * nbin[ni[i+1:]].prod() xy += Ncount[ni[-1]] # Compute the number of repetitions in xy and assign it to the # flattened histmat. if len(xy) == 0: if W > 0: return [np.zeros(nbin-2) for _ in arange(W)], edges else: return zeros(nbin-2, int), edges # Flattened histogram matrix (1D) # Reshape is used so that overlarge arrays # will raise an error. Wd = W if W > 0 else 1 hists = [zeros(nbin, float).reshape(-1) for _ in arange(Wd)] for histidx, hist in enumerate(hists): weights_ = weights[histidx] if W > 0 else weights flatcount = bincount(xy, weights_) a = arange(len(flatcount)) hist[a] = flatcount # Shape into a proper matrix hist = hist.reshape(sort(nbin)) ni = nbin.argsort() for i in arange(nbin.size): j = ni.argsort()[i] hist = hist.swapaxes(i, j) ni[i], ni[j] = ni[j], ni[i] # Remove outliers (indices 0 and -1 for each dimension). core = D*[slice(1, -1)] hist = hist[core] # Normalize if normed is True if normed: s = hist.sum() for i in arange(D): shape = ones(D, int) shape[i] = nbin[i] - 2 hist = hist / dedges[i].reshape(shape) hist /= s if (hist.shape != nbin - 2).any(): raise RuntimeError( "Internal Shape Error: hist.shape != nbin-2 -> " + str(hist.shape) + " != " + str(nbin-2)) hists[histidx] = hist if W in [None, -1]: return hists[0], edges else: return hists, edges
def handle_crop(self, event_click, event_release): corners = ( np.sort([event.xdata for event in (event_click, event_release)]), np.sort([event.ydata for event in (event_click, event_release)]), ) self.roi_limits = np.rint(np.hstack(corners)).astype(int)
from sklearn import tree X = [[0, 0], [3,3]] y = [0.75, 3] tree_reg = tree.DecisionTreeRegressor(random_state=42) tree_reg = tree_reg.fit(X, y) tree_reg.predict([[1.5, 1.5]]) # Import the necessary modules and libraries import numpy as np from sklearn.tree import DecisionTreeRegressor import matplotlib.pyplot as plt # Create a random dataset rng = np.random.RandomState(1) X = np.sort(5 * rng.rand(80, 1), axis=0) y = np.sin(X).ravel() y[::5] += 3 * (0.5 - rng.rand(16)) # Fit regression model regr_1 = DecisionTreeRegressor(max_depth=2) regr_2 = DecisionTreeRegressor(max_depth=5) regr_1.fit(X, y) regr_2.fit(X, y) # Predict X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis] y_1 = regr_1.predict(X_test) y_2 = regr_2.predict(X_test) # Plot the results
def trimmed_Kmeans(data,k,trim=0.1, runs=100, points= None,printcrit=False,maxit=None): ''' data: np.array of dataset k : nb of clusters trim: trimmed parameters runs: nb of iterations max points : initial datapoints. None by default ''' if maxit is None: maxit = 2*len(data) countmode = runs+1 data = np.array(data) n,p = data.shape nin = round((1-trim)*n) crit = np.Inf oldclass = np.zeros((n,)) iclass = np.zeros((n,)) optclass = np.zeros((n,)) disttom = np.zeros((n,)) for i in range(runs): #if i/countmode == round(i/countmode): #print("iteration",i) if points is None: means = data[sample(np.arange(n).tolist(),k),:] else: means = points.copy() wend = False itcounter = 0 while not wend: itcounter += 1 for j in range(n): dj = np.zeros((k,)) for l in range(k): #print(data[j,:],means[j,:]) dj_ = (data[j,:]-means[l,:])**2 dj[l] = dj_.sum() iclass[j] = dj.argmin() disttom[j]= dj.min() order_idx = np.argsort(disttom)[(nin+1):] iclass[order_idx] = -1 # t'es sur que c'est pas la classe d'outliers ici? -1, 0 ou K+1?? if itcounter >= maxit or np.all(oldclass in iclass) : wend = True else: for l in range(k): if sum(iclass==l)==0 : # j'ai l'impression que si ==0 alors toutes les donnees sont outliers means[l,:] = data[iclass==0,:] else: if sum(iclass==l)>1 : if means.shape[1] == 1: means[l,:] = data[iclass==l,:].means() else: means[l,:] = data[iclass==l,:].means(axis=1) else: means[l,:] = data[iclass==l,:] oldclass = iclass # here i changed "<-" into '=' newcrit = disttom[iclass>0].sum() if printcrit: print("Iteration ",i," criterion value ",newcrit/nin,"\n") # ah bon!? on calcul la distorsion moyenne sur les donnees non trimmmees...?! if newcrit <= crit : optclass = iclass.copy() crit = newcrit.copy() optmeans = means.copy() # optclass[optclass==0] = k+1 # ca suggere que les outliers sont les 0 out = {'classification':optclass,'means':optmeans,'criterion':crit/nin,'disttom':disttom, 'ropt':np.sort(disttom)[nin],'k':k,'trim':trim,"runs":runs} return(out)
def handle_crop(self, event_click, event_release): self.time_limits = np.sort( [event.xdata for event in (event_click, event_release)]) self.position_limits = np.sort( [event.ydata for event in (event_click, event_release)])
def make_plots(datadf, settings): ''' Call plotting functions from nanoplotter settings["lengths_pointer"] is a column in the DataFrame specifying which lengths to use ''' plot_settings = dict(font_scale=settings["font_scale"]) nanoplotter.plot_settings(plot_settings, dpi=settings["dpi"]) color = nanoplotter.check_valid_color(settings["color"]) colormap = nanoplotter.check_valid_colormap(settings["colormap"]) plotdict = {type: settings["plots"].count(type) for type in ["kde", "hex", "dot", 'pauvre']} plots = [] if settings["N50"]: n50 = nanomath.get_N50(np.sort(datadf["lengths"])) else: n50 = None plots.extend( nanoplotter.length_plots( array=datadf[datadf["length_filter"]]["lengths"].astype('uint64'), name="Read length", path=settings["path"], n50=n50, color=color, figformat=settings["format"], title=settings["title"]) ) logging.info("Created length plots") if "quals" in datadf: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][settings["lengths_pointer"].replace('log_', '')], y=datadf[datadf["length_filter"]]["quals"], names=['Read lengths', 'Average read quality'], path=settings["path"] + "LengthvsQualityScatterPlot", color=color, figformat=settings["format"], plots=plotdict, title=settings["title"], plot_settings=plot_settings) ) if settings["logBool"]: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][settings["lengths_pointer"]], y=datadf[datadf["length_filter"]]["quals"], names=['Read lengths', 'Average read quality'], path=settings["path"] + "LengthvsQualityScatterPlot", color=color, figformat=settings["format"], plots=plotdict, log=True, title=settings["title"], plot_settings=plot_settings) ) logging.info("Created LengthvsQual plot") if "channelIDs" in datadf: plots.extend( nanoplotter.spatial_heatmap( array=datadf["channelIDs"], title=settings["title"], path=settings["path"] + "ActivityMap_ReadsPerChannel", color=colormap, figformat=settings["format"]) ) logging.info("Created spatialheatmap for succesfull basecalls.") if "start_time" in datadf: plots.extend( nanoplotter.time_plots( df=datadf, path=settings["path"], color=color, figformat=settings["format"], title=settings["title"], plot_settings=plot_settings) ) if settings["logBool"]: plots.extend( nanoplotter.time_plots( df=datadf, path=settings["path"], color=color, figformat=settings["format"], title=settings["title"], log_length=True, plot_settings=plot_settings) ) logging.info("Created timeplots.") if "aligned_lengths" in datadf and "lengths" in datadf: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]]["aligned_lengths"], y=datadf[datadf["length_filter"]]["lengths"], names=["Aligned read lengths", "Sequenced read length"], path=settings["path"] + "AlignedReadlengthvsSequencedReadLength", figformat=settings["format"], plots=plotdict, color=color, title=settings["title"], plot_settings=plot_settings) ) logging.info("Created AlignedLength vs Length plot.") if "mapQ" in datadf and "quals" in datadf: plots.extend( nanoplotter.scatter( x=datadf["mapQ"], y=datadf["quals"], names=["Read mapping quality", "Average basecall quality"], path=settings["path"] + "MappingQualityvsAverageBaseQuality", color=color, figformat=settings["format"], plots=plotdict, title=settings["title"], plot_settings=plot_settings) ) logging.info("Created MapQvsBaseQ plot.") plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][settings["lengths_pointer"].replace('log_', '')], y=datadf[datadf["length_filter"]]["mapQ"], names=["Read length", "Read mapping quality"], path=settings["path"] + "MappingQualityvsReadLength", color=color, figformat=settings["format"], plots=plotdict, title=settings["title"], plot_settings=plot_settings) ) if settings["logBool"]: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][settings["lengths_pointer"]], y=datadf[datadf["length_filter"]]["mapQ"], names=["Read length", "Read mapping quality"], path=settings["path"] + "MappingQualityvsReadLength", color=color, figformat=settings["format"], plots=plotdict, log=True, title=settings["title"], plot_settings=plot_settings) ) logging.info("Created Mapping quality vs read length plot.") if "percentIdentity" in datadf: minPID = np.percentile(datadf["percentIdentity"], 1) if "aligned_quals" in datadf: plots.extend( nanoplotter.scatter( x=datadf["percentIdentity"], y=datadf["aligned_quals"], names=["Percent identity", "Average Base Quality"], path=settings["path"] + "PercentIdentityvsAverageBaseQuality", color=color, figformat=settings["format"], plots=plotdict, stat=stats.pearsonr if not settings["hide_stats"] else None, minvalx=minPID, title=settings["title"], plot_settings=plot_settings) ) logging.info("Created Percent ID vs Base quality plot.") plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][settings["lengths_pointer"].replace('log_', '')], y=datadf[datadf["length_filter"]]["percentIdentity"], names=["Aligned read length", "Percent identity"], path=settings["path"] + "PercentIdentityvsAlignedReadLength", color=color, figformat=settings["format"], plots=plotdict, stat=stats.pearsonr if not settings["hide_stats"] else None, minvaly=minPID, title=settings["title"], plot_settings=plot_settings) ) if settings["logBool"]: plots.extend( nanoplotter.scatter( x=datadf[datadf["length_filter"]][settings["lengths_pointer"]], y=datadf[datadf["length_filter"]]["percentIdentity"], names=["Aligned read length", "Percent identity"], path=settings["path"] + "PercentIdentityvsAlignedReadLength", color=color, figformat=settings["format"], plots=plotdict, stat=stats.pearsonr if not settings["hide_stats"] else None, log=True, minvaly=minPID, title=settings["title"], plot_settings=plot_settings) ) plots.append(nanoplotter.dynamic_histogram(array=datadf["percentIdentity"], name="percent identity", path=settings["path"] + "PercentIdentityHistogram", title=settings["title"], color=color)) logging.info("Created Percent ID vs Length plot") return plots
def best_times_day(coord, lat=-30.7133, lon=21.443, utcoff=2., date='2019-01-01', plot=True, show=False,\ distsun=5, distmoon=3, elev=20, night=False, leg=None, setrise=True, filename='auspiciousness_day.png', satellites=False): location = co.EarthLocation(lat=lat * u.deg, lon=lon * u.deg, height=1e3 * u.m) utcoffset = utcoff * u.hour timesteps = 120 delta_time = np.linspace(-12, 12, timesteps) * u.hour ind = range(0, len(delta_time)) midnight = Time(date) - utcoffset times = midnight + delta_time frame = co.AltAz(obstime=times, location=location) sun = co.get_sun(times).transform_to(frame) moon = co.get_moon(times).transform_to(frame) src = coord.transform_to(frame) ind_riseset = np.where( np.logical_and(sun.alt > -12 * u.deg, sun.alt < 12 * u.deg))[0] ind_sun = np.where(src.separation(sun).deg < distsun)[0] ind_moon = np.where(src.separation(moon).deg < distmoon)[0] alt_max = src.alt.max() ind_low = np.where(src.alt <= (alt_max - alt_max * (elev / 100.)))[0] if night == True: ind_day = np.where(sun.alt > 0 * u.deg)[0] else: ind_day = [None] g = np.sort( list( set(ind) - set(ind_riseset) - set(ind_sun) - set(ind_moon) - set(ind_low) - set(ind_day))) try: tg = delta_time[g] except: tg = None if satellites: print "Calculating satellite separations" sat_times = [ (times[g][0] + i * u.minute).datetime for i in range(int((tg[-1] - tg[0]).to(u.minute) / u.minute)) ] params = [ list(i) for i in zip([[coord.ra.rad, coord.dec.rad] for i in range(len(sat_times))], sat_times) ] min_seps = np.nanmin(np.array(parmap(sat_separations, params)), axis=1) sat_time_steps = np.linspace(delta_time[g][0], delta_time[g][-1], len(min_seps)) sat_frame = co.AltAz(obstime=sat_times, location=location) sat_alts = coord.transform_to(sat_frame).alt if plot == True: plt.plot(delta_time, sun.alt, 'r--', label='Sun') plt.plot(delta_time, moon.alt, 'b--', label='Moon') plt.fill_between(delta_time.to('hr').value, 0, 90, np.logical_and(sun.alt<12*u.deg,sun.alt>-12*u.deg), \ color='0.5', alpha=0.5) plt.fill_between(delta_time.to('hr').value, 0, 90, sun.alt < 0 * u.deg, color='0.9', alpha=0.7) plt.plot(delta_time, src.alt, 'g-', label='Target') if satellites: try: cb = plt.scatter(sat_time_steps, sat_alts, c=min_seps, s=30, alpha=0.5, vmin=0, vmax=10) except: None else: try: plt.scatter(delta_time[g], src.alt[g], s=30, c='g', alpha=0.5, label='Best time') except: None plt.ylim(0, 90) plt.xlim(-12, 12) plt.xticks(range(-12, 13, 2)) plt.legend(loc='best', ncol=2) plt.grid() plt.title("{0} to {1}".format( str(Time(date) - 1 * u.day).split()[0], date)) plt.xlabel('Time from midnight [hour]') plt.ylabel('Elevation [deg]') if satellites: cbar = plt.colorbar(cb) cbar.set_ticks(np.arange(0, 11, 2)) cbar.set_label('Nearest satellite distance [deg]', rotation=270, labelpad=+20) if show: plt.show() else: plt.tight_layout() plt.savefig(filename, dpi=80) else: return tg
def add_measures_to_metrics(self, metrics_and_measures): """Update a metric with a new measures, computing new aggregations. :param metrics_and_measures: A dict there keys are `storage.Metric` objects and values are timeseries array of the new measures. """ with self.statistics.time("raw measures fetch"): raw_measures = self._get_or_create_unaggregated_timeseries( metrics_and_measures.keys()) self.statistics["raw measures fetch"] += len(metrics_and_measures) self.statistics["processed measures"] += sum( map(len, metrics_and_measures.values())) new_boundts = [] splits_to_delete = {} splits_to_update = {} for metric, measures in six.iteritems(metrics_and_measures): measures = numpy.sort(measures, order='timestamps') agg_methods = list(metric.archive_policy.aggregation_methods) block_size = metric.archive_policy.max_block_size back_window = metric.archive_policy.back_window # NOTE(sileht): We keep one more blocks to calculate rate of change # correctly if any(filter(lambda x: x.startswith("rate:"), agg_methods)): back_window += 1 if raw_measures[metric] is None: ts = None else: try: ts = carbonara.BoundTimeSerie.unserialize( raw_measures[metric], block_size, back_window) except carbonara.InvalidData: LOG.error("Data corruption detected for %s " "unaggregated timeserie, creating a new one", metric.id) ts = None if ts is None: # This is the first time we treat measures for this # metric, or data are corrupted, create a new one ts = carbonara.BoundTimeSerie(block_size=block_size, back_window=back_window) current_first_block_timestamp = None else: current_first_block_timestamp = ts.first_block_timestamp() # NOTE(jd) This is Python where you need such # hack to pass a variable around a closure, # sorry. computed_points = {"number": 0} def _map_compute_splits_operations(bound_timeserie): # NOTE (gordc): bound_timeserie is entire set of # unaggregated measures matching largest # granularity. the following takes only the points # affected by new measures for specific granularity tstamp = max(bound_timeserie.first, measures['timestamps'][0]) new_first_block_timestamp = ( bound_timeserie.first_block_timestamp() ) computed_points['number'] = len(bound_timeserie) aggregations = metric.archive_policy.aggregations grouped_timeseries = { granularity: bound_timeserie.group_serie( granularity, carbonara.round_timestamp(tstamp, granularity)) for granularity, aggregations # No need to sort the aggregation, they are already in itertools.groupby(aggregations, ATTRGETTER_GRANULARITY) } aggregations_and_timeseries = { aggregation: carbonara.AggregatedTimeSerie.from_grouped_serie( grouped_timeseries[aggregation.granularity], aggregation) for aggregation in aggregations } deleted_keys, keys_and_split_to_store = ( self._compute_split_operations( metric, aggregations_and_timeseries, current_first_block_timestamp, new_first_block_timestamp) ) return (new_first_block_timestamp, deleted_keys, keys_and_split_to_store) with self.statistics.time("aggregated measures compute"): (new_first_block_timestamp, deleted_keys, keys_and_splits_to_store) = ts.set_values( measures, before_truncate_callback=_map_compute_splits_operations, ) splits_to_delete[metric] = deleted_keys splits_to_update[metric] = (keys_and_splits_to_store, new_first_block_timestamp) new_boundts.append((metric, ts.serialize())) with self.statistics.time("splits delete"): self._delete_metric_splits(splits_to_delete) self.statistics["splits delete"] += len(splits_to_delete) with self.statistics.time("splits update"): self._update_metric_splits(splits_to_update) self.statistics["splits delete"] += len(splits_to_update) with self.statistics.time("raw measures store"): self._store_unaggregated_timeseries(new_boundts) self.statistics["raw measures store"] += len(new_boundts)
def hpd(x, credible_interval=0.94, transform=lambda x: x, circular=False): """ Calculate highest posterior density (HPD) of array for given credible_interval. The HPD is the minimum width Bayesian credible interval (BCI). This implementation works only for unimodal distributions. Parameters ---------- x : Numpy array An array containing posterior samples credible_interval : float, optional Credible interval to plot. Defaults to 0.94. transform : callable Function to transform data (defaults to identity) circular : bool, optional Whether to compute the error taking into account `x` is a circular variable (in the range [-np.pi, np.pi]) or not. Defaults to False (i.e non-circular variables). Returns ------- np.ndarray lower and upper value of the interval. """ if x.ndim > 1: return np.array([ hpd(row, credible_interval=credible_interval, transform=transform, circular=circular) for row in x.T ]) # Make a copy of trace x = transform(x.copy()) len_x = len(x) if circular: mean = st.circmean(x, high=np.pi, low=-np.pi) x = x - mean x = np.arctan2(np.sin(x), np.cos(x)) x = np.sort(x) interval_idx_inc = int(np.floor(credible_interval * len_x)) n_intervals = len_x - interval_idx_inc interval_width = x[interval_idx_inc:] - x[:n_intervals] if len(interval_width) == 0: raise ValueError( "Too few elements for interval calculation. " "Check that credible_interval meets condition 0 =< credible_interval < 1" ) min_idx = np.argmin(interval_width) hdi_min = x[min_idx] hdi_max = x[min_idx + interval_idx_inc] if circular: hdi_min = hdi_min + mean hdi_max = hdi_max + mean hdi_min = np.arctan2(np.sin(hdi_min), np.cos(hdi_min)) hdi_max = np.arctan2(np.sin(hdi_max), np.cos(hdi_max)) return np.array([hdi_min, hdi_max])
]].values[0] # Compute distribution from Lagrange multipliers values Pp = ccutils.maxent.maxEnt_from_lagrange(mRNA_space, protein_space, lagrange_sample, exponents=moments).T # Compute mean protein copy number mean_delta_p = np.sum(protein_space * Pp) # Transform protein_space into fold-change fc_space = protein_space / mean_delta_p # Define operators to be included # Define concnentration to include in plot inducer = np.sort(df_maxEnt.inducer_uM.unique())[::2] # Define repressor copy number and operator rep = [22, 260, 1740] op = "O3" # Define binstep for plot binstep = 10 binstep_theory = 100 # Define colors colors = sns.color_palette("Greens_r", n_colors=len(inducer) + 2) # Initialize plot fig, ax = plt.subplots(len(rep), len(inducer),
def nanoporeplots(inFastq, outputPrefix="output", genomeSizeMb=0, desiredCoverage=1): sequenceLengths = [] with open(inFastq, 'r') as infile: for line in infile: line = line.strip() if line.startswith(("A", "C", "G", "T", "N")): sequenceLengths.append(len(line)) else: pass if int(genomeSizeMb) != 0: genomeSize = int(genomeSizeMb)*1000000 else: genomeSize = 1000000 desiredAmountOfData = genomeSize*int(desiredCoverage) sequenceLengths = np.array(sequenceLengths) sequenceLengths = np.sort(sequenceLengths)[::-1] cumulativeSum = np.cumsum(sequenceLengths) x = np.arange(len(cumulativeSum)) y = cumulativeSum fewerX = x[0:len(x):len(x)*0.01] fewerY = y[0:len(y):len(y)*0.01] myBins = np.arange(1000, 70000, 1000, dtype=float) sumsInBins = np.array(calculateSumsInBins(myBins, sequenceLengths), dtype=float) proportionsInBins = sumsInBins/sum(sequenceLengths) countsInBins = np.array(countSeqsInBins(myBins, sequenceLengths), dtype=float) minimumSum = np.array(cumulativeSum[cumulativeSum < desiredAmountOfData]) idx = len(minimumSum)+1 if idx > len(sequenceLengths): print "You do not have enough sequence data to attain " + str(desiredCoverage) + "X coverage. " elif idx <= len(sequenceLengths): cutoff = sequenceLengths[idx] print "Retain sequences larger than "+str(cutoff)+" to achieve "+str(desiredCoverage)+"X coverage. " # plot accumulation curve fig = plt.figure() plt.scatter(fewerX,fewerY, c="#d62728") plt.xlim(max(x)*-0.05, max(x)+max(x)*0.05) plt.ylim(max(y)*-0.1, max(y)+max(y)*0.1) yTickLabels = [] if max(y) >= genomeSize*10: yArrayOfTicks = np.arange(0, max(y), genomeSize*10) if max(y) < genomeSize*10: yArrayOfTicks = np.arange(0, max(y), genomeSize*1) for tick in np.nditer(yArrayOfTicks): if int(genomeSizeMb) == 0: oneLabel = str(tick/(genomeSize*0.1)) plt.ylabel("Data (Mb)") else: oneLabel = str(tick/genomeSize) + "x" plt.ylabel("Genome coverage") yTickLabels.append(oneLabel) plt.yticks(yArrayOfTicks, yTickLabels, fontsize=10) plt.title("Accumulation curve") # plt.show() if int(desiredCoverage) == 1 or idx > len(sequenceLengths): plt.xlabel("Nth read") xArrayOfTicks = np.arange(0, max(x), len(x)*0.1) plt.xticks(xArrayOfTicks, fontsize=10, rotation='vertical') plt.savefig(outputPrefix+"_accumulationCurve.pdf", format='pdf') elif int(desiredCoverage) > 1: plt.plot([max(x)*-0.05, idx], [desiredAmountOfData, desiredAmountOfData], color='r', linestyle='-') plt.plot([idx, idx], [max(y)*-0.1, desiredAmountOfData], color='r', linestyle='-') plt.xticks([]) cutoffText = str(cutoff) + "bp" plt.text(idx, max(y)*-0.1275, cutoffText, rotation='vertical', fontsize=8, ha='center', ma='right') plt.savefig(outputPrefix+"_accumulationCurve_"+str(desiredCoverage)+"X.pdf", format='pdf') plt.close(fig) # plot read length histogram fig = plt.figure() plt.bar(range(len(myBins)), countsInBins, color="#d62728", align='edge') plt.title("Histogram of sequence lengths") plt.ylabel("Number of reads") plt.xlabel("Length (Kb)") # plt.show() plt.savefig(outputPrefix+"_rawHist.pdf", format='pdf') plt.close(fig) # plot read length histogram as proportion of dataset for real fig = plt.figure() plt.bar(range(len(myBins)), proportionsInBins, color="#328AFF", align='edge') plt.title("Histogram of sequence lengths as a proportion of data") plt.ylabel("Proportion of reads") plt.xlabel("Length (Kb)") # plt.show() plt.savefig(outputPrefix+"_proportionHist.pdf", format='pdf') plt.close(fig)
activation='relu', kernel_regularizer=keras.regularizers.l1_l2(l1=0.001, l2=0.001))) dp.compile(loss=dnn_loss, optimizer=keras.optimizers.Adam()) dp.fit(Xnew_train, y_train, epochs=dnn_epoch, batch_size=bs, verbose=dnn_verb) weights = dp.get_weights() w3 = np.matmul(weights[1], weights[2]).reshape(d, ) w1 = np.multiply(weights[0][:d], w3) w2 = np.multiply(weights[0][d:], w3) W = w1**2 - w2**2 t = np.sort(np.concatenate(([0], abs(W)))) ratio = [ float(sum(W <= -tt)) / float(max(1, sum(W >= tt))) for tt in t[:d] ] ind = np.where(np.array(ratio) <= q)[0] if len(ind) == 0: T = float('inf') else: T = t[ind[0]] selected = np.where(W >= T)[0] print(selected) mat_selected[i, :] = W >= T
def graph(): # # Pb Filter # mask_filenames = glob(PathManager.path_valid_masks + 'mask*.npy') valid_bits = np.sort( np.unique([ int(name.replace('\\', '/').split('/')[-1].split('_')[1]) for name in mask_filenames ])) # # Questions # reldist_filter = np.load( PathManager.path_questions_hamming_reldistance_keep_bit_idxs) questions = np.concatenate([ np.load(PathManager.path_questions_hamming_angles), np.load(PathManager.path_questions_hamming_distances), np.load( PathManager.path_questions_hamming_reldistances)[reldist_filter] ]) # # Posebyte # posebyte_conditional = np.load('../posebytes/posebyte_conditioned.npy') angles_val = np.load(PathManager.path_annotations_hamming_valtest_angle) distances_val = np.load( PathManager.path_annotations_hamming_valtest_distance) reldistances_val = np.load( PathManager.path_annotations_hamming_valtest_reldistance) posebyte_valtest = np.concatenate(( angles_val, distances_val, reldistances_val, ), axis=1)[1919:] # # Embeddings # embedding_conditional = np.load('../embeddings/embeddings_conditional.npy') embedding_test = np.load( '../../image/hamming/embeddings/embeddings_valtest_0.npy')[1919:] # # Distances # distances = cdist(embedding_conditional, embedding_test) nearest_indices = np.argsort(distances, axis=1) # # Display # output_path = 'predictions/' root_img_dir = PathManager.path_image_root sequence_file = PathManager.path_dataset_valtest_txt with open(sequence_file, 'r') as in_file: label_lines = in_file.readlines() image_list = [x.strip() for x in label_lines] image_list = [[' '.join(x.strip().split(' ')[:-16]) + '/'] + x.strip().split(' ')[-16:] for x in image_list] image_list = image_list[1919:] for anno_idx, anno in enumerate(embedding_conditional): question_idx = int(anno_idx / 2) answer = posebyte_conditional[anno_idx, question_idx] if question_idx in valid_bits: pass else: continue answer = bool(answer) question = str(question_idx) + ': ' + str(questions[question_idx]) question = question.replace('angle:', 'is bent:') question = question.replace('distance:', 'is near:') question = question.replace('beyond:', 'is beyond:') question = question + '? ' + str(answer) output_file_name = output_path + question + '.png' nearest = nearest_indices[anno_idx] fig = plt.figure() fig.set_size_inches(8.0, 8.0) for frame_idx in range(25): near_idx = nearest[frame_idx] image_name = root_img_dir + image_list[near_idx][0] + image_list[ near_idx][1].split('_')[1] + '.png' axes = fig.add_subplot(5, 5, frame_idx + 1) if posebyte_valtest[near_idx, question_idx] == answer: for spine in axes.spines.values(): spine.set_edgecolor('green') spine.set_linewidth(8) else: for spine in axes.spines.values(): spine.set_edgecolor('red') spine.set_linewidth(8) image_to_show = imread(image_name) plt.suptitle(question, fontsize=16) plt.imshow(imresize(image_to_show, (288, 288))) plt.setp(axes.get_xticklabels(), visible=False) plt.setp(axes.get_yticklabels(), visible=False) plt.show()
def cmp(a, b): tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
rmse = np.sqrt(mean_squared_error(y_test, y_pred)) mae = mean_absolute_error(y_test, y_pred) rmae = np.sqrt(mean_squared_error(y_test, y_pred)) / mae r2 = r2_score(y_test, y_pred) print('RMSE, MAE, RMSE/MAE, R^2 = {:.3f}, {:.3f}, {:.3f}, {:.3f}'\ .format(rmse, mae, rmae, r2)) def print_gscv_score(gscv): print("Best parameters set found on development set:") print() print(gscv.best_params_) print() X_train = np.sort(1 * np.pi * np.random.rand(40, 1), axis=0) y_train = np.sin(X_train).ravel() y_train[::5] += 3 * (0.5 - np.random.rand(8)) # # test data: y = sin(x) # # X_test = X_train[:] X_test = np.sort(4 * np.pi * np.random.rand(80, 1), axis=0) y_test = np.sin(X_test).ravel() start = time() print('') print('') print('# 1. SVR with default hyper parameters') # step 1. model