def createClusters(self, sdt=None, edt=None): # pragma: no cover ''' Query the database for data , convert to the standard X and y arrays for sci-kit learn, and identify clusters in the data. ''' if not sdt and not edt: sdt = datetime.strptime(self.args.start, '%Y%m%dT%H%M%S') edt = datetime.strptime(self.args.end, '%Y%m%dT%H%M%S') clf = self.algorithms[self.args.algorithm] try: x, y, x_ids, y_ids = self.loadData(sdt, edt) except NoPPDataException as e: print(str(e)) x = np.array(x) y = np.array(y) X = np.column_stack((x,y)) x_ids = np.array(x_ids) y_ids = np.array(y_ids) X_ids = np.column_stack((x_ids, y_ids)) clf.fit(X) y_clusters = clf.labels_ if max(y_clusters) >= 26: print("Too many clusters, there probably aren't any significant patterns here.") else: return X, y_clusters, X_ids
def draw(self): cols, rows = self.size minx, maxx = self.xlimits miny, maxy = self.ylimits width, height = self.cell_dimensions x = map(lambda i: minx + width*i, range(cols+1)) y = map(lambda i: miny + height*i, range(rows+1)) f = plt.figure(figsize=self.figsize) hlines = np.column_stack(np.broadcast_arrays(x[0], y, x[-1], y)) vlines = np.column_stack(np.broadcast_arrays(x, y[0], x, y[-1])) lines = np.concatenate([hlines, vlines]).reshape(-1, 2, 2) line_collection = LineCollection(lines, color="black", linewidths=0.5) ax = plt.gca() ax.add_collection(line_collection) ax.set_xlim(x[0]-1, x[-1]+1) ax.set_ylim(y[0]-1, y[-1]+1) plt.gca().set_aspect('equal', adjustable='box') plt.axis('off') self.draw_obstacles(plt.gca()) return plt.gca()
def all_GL(self, q, maxpiv=None): """return (piv, f_binodal_gas, f_binodal_liquid, f_spinodal_gas, f_spinodal_liquid) at insersion works piv sampled between the critical point and maxpiv (default to 2.2*critical pressure)""" fc, pivc = self.critical_point(q) Fc = np.log(fc) #start sensibly above the critical point startp = pivc*1.1 fm = fminbound(self.mu, fc, self.maxf(), args=(startp, q)) fM = fminbound(lambda f: -self.pv(f, startp, q), 0, fc) initial_guess = np.log([0.5*fM, 0.5*(fm+self.maxf())]) #construct the top of the GL binodal if maxpiv is None: maxpiv = startp*2 topp = 1./np.linspace(1./startp, 1./maxpiv) topGL = [initial_guess] for piv in topp: topGL.append(self.binodalGL(piv, q, topGL[-1])) #construct the GL binodal between the starting piv and the critical point botp = np.linspace(startp, pivc)[:-1] botGL = [initial_guess] for piv in botp: botGL.append(self.binodalGL(piv, q, botGL[-1])) #join the two results and convert back from log binodal = np.vstack(( [[pivc, fc, fc]], np.column_stack((botp, np.exp(botGL[1:])))[::-1], np.column_stack((topp, np.exp(topGL[1:])))[1:] )) #spinodal at the same pivs spinodal = self.spinodalGL(q, binodal[:,0]) #join everything return np.column_stack((binodal, spinodal[:,1:]))
def center_galaxy(image, original_image, centroid=None): if centroid is None: # apply median filter to find the galaxy centroid centroid = median_filter(image, size=10).argmax() centroid = np.unravel_index(centroid, image.shape) # recenter image roffset = centroid[0] - image.shape[0] / 2 if roffset < 0: # add more white space to top of image extra_rows = image.shape[0] - 2 * centroid[0] image = np.vstack((np.zeros((extra_rows, image.shape[1])), image)) elif roffset > 0: # add more white space to bottom of image extra_rows = 2 * centroid[0] - image.shape[0] image = np.vstack((image, np.zeros((extra_rows, image.shape[1])))) coffset = centroid[1] - image.shape[1] / 2 if coffset > 0: # add more white space to right of image extra_columns = 2 * centroid[1] - image.shape[1] image = np.column_stack((image, np.zeros((image.shape[0], extra_columns)))) elif coffset < 0: # add more white space to left of image extra_columns = image.shape[1] - 2 * centroid[1] image = np.column_stack((np.zeros((image.shape[0], extra_columns)), image)) return image, centroid
def writeOut(self, outname='',include_state=False): """ Function writes out to file... only doing primitive variables for now. rho, u, p, maybe tack on e and h .... This needs to be nice, but for debugging purposes, only doing to write out to ascii for now... just ot be quick and easy to focus on coding rather than fancy outputting..... """ x = self.grid.center() rho = self.getPrimitive('Density') u = self.getPrimitive('Velocity') P = self.getPrimitive('Pressure') if include_state: data = np.column_stack((x,rho,u,P,self.q[0],self.q[1],self.q[2])) header = '# x Density Velocity Pressure q0 q1 q2' else: data = np.column_stack((x,rho,u,P)) header = '# x Density Velocity Pressure' # np.savetxt(outname + '_simstate_%3.3f_.txt'%(self.t), data, np.savetxt(outname + '_simstate.txt', data, header=header, fmt='%1.4e')
def import_scored_file_hdf5(scored_file, with_buffer = False): dat = h5py.File(scored_file, 'r') r = re.compile('.*tracks.*', re.IGNORECASE) dat = dat[list(filter(r.match, dat.keys()))[0]] # store data with one row for each puff-frame # the values in each row are a puff id, isPuff, frame number, x location, y location, and an # indicator for whether the puff-frame was one originally identified (not one of the additional frames # we later add) new_dat = np.concatenate([np.column_stack((np.repeat(ind, dat[dat['y'][ind,0]].shape[0]), np.repeat(dat[dat['isPuff'][ind,0]][0,0], dat[dat['y'][ind,0]].shape[0]), dat[dat['f'][ind,0]][:,0], dat[dat['x'][ind,0]][:,0], dat[dat['y'][ind,0]][:,0], dat[dat['c'][ind,0]][:,0], dat[dat['A'][ind,0]][:,0], np.repeat(1, dat[dat['y'][ind,0]].shape[0]))) for ind in range(dat['y'].shape[0])]) # uncomment if you only want to extract puffs: # new_dat = new_dat[np.where(new_dat[:,1] == 1)] if with_buffer: buffers = np.concatenate([np.column_stack(( np.repeat(ind,3), np.array([0, 1, 2]), (np.repeat(0,3) if isinstance(dat[dat['startBuffer'][ind,0]], h5py.Dataset) else dat[dat['startBuffer'][ind,0]]['A'][:,0]), (np.repeat(0,3) if isinstance(dat[dat['endBuffer'][ind,0]], h5py.Dataset) else dat[dat['endBuffer'][ind,0]]['A'][:,0]))) for ind in range(dat['y'].shape[0])]) return new_dat, buffers else: return new_dat
def expected(self, window_length, k, closes): """Compute the expected data (without adjustments) for the given window, k, and closes array. This uses talib.BBANDS to generate the expected data. """ lower_cols = [] middle_cols = [] upper_cols = [] for n in range(self.nassets): close_col = closes[:, n] if np.isnan(close_col).all(): # ta-lib doesn't deal well with all nans. upper, middle, lower = [np.full(self.ndays, np.nan)] * 3 else: upper, middle, lower = talib.BBANDS( close_col, window_length, k, k, ) upper_cols.append(upper) middle_cols.append(middle) lower_cols.append(lower) # Stack all of our uppers, middles, lowers into three 2d arrays # whose columns are the sids. After that, slice off only the # rows we care about. where = np.s_[window_length - 1:] uppers = np.column_stack(upper_cols)[where] middles = np.column_stack(middle_cols)[where] lowers = np.column_stack(lower_cols)[where] return uppers, middles, lowers
def absences_figure(date, names, vote_absences, vote_absences_percent): """Time series chart: x:session nb vs y:absences, color:name""" if not do_plots: return datestr = date.strftime('%Y%m%d') datestr_human = date.strftime('%d/%m/%Y') vote_absences = np.column_stack(vote_absences) vote_absences_percent = np.column_stack(vote_absences_percent) f = plt.figure() f.suptitle(u'Отсъствия по Време на Гласуване %s'%datestr_human) gs = gridspec.GridSpec(2,5) su = f.add_subplot(gs[0,:-1]) su.plot(vote_absences, alpha=0.8) su.set_ylabel(u'Брой Депутати') su.set_ylim(0) su.set_xticks([]) su.legend(names, loc='upper left', bbox_to_anchor=(1,1)) sd = f.add_subplot(gs[1,:-1], sharex=su) sd.plot(vote_absences_percent, alpha=0.8) sd.set_ylabel(u'% от Партията') sd.set_xlabel(u'хронологичен ред на гласуванията') sd.set_ylim(0, 100) sd.set_xticks([]) sd.set_yticks([25, 50, 75]) f.savefig('generated_html/absences%s.png' % datestr) plt.close()
def _recalc(self): self.clear() assert len(self.artists) == 0 if self.layout is None: return # layout[0] is [x0, x0, x[parent0], nan, ...] # layout[1] is [y0, y[parent0], y[parent0], nan, ...] ids = 3 * np.arange(self.layer.data.size) try: if isinstance(self.layer, Subset): ids = ids[self.layer.to_mask()] x, y = self.layout blank = np.zeros(ids.size) * np.nan x = np.column_stack([x[ids], x[ids + 1], x[ids + 2], blank]).ravel() y = np.column_stack([y[ids], y[ids + 1], y[ids + 2], blank]).ravel() except IncompatibleAttribute as exc: self.disable_invalid_attributes(*exc.args) return False self.artists = self._axes.plot(x, y, '--') return True
def reg_m(y, x): ones = np.ones(len(x[0])) X = sm.add_constant(np.column_stack((x[0], ones))) for ele in x[1:]: X = sm.add_constant(np.column_stack((ele, X))) results = sm.OLS(y, X).fit() return results
def combineTechnicalIndicators(ticker): dates, prices = getDateAndPrice(ticker) np_dates = np.chararray(len(dates), itemsize=len(dates[0])) for day in range(len(dates)): np_dates[day] = dates[day] percentChange = calcDailyPercentChange(prices) vol = calc30DayVol(percentChange) RSI = calcRSI(prices) if ticker == PREDICTED: np_prices = np.array(prices) label = np.zeros_like(np_prices) #create label for price of SPY for x in range(len(np_prices[:-lagTime])): print x if np_prices[x] < np_prices[x + lagTime]: label[x] = 1 else: label[x] = 0 features = np.column_stack((np_dates, percentChange, vol, RSI, label)) headers = ['date', 'return_'+ ticker, 'vol_'+ ticker, 'RSI_'+ ticker, 'label'] else: features = np.column_stack((np_dates, percentChange, vol, RSI)) headers = ['date', 'return_'+ ticker, 'vol_'+ ticker, 'RSI_'+ ticker] df_features = pd.DataFrame(features, columns=headers) print df_features[25:35] return df_features
def construct_query(keyword): fn = "cnn_cache" if locmem._caches.has_key(fn): cnn = locmem._caches[fn] else: fn_full = os.path.join(settings.BASE_DIR, fn) cnn = CNNFeatExtractor() locmem._caches[fn] = cnn # weights = load_mat_with_cache("obret/data/weight.mat")['weight'] weights = load_mat_with_cache(sett.cfg.TRAINED_SVM)["weight"] neg_feats = load_mat_with_cache(sett.cfg.NEG_PATH)["samples"] queries = np.zeros([4096, 0]) words = keyword.split() cate_ids = [] for word in words: try: cate_id = PASCAL_CATEGORIES.index(word) cate_ids.append(cate_id) queries = np.column_stack([queries, weights[:, cate_id]]) except ValueError: key = "classifier-" + word if locmem._caches.has_key(key): weight = locmem._caches[key] else: weight = learn_svm_from_keyword(word, neg_feats, cnn) weight = weight.reshape([4096, 1]) locmem._caches[key] = weight queries = np.column_stack([queries, weight]) pass if not cate_ids: cate_ids = [1] print (cate_ids) return queries
def wide_dataset_large(): print("Reading in Arcene training data for binomial modeling.") trainDataResponse = np.genfromtxt(tests.locate("smalldata/arcene/arcene_train_labels.labels"), delimiter=' ') trainDataResponse = np.where(trainDataResponse == -1, 0, 1) trainDataFeatures = np.genfromtxt(tests.locate("smalldata/arcene/arcene_train.data"), delimiter=' ') trainData = h2o.H2OFrame(np.column_stack((trainDataResponse, trainDataFeatures)).tolist()) print("Run model on 3250 columns of Arcene with strong rules off.") model = h2o.glm(x=trainData[1:3250], y=trainData[0].asfactor(), family="binomial", lambda_search=False, alpha=[1]) print("Test model on validation set.") validDataResponse = np.genfromtxt(tests.locate("smalldata/arcene/arcene_valid_labels.labels"), delimiter=' ') validDataResponse = np.where(validDataResponse == -1, 0, 1) validDataFeatures = np.genfromtxt(tests.locate("smalldata/arcene/arcene_valid.data"), delimiter=' ') validData = h2o.H2OFrame(np.column_stack((validDataResponse, validDataFeatures)).tolist()) prediction = model.predict(validData) print("Check performance of predictions.") performance = model.model_performance(validData) print("Check that prediction AUC better than guessing (0.5).") assert performance.auc() > 0.5, "predictions should be better then pure chance"
def summarize(self): """ Print CV risk estimates for each candidate estimator in the library, coefficients for weighted combination of estimators, and estimated risk for the SuperLearner. Parameters ---------- None Returns ------- Nothing """ if self.libnames is None: libnames=[est.__class__.__name__ for est in self.library] else: libnames=self.libnames print "Cross-validated risk estimates for each estimator in the library:" print np.column_stack((libnames, self.risk_cv[:-1])) print "\nCoefficients:" print np.column_stack((libnames,self.coef)) print "\n(Not cross-valided) estimated risk for SL:", self.risk_cv[-1]
def main(): t0 = time.time() # start time # output files path TRAINX_OUTPUT = "../../New_Features/train_x_processed.csv" TEST_X_OUTPUT = "../../New_Features/test__x_processed.csv" # input files path TRAIN_FILE_X1 = "../../ML_final_project/sample_train_x.csv" TRAIN_FILE_X2 = "../../ML_final_project/log_train.csv" TEST__FILE_X1 = "../../ML_final_project/sample_test_x.csv" TEST__FILE_X2 = "../../ML_final_project/log_test.csv" # load files TRAIN_DATA_X1 = np.loadtxt(TRAIN_FILE_X1, delimiter=',', skiprows=1, usecols=(range(1, 18))) TEST__DATA_X1 = np.loadtxt(TEST__FILE_X1, delimiter=',', skiprows=1, usecols=(range(1, 18))) TRAIN_DATA_X2 = logFileTimeCount(np.loadtxt(TRAIN_FILE_X2, delimiter=',', skiprows=1, dtype=object)) TEST__DATA_X2 = logFileTimeCount(np.loadtxt(TEST__FILE_X2, delimiter=',', skiprows=1, dtype=object)) # combine files TRAIN_DATA_X0 = np.column_stack((TRAIN_DATA_X1, TRAIN_DATA_X2)) TEST__DATA_X0 = np.column_stack((TEST__DATA_X1, TEST__DATA_X2)) # data preprocessing scaler = StandardScaler() TRAIN_DATA_X = scaler.fit_transform(TRAIN_DATA_X0) TEST__DATA_X = scaler.transform(TEST__DATA_X0) # output processed files outputXFile(TRAINX_OUTPUT, TRAIN_DATA_X) outputXFile(TEST_X_OUTPUT, TEST__DATA_X) t1 = time.time() # end time print "...This task costs " + str(t1 - t0) + " second."
def fitcurve(x, source_name, hml): try: source=sncosmo.get_source(source_name) model=sncosmo.Model(source=source) #adding zpsys and filter columns ab=np.zeros(len(hml[:,1]), dtype='|S2') for i in range(len(ab)): ab[i]='ab' hml=np.column_stack((hml,ab)) band=np.zeros(len(hml[:,1]), dtype='|S6') for i in range(len(band)): band[i]='ptf48r' hml=np.column_stack((hml,band)) #fit to model z0 = float(spec_z(x[:-7])) hml_dat=astropy.table.Table(data=hml, names=('ptfname', 'time', 'magnitude', 'mag_err', 'flux', 'flux_err', 'zp_new', 'zp', 'ra', 'dec', 'zpsys', 'filter'), dtype=('str','float','float','float','float','float','float','float','float','float','str', 'str')) res, fitted_model=sncosmo.fit_lc(hml_dat, model, ['z','t0','amplitude'], bounds={'z':(z0,z0 + 0.0001)}, nburn=10000, nsamples=50000) #The following excludes data points not in the range of the model and data sets with fewer than 4 data points limit = modellimit(source_name, x[:-7], res.parameters[1]) hml2 = [] for j in range(len(hml[:,1])): datapoint = hml [:,1][j] if (res.parameters[1]- limit[0])< float(datapoint) < (res.parameters[1]+limit[1]): hml2.append(hml[j]) hml2 = np.array(hml2) if len(hml2)>3: return finalfitcurve(x, source_name, hml2) except ValueError: print 'error'
def main(): LAMB = 10.0 SPLIT = 40 t0 = time.time() TRAIN19_FILE = 'hw4_train.dat' TRAIN19_DATA = np.loadtxt(TRAIN19_FILE, dtype=np.float) xTrain19 = np.column_stack((np.ones(TRAIN19_DATA.shape[0]), TRAIN19_DATA[:, 0:(TRAIN19_DATA.shape[1] - 1)])) yTrain19 = TRAIN19_DATA[:, (TRAIN19_DATA.shape[1] - 1)] TEST19_FILE = 'hw4_test.dat' TEST19_DATA = np.loadtxt(TEST19_FILE, dtype=np.float) xTest19 = np.column_stack((np.ones(TEST19_DATA.shape[0]), TEST19_DATA[:, 0:(TEST19_DATA.shape[1] - 1)])) yTest19 = TEST19_DATA[:, (TEST19_DATA.shape[1] - 1)] lambPowList = [] eCvList = [] for lambPower in range(-10, 3): eCv = vFoldErr(xTrain19, yTrain19, math.pow(LAMB, lambPower), SPLIT) lambPowList.append(lambPower) eCvList.append(eCv) eCvList = np.array(eCvList) minIndex = np.where(eCvList == eCvList.min()) index = minIndex[0].max() plotHist(lambPowList, eCvList, "log(lambda)", "Ecv", "Q19", 1, False) t1 = time.time() print '=========================================================' print 'Question 19: log(lambda) is', lambPowList[index], 'Ecv is', eCvList[index] print '---------------------------------------------------------' print 'Q19 costs', t1 - t0, 'seconds' print '========================================================='
def _compute_attenuation(self, rup, dists, imt, C): """ Compute the second term of the equation described on p. 1866: " [(c4 + c5 * M) * min{ log10(R), log10(70.) }] + [(c4 + c5 * M) * max{ min{ log10(R/70.), log10(140./70.) }, 0.}] + [(c8 + c9 * M) * max{ log10(R/140.), 0}] " """ vec = np.ones(len(dists.rrup)) a1 = (np.log10(np.sqrt(dists.rrup ** 2.0 + C['c11'] ** 2.0)), np.log10(70. * vec)) a = np.column_stack([a1[0], a1[1]]) b3 = (np.log10(np.sqrt(dists.rrup ** 2.0 + C['c11'] ** 2.0) / (70. * vec)), np.log10((140. / 70.) * vec)) b2 = np.column_stack([b3[0], b3[1]]) b1 = ([np.min(b2, axis=1), 0. * vec]) b = np.column_stack([b1[0], b1[1]]) c1 = (np.log10(np.sqrt(dists.rrup ** 2.0 + C['c11'] ** 2.0) / (140.) * vec), 0. * vec) c = np.column_stack([c1[0], c1[1]]) return (((C['c4'] + C['c5'] * rup.mag) * np.min(a, axis=1)) + ((C['c6'] + C['c7'] * rup.mag) * np.max(b, axis=1)) + ((C['c8'] + C['c9'] * rup.mag) * np.max(c, axis=1)))
def calculate(): """Build data array""" # Expand for multiple timeframes data = np.repeat(matches, len(self._timefrm), axis=0) timeframe = np.array(self._timefrm*len(matches)).T # Add timeframes data = np.column_stack((data, timeframe)) # Add change change = [return_range(targt_data, d, d+t) for d, t in zip(data[:, 1], data[:, 4])] data = np.column_stack((data, change)) # Add subset volatility subsets = [targt_data[start:end] for start, end in zip(data[:, 0], data[:, 1])] volatility = [historical_volatility(s) for s in subsets] data = np.column_stack((data, volatility)) # Add range rng = [] for d, t in zip(data[:, 1], data[:, 4]): subset = targt_data[d:d+t] change = [log(r2/r1) for r2, r1 in zip(subset[1:], subset)] rng.append(np.std(change)) data = np.column_stack((data, rng)) # Add match length length = [d[1]-d[0] for d in data] data = np.column_stack((data, length)) return data
def test_masked_multi_from_sample(self): data = numpy.ones((50, 10)) data[:, 5:] = 2 mask1 = numpy.ones((50, 10)) mask1[:, :5] = 0 mask2 = numpy.ones((50, 10)) mask2[:, 5:] = 0 mask3 = numpy.ones((50, 10)) mask3[:25, :] = 0 data_multi = numpy.column_stack( (data.ravel(), data.ravel(), data.ravel())) mask_multi = numpy.column_stack( (mask1.ravel(), mask2.ravel(), mask3.ravel())) masked_data = numpy.ma.array(data_multi, mask=mask_multi) lons = numpy.fromfunction(lambda y, x: 3 + x, (50, 10)) lats = numpy.fromfunction(lambda y, x: 75 - y, (50, 10)) swath_def = geometry.SwathDefinition(lons=lons, lats=lats) valid_input_index, valid_output_index, index_array, distance_array = \ kd_tree.get_neighbour_info(swath_def, self.area_def, 50000, neighbours=1, segments=1) res = kd_tree.get_sample_from_neighbour_info('nn', (800, 800), masked_data, valid_input_index, valid_output_index, index_array, fill_value=None) expected_fill_mask = numpy.fromfile(os.path.join(os.path.dirname(__file__), 'test_files', 'mask_test_full_fill_multi.dat'), sep=' ').reshape((800, 800, 3)) fill_mask = res.mask self.assertTrue(numpy.array_equal(fill_mask, expected_fill_mask), msg='Failed to create fill mask on masked data')
def output_wiggle(bins, binsize, norm_factor, by_strand, name, extra_trackline = ""): """write all non-empty bins to bedgraph format strings; always includes minimal track line; Output is in 1-based wiggle format.""" if not by_strand: print "track type=wiggle_0 alwaysZero=on visibility=full maxHeightPixels=100:80:50 " \ + ("name='%s'" % name) + extra_trackline for chrom in sorted(bins.keys()): print "variableStep chrom=%s span=%d" % (chrom, binsize) non_zero_bins = numpy.nonzero(bins[chrom] > 0) result = numpy.column_stack((non_zero_bins[0] * binsize + 1, bins[chrom][non_zero_bins] * norm_factor)) numpy.savetxt(sys.stdout, result, "%d\t%.8f") else: for strand in (0, 1): if strand == 0: nf = norm_factor else: nf = -norm_factor print "track type=wiggle_0 alwaysZero=on visibility=full maxHeightPixels=100:80:50 " \ + ("name='%s[%s]'" % (name, strand and '-' or '+')) + extra_trackline for chrom in sorted(bins.keys()): print "variableStep chrom=%s span=%d" % (chrom, binsize) non_zero_bins = numpy.nonzero(bins[chrom][strand] > 0) result = numpy.column_stack((non_zero_bins[0] * binsize + 1, bins[chrom][strand][non_zero_bins] * nf)) numpy.savetxt(sys.stdout, result, "%d\t%.8f")
def make_rect_poly(width, height, theta, phi, subdivisions=10): """Create a Polygon patch representing a rectangle with half-angles width and height rotated from the north pole to (theta, phi).""" # Convert width and height to radians, then to Cartesian coordinates. w = np.sin(np.deg2rad(width)) h = np.sin(np.deg2rad(height)) # Generate vertices of rectangle. v = np.asarray([[-w, -h], [w, -h], [w, h], [-w, h]]) # Subdivide. v = subdivide_vertices(v, subdivisions) # Project onto sphere by calculating z-coord from normalization condition. v = np.hstack((v, np.sqrt(1. - np.expand_dims(np.square(v).sum(1), 1)))) # Transform vertices. v = np.dot(v, hp.rotator.euler_matrix_new(phi, theta, 0, Y=True)) # Convert to spherical polar coordinates. thetas, phis = hp.vec2ang(v) # FIXME: Remove this after all Matplotlib monkeypatches are obsolete. if mpl_version < '1.2.0': # Return list of vertices as longitude, latitude pairs. return np.column_stack((reference_angle(phis), 0.5 * np.pi - thetas)) else: # Return list of vertices as longitude, latitude pairs. return np.column_stack((wrapped_angle(phis), 0.5 * np.pi - thetas))
def get_peaks(data,threshold,gap_threshold): # apply threshold, result is a boolean array abovethr = np.where( data >= threshold )[0] belowthr = np.where( data < threshold )[0] #### extract peaks # first, find gaps in "above"/"below" labels (differences bigger than 1) b1 = np.where( np.diff(abovethr)>1 )[0] b2 = np.where( np.diff(belowthr)>1 )[0] #~ pdb.set_trace() # second, concatenate peak start and stop indices # note the +1 which fixes the diff-offset if belowthr[b2][0] > abovethr[b1][0]: b1 = b1[1:] if len(belowthr[b2]) == len(abovethr[b1]): indices = np.column_stack(( belowthr[b2],abovethr[b1])) + 1 else: indices = np.column_stack(( belowthr[b2], np.concatenate((abovethr[b1],[abovethr[-1]])) )) + 1 # third, merge peaks if they are very close to eachother indices_gaps = indices.flatten()[1:-1].reshape((-1,2)) gaps_to_preserve = np.where(np.diff(indices_gaps).flatten() > gap_threshold )[0] indices_filtered = np.concatenate(( [indices[0,0]], indices_gaps[gaps_to_preserve].flatten(), [indices[-1,1]] )).reshape((-1,2)) return indices_filtered
def residuals(self, src, dst): """Compute the Sampson distance. The Sampson distance is the first approximation to the geometric error. Parameters ---------- src : (N, 2) array Source coordinates. dst : (N, 2) array Destination coordinates. Returns ------- residuals : (N, ) array Sampson distance. """ src_homogeneous = np.column_stack([src, np.ones(src.shape[0])]) dst_homogeneous = np.column_stack([dst, np.ones(dst.shape[0])]) F_src = self.params @ src_homogeneous.T Ft_dst = self.params.T @ dst_homogeneous.T dst_F_src = np.sum(dst_homogeneous * F_src.T, axis=1) return np.abs(dst_F_src) / np.sqrt(F_src[0] ** 2 + F_src[1] ** 2 + Ft_dst[0] ** 2 + Ft_dst[1] ** 2)
def process_recarray(data, endog_idx=0, exog_idx=None, stack=True, dtype=None): names = list(data.dtype.names) if isinstance(endog_idx, (int, long)): endog = array(data[names[endog_idx]], dtype=dtype) endog_name = names[endog_idx] endog_idx = [endog_idx] else: endog_name = [names[i] for i in endog_idx] if stack: endog = np.column_stack(data[field] for field in endog_name) else: endog = data[endog_name] if exog_idx is None: exog_name = [names[i] for i in range(len(names)) if i not in endog_idx] else: exog_name = [names[i] for i in exog_idx] if stack: exog = np.column_stack(data[field] for field in exog_name) else: exog = recarray_select(data, exog_name) if dtype: endog = endog.astype(dtype) exog = exog.astype(dtype) dataset = Dataset(data=data, names=names, endog=endog, exog=exog, endog_name=endog_name, exog_name=exog_name) return dataset
def listener_func(msg): amat = vectorize_func(msg) t = np.matrix([msg.header.stamp.to_time()]) got_lock = False if self.channels[topic][0] == None: self.channels[topic] = [amat, t, threading.RLock()] else: lock = self.channels[topic][2] lock.acquire() got_lock = True #print 'l locked' new_record = [np.column_stack((self.channels[topic][0], amat)), np.column_stack((self.channels[topic][1], t)), lock] #print 'got something', new_record[0].shape self.channels[topic] = new_record #print 'after appending', self.channels[topic][0].shape, self.channels[topic][1].shape #print 'time recorded is', t[0,0] #print 'shape', self.channels[topic][0].shape #lock.release() #print 'l released' lock = self.channels[topic][2] if not got_lock: lock.acquire() #lock.acquire() #select only messages n-seconds ago n_seconds_ago = t[0,0] - buffer_length_secs records_in_range = (np.where(self.channels[topic][1] >= n_seconds_ago)[1]).A1 #print records_in_range, self.channels[topic][0].shape self.channels[topic][0] = self.channels[topic][0][:, records_in_range] self.channels[topic][1] = self.channels[topic][1][:, records_in_range] #print 'after shortening', self.channels[topic][0].shape, self.channels[topic][1].shape #print 'shape after selection...', self.channels[topic][0].shape lock.release()
def create_colored_3d_points_from_matrices(matrices, index_list): points3d_l = [] colors_ll = [] mat_l = [] X_MULTIPLIER = 1/15. for i, mat in enumerate(matrices): X, Y = np.meshgrid(range(mat.shape[0]), range(mat.shape[1])) x_size = mat.shape[0] * X_MULTIPLIER X = np.matrix(X * X_MULTIPLIER) + x_size * i + (i * x_size / 3.) #Y = (np.matrix(np.ones((mat.shape[0], 1))) * times_m).T Y = (np.matrix(np.ones((mat.shape[0], 1))) * index_list[i]).T Z = np.matrix(np.zeros(mat.shape)).T points = np.row_stack((X.reshape(1, X.shape[0] * X.shape[1]), Y.reshape(1, Y.shape[0] * Y.shape[1]), Z.reshape(1, Z.shape[0] * Z.shape[1]))) colors = np.matrix(np.zeros((4, mat.shape[0]*mat.shape[1]))) mat_l.append(mat.T.reshape((1,mat.shape[1] * mat.shape[0]))) points3d_l.append(points) colors_ll.append(colors) all_mats = np.column_stack(mat_l) all_points = np.column_stack(points3d_l) all_colors = np.column_stack(colors_ll) return all_mats, all_points, all_colors
def write_parameters_outputvalues(self, P): Mstar, SFR_opt, _ = model.stellar_info_array(self.chain.flatchain_sorted, self.data, self.out['realizations2int']) column_names = np.transpose(np.array(["P025","P16","P50","P84","P975"], dtype='|S3')) chain_pars = np.column_stack((self.chain.flatchain_sorted, Mstar, SFR_opt)) # np.mean(chain_pars, axis[0]), # np.std(chain_pars, axis[0]), if self.out['calc_intlum']: SFR_IR = model.sfr_IR(self.int_lums[0]) #check that ['intlum_names'][0] is always L_IR(8-100) chain_others =np.column_stack((self.int_lums.T, SFR_IR)) outputvalues = np.column_stack((np.transpose(map(lambda v: (v[0],v[1],v[2],v[3],v[4]), zip(*np.percentile(chain_pars, [2.5,16, 50, 84,97.5], axis=0)))), np.transpose(map(lambda v: (v[0],v[1],v[2],v[3],v[4]), zip(*np.percentile(chain_others, [2.5,16, 50, 84,97.5], axis=0)))) )) outputvalues_header= ' '.join([ i for i in np.hstack((P.names, 'Mstar', 'SFR_opt', self.out['intlum_names'], 'SFR_IR',))] ) else: outputvalues = np.column_stack((map(lambda v: (v[1], v[2]-v[1], v[1]-v[0]), zip(*np.percentile(chain_pars, [16, 50, 84], axis=0))))) outputvalues_header=' '.join( [ i for i in P.names] ) return outputvalues, outputvalues_header
def main(): #clustering and write output if len(pep_array)>1: matrix=[] for i in range(0,len(pep_array)): matrix.append(pep_array[i][4].replace('\"',"").split(',')) dataMatrix=numpy.array(matrix,dtype=float) d = sch.distance.pdist(dataMatrix,metric)# vector of pairwise distances if metric=="correlation": D = numpy.clip(d,0,2) #when using correlation, all values in distance matrix should be in range[0,2] else: D=d try: cutoff=float(t) except ValueError: print "please provide a numeric value for --t"; sys.exit() L = sch.linkage(D, method,metric) ind = sch.fcluster(L,cutoff,'distance')#distance is dissmilarity(1-correlation) p=numpy.array(pep_array) p=numpy.column_stack([p,ind]) formatoutput(p) else: p=numpy.array(pep_array) p=numpy.column_stack([p,[0]]) formatoutput(p)
def test_masked_full_multi(self): data = numpy.ones((50, 10)) data[:, 5:] = 2 mask1 = numpy.ones((50, 10)) mask1[:, :5] = 0 mask2 = numpy.ones((50, 10)) mask2[:, 5:] = 0 mask3 = numpy.ones((50, 10)) mask3[:25, :] = 0 data_multi = numpy.column_stack( (data.ravel(), data.ravel(), data.ravel())) mask_multi = numpy.column_stack( (mask1.ravel(), mask2.ravel(), mask3.ravel())) masked_data = numpy.ma.array(data_multi, mask=mask_multi) lons = numpy.fromfunction(lambda y, x: 3 + x, (50, 10)) lats = numpy.fromfunction(lambda y, x: 75 - y, (50, 10)) swath_def = geometry.SwathDefinition(lons=lons, lats=lats) res = kd_tree.resample_nearest(swath_def, masked_data, self.area_def, 50000, fill_value=None, segments=1) expected_fill_mask = numpy.fromfile(os.path.join(os.path.dirname(__file__), 'test_files', 'mask_test_full_fill_multi.dat'), sep=' ').reshape((800, 800, 3)) fill_mask = res.mask cross_sum = res.sum() expected = 357140.0 self.assertAlmostEqual(cross_sum, expected, msg='Failed to resample masked data') self.assertTrue(numpy.array_equal(fill_mask, expected_fill_mask), msg='Failed to create fill mask on masked data')
if i.endswith(".png"): seg_list.append(os.path.join(location, i)) c = 0 pix = [] for i in seg_list: if i.endswith(".png"): img = cv2.imread(i, cv2.IMREAD_UNCHANGED) n_white_pix = np.sum(img == 255) pix.append(n_white_pix) # print('Number of white pixels:', n_white_pix) print(pix) return pix pixs = pixels() mean = sum(pixs) / len(pixs) print(mean) X = np.column_stack((diss, pixs)) print(X) plt.scatter(X[:,0], X[:,1], label='True Position') kmeans = KMeans(n_clusters=2) kmeans.fit(X) print(kmeans.cluster_centers_) print(kmeans.labels_) plt.scatter(X[:,0],X[:,1], c=kmeans.labels_, cmap='rainbow') plt.show()
def generate_positions(N, x0=0.0, x1=300.0, y0=0.0, y1=100.0): X = np.random.uniform(x0, x1, N) Y = np.random.uniform(y0, y1, N) return np.column_stack((X, Y))
def test_vehicle_formation(self): # References: # EE364B Exercises, Chapter 12, Question 12.1 (MPC for output tracking). # http://stanford.edu/class/ee364b/364b_exercises.pdf # Raffard, Tomlin, Boyd. "Distributed Optimization for Cooperative Agents: Application to Formation Flight." # Proceedings IEEE Conference on Decision and Control, 3:2453-2459, Nassau, Bahamas, December 2004. # http://stanford.edu/~boyd/papers/form_flight.html def plot_control(T, u, Umax, title = None): Umax_vec = np.repeat(Umax, T) Umax_lines = np.column_stack((Umax_vec, -Umax_vec)) plt.plot(range(T), u) plt.plot(range(T), Umax_lines, color = "red", linestyle = "dashed") plt.xlabel("Time (t)") plt.ylabel("Input (u(t))") if title is not None: plt.title(title) # plt.show() def plot_output(T, y, ydes, title = None): plt.plot(range(T), y) plt.plot(range(T), ydes, color = "red", linestyle = "dashed") plt.xlabel("Time (t)") plt.ylabel("Output (y(t))") if title is not None: plt.title(title) # plt.show() # Problem data. T = 100 Umax = 0.1 A = np.array([[1, 1, 0], [0, 1, 1], [0, 0, 1]]) B = np.array([[0], [0.5], [1]]) C = np.array([[-1, 0, 1]]) ydes = np.zeros((1,T)) ydes[0,30:70] = 10 # Define leader vehicle. x = Variable((3,T+1)) y = Variable((1,T)) u = Variable((1,T)) J = sum_squares(y - ydes) constr = [x[:,0] == 0, x[:,1:] == A*x[:,:T] + B*u, \ y == C*x[:,:T], norm(u, "inf") <= Umax] prob = Problem(Minimize(J), constr) prob.solve() print("Single Vehicle Objective:", prob.value) # Plot input and output dynamics. plot_control(T, u.value.T, Umax, title = "Single Vehicle Control Input") plot_output(T, y.value.T, ydes.T, title = "Single Vehicle Path Dynamics") # Define follower vehicles. ydlt_l = -1 x_l = Variable((3,T+1)) y_l = Variable((1,T)) u_l = Variable((1,T)) J_l = sum_squares(y_l - y - ydlt_l) constr_l = [x_l[:,0] == 0, x_l[:,1:] == A*x_l[:,:T] + B*u_l, \ y_l == C*x_l[:,:T], norm(u_l, "inf") <= Umax] prob_l = Problem(Minimize(J_l), constr_l) ydlt_r = 1 x_r = Variable((3,T+1)) y_r = Variable((1,T)) u_r = Variable((1,T)) J_r = sum_squares(y_r - y - ydlt_r) constr_r = [x_r[:,0] == 0, x_r[:,1:] == A*x_r[:,:T] + B*u_r, \ y_r == C*x_r[:,:T], norm(u_r, "inf") <= Umax] prob_r = Problem(Minimize(J_r), constr_r) # Solve formation consensus problem. probs = Problems([prob, prob_l, prob_r]) probs.solve(method = "consensus", rho_init = 0.5, solver = "ECOS") print("Leader-Follower Objective:", probs.value) # Plot input and output dynamics. u_comb = np.column_stack((u.value.T, u_l.value.T, u_r.value.T)) y_comb = np.column_stack((y.value.T, y_l.value.T, y_r.value.T)) plot_control(T, u_comb, Umax, title = "Leader-Follower Control Input") plot_output(T, y_comb, ydes.T, title = "Leader-Follower Path Dynamics")
def interpolate(z, y, x, method='linear'): shape = z.shape coords = np.column_stack([z.flat, y.flat, x.flat]) val = f(coords, method=method) return Quantity(np.exp(val).reshape(shape), unit)
def from_threshold_to_snr(image, sigma, mask, threshold=2000, neighbor_factor=3): """ Parameters ---------- image : np.ndarray, np.uint Image with shape (z, y, x) or (y, x). sigma : float or Tuple(float) Sigma used for the gaussian filter (one for each dimension). If it's a float, the same sigma is applied to every dimensions. mask : np.ndarray, bool Mask with shape (z, y, x) or (y, x) indicating the local peaks. threshold : float or int A threshold to detect peaks. Considered as a relative threshold if float. neighbor_factor : int or float The ratio between the radius of the neighborhood defining the noise and the radius of the signal. Returns ------- """ # remove peak with a low intensity if isinstance(threshold, float): threshold *= image.max() mask_ = (mask & (image > threshold)) # no spot detected if mask_.sum() == 0: return [] # we get the xy coordinate of the detected spot spot_coordinates = np.nonzero(mask_) spot_coordinates = np.column_stack(spot_coordinates) # compute radius for the spot and the neighborhood s = np.sqrt(image.ndim) (z_radius, yx_radius) = (int(s * sigma[0]), int(s * sigma[1])) (z_neigh, yx_neigh) = (int(s * sigma[0] * neighbor_factor), int(s * sigma[1] * neighbor_factor)) # we enlarge our mask to localize the complete signal and not just # the peak kernel_size_z = 2 * z_radius + 1 kernel_size_yx = 2 * yx_radius + 1 kernel_size = (kernel_size_z, kernel_size_yx, kernel_size_yx) mask_ = ndi.maximum_filter(mask_, size=kernel_size, mode='constant') # we define a binary matrix of noise noise = image.astype(np.float64) noise[mask_] = np.nan l_snr = [] for i in range(spot_coordinates.shape[0]): (z, y, x) = (spot_coordinates[i, 0], spot_coordinates[i, 1], spot_coordinates[i, 2]) max_z, max_y, max_x = image.shape if (z_neigh <= z <= max_z - z_neigh - 1 and yx_neigh <= y <= max_y - yx_neigh - 1 and yx_neigh <= x <= max_x - yx_neigh - 1): pass else: l_snr.append(np.nan) continue # extract local signal local_signal = image[z - z_radius:z + z_radius + 1, y - yx_radius:y + yx_radius + 1, x - yx_radius:x + yx_radius + 1].copy() # extract local noise local_noise = noise[z - z_neigh:z + z_neigh + 1, y - yx_neigh:y + yx_neigh + 1, x - yx_neigh:x + yx_neigh + 1].copy() local_noise[z_neigh - z_radius:z_neigh + z_radius + 1, yx_neigh - yx_radius:yx_neigh + yx_radius + 1, yx_neigh - yx_radius:yx_neigh + yx_radius + 1] = np.nan # compute snr snr = np.nanmean(local_signal) / np.nanstd(local_noise) l_snr.append(snr) return l_snr
X[n1,3]=comp X[n1,4]=powercount X[n1,5]=fp X[n1,6]=tp #X[n1,6]=adj #X[n1,7]=verb #X[n1,8]=adv #X[n1,7]=trefpos X[n1,7]=refpos X[n1,8]=refneg X[n1,9]=pos X[n1,10]=neg concount=negcount=comp=powercount=pos=neg=refpos=refneg=fp=tp=adj=verb=adv=0 print "Feature Shape:",X.shape Xword,yword=genDataset() print "Word Frequency Feature Shape:",Xword.shape Xcombine=numpy.column_stack((X,Xword)) print "Combined Feature Shape:",Xcombine.shape #y_pred=cross_val_predict(svm.SVC(kernel='linear',C=1),X,y,cv=10) y_pred=cross_val_predict(KNeighborsClassifier(n_neighbors=1),Xcombine,y,cv=10) print classification_report(y, y_pred) sys.stdout.flush() #savefile.close() f.close() f2.close() dic.close()
def export_swc(self, save_to='../output/swc/'): df_paths = self.df_paths path_checked = [] soma_coord = df_paths.loc[df_paths['connect_to'] == -1].path.iloc[0][0] soma_radius = df_paths.loc[df_paths['connect_to'] == -1].radius.iloc[0][0] swc_arr = np.array([[ 1, 1, soma_coord[0], soma_coord[1], soma_coord[2], soma_radius, -1 ]]) # ['n', 'type', 'x', 'y', 'z', 'radius', 'parent'] list_back_to_soma = (df_paths.sort_values(['connect_to' ]).back_to_soma).tolist() for i, back_to_soma in enumerate(list_back_to_soma): for path_id in back_to_soma[::-1]: if path_id in path_checked: continue path_data = df_paths.loc[path_id] path = path_data['path'][1:] path_radius = path_data['radius'][1:] path_type = path_data['types'][-1] connect_to = path_data['connect_to'] connect_to_at = path_data['connect_to_at'] swc_path = np.column_stack( [np.ones(len(path)) * path_type, path]) # type swc_path = np.column_stack([ np.arange(len(swc_arr) + 1, len(path) + len(swc_arr) + 1), swc_path ]) #ID swc_path = np.column_stack( [swc_path, path_radius * np.ones(len(path))]) # radius swc_path = np.column_stack([swc_path, swc_path[:, 0] - 1 ]) # placeholder for PID # print(i, path_id) # return swc_path pid = np.where( (swc_arr[:, 2:5] == connect_to_at).all(1))[0] + 1 if len(pid) > 1: swc_path[0][-1] = pid[0] else: swc_path[0][-1] = pid swc_arr = np.vstack([swc_arr, swc_path]) path_checked.append(path_id) df_swc = pd.DataFrame(swc_arr) df_swc.index = np.arange(1, len(df_swc) + 1) df_swc.columns = ['n', 'type', 'x', 'y', 'z', 'radius', 'parent'] df_swc[['n', 'type', 'parent']] = df_swc[['n', 'type', 'parent']].astype(int) self.df_swc = df_swc self.df_swc.to_csv(save_to + 'Cell_{}.swc'.format(self.filename), sep=' ', index=None, header=None)
def label_normalize(sample, sample_labels, ref, ref_labels, out=None, sample_mask=None, ref_mask=None, median=False, order=3, debug=False): '''Use label-based intensity normalization''' with mincTools() as minc: if not mincTools.checkfiles(outputs=[out]): return ref_stats = { i[0]: i[5] for i in minc.label_stats( ref_labels, volume=ref, mask=ref_mask, median=median) } sample_stats = { i[0]: i[5] for i in minc.label_stats( sample_labels, volume=sample, mask=sample_mask, median=median) } x = [] y = [] for i in ref_stats: # use 0-intercept if i in sample_stats: #x.append( [1.0, sample_stats[i], sample_stats[i]*sample_stats[i] ] ) x.append(sample_stats[i]) y.append(ref_stats[i]) #print('{} -> {}'.format(sample_stats[i],ref_stats[i])) # FIX origin? (HACK) x.append(0.0) y.append(0.0) # run linear regression clf = linear_model.LinearRegression() __x = np.array(x) _x = np.column_stack((np.power(__x, i) for i in range(1, order + 1))) _y = np.array(y) #print(_x) #print(_y) clf.fit(_x, _y) if debug: import matplotlib.pyplot as plt print('Coefficients: \n', clf.coef_) #print('[0.0 100.0] -> {}'.format(clf.predict([[1.0,0.0,0.0], [1.0,100.0,100.0*100.0]] ))) plt.scatter(_x[:, 0], _y, color='black') #plt.plot(_x[:,0], clf.predict(_x), color='blue', linewidth=3) prx = np.linspace(0, 100, 20) prxp = np.column_stack( (np.power(prx, i) for i in range(1, order + 1))) plt.plot(prx, clf.predict(prxp), color='red', linewidth=3) plt.xticks(np.arange(0, 100, 5)) plt.yticks(np.arange(0, 100, 5)) plt.show() # create command-line for minccalc cmd = '' for i in range(order): if i == 0: cmd += 'A[0]*{}'.format(clf.coef_[i]) else: cmd += '+' + '*'.join(['A[0]'] * (i + 1)) + '*{}'.format(clf.coef_[i]) if out is not None: minc.calc([sample], cmd, out) return cmd
def test_dataset_array_init_hm(self): "Tests support for arrays (homogeneous)" dataset = Dataset(np.column_stack([self.xs, self.xs_2]), kdims=['x'], vdims=['x2']) self.assertTrue(isinstance(dataset.data, self.data_instance_type))
print(args.file) skip_iters = 100 with open(args.file, 'r') as f: for line in f: if "mrcnn_class_loss" in line: iter = iter + 1 terms = line.split("-") mask_loss_term = terms[7] mask_loss = mask_loss_term.split(":")[1] loss_mask.append(float(mask_loss)) if "val_mrcnn_mask_loss" in line: terms = line.split("-") val_mask_loss_term = terms[13] val_mask_loss = val_mask_loss_term.split(":")[1] val_loss_mask.append(float(val_mask_loss)) else: val_loss_mask.append(val_loss_mask[-1]) loss_mask_arr = np.array(loss_mask) val_loss_mask_arr = np.array(val_loss_mask) viz.line(X=np.column_stack((np.arange(0, iter), np.arange(0, iter))), Y=np.column_stack((loss_mask_arr, val_loss_mask_arr)))
ts_de_model = ts.linefit(syr, sde) ts_pmra_masyr = ts_ra_model[1] * 3.6e6 / np.cos(np.radians(ts_de_model[0])) ts_pmde_masyr = ts_de_model[1] * 3.6e6 # initial RA/Dec guess: sys.stderr.write("%s\n" % fulldiv) guess_ra = sra.mean() guess_de = sde.mean() sys.stderr.write("guess_ra: %15.7f\n" % guess_ra) sys.stderr.write("guess_de: %15.7f\n" % guess_de) afpars = [guess_ra, guess_de, ts_pmra_masyr/1e3, ts_pmde_masyr/1e3, 1.0] appcoo = af.apparent_radec(use_epoch_tdb, afpars, use_eph) # proper fit: design_matrix = np.column_stack((np.ones(syr.size), syr)) #de_design_matrix = np.column_stack((np.ones(syr.size), syr)) ra_ols_res = sm.OLS(sra, design_matrix).fit() de_ols_res = sm.OLS(sde, design_matrix).fit() ra_rlm_res = sm.RLM(sra, design_matrix).fit() de_rlm_res = sm.RLM(sde, design_matrix).fit() rlm_pmde_masyr = de_rlm_res.params[1] * 3.6e6 rlm_pmra_masyr = ra_rlm_res.params[1] * 3.6e6 \ * np.cos(np.radians(de_rlm_res.params[0])) sys.stderr.write("\nTheil-Sen intercepts:\n") sys.stderr.write("RA: %15.7f\n" % ts_ra_model[0]) sys.stderr.write("DE: %15.7f\n" % ts_de_model[0]) sys.stderr.write("\nTheil-Sen proper motions:\n")
def test_dataset_array_ht(self): self.assertEqual(self.dataset_ht.array(), np.column_stack([self.xs, self.ys]))
# Calculate acc and stdev from each run (100 cv mixes) accuracy = np.append(accuracy, line[18]) stdev = np.append(stdev, line[19]) sterr = np.append(sterr, line[20]) notes = np.append(notes, 'na') elif i == '09_MLP': method = 'MLP' wkdir3 = wkdir + j +'/' + i + '/' mlp = pd.read_table(wkdir3 + 'RESULTS.txt', sep='\t', header=0) mlp_mean = mlp.groupby(['Trait','Archit','ActFun','LearnRate','Beta']).agg({'Accuracy': ['mean','std']}).reset_index() mlp_mean.columns = list(map(''.join, mlp_mean.columns.values)) mlp_mean = mlp_mean.sort_values('Accuracymean', ascending=False).drop_duplicates(['Trait']) for i, row in mlp_mean.iterrows(): index.append((j,row['Trait'],method)) accuracy = np.append(accuracy, row['Accuracymean']) stdev = np.append(stdev, row['Accuracystd']) sterr = np.append(sterr, 'na') notes = np.append(notes, row['ActFun'] + '_' + row['Archit'] + '_' + str(row['LearnRate']) + '_' + str(row['Beta'])) pd_index = pd.MultiIndex.from_tuples(index, names = ['ID','Trait','Method']) data_array = np.column_stack((np.array(accuracy), np.array(stdev), np.array(sterr), np.array(notes))) df_acc = pd.DataFrame(data_array, index = pd_index, columns = ('Ac_mean', 'Ac_sd', 'Ac_se', 'Notes')) print(df_acc.head(20)) df_acc.to_csv('RESULTS.csv', sep=',')
def test_dataset_array_hm_alias(self): self.assertEqual(self.dataset_hm_alias.array(), np.column_stack([self.xs, self.y_ints]))
if __name__ == '__main__': R = np.array([[1, 1, 0, 0, 0], [0, 0, 1, -1, 0]]) k_constr, k_vars = R.shape m = np.eye(k_vars) - R.T.dot(np.linalg.pinv(R).T) evals, evecs = np.linalg.eigh(m) L = evecs[:, :k_constr] T = evecs[:, k_constr:] print(T.T.dot(np.eye(k_vars))) tr = np.column_stack((T, R.T)) q = [2, 0] tr0 = TransformRestriction(R, q) p_reduced = [1, 1, 1] #round trip test assert_allclose(tr0.reduce(tr0.expand(p_reduced)), p_reduced, rtol=1e-14) p = tr0.expand(p_reduced) assert_allclose(R.dot(p), q, rtol=1e-14) # inconsistent restrictions #Ri = np.array([[1, 1, 0, 0, 0], [0, 0, 1, -1, 0], [0, 0, 1, -2, 0]]) R = np.array([[1, 1, 0, 0, 0], [0, 0, 1, -1, 0], [0, 0, 1, 0, -1]]) q = np.zeros(R.shape[0])
if __name__ == "__main__": # 1,import data X, Y = get_data() # 2,split data x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.33) # CSR_matrix to array x_train = x_train.toarray() x_test = x_test.toarray() X = X.toarray() # add 1 for Xi array_with_1 = [[1]]*len(x_train) x_train = np.column_stack((x_train, array_with_1)) array_with_1 = [[1]] * len(x_test) x_test = np.column_stack((x_test, array_with_1)) array_with_1 = [[1]] * len(X) X = np.column_stack((X, array_with_1)) # 3,initialize data w = [[0]]*len(x_train[0]) learning_rate = 0.00005 rounds = 30000 l_train = [None] * rounds l_validation = [None] * rounds # pre process x_train = np.mat(x_train) y_train = np.mat(y_train)
def measure_multi_cv(keith_dict, df, df_i): # Measure C-V curve keith_dict['measure_iv_now'].setEnabled(False) keith_dict['measure_cv_now'].setEnabled(False) keith_dict['measure_current_now'].setEnabled(False) keith_dict['measure_bias_seq_now'].setEnabled(False) keith_dict['set_bias'].setEnabled(False) keith_dict['max_bias'].setEnabled(False) keith_dict['voltage_steps'].setEnabled(False) keith_dict['keith_busy'] = True _, cv_biases = get_bias_voltages(float(keith_dict['max_bias'].value()), int(keith_dict['voltage_steps'].value())) keith_dict['output_box'].append('Measuring C-V...') iv_time = time.strftime('%Y-%m-%d_%H-%M-%S_') # get sweep rates rates_list = get_sweep_rates(keith_dict) # calculate appropriate delays per point to result in the sweep rates delta_v = cv_biases[1] - cv_biases[0] delays = [delta_v / rate for rate in rates_list] # append new data to C-V dataframe. first create empty cells to fill. # this is done so C-V curves with different lengths can be appended keith_dict['cv_df']['bias_'+iv_time] = np.repeat('', 1000) # now fill empty cells with new data keith_dict['cv_df']['bias_'+iv_time].iloc[ :len(cv_biases)] = cv_biases.astype(str) keith_dict['new_data'] = None # loop through each sweep rate for delay_i, delay0 in enumerate(delays): rate0 = rates_list[delay_i] save_rate = '_'+str(np.round(rate0, decimals=3))+'V/s_' current_list = np.zeros_like(cv_biases) # loop through each applied voltage level for v_i, v0 in enumerate(cv_biases): # apply voltage apply_bias(keith_dict['keith_dev'], v0) time.sleep(delay0) # read current current_list[v_i] = get_current(keith_dict['keith_dev']) keith_dict['actual_bias'].setText(str(np.round(v0, decimals=8))) keith_dict['current_display'].setText( str(np.round(current_list[v_i], decimals=11))) keith_dict['new_data'] = np.column_stack( (cv_biases, current_list))[:v_i] # append new data to C-V dataframe. first create empty cells to fill. # this is done so C-V curves with different lengths can be appended keith_dict['cv_df']['current_'+save_rate+iv_time] = np.repeat('', 1000) keith_dict['cv_df']['current_'+save_rate+iv_time].iloc[ :len(cv_biases)] = current_list.astype(str) # save C-V data to file keith_dict['cv_df'].to_csv( keith_dict['save_file_dir']+'/'+keith_dict[ 'start_date']+'_cv.csv', index=False) # save capacitance to main df capacitance, max_cv_current = get_capacitance(cv_biases, current_list) df['cv_area'].iloc[df_i] = str(capacitance) df['max_cv_current'].iloc[df_i] = str(max_cv_current) keith_dict['output_box'].append('C-V measurement complete.') remove_bias(keith_dict) keith_dict['actual_bias'].setText('0') keith_dict['current_display'].setText('--') keith_dict['measure_iv_now'].setEnabled(True) keith_dict['measure_cv_now'].setEnabled(True) keith_dict['set_bias'].setEnabled(True) keith_dict['max_bias'].setEnabled(True) keith_dict['voltage_steps'].setEnabled(True) keith_dict['measure_current_now'].setEnabled(True) keith_dict['measure_bias_seq_now'].setEnabled(True) keith_dict['new_data'] = None if keith_dict['keith_seq_running'] is True: pass else: keith_dict['keith_busy'] = False
def buildm(): m_name, m_init, sigmam, pdist = np.zeros(inv.M).tolist(), np.zeros( inv.M), np.zeros(inv.M), ['Unif'] * inv.M start = inv.fmodel[0].Mker # 6 logger.debug('Get parameters for secondary segments') for j in xrange(1, inv.Mseg): m_init[start], sigmam[start], pdist[start] = inv.fmodel[ j].ss, inv.fmodel[j].sigmass, inv.fmodel[j].distss m_name[start] = '{} Strike Slip'.format(inv.fmodel[j].name) m_init[start + 1], sigmam[start + 1], pdist[ start + 1] = inv.fmodel[j].D, inv.fmodel[j].sigmaD, inv.fmodel[j].distss m_name[start + 1] = '{} D'.format(inv.fmodel[j].name) m_init[start + 2], sigmam[start + 2], pdist[ start + 2] = inv.fmodel[j].H, inv.fmodel[j].sigmaH, inv.fmodel[j].distH m_name[start + 2] = '{} H'.format(inv.fmodel[j].name) start += 3 logger.debug('Get parameters for the main segment') m_init[0], sigmam[0], pdist[0] = inv.fmodel[0].ss, inv.fmodel[ 0].sigmass, inv.fmodel[0].distss m_name[0] = '{} Strike Slip'.format(inv.fmodel[0].name) m_init[1], sigmam[1], pdist[1] = inv.fmodel[0].vh, inv.fmodel[ 0].sigmavh, inv.fmodel[0].distvh m_name[1] = '{} Shortening'.format(inv.fmodel[0].name) m_init[2], sigmam[2], pdist[2] = inv.fmodel[0].H, inv.fmodel[ 0].sigmaH, inv.fmodel[0].distH m_name[2] = '{} H'.format(inv.fmodel[0].name) m_init[3], sigmam[3], pdist[3] = inv.fmodel[0].D, inv.fmodel[ 0].sigmaD, inv.fmodel[0].distD m_name[3] = '{} D'.format(inv.fmodel[0].name) m_init[4], sigmam[4], pdist[4] = inv.fmodel[0].L, inv.fmodel[ 0].sigmaL, inv.fmodel[0].distL m_name[4] = '{} L'.format(inv.fmodel[0].name) m_init[5], sigmam[5], pdist[5] = inv.fmodel[0].dip, inv.fmodel[ 0].sigmadip, inv.fmodel[0].distdip m_name[5] = '{} dip'.format(inv.fmodel[0].name) logger.debug('Get parameter for volumic deformations') for j in xrange(inv.Mvol): m_init[inv.Mdis + j], sigmam[inv.Mdis + j], pdist[ inv.Mdis + j] = inv.volum[j].ds, inv.volum[j].sigmads, inv.fmodel[j].distshort m_name[inv.Mdis + j] = '{} DS'.format(inv.volum[j].name) m_init[inv.Mdis + j + 1], sigmam[inv.Mdis + j + 1], pdist[ inv.Mdis + j + 1] = inv.volum[j].D, inv.volum[j].sigmaD, inv.fmodel[j].distD m_name[inv.Mdis + j + 1] = '{} D'.format(inv.volum[j].name) M = 0 for i in xrange(len(manifolds)): if manifolds[i].dim > 1: m_init[inv.Minv + M:inv.Minv + M + manifolds[i]. dim], sigmam[inv.Minv + M:inv.Minv + M + manifolds[i].dim] = 0, manifolds[i].base M += manifolds[i].dim else: m_init[inv.Minv + M], sigmam[inv.Minv + M] = 0., manifolds[i].base[0] m_init[inv.Minv + M + 1], sigmam[inv.Minv + M + 1] = 0, manifolds[i].base[1] M += 2 uu = 1 for i in range(inv.Minv, inv.M): m_name[i] = '{} Baseline {}'.format(profile.name, uu) uu += 1 # Define the bounds m_max = (m_init + sigmam) m_min = (m_init - sigmam) # export m_init and sigmam for i in xrange(len(manifolds)): manifolds[i].sigmam = sigmam manifolds[i].m_init = m_init b = np.column_stack((m_min, m_max)) return m_name, m_init.tolist(), sigmam.tolist(), m_min.tolist( ), m_max.tolist(), pdist
def main(is_baseline, checkpoint_dir, pos_test_file, neg_test_file, prediction_file, performance_file): FLAGS = tf.flags.FLAGS # CHANGE THIS: Load data. Load your own data here if is_baseline: x_raw, y_test = data_helpers_nsmc.load_nsmc_test_data_and_labels_baseline(pos_test_file, neg_test_file) else: x_raw, y_test = data_helpers_nsmc.load_nsmc_test_data_and_labels_test(pos_test_file, neg_test_file) y_test = np.argmax(y_test, axis=1) # Map data into vocabulary vocab_path = os.path.join(checkpoint_dir, "..", "vocab") vocab_processor = learn.preprocessing.VocabularyProcessor.restore(vocab_path) x_test = np.array(list(vocab_processor.transform(x_raw))) print("\nEvaluating...\n") # Evaluation # ================================================== checkpoint_file = tf.train.latest_checkpoint(checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): with tf.device('/device:GPU:0'): # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x = graph.get_operation_by_name("input_x").outputs[0] # input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name("output/predictions").outputs[0] # Generate batches for one epoch batches = data_helpers_nsmc.batch_iter(list(x_test), FLAGS.batch_size, 1, shuffle=False) # Collect the predictions here all_predictions = [] for x_test_batch in batches: batch_predictions = sess.run(predictions, {input_x: x_test_batch, dropout_keep_prob: 1.0}) all_predictions = np.concatenate([all_predictions, batch_predictions]) # Save the evaluation to a csv predictions_human_readable = np.column_stack((np.array(x_raw), all_predictions)) out_path = os.path.join(checkpoint_dir, "..", prediction_file)#"prediction.csv") print("Saving evaluation to {0}".format(out_path)) with open(out_path, 'w') as f: csv.writer(f).writerows(predictions_human_readable) # Print accuracy if y_test is defined if y_test is not None: correct_predictions = float(sum(all_predictions == y_test)) perf_file_path = os.path.join(checkpoint_dir, "..", performance_file)#"performance.txt") with open(perf_file_path, 'wt') as outf: outf.write("Total number of test examples: {}\n".format(len(y_test))) outf.write("Accuracy: {:g}\n".format(correct_predictions/float(len(y_test))))
model.cuda() x_train, y_train, x_test, y_test, x_val, y_val = load_dataset(args.dataset) n = x_train.shape[0] x_train = x_train[:n] y_train = y_train[:n] optimizer = optim.Adam(model.parameters(), lr=args.lr) if args.sample_type == 'grad': for epoch in np.arange(3): uniform_sampling(x_train, y_train) test(x_test, y_test) weights = gradients(x_train, y_train) weights = weights.reshape(weights.shape[0], 1) weights = np.column_stack([weights, range(weights.shape[0])]) prob = args.batch_size * (weights[:, 0] / np.sum(weights[:, 0])) for epoch in np.arange(args.epochs - 3): gradient_sampling(x_train, y_train, prob) test(x_test, y_test) elif args.sample_type == 'obj': loss, losses = train(x_train, y_train, ret_all_losses=True) losses = np.mean(losses, axis=1) # losses = losses.reshape(losses.shape[0], 1) losses = np.column_stack([losses, range(losses.shape[0])]) for epoch in np.arange(args.epochs - 1): objective_epoch, losses = objective_sampling(x_train, y_train, losses) test(x_test, y_test) elif args.sample_type == 'var': # x_train_var = np.var(x_train.reshape(x_train.shape[0], reshape_size), axis=1) x_train_var = np.var(x_train, axis=1)
def exportPOV(path='/mnt/htw20/Documents/data/retrack/go/1/', head='J1_thr0_radMin3.1_radMax0_6min', tail='_t000', out='/home/mathieu/Documents/Thesis/data/go/1/mrco_ico.pov', ico_thr=-0.027, zmin=100, zmax=175, header='go1.inc', polydisperse=False): if polydisperse: positions = np.load(path + head + tail + '.npy') radii = positions[:, -2] * np.sqrt(2) positions = positions[:, :-2] else: positions = np.loadtxt(path + head + tail + '.dat', skiprows=2) Q6 = np.loadtxt(path + head + '_space' + tail + '.cloud', usecols=[1]) bonds = np.loadtxt(path + head + tail + '.bonds', dtype=int) q6, w6 = np.loadtxt(path + head + tail + '.cloud', usecols=[1, 5], unpack=True) u6 = ((2 * 6 + 1) / (4.0 * np.pi))**1.5 * w6 * q6**3 ico_bonds = np.bitwise_and( u6[bonds].min(axis=-1) < ico_thr, np.bitwise_and(positions[:, -1][bonds].min(axis=-1) < zmax, positions[:, -1][bonds].max(axis=-1) > zmin)) ico = np.unique(bonds[ico_bonds]) mrco = np.unique(bonds[np.bitwise_and( Q6[bonds].max(axis=-1) > 0.25, np.bitwise_and(positions[:, -1][bonds].min(axis=-1) < zmax, positions[:, -1][bonds].max(axis=-1) > zmin))]) gr = nx.Graph() gr.add_nodes(ico) for a, b in bonds[ico_bonds]: gr.add_edge((a, b)) try: cc = nx.connected_components(gr) except RuntimeError: print("Graph is too large for ico_thr=%g, lower the threshold." % ico_thr) return #remove clusters than contain less than 10 particles ## sizes = np.zeros(max(cc.values()), int) ## for p,cl in cc.iteritems(): ## sizes[cl-1] +=1 ## cc2 = dict() ## for p,cl in cc.iteritems(): ## if sizes[cl-1]>9: ## cc2[p] = cl ## cc =cc2 if polydisperse: pov_mrco = [ Sphere((x, y, z), r) for x, y, z, r in np.column_stack((positions, radii))[np.setdiff1d(mrco, ico)] ] else: pov_mrco = [ Sphere((x, y, z), 6) for x, y, z in positions[np.setdiff1d(mrco, ico)] ] pov_mrco = Union(*pov_mrco + [Texture(Pigment(color="Green"))]) if polydisperse: pov_ico = [ Sphere( tuple(positions[p].tolist()), radii[p], Texture( Pigment(color="COLORSCALE(%f)" % (cl * 120.0 / max(cc.values()))))) for p, cl in cc.items() ] else: pov_ico = [ Sphere( tuple(positions[p].tolist()), 6, Texture( Pigment(color="COLORSCALE(%f)" % (cl * 120.0 / max(cc.values()))))) for p, cl in cc.items() ] pov_ico = Union(*pov_ico) f = File(out, "colors.inc", header) f.write(pov_mrco, pov_ico) f.file.flush()
def measure_iv(keith_dict, df, df_i): # Measure I-V curve keith_dict['measure_iv_now'].setEnabled(False) keith_dict['measure_cv_now'].setEnabled(False) keith_dict['measure_current_now'].setEnabled(False) keith_dict['measure_bias_seq_now'].setEnabled(False) keith_dict['set_bias'].setEnabled(False) keith_dict['max_bias'].setEnabled(False) keith_dict['voltage_steps'].setEnabled(False) keith_dict['keith_busy'] = True iv_biases, _ = get_bias_voltages(float(keith_dict['max_bias'].value()), int(keith_dict['voltage_steps'].value())) current_list = np.empty_like(iv_biases) keith_dict['output_box'].append('Measuring I-V...') iv_time = time.strftime('%Y-%m-%d_%H-%M-%S_') keith_dict['new_data'] = None # loop through each applied voltage level for v_i, v0 in enumerate(iv_biases): # apply voltage apply_bias(keith_dict['keith_dev'], v0) time.sleep(0.2) # read current current_list[v_i] = get_current(keith_dict['keith_dev']) keith_dict['new_data'] = np.column_stack( (iv_biases, current_list))[:v_i] keith_dict['actual_bias'].setText(str(np.round(v0, decimals=8))) keith_dict['current_display'].setText( str(np.round(current_list[v_i], decimals=11))) remove_bias(keith_dict) keith_dict['actual_bias'].setText('0') keith_dict['current_display'].setText('--') keith_dict['output_box'].append('I-V measurement complete.') keith_dict['measure_iv_now'].setEnabled(True) keith_dict['measure_cv_now'].setEnabled(True) keith_dict['set_bias'].setEnabled(True) keith_dict['max_bias'].setEnabled(True) keith_dict['voltage_steps'].setEnabled(True) keith_dict['measure_bias_seq_now'].setEnabled(True) keith_dict['measure_current_now'].setEnabled(True) keith_dict['new_data'] = None # append new data to I-V dataframe. first create empty cells to fill. # this is done so I-V curves with different lengths can be appended keith_dict['iv_df']['bias_'+iv_time] = np.repeat('', 999) keith_dict['iv_df']['current_'+iv_time] = np.repeat('', 999) # now fill empty cells with new data keith_dict['iv_df']['bias_'+iv_time].iloc[ :len(iv_biases)] = iv_biases.astype(str) keith_dict['iv_df']['current_'+iv_time].iloc[ :len(iv_biases)] = current_list.astype(str) # save I-V data to file keith_dict['iv_df'].to_csv( keith_dict['save_file_dir']+'/'+keith_dict[ 'start_date']+'_iv.csv', index=False) # save max current to main df max_current = np.amax(current_list) df['max_iv_current'].iloc[df_i] = str(max_current) if keith_dict['keith_seq_running'] is True: pass else: keith_dict['keith_busy'] = False
def get_pixel_pos_and_cc_from_imgs(input_tiff, emb_img_idx_plot, cc_min_area, cc_min_area2): ''' input: input_tiff emb_img_idx_plot: img index considered to be plotted cc_min_area: first min area threshold for cc (larger than cc_min_area2) cc_min_area2: smaller min area threshold for cc, in case there's no cc with area > cc_min_area. output: plot_mat_all: dataframe with 9 columns including pixel positions of embolism, embolism index (in img and cc level), and basic shape info for cc haven't adjusted for tiltness yet ''' num_emb = 1 #num_emb: (embolism index at img level), starts at 1 cc_num_emb = 1 #cc_num_emb(embolism index at cc level), starts at 1 plot_mat_all = pd.DataFrame(columns=[ 'row', 'col', 'number_emb', 'cc_num_emb', 'cc_width', 'cc_height', 'cc_area', 'cc_centroid_row', 'cc_centroid_col' ]) #initialinum_embe df with 3 cols: row,col,num_emb(embolism index at img level),cc_num_emb(embolism index at cc level) for j in emb_img_idx_plot: img_j = input_tiff[j] #0: background. 255: embolism smooth_img_j = cv2.morphologyEx(img_j.astype( np.uint8), cv2.MORPH_CLOSE, np.ones( (3, 3), np.uint8)) #connect a bit by closing(remove small holes) num_cc, mat_cc, stats, centroids = cv2.connectedComponentsWithStats( smooth_img_j.astype(np.uint8), 8) #8-connectivity #number of c.c., centroids: 2 cols(col,row) cc_width = stats[:, cv2.CC_STAT_WIDTH] cc_height = stats[:, cv2.CC_STAT_HEIGHT] cc_area = stats[:, cv2.CC_STAT_AREA] cc_big_enough_labels = np.where( cc_area[1:] > cc_min_area)[0] #ignore bgd:0 if cc_big_enough_labels.size > 0: #at least have one cc big enough for cc_idx in (cc_big_enough_labels + 1): #+1 cuz ignore bgd before row_col_cc = np.transpose(np.nonzero( mat_cc == cc_idx)) #get (row,col) of all pixels in the c.c. #matrix: (number of pixels in that cc) x 2. Two cols: row,col num_px_in_cc = row_col_cc.shape[0] cc_stat = np.array([ num_emb, cc_num_emb, cc_width[cc_idx], cc_height[cc_idx], cc_area[cc_idx], centroids[cc_idx][1], centroids[cc_idx][0] ], ndmin=2) #-1 cuz some statistics for cc ignores background cc_stat_rep = np.repeat( cc_stat, num_px_in_cc, axis=0) #horizontally repeat num_px_in_cc times plot_mat_j = pd.DataFrame( np.column_stack((row_col_cc, cc_stat_rep))) plot_mat_j.columns = plot_mat_all.columns #set to the same column names as plot_mat_all, s.t. can concat correctly plot_mat_all = pd.concat([plot_mat_all, plot_mat_j], ignore_index=True) ## not used #contours, _ = cv2.findContours(((mat_cc==cc_idx)*1).astype(np.uint8),cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE) ##img_w_contour = cv2.drawContours((mat_cc==cc_idx)*1, contours, contourIdx=-1, color = 2, thickness = 1)#contourIdx=-1: draw all contours ##plt.imshow(img_w_contour)#slightly expanded contour (expand to ensure w/thickness = 1) will be drawn by pixel value 2. inside contour = 1. background =0 cc_num_emb = cc_num_emb + 1 else: #try a smaller cc min area threshold cc_big_enough_labels2 = np.where( cc_area[1:] > cc_min_area2)[0] #ignore bgd:0 if cc_big_enough_labels2.size > 0: #at least have one cc big enough for cc_idx in (cc_big_enough_labels2 + 1): #+1 cuz ignore bgd before row_col_cc = np.transpose( np.nonzero(mat_cc == cc_idx) ) #matrix: number of pixels in that cc x 2. Two cols: row,col num_px_in_cc = row_col_cc.shape[0] cc_stat = np.array([ num_emb, cc_num_emb, cc_width[cc_idx], cc_height[cc_idx], cc_area[cc_idx], centroids[cc_idx][1], centroids[cc_idx][0] ], ndmin=2) #-1 cuz some statistics for cc ignores background cc_stat_rep = np.repeat( cc_stat, num_px_in_cc, axis=0) #horizontally repeat num_px_in_cc times plot_mat_j = pd.DataFrame( np.column_stack((row_col_cc, cc_stat_rep))) plot_mat_j.columns = plot_mat_all.columns #set to the same column names as plot_mat_all, s.t. can concat correctly plot_mat_all = pd.concat([plot_mat_all, plot_mat_j], ignore_index=True) cc_num_emb = cc_num_emb + 1 print("No cc in img_idx = ", j, " has area >", cc_min_area, ". But some cc area >", cc_min_area2) else: print("[CAUTION] No cc in img_idx = ", j, " has area >", cc_min_area2) num_emb = num_emb + 1 plot_mat_all["number_emb"] = plot_mat_all["number_emb"].astype(int) #plot_mat_all.dtypes #... #number_emb float64 ##convert the columns from float64 type to int type, s.t. it has the same type as embolism_table["number_emb"] , and thus can be merged later on return (plot_mat_all)
# ------------------- # # Uploading dataset # # ------------------- # print('') print(' * creating polynomial expansion') if housing: from sklearn import datasets A, b = datasets.load_boston(return_X_y=True) if bodyfat: data = np.genfromtxt('./toy_data/bodyfat.csv', delimiter=',', skip_header=True) A = np.column_stack((data[:, 0], data[:, 2:])) b = data[:, 1] if traizines: data = np.genfromtxt('./toy_data/traizines.csv', delimiter=',', skip_header=True) A = data[:, 1:] b = data[:, 0] poly = PolynomialFeatures(poly_degree) A = poly.fit_transform(A)[:, 1:] A = (A - A.mean(axis=0)) / A.std(axis=0) b -= b.mean(axis=0) m, n = A.shape
def extrude_triangulation(vertices, faces, height, **kwargs): """ Turn a shapely.geometry Polygon object and a height (float) into a watertight Trimesh object. Parameters ---------- vertices: (n,2) float, 2D vertices faces: (m,3) int, triangle indexes of vertices height: float, distance to extrude triangulation Returns --------- mesh: Trimesh object of result """ vertices = np.asanyarray(vertices, dtype=np.float64) faces = np.asanyarray(faces, dtype=np.int) height = float(height) if not util.is_shape(vertices, (-1, 2)): raise ValueError('Vertices must be (n,3)') if not util.is_shape(faces, (-1, 3)): raise ValueError('Faces must be (n,3)') if np.abs(height) < tol.zero: raise ValueError('Height must be nonzero!') # make sure triangulation winding is pointing up normal_test = normals([util.three_dimensionalize(vertices[faces[0]])[1] ])[0] # make sure the triangulation is aligned with the sign of # the height we've been passed if np.dot(normal_test, [0, 0, np.sign(height)]) < 0: faces = np.fliplr(faces) # stack the (n,3) faces into (3*n, 2) edges edges = faces_to_edges(faces) edges_sorted = np.sort(edges, axis=1) # edges which only occur once are on the boundary of the polygon # since the triangulation may have subdivided the boundary of the # shapely polygon, we need to find it again edges_unique = group_rows(edges_sorted, require_count=1) # (n, 2, 2) set of line segments (positions, not references) boundary = vertices[edges[edges_unique]] # we are creating two vertical triangles for every 2D line segment # on the boundary of the 2D triangulation vertical = np.tile(boundary.reshape((-1, 2)), 2).reshape((-1, 2)) vertical = np.column_stack( (vertical, np.tile([0, height, 0, height], len(boundary)))) vertical_faces = np.tile([3, 1, 2, 2, 1, 0], (len(boundary), 1)) vertical_faces += np.arange(len(boundary)).reshape((-1, 1)) * 4 vertical_faces = vertical_faces.reshape((-1, 3)) # stack the (n,2) vertices with zeros to make them (n, 3) vertices_3D = util.three_dimensionalize(vertices, return_2D=False) # a sequence of zero- indexed faces, which will then be appended # with offsets to create the final mesh faces_seq = [faces[:, ::-1], faces.copy(), vertical_faces] vertices_seq = [ vertices_3D, vertices_3D.copy() + [0.0, 0, height], vertical ] mesh = Trimesh(*util.append_faces(vertices_seq, faces_seq), process=True) return mesh
def deskewImage(filename): logging.info(filename) try: image = cv2.imread(filename) except: logging.exception("message") # cv2.imshow("Original", image) # logging.info("Image...\n", image) ret, image = cv2.threshold(image, 170, 255, cv2.THRESH_BINARY) # cv2.imshow("Original after threshold", image) # convert the image to grayscale and flip the foreground # and background to ensure foreground is now "white" and # the background is "black" gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # logging.info("\nGray...\n", gray) gray = cv2.bitwise_not(gray) # cv2.imshow("Gray", gray) # threshold the image, setting all foreground pixels to 255 and all background pixels to 0 thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1] # logging.info("Thresh...\n", thresh) # cv2.imshow("Thresh", thresh) # cv2.waitKey(0) # grab the (x, y) coordinates of all pixel values that # are greater than zero, then use these coordinates to # compute a rotated bounding box that contains all # coordinates # logging.info("np.where....\n", np.where(thresh > 0)) coords = np.column_stack(np.where(thresh > 0)) # logging.info("coords..\n", coords) angle = cv2.minAreaRect(coords)[-1] logging.info("angle: {}".format(angle)) ''' logging.info('----------------------------') cntrs, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) for cntr in cntrs: approx = cv2.approxPolyDP(cntr, 0.01 * cv2.arcLength(cntr, True), True) if len(approx) == 4 and cv2.contourArea(cntr) > 10: logging.info("size: ", cv2.contourArea(cntr)) cv2.drawContours(image, [cntr], -1, (0, 0, 255), 1) cv2.imshow('Contour selected', image) cv2.waitKey(0) logging.info('----------------------------') ''' # the `cv2.minAreaRect` function returns values in the # range [-90, 0); as the rectangle rotates clockwise the # returned angle trends to 0 -- in this special case we # need to add 90 degrees to the angle if abs(angle) != 0.0: logging.info("Angle needs to be adjusted..") if angle < -45: angle = -(90 + angle) # otherwise, just take the inverse of the angle to make # it positive else: angle = -angle # rotate the image to deskew it (h, w) = image.shape[:2] center = (w // 2, h // 2) middle = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine(image, middle, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) rotated = cv2.cvtColor(rotated, cv2.COLOR_BGR2GRAY) ''' # save rotated image to new file deskewedFileName = getDeskewedFilename(filename) cv2.imwrite(deskewedFileName, rotated) ''' # height, width, channels = rotated.shape height, width = rotated.shape # logging.info("Heiht:{}, Width:{}, Channels:{}".format(height, width, channels)) # dim2 = rotated.reshape(channels, height, width) # logging.info(dim2) # draw the correction angle on the image so we can validate it # cv2.putText(rotated, "Angle: {:.2f} degrees".format(angle), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) # show the output image logging.info("[INFO] angle: {:.5f}".format(angle)) # cv2.imshow("Input", image) cv2.imshow("Deskewed", rotated) cv2.waitKey(0) ''' Should have some logic to avoid memory leak.... like below ''' # thresh.release() # gray.release() # image.release()\ del thresh del gray del image return deskewedFileName, rotated
L.append(seq_record.seq.count('L')) M.append(seq_record.seq.count('M')) N.append(seq_record.seq.count('N')) P.append(seq_record.seq.count('P')) Q.append(seq_record.seq.count('Q')) R.append(seq_record.seq.count('R')) S.append(seq_record.seq.count('S')) T.append(seq_record.seq.count('T')) U.append(seq_record.seq.count('U')) V.append(seq_record.seq.count('V')) W.append(seq_record.seq.count('W')) X.append(seq_record.seq.count('X')) Y.append(seq_record.seq.count('Y')) #creating a database bio_df = pd.DataFrame(np.column_stack([identifiers, lengths, sequences,A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y]), columns=['idTitle', 'lenTitle', 'seqTitle', 'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']) x = bio_df['seqTitle'] chars = [] for line in x: chars.append(list(line)) id_prune=[] for each in chars: for i in each: if i=="U" or i=="X": id_prune.append(chars.index(each))
import matplotlib.pyplot as plt from pyprobml_utils import save_fig #from mpl_toolkits.mplot3d import Axes3D from scipy.optimize import minimize plt.rcParams["figure.figsize"] = (5,5) # width x height np.random.seed(0) #Generating synthetic data: N = 21 wTrue = np.array([1.45, 0.92]) X = np.random.uniform(-2, 2, N) X = np.column_stack((np.ones(N), X)) y = wTrue[0] * X[:, 0] + wTrue[1] * X[:, 1] + np.random.normal(0, .1, N) #Plot SSE surface over parameter space. v = np.arange(-1, 3, .1) W0, W1 = np.meshgrid(v, v) SS = np.array([sum((w0 * X[:, 0] + w1 * X[:, 1] - y) ** 2) for w0, w1 in zip(np.ravel(W0), np.ravel(W1))]) SS = SS.reshape(W0.shape) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') surf = ax.plot_surface(W0, W1, SS) save_fig('lmsSSE.pdf') plt.draw()
def main(): # Read CSV. df=pd.read_csv("mask_with_daily_cases.csv",comment='#') # Retrieve all data. global state, dates, feature_1_mask, new_cases state = df.iloc[:,0] dates = df.iloc[:,1] new_cases = df.iloc[:,2] feature_1_mask = df.iloc[:,3] # Collect only first State data. first_state = state[5] counter = 1 while state[counter] == first_state: counter += 1 days_delay = 7 feature_1_mask = feature_1_mask[0 : counter - days_delay] feature_1_mask = feature_1_mask/100 new_cases = new_cases[days_delay : counter] new_cases = new_cases/new_cases.max() dates = dates[days_delay : counter] q = 7 dd = 1 lag = 14 #q−step ahead prediction stride = 1 XX = new_cases[0:new_cases.size - q - lag * dd:stride] for i in range(1,lag): X = new_cases[i*dd:new_cases.size - q - (lag - i) * dd:stride] M = feature_1_mask[i*dd:feature_1_mask.size - q - (lag - i) * dd:stride] XX = np.column_stack((XX,X,M)) yy = new_cases[lag*dd+q::stride] tt = dates[lag*dd+q::stride] from sklearn.linear_model import LinearRegression model = LinearRegression().fit(XX, yy) fig = plt.figure() predictions = model.predict(XX) ax = fig.add_subplot(3, 1, 1) ax.xaxis.set_major_locator(plt.MaxNLocator(10)) dots = ax.scatter(dates, new_cases, color='r') ax.plot(tt, predictions) fake2Dline = mpl.lines.Line2D([0],[0], linestyle="none", c='b', marker = 'o') ax.legend([fake2Dline, dots], ['Predictions', 'Training data'], numpoints = 1) ax.set_xlabel('Dates (MM/DD/YY)') ax.set_ylabel('Target Value - New Cases') ax.title.set_text('Daily New Cases in Alabama') """ ax = fig.add_subplot(3, 1, 3) ax.xaxis.set_major_locator(plt.MaxNLocator(10)) ax.scatter(tt, new_cases) ax.set_xlabel('Dates (MM/DD/YY)') ax.set_ylabel('Masks Usage Percent') ax.title.set_text('Mask Usage over Time in Alabama') """ plt.show() from sklearn.metrics import mean_squared_error print(mean_squared_error(yy, predictions))
'max_features': 'sqrt', 'min_samples_leaf': 10, 'n_estimators': 2000, 'subsample': 0.9} params_svr = {'C': 4.684210526315789, 'epsilon': 0.01, 'gamma': 'scale', 'kernel': 'rbf'} ridge = Ridge(**params_ridge) Enet = ElasticNet(**params_ElasticNet) gbr = GradientBoostingRegressor(**params_gbr) lgb = lgb.LGBMRegressor(objective="regression", metric="mse", boosting_type="dart", device_type="cpu", tree_learner="feature", verbosity=-50, **params_lgb) svr = SVR(**params_svr) model_list = [ridge,Enet,gbr,lgb,svr] for model in model_list: print(f'\nFitting model {model.__class__}') model.fit(train_X,train_y) print("\nPrediction") preds = np.column_stack([model.predict(test_X) for model in model_list]).mean(axis=1) print(f'Shape of predictions: {preds.shape}') preds = np.exp(pipeline_y.inverse_transform(preds.reshape(-1,1))) submit_frame = pd.DataFrame() submit_frame['Id'] = testData.Id submit_frame['SalePrice'] = preds submit_frame.to_csv('submission_ensemble_avg1.csv', index=False)