def process_binned(self, counts, x, scale, monitor, vanadium_count=None, vanadium_monitor=None, vcorr=None): """Bin the data""" binWidth = self.getProperty("BinWidth").value bins = np.arange(x.min(), x.max()+binWidth, binWidth) # calculate bin boundaries inds = np.digitize(x, bins) # get bin indices if vcorr is not None: vcorr = np.tile(vcorr, (counts.shape[1], 1)).T vcorr_binned = np.bincount(inds, weights=vcorr.ravel(), minlength=len(bins)) else: vanadium_count = np.tile(vanadium_count, (counts.shape[1], 1)).T vanadium_binned = np.bincount(inds, weights=vanadium_count.ravel(), minlength=len(bins)) vanadium_monitor_binned = np.bincount(inds, minlength=len(bins))*vanadium_monitor monitor = np.tile(monitor, (counts.shape[0], 1)) counts_binned = np.bincount(inds, weights=counts.ravel(), minlength=len(bins)) monitor_binned = np.bincount(inds, weights=monitor.ravel(), minlength=len(bins)) number_binned = np.bincount(inds, minlength=len(bins)) old_settings = np.seterr(all='ignore') # otherwise it will complain about divide by zero if vcorr is not None: y = (counts_binned/vcorr_binned*number_binned/monitor_binned)[1:] e = (np.sqrt(1/counts_binned)[1:])*y else: y = (counts_binned/vanadium_binned*vanadium_monitor_binned/monitor_binned)[1:] e = (np.sqrt(1/counts_binned + 1/vanadium_binned + 1/vanadium_monitor + 1/monitor_binned)[1:])*y np.seterr(**old_settings) x = bins return x, np.nan_to_num(y*scale), np.nan_to_num(e*scale)
def segment_units(self, sfid, verb=False): """ This function takes the list of unit breakpoints, plus the raw metadata, and assembles 'cooked' segments in the corpus segtable. Note: currently ignores the amplitude scalars (aside from generating stats)... """ segmented = self.get_sorted_units_list(sfid) raw_amps, raw_mfccs, raw_chromas = self.get_raw_metadata(sfid) # , raw_chromas amps, reheated = [], [] if verb: print 'raw: ', raw_amps amps_stripped = np.nan_to_num(raw_amps) if verb: print 'amps_stripped: ', amps_stripped mfccs_stripped = np.nan_to_num(raw_mfccs) if verb: print 'mfccs_stripped: ', mfccs_stripped chromas_stripped = np.nan_to_num(raw_chromas) if verb: print 'chromas_stripped: ', chromas_stripped for relid, sfu in enumerate(segmented): offset = int(math.floor(sfu.onset / self.HOP_SECS)) dur = int(math.floor(sfu.dur / self.HOP_SECS)) if verb: print '[[', offset, '|', dur, ']]' self.sftree.nodes[sfid].add_metadata_for_relid(relid, powers=self.feat.powers.proc_funcs[0](amps_stripped, offset, dur)) # WHY ARE THE FUNCTION SIGNATURES DIFFERENT FOR OFFSET AND DUR??? self.sftree.nodes[sfid].add_metadata_for_relid(relid, mfccs=self.feat.proc_funcs[0](mfccs_stripped[offset:(offset+dur)])) if verb: print self.feat.proc_funcs[1] if verb: print mfccs_stripped[offset:(offset+dur)] self.sftree.nodes[sfid].add_metadata_for_relid(relid, mfcc_vars=self.feat.proc_funcs[1](mfccs_stripped[offset:(offset+dur)])) self.sftree.nodes[sfid].add_metadata_for_relid(relid, chromas=self.feat.proc_funcs[0](chromas_stripped[offset:(offset+dur)])) self.sftree.nodes[sfid].add_metadata_for_relid(relid, chroma_vars=self.feat.proc_funcs[1](chromas_stripped[offset:(offset+dur)]))
def shifted_corr(reference, image, displacement): """Calculate the correlation between the reference and the image shifted by the given displacement. Parameters ---------- reference : np.ndarray image : np.ndarray displacement : np.ndarray Returns ------- correlation : float """ ref_cuts = np.maximum(0, displacement) ref = reference[ref_cuts[0]:, ref_cuts[1]:, ref_cuts[2]:] im_cuts = np.maximum(0, -displacement) im = image[im_cuts[0]:, im_cuts[1]:, im_cuts[2]:] s = np.minimum(im.shape, ref.shape) ref = ref[:s[0], :s[1], :s[2]] im = im[:s[0], :s[1], :s[2]] ref -= nanmean(ref.reshape(-1, ref.shape[-1]), axis=0) ref = np.nan_to_num(ref) im -= nanmean(im.reshape(-1, im.shape[-1]), axis=0) im = np.nan_to_num(im) assert np.all(np.isfinite(ref)) and np.all(np.isfinite(im)) corr = nanmean( [old_div(np.sum(i * r), np.sqrt(np.sum(i * i) * np.sum(r * r))) for i, r in zip(np.rollaxis(im, -1), np.rollaxis(ref, -1))]) return corr
def plotheatmaps(data, title=''): local = get_local_full() glob = get_global_full() gden = [('%4.0f'%float(i)).lstrip('0') for i in glob['density']] gcnt = [int(i) for i in glob['count']] max_gden = max([float(i) for i in glob['density']]) for tbin in data.keys(): c = np.array(data[tbin]) # gcnt = np.sum(c, axis=1) # lcnt = np.sum(c, axis=0) lcnt = [int(i) for i in local[tbin]['count']] lden = [float(i) for i in local[tbin]['density']] lden_norm = [i / sum(lden) for i in lden] lden_scaled = [i * max_gden for i in lden_norm] denlab = [('%3.0f'%i) for i in lden_scaled] print(local[tbin]['volume']) glabels = ['%4d/%4s' % i for i in zip(gcnt,gden)] llabels = ['%4d/%4s' % i for i in zip(lcnt,denlab)] norm_c = np.nan_to_num(c / np.linalg.norm(c, axis=-1)[:, np.newaxis]).T P.heatmap(norm_c, glabels, llabels, title+tbin+'_col') d = c.T norm_r = np.nan_to_num(d / np.linalg.norm(d, axis=-1)[:, np.newaxis]) P.heatmap(norm_r, glabels, llabels, title+tbin+'_row') combined = (norm_c + norm_r) / 2 P.heatmap(combined, glabels, llabels, title+tbin+'_combined') print(combined)
def time_std(self): if hasattr(self, '_time_std'): return self._time_std if self.savedir is not None: try: with open(join(self.savedir, 'time_std.pkl'), 'rb') as f: time_std = pickle.load(f) except IOError: pass else: # Same protocol as the averages. Make sure the # std is a single 4D (zyxc) array and if not just # re-calculate the time std. if isinstance(time_std, np.ndarray): self._time_std = time_std return self._time_std sums = np.zeros(self.frame_shape) sums_squares = np.zeros(self.frame_shape) counts = np.zeros(self.frame_shape) for frame in it.chain.from_iterable(self): sums += np.nan_to_num(frame) sums_squares += np.square(np.nan_to_num(frame)) counts[np.isfinite(frame)] += 1 means = old_div(sums, counts) mean_of_squares = old_div(sums_squares, counts) std = np.sqrt(mean_of_squares-np.square(means)) if self.savedir is not None and not self._read_only: with open(join(self.savedir, 'time_std.pkl'), 'wb') as f: pickle.dump(std, f, pickle.HIGHEST_PROTOCOL) self._time_std = std return self._time_std
def uncertaintyMap(self, psf, method='convolve', fitParams=None): ''' return the intensity based uncertainty due to the unsharpness of the image as standard deviation method = ['convolve' , 'unsupervised_wiener'] latter one also returns the reconstructed image (deconvolution) ''' #ignore background: #img[img<0]=0 ###noise should not influence sharpness uncertainty: ##img = median_filter(img, 3) # decrease noise in order not to overestimate result: img = scaleSignal(self.img, fitParams=fitParams) if method == 'convolve': #print 'convolve' blurred = convolve2d(img, psf, 'same') m = abs(img-blurred) / abs(img + blurred) m = np.nan_to_num(m) m*=self.std**2 m[m>1]=1 self.blur_distortion = m np.save('blurred', blurred) return m else: restored = unsupervised_wiener(img, psf)[0] m = abs(img-restored) / abs(img + restored) m = np.nan_to_num(m) m*=self.std**2 m[m>1]=1 self.blur_distortion = m return m, restored
def _to_raw(self, data1, data2): from matplotlib import pyplot as plt from matplotlib.colors import Normalize cmapdir = options.config.get("webgl", "colormaps") cmap = plt.imread(os.path.join(cmapdir, "%s.png"%self.cmap)) norm1 = Normalize(self.vmin, self.vmax) norm2 = Normalize(self.vmin2, self.vmax2) d1 = np.clip(norm1(data1), 0, 1) d2 = np.clip(1 - norm2(data2), 0, 1) dim1 = np.round(d1 * (cmap.shape[1]-1)) # Nans in data seemed to cause weird interaction with conversion to uint32 dim1 = np.nan_to_num(dim1).astype(np.uint32) dim2 = np.round(d2 * (cmap.shape[0]-1)) dim2 = np.nan_to_num(dim2).astype(np.uint32) colored = cmap[dim2.ravel(), dim1.ravel()] r, g, b, a = colored.T r.shape = dim1.shape g.shape = dim1.shape b.shape = dim1.shape a.shape = dim1.shape # Preserve nan values as alpha = 0 aidx = np.logical_or(np.isnan(data1),np.isnan(data2)) a[aidx] = 0 # Code from master, to handle alpha input, prob better here but not tested. # # Possibly move this above setting nans to alpha = 0; # # Possibly multiply specified alpha by alpha in colormap?? # if 'alpha' in self.attrs: # # Over-write alpha from colormap / nans with alpha arg if provided. # # Question: Might it be important tokeep alpha as an attr? # a = self.attrs.pop('alpha') return r, g, b, a
def crossEntropy(a,t): a=np.array(a) t=np.array(t) num_samples=len(a[:,0]) p=np.array([x/np.sum(x) for x in a]) # normalize output to [0,1] return np.sum(-t*np.nan_to_num(np.log(p))- (1-t)*np.nan_to_num(np.log(1.-p)))/num_samples
def __lazy_cost_function__(H, Y): result = 0.0 for i in range(0, Y.shape[0]): a = np.nan_to_num(np.log2(H[i]) * Y[i]) b = np.nan_to_num((1. - Y[i]) * np.log2((1. - H[i]))) result += a + b return result
def compute_parameters(input_data, colors=None, rays=None): """ Having as inputs a Connectivity matrix(required) and two arrays that represent the rays and colors of the nodes from the matrix(optional) this method will build the required parameter dictionary that will be sent to the HTML/JS 3D representation of the connectivity matrix. """ if colors is not None: color_list = colors.array_data.tolist() color_list = ABCDisplayer.get_one_dimensional_list(color_list, input_data.number_of_regions, "Invalid input size for Sphere Colors") color_list = numpy.nan_to_num(numpy.array(color_list, dtype=numpy.float64)).tolist() else: color_list = [1.0] * input_data.number_of_regions if rays is not None: rays_list = rays.array_data.tolist() rays_list = ABCDisplayer.get_one_dimensional_list(rays_list, input_data.number_of_regions, "Invalid input size for Sphere Sizes") rays_list = numpy.nan_to_num(numpy.array(rays_list, dtype=numpy.float64)).tolist() else: rays_list = [1.0] * input_data.number_of_regions params = dict(raysArray=json.dumps(rays_list), rayMin=min(rays_list), rayMax=max(rays_list), colorsArray=json.dumps(color_list), colorMin=min(color_list), colorMax=max(color_list)) return params, {}
def compare_derivatives(self, var_in, var_out, rel_error=False): model = self.model # Numeric Jn = model.calc_gradient(var_in, var_out, mode="fd", return_format='array') #print 'finite diff', Jn # Analytic forward Jf = model.calc_gradient(var_in, var_out, mode='fwd', return_format='array') #print 'forward', Jf if rel_error: diff = np.nan_to_num(abs(Jf - Jn) / Jn) else: diff = abs(Jf - Jn) assert_rel_error(self, diff.max(), 0.0, 1e-3) # Analytic adjoint Ja = model.calc_gradient(var_in, var_out, mode='rev', return_format='array') # print Ja if rel_error: diff = np.nan_to_num(abs(Ja - Jn) / Jn) else: diff = abs(Ja - Jn) assert_rel_error(self, diff.max(), 0.0, 1e-3)
def attractive(self): # Potential energy and force r < self.r_c r_low = np.arange(0, self.r_c, 0.02) with np.errstate(all='ignore'): v_att_low = np.zeros_like(r_low) - 1 v_rep_low = np.zeros_like(r_low) + 4.0*((self.sigma/r_low) ** 12 - (self.sigma/r_low) ** 6 + (1.0 / 4.0)) force_low = np.zeros_like(r_low) + 4.0*(12*(self.sigma**12)/(r_low**13) - (6*(self.sigma**6)/(r_low**7))) # Potential energy and force r_c <= r <= r_c + w_c r_mid = np.arange(r_low[len(r_low)-1] + 0.02, self.r_c + self.w_c, 0.02) v_att_mid = np.zeros_like(r_mid) - (np.cos(np.pi*(r_mid - self.r_c)/(2.0*self.w_c)))**2 v_rep_mid = np.zeros_like(r_mid) force_mid = np.zeros_like(r_mid) - np.cos(np.pi*(r_mid - self.r_c)/(2.0*self.w_c))*np.sin(np.pi*(r_mid - self.r_c)/(2.0*self.w_c))*(np.pi/self.w_c) # For r > r_c + w_c r_hi = np.arange(r_mid[len(r_mid)-1] + 0.02, 4.02, 0.02) v_att_hi = np.zeros_like(r_hi) v_rep_hi = np.zeros_like(r_hi) force_hi = np.zeros_like(r_hi) # Concatenate for full attractive forces r = np.append(np.append(r_low, r_mid), r_hi) v_attractive = np.append(np.append((v_att_low + v_rep_low), (v_att_mid + v_rep_mid)), v_att_hi+v_rep_hi) force_attractive = np.append(np.append(force_low, force_mid), force_hi) force_attractive = np.nan_to_num(force_attractive) v_attractive = np.nan_to_num(v_attractive) r[0] = 1.0e-6 return r, force_attractive, v_attractive
def costf(self, train_data, train_targets): '''The traindata should contain the training inputs and train_targets the target vectors. Evaluates the cross entropy cost with the current set of data and parameters''' Y = self.Y(train_data) J = -sum([dot(t, ly) for t,ly in zip(train_targets, np.nan_to_num(np.log(np.nan_to_num(Y))))]) return J
def data_prepare(): dataset1 = np.loadtxt('1.txt',dtype=float) dataset1 = np.nan_to_num(normalize_cols(dataset1)) label1 = np.ones((len(dataset1),1)) # generate label1 dataset1 = np.concatenate((dataset1,label1),axis=1) dataset2 = np.loadtxt('2.txt',dtype=float) dataset2 = np.nan_to_num(normalize_cols(dataset2)) label2 = 2*np.ones((len(dataset2),1)) # generate label2 dataset2 = np.concatenate((dataset2,label2),axis=1) dataset3 = np.loadtxt('3.txt',dtype=float) dataset3 = np.nan_to_num(normalize_cols(dataset3)) label3 = 3*np.ones((len(dataset3),1)) # generate label3 dataset3 = np.concatenate((dataset3,label3),axis=1) dataset4 = np.loadtxt('4.txt',dtype=float) dataset4 = np.nan_to_num(normalize_cols(dataset4)) label4 = 4*np.ones((len(dataset4),1)) # generate label4 dataset4 = np.concatenate((dataset4,label4),axis=1) dataset5 = np.loadtxt('5.txt',dtype=float) dataset5 = np.nan_to_num(normalize_cols(dataset5)) label5 = 5*np.ones((len(dataset5),1)) # generate label5 dataset5 = np.concatenate((dataset5,label5),axis=1) dataset = np.concatenate((dataset1,dataset2,dataset3,dataset4,dataset5,dataset5,dataset5),axis=0) random.shuffle(dataset) #random shuffle dataset return (dataset)
def test_mflist(): ml = flopy.modflow.Modflow(model_ws=out_dir) dis = flopy.modflow.ModflowDis(ml, 10, 10, 10, 10) sp_data = {0: [[1, 1, 1, 1.0], [1, 1, 2, 2.0], [1, 1, 3, 3.0]], 1: [1, 2, 4, 4.0]} wel = flopy.modflow.ModflowWel(ml, stress_period_data=sp_data) m4ds = ml.wel.stress_period_data.masked_4D_arrays sp_data = flopy.utils.MfList.masked4D_arrays_to_stress_period_data \ (flopy.modflow.ModflowWel.get_default_dtype(), m4ds) assert np.array_equal(sp_data[0], ml.wel.stress_period_data[0]) assert np.array_equal(sp_data[1], ml.wel.stress_period_data[1]) # the last entry in sp_data (kper==9) should equal the last entry # with actual data in the well file (kper===1) assert np.array_equal(sp_data[9], ml.wel.stress_period_data[1]) pth = os.path.join('..', 'examples', 'data', 'mf2005_test') ml = flopy.modflow.Modflow.load(os.path.join(pth, "swi2ex4sww.nam"), verbose=True) m4ds = ml.wel.stress_period_data.masked_4D_arrays sp_data = flopy.utils.MfList.masked4D_arrays_to_stress_period_data \ (flopy.modflow.ModflowWel.get_default_dtype(), m4ds) # make a new wel file wel = flopy.modflow.ModflowWel(ml, stress_period_data=sp_data) flx1 = m4ds["flux"] flx2 = wel.stress_period_data.masked_4D_arrays["flux"] flx1 = np.nan_to_num(flx1) flx2 = np.nan_to_num(flx2) assert flx1.sum() == flx2.sum()
def transform_row(self, i, df, data_dir): """ Normalizes the data (X, y, w, ...) in a single row). """ row = df.iloc[i] if self.transform_X: X = load_from_disk( os.path.join(data_dir, row['X-transformed'])) X = np.nan_to_num((X - self.X_means) / self.X_stds) save_to_disk(X, os.path.join(data_dir, row['X-transformed'])) if self.transform_y: y = load_from_disk(os.path.join(data_dir, row['y-transformed'])) # transform tasks as normal y = np.nan_to_num((y - self.y_means) / self.y_stds) # add 2nd order correction term to gradients grad_var = 1/self.y_stds[0]*(self.ydely_means-self.y_means[0]*self.y_means[1:]) for i in range(y.shape[0]): y[i,1:] = y[i,1:] - grad_var*y[i,0]/self.y_stds[0] save_to_disk(y, os.path.join(data_dir, row['y-transformed']))
def AvgQE(x, y, ye, bin, bintype=1, hardlimit=0, binmax=None): '''Average values of scatter plot''' def HelpMe(kk, BR, ii): if len(kk) > 0: return np.average(kk) elif len(kk) == 0: return (BR[ii-1] + BR[ii])/2. x = array(x) y = array(y) w = 1 / array(ye)**2. BinNo, BinsReturned = ReturnBins(x, bin, bintype=bintype, hardlimit=hardlimit, binmax=binmax) #print BinNo, BinsReturned #BinSize = np.max(BinNo)+1 BinSize = len(BinsReturned) xavg = [HelpMe(x[BinNo == i], BinsReturned, i) for i in range(1, BinSize)] xstd = [np.std(x[BinNo == i]) for i in range(1, BinSize)] yavg = [np.average(y[BinNo == i], weights=w[BinNo == i]) for i in range(1, BinSize)] ystd = [np.sqrt(1/np.sum(w[BinNo == i])) for i in range(1, BinSize)] N = [y[BinNo == i].shape[0] for i in range(1, BinSize)] xavg = np.array(xavg) xstd = np.array(xstd) yavg = np.array(yavg) ystd = np.array(ystd) N = np.array(N) yavg = np.nan_to_num(yavg) ystd = np.nan_to_num(ystd) xstd = np.nan_to_num(xstd) N = np.nan_to_num(N) return xavg, xstd, yavg, ystd, N
def list_of_tuples_rm_flat_avg_signal(ret_values,lot_list,average_signal,discr_coefficient): oper_list=list() if len(ret_values[0,:]) >= len(average_signal): v_length=len(average_signal) else: v_length=len(ret_values[0,:]) for x in ret_values: oper_list.append(np.trapz(np.nan_to_num(np.abs(x[:v_length])),x=np.arange(v_length))) i=lot_list[0][1][0] lot_rm_index=list() lotcopy=list() count=0 #rmcount=0 #rescaling average_signal=average_signal[:v_length] #integrating average signal avg_integral=np.trapz(np.nan_to_num(np.abs(average_signal)),x=np.arange(v_length)) for a in lot_list: j=a[1][1] if ( np.abs(avg_integral - oper_list[j])/oper_list[j] > discr_coefficient) : lot_rm_index.append(count) #rmcount+=1 count+=1 continue lotcopy.append(a) # czyli jesli zlapie na coefficent, to nie wpisze do nowej tabeli dixlist, a jesli nie zlapie to po prostu przepisze go (ten row ze zgadzajacym sie oper value) count+=1 #print 'Removed a total of ', rmcount, ' signals' #print 'Reached count:', count return lotcopy
def transform_array(self, X, y, w): """Transform the data in a set of (X, y, w) arrays.""" if self.transform_X: X = np.nan_to_num((X - self.X_means) / self.X_stds) if self.transform_y: y = np.nan_to_num((y - self.y_means) / self.y_stds) return (X, y, w)
def linear_regression(folder): statsfldr = folder + statsfldrext line_fit_log = [f for f in os.listdir(statsfldr) if re.search("linefit.txt", f)] if not len(line_fit_log): print "Calculating the linear regression\n" av, cols, rows = open_as_array(statsfldr + "/average" + ext) avg_array = av.ravel() # ravel converts the 2d array to a 1d array avg_array = array(avg_array) # Find the linear model for SNR as a function of AVERAGE, y = snr, x = avg snr, cols, rows = open_as_array(statsfldr + "/SNR" + ext) snr_array = snr.ravel() yy = numpy.nan_to_num(snr_array) print len(yy) xx = numpy.nan_to_num(avg_array) # y = numpy.array([a[:5] for a in yy]) s = 1 # shorten the data to per thousand original length = len(yy) mini = (length - length * s/1000) / 2 maxi = length - mini print mini print maxi y = yy[mini:maxi] print len(y) x = xx[mini:maxi] y[y > 4095] = 4095 # converts any obserdly high numbers to 4095 x[x > 4095] = 4095 # converts any obserdly high numbers to 4095 print 'this part takes time' popt, pcov = curve_fit(fit_func_line, x, y) # curve fit needs a function to call to return the fit write_to_log('\t' + str(datetime.datetime.now()) + ' Calculated the linear regression\n') with open(statsfldr + "/linefit.txt", "w") as text_file: # "a" is to append, "w" is to overwrite text_file.write(str(popt)) plotting(x,y,popt) return
def costFunctionReg(theta, X, y, lmbda): # Initialize some useful values m = y.shape[0] # number of training examples # You need to return the following variables correctly J = 0 grad = np.zeros(theta.shape) # ====================== YOUR CODE HERE ====================== def h(X, theta): return X.dot(theta) J = np.float(-y.T * np.nan_to_num(np.log(sigmoid(h(X, theta))).T) - (1 - y).T * np.nan_to_num(np.log(1 - sigmoid(h(X, theta))).T)) / m reg_cost = theta.copy() reg_cost[0] = 0 J += (lmbda * reg_cost.T.dot(reg_cost)) / (2 * m) grad = (sigmoid(h(X, theta)) - y.T).dot(X) / m reg_grad = theta * (float(lmbda) / m) reg_grad[0] = 0 grad = grad.A1 + reg_grad # ============================================================= return (J, grad)
def test_unity_3x3_withnan(self, boundary): ''' Test that a 3x3 unit kernel returns the same array (except when boundary is None). This version includes a NaN value in the original array. ''' x = np.array([[1., 2., 3.], [4., np.nan, 6.], [7., 8., 9.]], dtype='>f8') y = np.array([[0., 0., 0.], [0., 1., 0.], [0., 0., 0.]], dtype='>f8') z = convolve(x, y, boundary=boundary, nan_treatment='fill', preserve_nan=True) assert np.isnan(z[1, 1]) x = np.nan_to_num(z) z = np.nan_to_num(z) if boundary is None: assert np.all(z == np.array([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], dtype='>f8')) else: assert np.all(z == x)
def write_data_array(self,output_file_name,times): # find distance to next nearest time f = h5py.File(output_file_name,'w') names = self.keys() lon = [self[n].meta['longitude'] for n in names] lat = [self[n].meta['latitude'] for n in names] positions = np.array([lon,lat]).transpose() f['position'] = positions f['name'] = names f['time'] = times f.create_dataset('mean',shape=(len(times),len(names),3),dtype=float) f.create_dataset('mask',shape=(len(times),len(names)),dtype=bool) f.create_dataset('covariance',shape=(len(times),len(names),3,3),dtype=float) f.create_dataset('variance',shape=(len(times),len(names),3),dtype=float) f.create_dataset('sigma',shape=(len(times),len(names),3),dtype=float) for i,n in enumerate(names): logger.info('writing displacement data for station %s' % n) mean,sigma = self[n](times) f['mean'][:,i,:] = mean f['mask'][:,i] = np.any(np.isinf(sigma),axis=1) f['covariance'][:,i,:,:] = np.array([np.diag(v) for v in np.nan_to_num(sigma**2)]) f['variance'][:,i,:] = np.nan_to_num(sigma**2) f['sigma'][:,i,:] = np.nan_to_num(sigma) f.close()
def decayCoefObjectiveFn(x, Y, EX2): """ Computes the objective function for terms involving lambda in the M-step. Checked. Input: x: value of lambda Y: the matrix of observed values EX2: the matrix of values of EX2 estimated in the E-step. Returns: obj: value of objective function grad: gradient """ with warnings.catch_warnings(): warnings.simplefilter("ignore") y_squared = Y ** 2 Y_is_zero = np.abs(Y) < 1e-6 exp_Y_squared = np.exp(-x * y_squared) log_exp_Y = np.nan_to_num(np.log(1 - exp_Y_squared)) exp_ratio = np.nan_to_num(exp_Y_squared / (1 - exp_Y_squared)) obj = sum(sum(Y_is_zero * (-EX2*x) + (1 - Y_is_zero) * log_exp_Y)) grad = sum(sum(Y_is_zero * (-EX2) + (1 - Y_is_zero) * y_squared * exp_ratio)) if type(obj) is np.float64: obj = -np.array([obj]) if type(grad) is np.float64: grad = -np.array([grad]) return obj, grad
def compare_csv(csv1, csv2, column_headers=True, eps=1e-3): """ """ column_types = DTYPE_MAP[os.path.basename(csv1)] da1 = read_csv(csv1, column_headers, column_types) da2 = read_csv(csv2, column_headers, column_types) # compare cloumn names ret = da1.dtype.names == da2.dtype.names # compare all string columns fields = [field for field in da1.dtype.fields if da1.dtype[field].kind == 'S'] if fields: ret = ret and np.array_equal(da1[fields], da2[fields]) # compare all integer fields fields = [field for field in da1.dtype.fields if da1.dtype[field].kind == 'i'] if fields: ret = ret and np.array_equal(da1[fields], da2[fields]) # compare all float type columns (with epsilon) fields = [field for field in da1.dtype.fields if da1.dtype[field].kind == 'f'] # make copy of float view, so that we can safely replace nan's if fields: fa1 = np.nan_to_num(da1[fields].view((float, len(fields)))) fa2 = np.nan_to_num(da2[fields].view((float, len(fields)))) ret = ret and np.allclose(fa1, fa2, rtol=0, atol=eps) return ret
def compute_homogeneous_statistics(unit_statistic, unit_statistic_permutation, p_value_threshold, homogeneous_statistic='normalized MMD2u', verbose=True): """Compute p_values from permutations and create homogeneous statistics. """ # Compute p-values for each unit print("Homogeneous statistic: %s" % homogeneous_statistic) print("Computing MMD2u thresholds for each unit with p-value=%f" % p_value_threshold) mmd2us_threshold = compute_statistic_threshold(unit_statistic_permutation, p_value_threshold) print("Computing actual p-values at each unit on the original (unpermuted) data") p_value = compute_pvalues_from_permutations(unit_statistic, unit_statistic_permutation) print("Computing the p-value of each permutation of each unit.") p_value_permutation = compute_pvalues_of_permutations(unit_statistic_permutation) # Here we try to massage the unit statistic so that it becomes homogeneous across different units, to compute the cluster statistic later on if homogeneous_statistic == '1-p_value': # Here we use (1-p_value) instead of the MMD2u statistic : this is perfectly homogeneous across units because the p_value is uniformly distributed, by definition unit_statistic_permutation_homogeneous = 1.0 - p_value_permutation unit_statistic_homogeneous = 1.0 - p_value elif homogeneous_statistic == 'normalized MMD2u': # Here we use a z-score of MMD2u, which is good if its distribution normal or approximately normal mmd2us_mean = unit_statistic_permutation.mean(1) mmd2us_std = unit_statistic_permutation.std(1) unit_statistic_permutation_homogeneous = np.nan_to_num((unit_statistic_permutation - mmd2us_mean[:,None]) / mmd2us_std[:,None]) unit_statistic_homogeneous = np.nan_to_num((unit_statistic - mmd2us_mean) / mmd2us_std) elif homogeneous_statistic == 'unit_statistic': # Here we use the unit statistic assuming that it is homogeneous across units (this is not much true) unit_statistic_permutation_homogeneous = unit_statistic_permutation unit_statistic_homogeneous = unit_statistic elif homogeneous_statistic == 'p_value': # Here we use p_value instead of the MMD2u statistic : this is perfectly homogeneous across units because the p_value is uniformly distributed, by definition unit_statistic_permutation_homogeneous = p_value_permutation unit_statistic_homogeneous = p_value else: raise Exception return p_value, p_value_permutation, unit_statistic_homogeneous, unit_statistic_permutation_homogeneous
def setup_measureCrosstalk(self, isTrimmed=False, nSources=8): """Generate a simulated set of exposures and test the measured crosstalk matrix. Parameters ---------- isTrimmed : `bool`, optional Should the simulation use trimmed or untrimmed raw exposures? nSources : `int`, optional Number of random simulated sources to generate in the simulated exposures. Returns ------- coeffErr : `np.ndarray` Array of booleans indicating if the measured and expected crosstalk ratios are smaller than the measured uncertainty in the crosstalk ratio. """ config = isrMock.IsrMockConfig() config.rngSeed = 12345 config.doAddCrosstalk = True config.doAddSky = True config.doAddSource = True config.skyLevel = 0.0 config.readNoise = 0.0 mcConfig = MeasureCrosstalkConfig() mcConfig.threshold = 4000 mct = MeasureCrosstalkTask(config=mcConfig) fullResult = [] config.isTrimmed = isTrimmed # Generate simulated set of exposures. for idx in range(0, 10): config.rngSeed = 12345 + idx * 1000 # Allow each simulated exposure to have nSources random # bright sources. config.sourceAmp = (np.random.randint(8, size=nSources)).tolist() config.sourceFlux = ((np.random.random(size=nSources) * 25000.0 + 20000.0).tolist()) config.sourceX = ((np.random.random(size=nSources) * 100.0).tolist()) config.sourceY = ((np.random.random(size=nSources) * 50.0).tolist()) exposure = isrMock.CalibratedRawMock(config=config).run() result = mct.run(exposure, dataId=None) fullResult.append(result) # Generate the final measured CT ratios, uncertainties, pixel counts. coeff, coeffSig, coeffNum = mct.reduce(fullResult) # Needed because measureCrosstalk cannot find coefficients equal to 0.0 coeff = np.nan_to_num(coeff) coeffSig = np.nan_to_num(coeffSig) # Compare result against expectation used to create the simulation. expectation = isrMock.CrosstalkCoeffMock().run() coeffErr = abs(coeff - expectation) <= coeffSig return coeffErr
def test_unity_3_withnan(self, boundary, nan_treatment, normalize_kernel, preserve_nan): ''' Test that a unit kernel with three elements returns the same array (except when boundary is None). This version includes a NaN value in the original array. ''' x = np.array([1., np.nan, 3.], dtype='>f8') y = np.array([0., 1., 0.], dtype='>f8') z = convolve(x, y, boundary=boundary, nan_treatment=nan_treatment, normalize_kernel=normalize_kernel, preserve_nan=preserve_nan) if preserve_nan: assert np.isnan(z[1]) x = np.nan_to_num(z) z = np.nan_to_num(z) if boundary is None: assert np.all(z == np.array([0., 0., 0.], dtype='>f8')) else: assert np.all(z == x)
def forward_procedure(A, B, PI, O, wx, pubmsg): T = len(O) N = len(B) alpha = numpy.zeros((N, T)) C = numpy.zeros(T) alpha[:,0] = PI * [B[i](O[0]) for i in range(N)] C[0] = 1.0/numpy.sum(alpha[:,0]) alpha[:,0] = C[0] * alpha[:,0] ITERATIONS = T*4 count = 2*T for t in xrange(1, T): #B[i](O[:,t]) => numpy.prod(B[i](O[:,t])) #b_o = numpy.array([numpy.prod(B[i](O[:,t])) for i in range(N)]) b_o = [B[i](O[t]) for i in range(N)] alpha[:,t] = numpy.dot(alpha[:,t-1], A) * b_o C[t] = numpy.nan_to_num(1.0/numpy.sum(alpha[:,t])) alpha[:,t] = numpy.nan_to_num(alpha[:,t] * C[t]) if numpy.sum(alpha[:,t]) == 0: alpha[:,t] = 0.0000000000001 if wx: wx.CallAfter(pubmsg, "hmm", msg="Running HMM Method... %2.0f%%" % (100.0*(count-1)/(ITERATIONS))) count+=1 #print t, O[:,t], alpha[:,t] log_Prob_Obs = - (numpy.sum(numpy.log(C))) return(( log_Prob_Obs, alpha, C ))
def test_unity_3x3x3_withnan(self, boundary, nan_treatment): ''' Test that a 3x3x3 unit kernel returns the same array (except when boundary is None). This version includes a NaN value in the original array. ''' x = np.array([[[1., 2., 1.], [2., 3., 1.], [3., 2., 5.]], [[4., 3., 1.], [5., np.nan, 2.], [6., 1., 1.]], [[7., 0., 2.], [8., 2., 3.], [9., 2., 2.]]], dtype='>f8') y = np.zeros((3, 3, 3), dtype='>f8') y[1, 1, 1] = 1. z = convolve(x, y, boundary=boundary, nan_treatment=nan_treatment, preserve_nan=True) assert np.isnan(z[1, 1, 1]) x = np.nan_to_num(z) z = np.nan_to_num(z) if boundary is None: assert np.all(z == np.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]], [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]], dtype='>f8')) else: assert np.all(z == x)
# preprocess data feature_unscaled = feature.values.reshape(feature.shape[0],-1) #feature2_unscaled = feature2.values.reshape(feature.shape[0],-1) #feature_unscaled = np.concatenate((feature_unscaled, feature2_unscaled), axis=1) label_unscaled = label.values.reshape(label.shape[0],-1) scaler_f = StandardScaler() Xorg = scaler_f.fit_transform(feature_unscaled) scaler_l = StandardScaler() yorg = scaler_l.fit_transform(label_unscaled) Xall = np.nan_to_num(Xorg) yall = np.nan_to_num(yorg) # shift shift = 3 for lead in [3, 6, 9, 12, 15, 0]: print_header(f'Lead time: {lead} month') y = yall[lead+shift:] X = Xall[:-lead-shift] timey = oni.index[lead+shift:] for decade in [60, 70, 80, 90, 100, 110]: print_header(f'Test period: {1902+decade}-01-01 till {1911+decade}-12-01') K.clear_session()
def automatic_community_detector(G, method = 'un_louvain', scale = 'log', figsize = (15,15), dpi = 100., ylabel = 'Presynaptic Neuron ID', xlabel = 'Postsynaptic Neuron ID', xticklabels = None, yticklabels = None, title = 'Community-Ordered Connectivity Matrix (Log Scale)', vmax = None, export_format = None, figname = 'example_automatic_community'): """ Automatically detects communities for a large networkx graph. # Arguments: G (network.Graph): A networkx graph or subclass object, including flybrainlab.graph.NeuronGraph. method (str): Method to use. One of un_louvain, label_propagation, leiden, louvain, walktrap or infomap. scale (str): 'linear', 'log' or 'scaledlog'. Use linear or log scale to cluster. 'scaledlog' uses 50th percentile of nonzero entries as vmax. figsize (tuple): size of the figure. dpi (float): dpi of the figure. xlabel (str): Name of the x label. ylabel (str): Name of the y label. xticklabels (list): x tick labels to have. yticklabels (list): y tick labels to have. title (str): Title for the diagram. vmax (float): Maximum value for the diagram. export_format (str): if specified, file format to export the diagram. figname (str): Name for the diagram. # Returns: np.ndarray: community-ordered connectivity matrix in linear scale list: a list of list of node ids for each group member """ Gun, all_list_nodes, all_nodes = community_detection(G, method = method) all_pre_nodes = [i for i in all_list_nodes] all_post_nodes = [i for i in all_list_nodes] B = nx.adjacency_matrix(G).todense()[np.ix_(all_pre_nodes,all_post_nodes)].copy() if xticklabels is None: if isinstance(G, NeuronGraph): xticklabels = sum(nodes_to_unames(G, all_nodes),[]) else: xticklabels = sum(all_nodes, []) if yticklabels is None: if isinstance(G, NeuronGraph): yticklabels = sum(nodes_to_unames(G, all_nodes),[]) else: yticklabels = sum(all_nodes, []) if scale == 'log': Bd = np.log10(1.+B) # Bd[Bd>np.percentile(Bd, 90) = np.percentile(Bd, 90) # Bd[Bd<np.percentile(Bd, 10) = np.percentile(Bd, 10) elif scale == 'scaledlog': print('Min B:', np.min(B)) Bd = np.log10(1.+B) # Bd[Bd>np.percentile(Bd, 90)] = np.percentile(Bd, 90) else: Bd = B if title == 'Community-Ordered Connectivity Matrix (Log Scale)': title = 'Community-Ordered Connectivity Matrix' if scale == 'scaledlog': Bd_s = np.array(Bd) Bd_s = Bd_s[Bd_s>0.] vmax = np.percentile(Bd_s, 50) Bd = np.nan_to_num(Bd) print('Scaled vmax:', vmax) sizes = np.cumsum([len(i) for i in all_nodes]) gen_heatmap(Bd, figsize = figsize, dpi = dpi, xlabel = xlabel, ylabel = ylabel, xticklabels = xticklabels, yticklabels = yticklabels, hlines = sizes, vlines = sizes, title = title, vmax = vmax, vmin=0., export_format = export_format, figname = figname) return B, all_nodes
def highly_variable_genes(adata, min_disp=None, max_disp=None, min_mean=None, max_mean=None, n_top_genes=None, n_bins=20, flavor='seurat', binning_method='equal_width', subset=False, inplace=True): """Annotate highly variable genes [Satija15]_ [Zheng17]_. Expects logarithmized data. Depending on `flavor`, this reproduces the R-implementations of Seurat [Satija15]_ and Cell Ranger [Zheng17]_. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected. Parameters ---------- adata : :class:`~anndata.AnnData` The annotated data matrix of shape `n_obs` × `n_vars`. Rows correspond to cells and columns to genes. min_mean : `float`, optional (default: 0.0125) If `n_top_genes` unequals `None`, this and all other cutoffs for the means and the normalized dispersions are ignored. max_mean : `float`, optional (default: 3) If `n_top_genes` unequals `None`, this and all other cutoffs for the means and the normalized dispersions are ignored. min_disp : `float`, optional (default: 0.5) If `n_top_genes` unequals `None`, this and all other cutoffs for the means and the normalized dispersions are ignored. max_disp : `float`, optional (default: `None`) If `n_top_genes` unequals `None`, this and all other cutoffs for the means and the normalized dispersions are ignored. n_top_genes : `int` or `None`, optional (default: `None`) Number of highly-variable genes to keep. n_bins : `int`, optional (default: 20) Number of bins for binning the mean gene expression. Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1. You'll be informed about this if you set `settings.verbosity = 4`. flavor : `{'seurat', 'cell_ranger'}`, optional (default: 'seurat') Choose the flavor for computing normalized dispersion. In their default workflows, Seurat passes the cutoffs whereas Cell Ranger passes `n_top_genes`. binning_method : `{'equal_width', 'equal_frequency'}`, optional (default: 'equal_width') Choose the binning method for the means. In `equal_width`, each bin covers the same width. For `equal_frequency`, each bin has an equal number of genes. subset : `bool`, optional (default: `False`) Inplace subset to highly-variable genes if `True` otherwise merely indicate highly variable genes. inplace : `bool`, optional (default: `True`) Whether to place calculated metrics in `.var` or return them. Returns ------- :class:`~numpy.recarray`, `None` Depending on `inplace` returns calculated metrics (:class:`~numpy.recarray`) or updates `.var` with the following fields * `highly_variable` - boolean indicator of highly-variable genes * `means` - means per gene * `dispersions` - dispersions per gene * `dispersions_norm` - normalized dispersions per gene Notes ----- This function replaces :func:`~scanpy.pp.filter_genes_dispersion`. """ logg.msg('extracting highly variable genes', r=True, v=4) if not isinstance(adata, AnnData): raise ValueError( '`pp.highly_variable_genes` expects an `AnnData` argument, ' 'pass `inplace=False` if you want to return a `np.recarray`.') if n_top_genes is not None and not all([ min_disp is None, max_disp is None, min_mean is None, max_mean is None ]): logg.info('If you pass `n_top_genes`, all cutoffs are ignored.') if min_disp is None: min_disp = 0.5 if min_mean is None: min_mean = 0.0125 if max_mean is None: max_mean = 3 X = np.expm1(adata.X) if flavor == 'seurat' else adata.X mean, var = materialize_as_ndarray(_get_mean_var(X)) # now actually compute the dispersion mean[mean == 0] = 1e-12 # set entries equal to zero to small value dispersion = var / mean if flavor == 'seurat': # logarithmized mean as in Seurat dispersion[dispersion == 0] = np.nan dispersion = np.log(dispersion) mean = np.log1p(mean) # all of the following quantities are "per-gene" here df = pd.DataFrame() df['mean'] = mean df['dispersion'] = dispersion if flavor == 'seurat': if binning_method == 'equal_width': df['mean_bin'] = pd.cut(df['mean'], bins=n_bins) elif binning_method == 'equal_frequency': df['mean_bin'] = pd.qcut(df['mean'], q=n_bins, duplicates='drop') else: raise ValueError( '`binning_method` needs to be "equal_width" or "equal_frequency"' ) disp_grouped = df.groupby('mean_bin')['dispersion'] disp_mean_bin = disp_grouped.mean() disp_std_bin = disp_grouped.std(ddof=1) # retrieve those genes that have nan std, these are the ones where # only a single gene fell in the bin and implicitly set them to have # a normalized disperion of 1 one_gene_per_bin = disp_std_bin.isnull() gen_indices = np.where( one_gene_per_bin[df['mean_bin'].values])[0].tolist() if len(gen_indices) > 0: logg.msg( 'Gene indices {} fell into a single bin: their ' 'normalized dispersion was set to 1.\n ' 'Decreasing `n_bins` will likely avoid this effect.'.format( gen_indices), v=4) # Circumvent pandas 0.23 bug. Both sides of the assignment have dtype==float32, # but there’s still a dtype error without “.value”. disp_std_bin[one_gene_per_bin.values] = disp_mean_bin[ one_gene_per_bin.values].values disp_mean_bin[one_gene_per_bin.values] = 0 # actually do the normalization df['dispersion_norm'] = (( df['dispersion'].values # use values here as index differs - disp_mean_bin[df['mean_bin'].values].values) / disp_std_bin[df['mean_bin'].values].values) elif flavor == 'cell_ranger': from statsmodels import robust df['mean_bin'] = pd.cut( df['mean'], np.r_[-np.inf, np.percentile(df['mean'], np.linspace(10, 100, n_bins - 1)), np.inf]) disp_grouped = df.groupby('mean_bin')['dispersion'] disp_median_bin = disp_grouped.median() # the next line raises the warning: "Mean of empty slice" with warnings.catch_warnings(): warnings.simplefilter('ignore') disp_mad_bin = disp_grouped.apply(robust.mad) df['dispersion_norm'] = ( np.abs(df['dispersion'].values - disp_median_bin[df['mean_bin'].values].values) / disp_mad_bin[df['mean_bin'].values].values) else: raise ValueError('`flavor` needs to be "seurat" or "cell_ranger"') dispersion_norm = df['dispersion_norm'].values.astype('float32') if n_top_genes is not None: dispersion_norm = dispersion_norm[~np.isnan(dispersion_norm)] dispersion_norm[::-1].sort( ) # interestingly, np.argpartition is slightly slower disp_cut_off = dispersion_norm[n_top_genes - 1] gene_subset = np.nan_to_num( df['dispersion_norm'].values) >= disp_cut_off logg.msg( 'the {} top genes correspond to a normalized dispersion cutoff of'. format(n_top_genes, disp_cut_off), v=5, ) else: max_disp = np.inf if max_disp is None else max_disp dispersion_norm[np.isnan(dispersion_norm)] = 0 # similar to Seurat gene_subset = np.logical_and.reduce(( mean > min_mean, mean < max_mean, dispersion_norm > min_disp, dispersion_norm < max_disp, )) logg.msg(' finished', time=True, v=4) if inplace or subset: logg.hint('added\n' ' \'highly_variable\', boolean vector (adata.var)\n' ' \'means\', float vector (adata.var)\n' ' \'dispersions\', float vector (adata.var)\n' ' \'dispersions_norm\', float vector (adata.var)') adata.var['highly_variable'] = gene_subset adata.var['means'] = df['mean'].values adata.var['dispersions'] = df['dispersion'].values adata.var['dispersions_norm'] = df['dispersion_norm'].values.astype( 'float32', copy=False) if subset: adata._inplace_subset_var(gene_subset) else: arrays = (gene_subset, df['mean'].values, df['dispersion'].values, df['dispersion_norm'].values.astype('float32', copy=False)) dtypes = [ ('highly_variable', np.bool_), ('means', 'float32'), ('dispersions', 'float32'), ('dispersions_norm', 'float32'), ] return np.rec.fromarrays(arrays, dtype=dtypes)
for ii in hypR: # compute policies corresponding to each R in hypR # P is assumed to be known Q = VI(P,ii[:,np.newaxis],ns,na,l) hypQ[:,:,ic]=Q2OptAct(Q) ic+=1 D = [] # intial likelihood of each hyp likhyp = np.ones((ns,))/ns # active phase for ii in range(0,ns): #weight votes p = np.sum((hypQ*likhyp),axis=2) e = np.nan_to_num(-p*np.log(p),0) we = np.sum(e,axis=1) #print(np.nonzero(we == np.max(we))[0]) ss = random.choice(np.nonzero(we == np.max(we))[0]) a = polO[ss] # add element (ss,a) to the demonstration D.append((ss,a)) likhyp *= np.exp(5*hypQ[ss,a,:]-1) likhyp = likhyp/np.sum(likhyp) #print(ss,a,likhyp) # if the likelihood of a given hypothesis is bigger than 50% than # it is probably it is the best one if np.max(likhyp>0.5): print("Final Dataset",D) break
def get_mean(X_raw): # use masked array to ignore -1 means = np.apply_along_axis( lambda x: [np.mean(np.ma.array(x, mask=(x == -1)))], 1, np.array(X_raw)) return np.nan_to_num(means, nan=0.0)
arr_sl[arr_sl < int(0)] = np.nan # here negeative values as nas arr_dem[arr_dem == int(65536)] = np.nan # here 65535 as nas arr_thp[arr_thp == int(65535)] = np.nan print("mean slope", round(np.nanmean(arr_sl), 2)) print("max slope", round(np.nanmax(arr_sl), 2)) print("min slope", round(np.nanmin(arr_sl), 2)) print("mean dem", round(np.nanmean(arr_dem), 2)) print("max dem", round(np.nanmax(arr_dem), 2)) print("min dem", round(np.nanmin(arr_dem), 2)) # task 2 # binary raster where elevatoin < 1000m and slope < 30deg # slope < 30 deg arr_sl_bin = np.nan_to_num(arr_sl) arr_sl_bin[arr_sl_bin < float(30.00)] = 1 arr_sl_bin[arr_sl_bin >= float(30.00)] = 0 np.unique(arr_sl_bin) # elevation < 1000m arr_dem_bin = np.nan_to_num(arr_dem) arr_dem_bin[arr_dem_bin < float(1000.00)] = 1 arr_dem_bin[arr_dem_bin >= float(1000.00)] = 0 np.unique(arr_dem_bin) # combine the two arrays arr_comb = arr_dem_bin + arr_sl_bin
def step(self, dataloader, mode): print('Start {}'.format(mode)) # self.model = self.prev_model if mode == 'train': self.model.train() elif mode == 'val' or mode == 'test': self.model.eval() loss_sum = 0 confidence_loss_sum = 0 depth_loss_sum = 0 rotation_loss_sum = 0 rotation_loss_count = 0 for index, (hp_data, depth_image, camera_info_path, hp_data_gt, annotation_data) in tqdm.tqdm(enumerate(dataloader), total=len(dataloader), desc='{} epoch={}'.format( mode, self.epo), leave=False): # if index == 0: # self.model = self.prev_model self.cameramodel\ = cameramodels.PinholeCameraModel.from_yaml_file( camera_info_path[0]) self.cameramodel.target_size = self.target_size depth_image = hp_data.numpy().copy()[0, 0, ...] depth_image = np.nan_to_num(depth_image) depth_image = unnormalize_depth(depth_image, self.depth_range[0], self.depth_range[1]) hp_data = hp_data.to(self.device) depth_image_bgr = colorize_depth(depth_image, ignore_value=self.depth_range[0]) if mode == 'train': confidence, depth, rotation = self.model(hp_data) elif mode == 'val' or mode == 'test': with torch.no_grad(): confidence, depth, rotation = self.model(hp_data) confidence_np = confidence[0, ...].cpu().detach().numpy().copy() confidence_np[confidence_np >= 1] = 1. confidence_np[confidence_np <= 0] = 0. confidence_vis = cv2.cvtColor(confidence_np[0, ...] * 255, cv2.COLOR_GRAY2BGR) if mode != 'test': pos_weight = hp_data_gt.detach().numpy().copy() pos_weight = pos_weight[:, 0, ...] zeroidx = np.where(pos_weight < 0.5) nonzeroidx = np.where(pos_weight >= 0.5) pos_weight[zeroidx] = 0.5 pos_weight[nonzeroidx] = 1.0 pos_weight = torch.from_numpy(pos_weight) pos_weight = pos_weight.to(self.device) hp_data_gt = hp_data_gt.to(self.device) confidence_gt = hp_data_gt[:, 0:1, ...] rois_list_gt, rois_center_list_gt = find_rois(confidence_gt) criterion = HPNETLoss(self.use_coords).to(self.device) if self.model.rois_list is None or rois_list_gt is None: return None, None annotated_rois = annotate_rois(self.model.rois_list, rois_list_gt, annotation_data) confidence_loss, depth_loss, rotation_loss = criterion( confidence, hp_data_gt, pos_weight, depth, rotation, annotated_rois) if self.train_depth: loss = confidence_loss + rotation_loss + depth_loss else: loss = confidence_loss + rotation_loss if torch.isnan(loss): print('loss is nan!!') self.model = self.prev_model self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr, betas=(0.9, 0.999), eps=1e-10, weight_decay=0, amsgrad=False) self.optimizer.load_state_dict( self.prev_optimizer.state_dict()) continue else: self.prev_model = copy.deepcopy(self.model) self.prev_optimizer = copy.deepcopy(self.optimizer) if mode == 'train': self.optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5) self.optimizer.step() axis_gt = depth_image_bgr.copy() confidence_gt_vis = cv2.cvtColor( confidence_gt[0, 0, ...].cpu().detach().numpy().copy() * 255, cv2.COLOR_GRAY2BGR) # Visualize gt axis and roi for roi, roi_c in zip(rois_list_gt[0], rois_center_list_gt[0]): if roi.tolist() == [0, 0, 0, 0]: continue roi = roi.cpu().detach().numpy().copy() cx = roi_c[0] cy = roi_c[1] depth_and_rotation_gt = get_value_gt([cx, cy], annotation_data[0]) rotation_gt = depth_and_rotation_gt[1:] depth_gt_val = depth_and_rotation_gt[0] unnormalized_depth_gt_val = unnormalize_depth( depth_gt_val, self.depth_range[0], self.depth_range[1]) hanging_point_pose = np.array( self.cameramodel.project_pixel_to_3d_ray( [int(cx), int(cy)])) \ * unnormalized_depth_gt_val * 0.001 if self.use_coords: rot = quaternion2matrix(rotation_gt), else: v = np.matmul(quaternion2matrix(rotation_gt), [1, 0, 0]) rot = rotation_matrix_from_axis(v, [0, 1, 0], 'xy') try: draw_axis(axis_gt, rot, hanging_point_pose, self.cameramodel.K) except Exception: print('Fail to draw axis') confidence_gt_vis = draw_roi(confidence_gt_vis, roi, val=depth_gt_val, gt=True) axis_gt = draw_roi(axis_gt, roi, val=depth_gt_val, gt=True) # Visualize pred axis and roi axis_pred = depth_image_bgr.copy() for i, (roi, roi_c) in enumerate( zip(self.model.rois_list[0], self.model.rois_center_list[0])): if roi.tolist() == [0, 0, 0, 0]: continue roi = roi.cpu().detach().numpy().copy() cx = roi_c[0] cy = roi_c[1] dep = depth[i].cpu().detach().numpy().copy() normalized_dep_pred = float(dep) dep = unnormalize_depth(dep, self.depth_range[0], self.depth_range[1]) confidence_vis = draw_roi(confidence_vis, roi, val=normalized_dep_pred) axis_pred = draw_roi(axis_pred, roi, val=normalized_dep_pred) if mode != 'test': if annotated_rois[i][2]: confidence_vis = draw_roi(confidence_vis, annotated_rois[i][0], val=annotated_rois[i][1][0], gt=True) axis_pred = draw_roi(axis_pred, annotated_rois[i][0], val=annotated_rois[i][1][0], gt=True) hanging_point_pose = np.array( self.cameramodel.project_pixel_to_3d_ray( [int(cx), int(cy)])) * float(dep * 0.001) if self.use_coords: # have not check this yet q = rotation[i].cpu().detach().numpy().copy() q /= np.linalg.norm(q) rot = quaternion2matrix(q) else: v = rotation[i].cpu().detach().numpy() v /= np.linalg.norm(v) rot = rotation_matrix_from_axis(v, [0, 1, 0], 'xy') try: draw_axis(axis_pred, rot, hanging_point_pose, self.cameramodel.K) except Exception: print('Fail to draw axis') axis_pred = cv2.cvtColor(axis_pred, cv2.COLOR_BGR2RGB) confidence_vis = cv2.cvtColor(confidence_vis, cv2.COLOR_BGR2RGB) if self.config['use_bgr']: if self.config['use_bgr2gray']: in_gray = hp_data.cpu().detach().numpy().copy()[0, 1:2, ...] * 255 in_gray = in_gray.transpose(1, 2, 0).astype(np.uint8) in_gray = cv2.cvtColor(in_gray, cv2.COLOR_GRAY2RGB) in_gray = in_gray.transpose(2, 0, 1) in_img = in_gray else: in_bgr = hp_data.cpu().detach().numpy().copy()[ 0, 3:, ...].transpose(1, 2, 0) in_rgb = cv2.cvtColor(in_bgr, cv2.COLOR_BGR2RGB).transpose( 2, 0, 1) in_img = in_rgb if mode != 'test': confidence_loss_sum += confidence_loss.item() axis_gt = cv2.cvtColor(axis_gt, cv2.COLOR_BGR2RGB) confidence_gt_vis = cv2.cvtColor(confidence_gt_vis, cv2.COLOR_BGR2RGB) if rotation_loss.item() > 0: depth_loss_sum += depth_loss.item() rotation_loss_sum += rotation_loss.item() loss_sum = loss_sum \ + confidence_loss.item() \ + rotation_loss.item() rotation_loss_count += 1 if np.mod(index, 1) == 0: print( 'epoch {}, {}/{},{} loss is confidence:{} rotation:{} depth:{}' .format( # noqa self.epo, index, len(dataloader), mode, confidence_loss.item(), rotation_loss.item(), depth_loss.item())) self.vis.images( [axis_gt.transpose(2, 0, 1), axis_pred.transpose(2, 0, 1)], win='{} axis'.format(mode), opts=dict(title='{} axis'.format(mode))) self.vis.images( [ confidence_gt_vis.transpose(2, 0, 1), confidence_vis.transpose(2, 0, 1) ], win='{}_confidence_roi'.format(mode), opts=dict(title='{} confidence(GT, Pred)'.format(mode))) if self.config['use_bgr']: self.vis.images([in_img], win='{} in_gray'.format(mode), opts=dict(title='{} in_gray'.format(mode))) else: if self.config['use_bgr']: self.vis.images( [ in_img, confidence_vis.transpose(2, 0, 1), axis_pred.transpose(2, 0, 1) ], win='{}-{}'.format(mode, index), opts=dict( title='{}-{} hanging_point_depth (pred)'.format( mode, index))) else: self.vis.images( [ confidence_vis.transpose(2, 0, 1), axis_pred.transpose(2, 0, 1) ], win='{}-{}'.format(mode, index), opts=dict( title='{}-{} hanging_point_depth (pred)'.format( mode, index))) if np.mod(index, 1000) == 0: save_file = osp.join( self.save_dir, 'hpnet_latestmodel_' + self.time_now + '.pt') print('save {}'.format(save_file)) torch.save(self.model.state_dict(), save_file, _use_new_zipfile_serialization=False) if mode != 'test': if len(dataloader) > 0: avg_confidence_loss\ = confidence_loss_sum / len(dataloader) if rotation_loss_count > 0: avg_rotation_loss\ = rotation_loss_sum / rotation_loss_count avg_depth_loss\ = depth_loss_sum / rotation_loss_count avg_loss\ = loss_sum / rotation_loss_count else: avg_rotation_loss = 1e10 avg_depth_loss = 1e10 avg_loss = 1e10 else: avg_loss = loss_sum avg_confidence_loss = confidence_loss_sum avg_rotation_loss = rotation_loss_sum avg_depth_loss = rotation_loss_sum self.vis.line(X=np.array([self.epo]), Y=np.array([avg_confidence_loss]), opts={'title': 'confidence'}, win='confidence loss', name='{}_confidence_loss'.format(mode), update='append') if rotation_loss_count > 0: self.vis.line(X=np.array([self.epo]), Y=np.array([avg_rotation_loss]), opts={'title': 'rotation loss'}, win='rotation loss', name='{}_rotation_loss'.format(mode), update='append') self.vis.line(X=np.array([self.epo]), Y=np.array([avg_depth_loss]), opts={'title': 'depth loss'}, win='depth loss', name='{}_depth_loss'.format(mode), update='append') self.vis.line(X=np.array([self.epo]), Y=np.array([avg_loss]), opts={'title': 'loss'}, win='loss', name='{}_loss'.format(mode), update='append') if mode == 'val': if np.mod(self.epo, self.save_model_interval) == 0: save_file = osp.join( self.save_dir, 'hpnet_latestmodel_' + self.time_now + '.pt') print('save {}'.format(save_file)) torch.save(self.model.state_dict(), save_file, _use_new_zipfile_serialization=False) if self.best_loss > avg_loss: print('update best model {} -> {}'.format( self.best_loss, avg_loss)) self.best_loss = avg_loss save_file = osp.join( self.save_dir, 'hpnet_bestmodel_' + self.time_now + '.pt') print('save {}'.format(save_file)) # For ros(python 2, torch 1.4) torch.save(self.model.state_dict(), save_file, _use_new_zipfile_serialization=False)
def step(self, action): import numpy as np action = np.nan_to_num(action) action = np.clip(action, self.action_space.low, self.action_space.high) return self.env.step(action)
def sentence_similarity(self, wnsimilarity, sentence1, sentence2, icneed=False): """ compute the sentence similarity using Wordnet """ # Tokenize and tag sentence1 = pos_tag(word_tokenize(sentence1)) sentence2 = pos_tag(word_tokenize(sentence2)) # Get the synsets for the tagged words synsets1 = [ self.tagged_to_synset(*tagged_word) for tagged_word in sentence1 ] synsets2 = [ self.tagged_to_synset(*tagged_word) for tagged_word in sentence2 ] # Filter out the Nones synsets1 = [ss for ss in synsets1 if ss] synsets2 = [ss for ss in synsets2 if ss] score, count = 0.0, 0 # For each word in the first sentence for synset in synsets1: # Get the similarity value of the most similar word in the other sentence score_list = [] if icneed == True: for ss in synsets2: try: temp = wnsimilarity(synset, ss, self.brown_ic) score_list.append(temp) except: continue else: for ss in synsets2: try: temp = wnsimilarity(synset, ss) score_list.append(temp) except: continue score_list = np.array(score_list, dtype=np.float64) score_list = np.nan_to_num(score_list) # print(score_list) if len(score_list) > 0: best_score = np.nanmax(score_list) else: best_score = 0.0 # print(best_score) # print(type(best_score)) # Check that the similarity could have been computed if best_score is not None: score = score + best_score # print(score) count = count + 1 # print("one sentence over") # Average the values score /= count return score
def tSNEgen(MF, CDs, tol_fact, filter, metric='chebyshev', fetch_ann='online', p=30, ea=12): """Performs tSNE analysis on the molecular data collected using spaceM. The documentation page of the sklearn implementation of tSNE: http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html Args: MF (str): path to the Main Folder. CDs (list): correlation distance tresholds used for filtering background annotation images, only used when filter is 'correlation'. Default value is 0.75. tol_fact (float): tolerance factor to use for the filter 'mean'. filter (str): filter strategy to select background and on-sample annotation images: 'mean': compare the mean ion intensity from off and on-sample pixels. Consider annotation as coming from the sample if mean on-sample intensity > tol_fact * mean off-sample intensity. 'correlation': compute the correlation distance between the intensity thresholded annotation image and the cell distribution binary mask. The annotation is considered as coming from the sample if the correlation distance is inferior to CDs[i]. The cell distribution mask has pixel equal to 1 if its corresponding ablation mark is touching a cell and 0 if not. The treshold value to binarize the annotation image is found using an optimizer which minimzes the correlation distance with the cell distribution mask. This removes the negative effect that an uneven ion intensity distribution will have on the correlation distance with the cell distribution mask. metric (str): The metric to use when calculating distance between instances in a feature array. Metric value must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or a metric listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS. fetch_ann (str): method for fetching annotations: 'online': (default) queries metaspace using the name of the .imzml data present in the MALDI input folder as dataset name, 'offline': reads annotation images from a provided dataframe. p (float): perplexity value to use for the tSNE algorithm. The perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. The choice is not extremely critical since t-SNE is quite insensitive to this parameter. ea (float): early exaggeration value to use for the tSNE algorithm. Controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. For larger values, the space between natural clusters will be larger in the embedded space. Again, the choice of this parameter is not very critical. If the cost function increases during initial optimization, the early exaggeration factor or the learning rate might be too high. """ if fetch_ann == 'online' and filter == 'correlation': MOLcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/CD={}/MOLonlyData.csv'.format( CDs[0]) MOLallcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/CD={}/MOLallData.csv'.format( CDs[0]) elif fetch_ann == 'online' and filter == 'mean': MOLcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/tol_fact={}/MOLonlyData.csv'.format( tol_fact) MOLallcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/tol_fact={}/MOLallData.csv'.format( tol_fact) if fetch_ann == 'offline': MOLcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/offline/MOLonlyData.csv' MOLallcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/offline/MOLallData.csv' MOLdf = pd.read_csv(MOLcsv_p) fluo_data = pd.read_csv(MOLallcsv_p).fluoMarksMean_lu.as_matrix() tsne_input = np.nan_to_num(np.log10(MOLdf.iloc[:, 2:].as_matrix())) # perp = [5,10,15,20,25,30,40,50,75,100] # for p in perp: # p = 30 dist = pairwise_distances(tsne_input, metric=metric) tsne = manifold.TSNE(n_components=2, metric='precomputed', early_exaggeration=ea, perplexity=p) X_tsne = tsne.fit_transform(np.nan_to_num(dist)) x_min, x_max = np.min(X_tsne, 0), np.max(X_tsne, 0) X2_tsne = (X_tsne - x_min) / (x_max - x_min) tsne_colors_i = fluo_data contrast_cut = 5 pc_top = np.percentile(tsne_colors_i, 100 - contrast_cut) pc_down = np.percentile(tsne_colors_i, contrast_cut) tsne_colors_f = [] for i in tsne_colors_i: if i >= pc_top: val = pc_top elif i <= pc_down: val = pc_down else: val = i tsne_colors_f = np.append(tsne_colors_f, val) plt.figure() plt.scatter(X2_tsne[:, 0], X2_tsne[:, 1], 50, np.log10(tsne_colors_f), cmap='viridis', edgecolors='none') plt.xlabel('tSNE dim 1', fontsize=20) plt.ylabel('tSNE dim 2', fontsize=20) plt.axis('equal') coords = pd.DataFrame({ 'tSNE1': X2_tsne[:, 0], 'tSNE2': X2_tsne[:, 1], 'ObjectNumber': MOLdf['ObjectNumber_lu'] }) coords.to_csv(MF + 'Analysis/tSNE/metric={}_perp={}_KLD='.format(metric, p) + str(tsne.kl_divergence_)[:5] + '_' + fetch_ann + '.csv', index=False) plt.savefig(MF + 'Analysis/tSNE/metric={}_perp={}_KLD='.format(metric, p) + str(tsne.kl_divergence_)[:5] + '_' + fetch_ann + '.png', dpi=200) plt.close('all')
def register2Fragments(id1, id2, keyptspath, descpath, resultpath, logpath, gtLog, desc_name, inlier_ratio, distance_threshold): """ Register point cloud {id1} and {id2} using the keypts location and descriptors. """ cloud_bin_s = f'cloud_bin_{id1}' cloud_bin_t = f'cloud_bin_{id2}' write_file = f'{cloud_bin_s}_{cloud_bin_t}.rt.txt' if os.path.exists(os.path.join(resultpath, write_file)): return 0, 0, 0 source_keypts = get_keypts(keyptspath, cloud_bin_s) target_keypts = get_keypts(keyptspath, cloud_bin_t) source_desc = get_desc(descpath, cloud_bin_s, desc_name) target_desc = get_desc(descpath, cloud_bin_t, desc_name) source_desc = np.nan_to_num(source_desc) target_desc = np.nan_to_num(target_desc) # Select {num_keypts} points based on the scores. The descriptors and keypts are already sorted based on the detection score. num_keypts = 250 source_keypts = source_keypts[-num_keypts:, :] source_desc = source_desc[-num_keypts:, :] target_keypts = target_keypts[-num_keypts:, :] target_desc = target_desc[-num_keypts:, :] # Select {num_keypts} points randomly. # num_keypts = 250 # source_indices = np.random.choice(range(source_keypts.shape[0]), num_keypts) # target_indices = np.random.choice(range(target_keypts.shape[0]), num_keypts) # source_keypts = source_keypts[source_indices, :] # source_desc = source_desc[source_indices, :] # target_keypts = target_keypts[target_indices, :] # target_desc = target_desc[target_indices, :] key = f'{cloud_bin_s.split("_")[-1]}_{cloud_bin_t.split("_")[-1]}' if key not in gtLog.keys(): # skip the pairs that have less than 30% overlap. num_inliers = 0 inlier_ratio = 0 gt_flag = 0 else: # build correspondence set in feature space. corr = build_correspondence(source_desc, target_desc) # calculate the inlier ratio, this is for Feature Matching Recall. gt_trans = gtLog[key] frag1 = source_keypts[corr[:, 0]] frag2_pc = open3d.PointCloud() frag2_pc.points = open3d.utility.Vector3dVector(target_keypts[corr[:, 1]]) frag2_pc.transform(gt_trans) frag2 = np.asarray(frag2_pc.points) distance = np.sqrt(np.sum(np.power(frag1 - frag2, 2), axis=1)) num_inliers = np.sum(distance < distance_threshold) if num_inliers / len(distance) < inlier_ratio: print(key) print("num_corr:", len(corr), "inlier_ratio:", num_inliers / len(distance)) inlier_ratio = num_inliers / len(distance) gt_flag = 1 # calculate the transformation matrix using RANSAC, this is for Registration Recall. source_pcd = open3d.PointCloud() source_pcd.points = open3d.utility.Vector3dVector(source_keypts) target_pcd = open3d.PointCloud() target_pcd.points = open3d.utility.Vector3dVector(target_keypts) s_desc = open3d.registration.Feature() s_desc.data = source_desc.T t_desc = open3d.registration.Feature() t_desc.data = target_desc.T result = open3d.registration_ransac_based_on_feature_matching( source_pcd, target_pcd, s_desc, t_desc, 0.05, open3d.TransformationEstimationPointToPoint(False), 3, [ open3d.CorrespondenceCheckerBasedOnEdgeLength(0.9), open3d.CorrespondenceCheckerBasedOnDistance(0.05) ], open3d.RANSACConvergenceCriteria(50000, 1000)) # write the transformation matrix into .log file for evaluation. with open(os.path.join(logpath, f'{desc_name}_{timestr}.log'), 'a+') as f: trans = result.transformation trans = np.linalg.inv(trans) s1 = f'{id1}\t {id2}\t 37\n' f.write(s1) f.write( f"{trans[0,0]}\t {trans[0,1]}\t {trans[0,2]}\t {trans[0,3]}\t \n" ) f.write( f"{trans[1,0]}\t {trans[1,1]}\t {trans[1,2]}\t {trans[1,3]}\t \n" ) f.write( f"{trans[2,0]}\t {trans[2,1]}\t {trans[2,2]}\t {trans[2,3]}\t \n" ) f.write( f"{trans[3,0]}\t {trans[3,1]}\t {trans[3,2]}\t {trans[3,3]}\t \n" ) # write the result into resultpath so that it can be re-shown. s = f"{cloud_bin_s}\t{cloud_bin_t}\t{num_inliers}\t{inlier_ratio:.8f}\t{gt_flag}" with open(os.path.join(resultpath, f'{cloud_bin_s}_{cloud_bin_t}.rt.txt'), 'w+') as f: f.write(s) return num_inliers, inlier_ratio, gt_flag
def optimization(generate_opt_data=True, read_opt_data=False, beta_fin=4, x_max=5, potential=harmonic_potential, potential_string='harmonic_potential', nx_min=50, nx_max=1000, nx_sampling=50, N_iter_min=1, N_iter_max=20, save_opt_data=False, opt_data_file_name=None, plot=True, show_plot=True, save_plot=True, opt_plot_file_name=None): """ Uso: calcula diferentes valores de error usando calc_error() para encontrar valores de dx y beta_ini óptimos para correr el alcoritmo (óptimos = que minimicen error) Recibe: generate_opt_data: bool -> decide si genera datos para optimización. read_opt_data: bool -> decide si lee datos para optimización. Nota: generate_opt_data y read_opt_data son excluyentes. Se evalúa primero la primera. nx_min: int nx_max: int -> se relaciona con dx = 2*x_max/(nx-1). nx_sampling: int -> se generan nx mediante range(nx_max,nx_min,-1*nx_sampling). N_iter_min: int N_iter_max: int -> se relaciona con beta_ini = beta_fin **(-N_iter). Se gereran valores de N_iter con range(N_iter_max,N_iter_min-1,-1). save_opt_data: bool -> decide si guarda datos de optimización en archivo CSV. opt_data_file_name: str -> nombre de archivo para datos de optimización. plot: bool -> decide si grafica optimización. show_plot: bool -> decide si muestra optimización. save_plot: bool -> decide si guarda optimización. opt_plot_file_name: str -> nombre de gráfico de optimización. Si valor es None, se guarda con nombre conveniente según parámetros relevantes. Devuelve: error: list, shape=(nb,ndx) -> valores de calc_error para diferentes valores de dx y beta_ini. dx incrementa de izquierda a derecha en lista y beta_ini incrementa de arriba a abajo. dx_grid: list, shape=(ndx,) -> valores de dx para los que se calcula error. beta-ini_grid: list, shape=(nb,) -> valores de beta_ini para los que se calcula error. """ t_0 = time() # Decide si genera o lee datos. if generate_opt_data: N_iter_min = int(N_iter_min) N_iter_max = int(N_iter_max) nx_min = int(nx_min) nx_max = int(nx_max) if nx_min%2==1: nx_min -= 1 if nx_max%2==0: nx_max += 1 # Crea valores de nx y N_iter (equivalente a generar valores de dx y beta_ini) nx_values = range(nx_max,nx_min,-1*nx_sampling) N_iter_values = range(N_iter_max,N_iter_min-1,-1) dx_grid = [2*x_max/(nx-1) for nx in nx_values] beta_ini_grid = [beta_fin * 2**(-N_iter) for N_iter in N_iter_values] error = [] # Calcula error para cada valor de nx y N_iter especificado # (equivalentemente dx y beta_ini). for N_iter in N_iter_values: row = [] for nx in nx_values: rho,trace_rho,grid_x = run_pi_x_sq_trotter(x_max, nx, N_iter, beta_fin, potential, potential_string, False, False, None, None, False, False, False) grid_x = np.array(grid_x) dx = grid_x[1]-grid_x[0] rho_normalized = np.copy(rho)/trace_rho pi_x = np.diag(rho_normalized) theoretical_pi_x = QHO_canonical_ensemble(grid_x,beta_fin) error_comp_theo = calc_error(pi_x,theoretical_pi_x,dx) row.append(error_comp_theo) error.append(row) #error = np.array(error) elif read_opt_data: error = pd.read_csv(opt_data_file_name, index_col=0, comment='#') dx_grid = error.columns.to_numpy() beta_ini_grid = error.index.to_numpy() error = error.to_numpy() else: raise Exception('Escoja si generar o leer datos en optimization(.)') #print(error) error = np.array(error) # Toma valores de error en cálculo de Z (nan e inf) y los remplaza por # el valor de mayor error en el gráfico. try: error = np.where(np.isinf(error),0,error) error = np.where(np.isnan(error),0,error) nan_value = 1.3*np.max(error) error = np.where(error==0, float('nan'), error) except: nan_value = 0 error = np.nan_to_num(error, nan=nan_value, posinf=nan_value, neginf=nan_value) script_dir = os.path.dirname(os.path.abspath(__file__)) # Guarda datos (solo si fueron generados y se escoje guardar) if generate_opt_data and save_opt_data: if opt_data_file_name is None: opt_data_file_name = ('pi_x-ms-opt-%s-beta_fin_%.3f'%(potential_string, beta_fin) + '-x_max_%.3f-nx_min_%d-nx_max_%d'%(x_max, nx_min, nx_max) + '-nx_sampling_%d-N_iter_min_%d'%(nx_sampling, N_iter_min) + '-N_iter_max_%d.csv'%(N_iter_max)) opt_data_file_name = script_dir + '/' + opt_data_file_name relevant_info = ['Optimization of parameters dx and beta_ini of matrix squaring' + ' algorithm', '%s beta_fin = %.3f '%(potential_string, beta_fin) + 'x_max = %.3f nx_min = %d nx_max = %d '%(x_max, nx_min, nx_max) + 'nx_sampling = %d N_iter_min = %d '%(nx_sampling, N_iter_min) + 'N_iter_max = %d'%(N_iter_max)] save_csv(error, dx_grid, beta_ini_grid, opt_data_file_name, relevant_info) t_1 = time() # Grafica if plot: fig, ax = plt.subplots(1, 1) DX, BETA_INI = np.meshgrid(dx_grid, beta_ini_grid) cp = plt.contourf(DX,BETA_INI,error) plt.colorbar(cp) ax.set_ylabel(u'$\\beta_{ini}$') ax.set_xlabel('$dx$') plt.tight_layout() if save_plot: if opt_plot_file_name is None: opt_plot_file_name = \ ('pi_x-ms-opt-plot-%s-beta_fin_%.3f'%(potential_string, beta_fin) + '-x_max_%.3f-nx_min_%d-nx_max_%d'%(x_max, nx_min, nx_max) + '-nx_sampling_%d-N_iter_min_%d'%(nx_sampling, N_iter_min) + '-N_iter_max_%d.eps'%(N_iter_max)) opt_plot_file_name = script_dir + '/' + opt_plot_file_name plt.savefig(opt_plot_file_name) if show_plot: plt.show() plt.close() comp_time = t_1 - t_0 return error, dx_grid, beta_ini_grid, comp_time
#sns.distplot(GCP, bins=100) #sns.distplot(MET, bins=100) #plt.legend(['GCP', 'Metabolomics']) """## 1d. Train on GCP to predict metabolism First, let's split the data into training and test sets. """ from sklearn.model_selection import train_test_split """Convert to Numpy array.""" GCP = GCP.to_numpy() MET = MET.to_numpy() """Let's ensure all NaNs are 0.""" # Ensure all values are finite GCP = np.nan_to_num(GCP, nan=0) MET = np.nan_to_num(MET, nan=0) """Split the data into validation (30%) and training (70%) data.""" # Split the CCLE data into a validation set Xtrain, Xval, Ytrain, Yval = train_test_split(GCP, MET, test_size=0.3, random_state=0) """Print $X_{train}$ and $Y_{train}$.""" import sys np.set_printoptions(threshold=sys.maxsize) #print(Xtrain) #print(Ytrain) """Print shape of $X_{train}$ and $Y_{train}$."""
def ElasticNet_OptimalAlpha_KFold(Training_Data, Training_Score, Fold_Quantity, Alpha_Range, L1_ratio_Range, ResultantFolder, Parallel_Quantity): # # Select optimal regularization parameter using nested cross-validation # # Training_Data: # n*m matrix, n is subjects quantity, m is features quantity # Training_Score: # n*1 vector, n is subjects quantity # Fold_Quantity: # Fold quantity for the cross-validation # 5 or 10 is recommended generally, the small the better accepted by community, but the results may be worse as traning samples are fewer # Alpha_Range: # Range of alpha, the regularization parameter balancing the training error and L2 penalty # Our previous paper used (2^(-10), 2^(-9), ..., 2^4, 2^5), see Cui and Gong (2018), NeuroImage # L1_ratio_Range: # Range of l1 ratio, the parameter balancing l1 and l2 penalty # Our previous paper 10 values in the range [0.2,1], see Cui et al., (2018), Cerebral Cortex # ResultantFolder: # Path of the folder storing the results # Parallel_Quantity: # Parallel multi-cores on one single computer, at least 1 # Subjects_Quantity = len(Training_Score) # Sort the subjects score Sorted_Index = np.argsort(Training_Score) Training_Data = Training_Data[Sorted_Index, :] Training_Score = Training_Score[Sorted_Index] Inner_EachFold_Size = np.int( np.fix(np.divide(Subjects_Quantity, Fold_Quantity))) MaxSize = Inner_EachFold_Size * Fold_Quantity EachFold_Max = np.ones(Fold_Quantity, np.int) * MaxSize tmp = np.arange(Fold_Quantity - 1, -1, -1) EachFold_Max = EachFold_Max - tmp Remain = np.mod(Subjects_Quantity, Fold_Quantity) for j in np.arange(Remain): EachFold_Max[j] = EachFold_Max[j] + Fold_Quantity Parameter_Combination_Quantity = len(Alpha_Range) * len(L1_ratio_Range) Inner_Corr = np.zeros((Fold_Quantity, Parameter_Combination_Quantity)) Inner_MAE_inv = np.zeros((Fold_Quantity, Parameter_Combination_Quantity)) for k in np.arange(Fold_Quantity): Inner_Fold_K_Index = np.arange(k, EachFold_Max[k], Fold_Quantity) Inner_Fold_K_Data_test = Training_Data[Inner_Fold_K_Index, :] Inner_Fold_K_Score_test = Training_Score[Inner_Fold_K_Index] Inner_Fold_K_Data_train = np.delete(Training_Data, Inner_Fold_K_Index, axis=0) Inner_Fold_K_Score_train = np.delete(Training_Score, Inner_Fold_K_Index) Scale = preprocessing.MinMaxScaler() Inner_Fold_K_Data_train = Scale.fit_transform(Inner_Fold_K_Data_train) Inner_Fold_K_Data_test = Scale.transform(Inner_Fold_K_Data_test) Parallel(n_jobs=Parallel_Quantity, backend="threading")( delayed(ElasticNet_SubAlpha) (Inner_Fold_K_Data_train, Inner_Fold_K_Score_train, Inner_Fold_K_Data_test, Inner_Fold_K_Score_test, Alpha_Range, L1_ratio_Range, l, ResultantFolder) for l in np.arange(Parameter_Combination_Quantity)) for l in np.arange(Parameter_Combination_Quantity): print(l) Fold_l_Mat_Path = ResultantFolder + '/Alpha_' + str(l) + '.mat' Fold_l_Mat = sio.loadmat(Fold_l_Mat_Path) Inner_Corr[k, l] = Fold_l_Mat['Corr'][0][0] Inner_MAE_inv[k, l] = Fold_l_Mat['MAE_inv'] os.remove(Fold_l_Mat_Path) Inner_Corr = np.nan_to_num(Inner_Corr) Inner_Corr_Mean = np.mean(Inner_Corr, axis=0) Inner_Corr_Mean = (Inner_Corr_Mean - np.mean(Inner_Corr_Mean)) / np.std(Inner_Corr_Mean) Inner_MAE_inv_Mean = np.mean(Inner_MAE_inv, axis=0) Inner_MAE_inv_Mean = (Inner_MAE_inv_Mean - np.mean(Inner_MAE_inv_Mean) ) / np.std(Inner_MAE_inv_Mean) Inner_Evaluation = Inner_Corr_Mean + Inner_MAE_inv_Mean Inner_Evaluation_Mat = { 'Inner_Corr': Inner_Corr, 'Inner_MAE_inv': Inner_MAE_inv, 'Inner_Evaluation': Inner_Evaluation } sio.savemat(ResultantFolder + '/Inner_Evaluation.mat', Inner_Evaluation_Mat) Optimal_Combination_Index = np.argmax(Inner_Evaluation) Optimal_Alpha_Index = np.int64( np.ceil((Optimal_Combination_Index + 1) / len(L1_ratio_Range))) - 1 Optimal_Alpha = Alpha_Range[Optimal_Alpha_Index] Optimal_L1_ratio_Index = np.mod(Optimal_Combination_Index, len(L1_ratio_Range)) Optimal_L1_ratio = L1_ratio_Range[Optimal_L1_ratio_Index] return (Optimal_Alpha, Optimal_L1_ratio)
def fn(a, y): return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))
def create_features(seg_id, seg, X, st, end): """ create features including fft features, statistical features and time series features :param seg_id: the ID for a sample :param seg: s signal segment :param X: train set features before creating these features :param st: the start index of the signal segment :param end: the end index of the signal segment :return: train set features after creating these features """ try: # test set won't create these features because its seg_id is string X.loc[seg_id, 'seg_id'] = np.int32(seg_id) X.loc[seg_id, 'seg_start'] = np.int32(st) X.loc[seg_id, 'seg_end'] = np.int32(end) except ValueError: pass xc = pd.Series(seg['acoustic_data'].values) xcdm = xc - np.mean(xc) b, a = des_bw_filter_lp(cutoff=18000) xcz = sg.lfilter(b, a, xcdm) zc = np.fft.fft(xcz) zc = zc[:MAX_FREQ] # FFT transform values realFFT = np.real(zc) imagFFT = np.imag(zc) freq_bands = [x for x in range(0, MAX_FREQ, FREQ_BAND)] magFFT = np.sqrt(realFFT ** 2 + imagFFT ** 2) phzFFT = np.arctan(imagFFT / realFFT) phzFFT[phzFFT == -np.inf] = -np.pi / 2.0 phzFFT[phzFFT == np.inf] = np.pi / 2.0 phzFFT = np.nan_to_num(phzFFT) for freq in freq_bands: X.loc[seg_id, 'FFT_Mag_01q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_BAND], 0.01) X.loc[seg_id, 'FFT_Mag_10q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_BAND], 0.1) X.loc[seg_id, 'FFT_Mag_90q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_BAND], 0.9) X.loc[seg_id, 'FFT_Mag_99q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_BAND], 0.99) X.loc[seg_id, 'FFT_Mag_mean%d' % freq] = np.mean(magFFT[freq: freq + FREQ_BAND]) X.loc[seg_id, 'FFT_Mag_std%d' % freq] = np.std(magFFT[freq: freq + FREQ_BAND]) X.loc[seg_id, 'FFT_Mag_max%d' % freq] = np.max(magFFT[freq: freq + FREQ_BAND]) X.loc[seg_id, 'FFT_Phz_mean%d' % freq] = np.mean(phzFFT[freq: freq + FREQ_BAND]) X.loc[seg_id, 'FFT_Phz_std%d' % freq] = np.std(phzFFT[freq: freq + FREQ_BAND]) X.loc[seg_id, 'FFT_Rmean'] = realFFT.mean() X.loc[seg_id, 'FFT_Rstd'] = realFFT.std() X.loc[seg_id, 'FFT_Rmax'] = realFFT.max() X.loc[seg_id, 'FFT_Rmin'] = realFFT.min() X.loc[seg_id, 'FFT_Imean'] = imagFFT.mean() X.loc[seg_id, 'FFT_Istd'] = imagFFT.std() X.loc[seg_id, 'FFT_Imax'] = imagFFT.max() X.loc[seg_id, 'FFT_Imin'] = imagFFT.min() X.loc[seg_id, 'FFT_Rmean_first_6000'] = realFFT[:6000].mean() X.loc[seg_id, 'FFT_Rstd__first_6000'] = realFFT[:6000].std() X.loc[seg_id, 'FFT_Rmax_first_6000'] = realFFT[:6000].max() X.loc[seg_id, 'FFT_Rmin_first_6000'] = realFFT[:6000].min() X.loc[seg_id, 'FFT_Rmean_first_18000'] = realFFT[:18000].mean() X.loc[seg_id, 'FFT_Rstd_first_18000'] = realFFT[:18000].std() X.loc[seg_id, 'FFT_Rmax_first_18000'] = realFFT[:18000].max() X.loc[seg_id, 'FFT_Rmin_first_18000'] = realFFT[:18000].min() del xcz del zc b, a = des_bw_filter_lp(cutoff=2500) xc0 = sg.lfilter(b, a, xcdm) b, a = des_bw_filter_bp(low=2500, high=5000) xc1 = sg.lfilter(b, a, xcdm) b, a = des_bw_filter_bp(low=5000, high=7500) xc2 = sg.lfilter(b, a, xcdm) b, a = des_bw_filter_bp(low=7500, high=10000) xc3 = sg.lfilter(b, a, xcdm) b, a = des_bw_filter_bp(low=10000, high=12500) xc4 = sg.lfilter(b, a, xcdm) b, a = des_bw_filter_bp(low=12500, high=15000) xc5 = sg.lfilter(b, a, xcdm) b, a = des_bw_filter_bp(low=15000, high=17500) xc6 = sg.lfilter(b, a, xcdm) b, a = des_bw_filter_bp(low=17500, high=20000) xc7 = sg.lfilter(b, a, xcdm) b, a = des_bw_filter_hp(cutoff=20000) xc8 = sg.lfilter(b, a, xcdm) sigs = [xc, pd.Series(xc0), pd.Series(xc1), pd.Series(xc2), pd.Series(xc3), pd.Series(xc4), pd.Series(xc5), pd.Series(xc6), pd.Series(xc7), pd.Series(xc8)] for i, sig in enumerate(sigs): X.loc[seg_id, 'mean_%d' % i] = sig.mean() X.loc[seg_id, 'std_%d' % i] = sig.std() X.loc[seg_id, 'max_%d' % i] = sig.max() X.loc[seg_id, 'min_%d' % i] = sig.min() X.loc[seg_id, 'mean_change_abs_%d' % i] = np.mean(np.diff(sig)) X.loc[seg_id, 'mean_change_rate_%d' % i] = calc_mean_change_rate(sig) X.loc[seg_id, 'abs_max_%d' % i] = np.abs(sig).max() X.loc[seg_id, 'std_first_50000_%d' % i] = sig[:50000].std() X.loc[seg_id, 'std_last_50000_%d' % i] = sig[-50000:].std() X.loc[seg_id, 'std_first_10000_%d' % i] = sig[:10000].std() X.loc[seg_id, 'std_last_10000_%d' % i] = sig[-10000:].std() X.loc[seg_id, 'avg_first_50000_%d' % i] = sig[:50000].mean() X.loc[seg_id, 'avg_last_50000_%d' % i] = sig[-50000:].mean() X.loc[seg_id, 'avg_first_10000_%d' % i] = sig[:10000].mean() X.loc[seg_id, 'avg_last_10000_%d' % i] = sig[-10000:].mean() X.loc[seg_id, 'min_first_50000_%d' % i] = sig[:50000].min() X.loc[seg_id, 'min_last_50000_%d' % i] = sig[-50000:].min() X.loc[seg_id, 'min_first_10000_%d' % i] = sig[:10000].min() X.loc[seg_id, 'min_last_10000_%d' % i] = sig[-10000:].min() X.loc[seg_id, 'max_first_50000_%d' % i] = sig[:50000].max() X.loc[seg_id, 'max_last_50000_%d' % i] = sig[-50000:].max() X.loc[seg_id, 'max_first_10000_%d' % i] = sig[:10000].max() X.loc[seg_id, 'max_last_10000_%d' % i] = sig[-10000:].max() X.loc[seg_id, 'max_to_min_%d' % i] = sig.max() / np.abs(sig.min()) X.loc[seg_id, 'max_to_min_diff_%d' % i] = sig.max() - np.abs(sig.min()) X.loc[seg_id, 'count_big_%d' % i] = len(sig[np.abs(sig) > 500]) X.loc[seg_id, 'mean_change_rate_first_50000_%d' % i] = calc_mean_change_rate(sig[:50000]) X.loc[seg_id, 'mean_change_rate_last_50000_%d' % i] = calc_mean_change_rate(sig[-50000:]) X.loc[seg_id, 'mean_change_rate_first_10000_%d' % i] = calc_mean_change_rate(sig[:10000]) X.loc[seg_id, 'mean_change_rate_last_10000_%d' % i] = calc_mean_change_rate(sig[-10000:]) X.loc[seg_id, 'q95_%d' % i] = np.quantile(sig, 0.95) X.loc[seg_id, 'q99_%d' % i] = np.quantile(sig, 0.99) X.loc[seg_id, 'q05_%d' % i] = np.quantile(sig, 0.05) X.loc[seg_id, 'q01_%d' % i] = np.quantile(sig, 0.01) X.loc[seg_id, 'abs_q95_%d' % i] = np.quantile(np.abs(sig), 0.95) X.loc[seg_id, 'abs_q99_%d' % i] = np.quantile(np.abs(sig), 0.99) X.loc[seg_id, 'abs_q05_%d' % i] = np.quantile(np.abs(sig), 0.05) X.loc[seg_id, 'abs_q01_%d' % i] = np.quantile(np.abs(sig), 0.01) X.loc[seg_id, 'trend_%d' % i] = add_trend_feature(sig) X.loc[seg_id, 'abs_trend_%d' % i] = add_trend_feature(sig, abs_values=True) X.loc[seg_id, 'abs_mean_%d' % i] = np.abs(sig).mean() X.loc[seg_id, 'abs_std_%d' % i] = np.abs(sig).std() X.loc[seg_id, 'mad_%d' % i] = sig.mad() X.loc[seg_id, 'kurt_%d' % i] = sig.kurtosis() X.loc[seg_id, 'skew_%d' % i] = sig.skew() X.loc[seg_id, 'med_%d' % i] = sig.median() X.loc[seg_id, 'Hilbert_mean_%d' % i] = np.abs(hilbert(sig)).mean() X.loc[seg_id, 'Hann_window_mean'] = (convolve(xc, hann(150), mode='same') / sum(hann(150))).mean() X.loc[seg_id, 'classic_sta_lta1_mean_%d' % i] = classic_sta_lta(sig, 500, 10000).mean() X.loc[seg_id, 'classic_sta_lta2_mean_%d' % i] = classic_sta_lta(sig, 5000, 100000).mean() X.loc[seg_id, 'classic_sta_lta3_mean_%d' % i] = classic_sta_lta(sig, 3333, 6666).mean() X.loc[seg_id, 'classic_sta_lta4_mean_%d' % i] = classic_sta_lta(sig, 10000, 25000).mean() X.loc[seg_id, 'Moving_average_700_mean_%d' % i] = sig.rolling(window=700).mean().mean(skipna=True) X.loc[seg_id, 'Moving_average_1500_mean_%d' % i] = sig.rolling(window=1500).mean().mean(skipna=True) X.loc[seg_id, 'Moving_average_3000_mean_%d' % i] = sig.rolling(window=3000).mean().mean(skipna=True) X.loc[seg_id, 'Moving_average_6000_mean_%d' % i] = sig.rolling(window=6000).mean().mean(skipna=True) ewma = pd.Series.ewm X.loc[seg_id, 'exp_Moving_average_300_mean_%d' % i] = ewma(sig, span=300).mean().mean(skipna=True) X.loc[seg_id, 'exp_Moving_average_3000_mean_%d' % i] = ewma(sig, span=3000).mean().mean(skipna=True) X.loc[seg_id, 'exp_Moving_average_30000_mean_%d' % i] = ewma(sig, span=30000).mean().mean(skipna=True) no_of_std = 3 X.loc[seg_id, 'MA_700MA_std_mean_%d' % i] = sig.rolling(window=700).std().mean() X.loc[seg_id, 'MA_700MA_BB_high_mean_%d' % i] = ( X.loc[seg_id, 'Moving_average_700_mean_%d' % i] + no_of_std * X.loc[ seg_id, 'MA_700MA_std_mean_%d' % i]).mean() X.loc[seg_id, 'MA_700MA_BB_low_mean_%d' % i] = ( X.loc[seg_id, 'Moving_average_700_mean_%d' % i] - no_of_std * X.loc[ seg_id, 'MA_700MA_std_mean_%d' % i]).mean() X.loc[seg_id, 'MA_400MA_std_mean_%d' % i] = sig.rolling(window=400).std().mean() X.loc[seg_id, 'MA_400MA_BB_high_mean_%d' % i] = ( X.loc[seg_id, 'Moving_average_700_mean_%d' % i] + no_of_std * X.loc[ seg_id, 'MA_400MA_std_mean_%d' % i]).mean() X.loc[seg_id, 'MA_400MA_BB_low_mean_%d' % i] = ( X.loc[seg_id, 'Moving_average_700_mean_%d' % i] - no_of_std * X.loc[ seg_id, 'MA_400MA_std_mean_%d' % i]).mean() X.loc[seg_id, 'MA_1000MA_std_mean_%d' % i] = sig.rolling(window=1000).std().mean() X.loc[seg_id, 'iqr_%d' % i] = np.subtract(*np.percentile(sig, [75, 25])) X.loc[seg_id, 'q999_%d' % i] = np.quantile(sig, 0.999) X.loc[seg_id, 'q001_%d' % i] = np.quantile(sig, 0.001) X.loc[seg_id, 'ave10_%d' % i] = stats.trim_mean(sig, 0.1) X.loc[seg_id, 'num_peaks_10_%d' % i] = feature_calculators.number_peaks(sig, 10) X.loc[seg_id, 'cid_ce_1_%d' % i] = feature_calculators.cid_ce(sig, 1) # time series complexity X.loc[seg_id, 'count_1000_0_%d' % i] = feature_calculators.range_count(sig, -1000, 0) X.loc[seg_id, 'binned_entropy_5_%d' % i] = feature_calculators.binned_entropy(sig, 5) X.loc[seg_id, 'binned_entropy_15_%d' % i] = feature_calculators.binned_entropy(sig, 15) # sliding window is a kind of filter, so this code is out of the cycle of band pass for windows in [10, 100, 1000]: x_roll_std = xc.rolling(windows).std().dropna() x_roll_mean = xc.rolling(windows).mean().dropna() X.loc[seg_id, 'ave_roll_std_' + str(windows)] = x_roll_std.mean() X.loc[seg_id, 'std_roll_std_' + str(windows)] = x_roll_std.std() X.loc[seg_id, 'max_roll_std_' + str(windows)] = x_roll_std.max() X.loc[seg_id, 'min_roll_std_' + str(windows)] = x_roll_std.min() X.loc[seg_id, 'q01_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.01) X.loc[seg_id, 'q05_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.05) X.loc[seg_id, 'q95_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.95) X.loc[seg_id, 'q99_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.99) X.loc[seg_id, 'av_change_abs_roll_std_' + str(windows)] = np.mean(np.diff(x_roll_std)) X.loc[seg_id, 'av_change_rate_roll_std_' + str(windows)] = calc_mean_change_rate(x_roll_std) X.loc[seg_id, 'abs_max_roll_std_' + str(windows)] = np.abs(x_roll_std).max() X.loc[seg_id, 'ave_roll_mean_' + str(windows)] = x_roll_mean.mean() X.loc[seg_id, 'std_roll_mean_' + str(windows)] = x_roll_mean.std() X.loc[seg_id, 'max_roll_mean_' + str(windows)] = x_roll_mean.max() X.loc[seg_id, 'min_roll_mean_' + str(windows)] = x_roll_mean.min() X.loc[seg_id, 'q01_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.01) X.loc[seg_id, 'q05_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.05) X.loc[seg_id, 'q95_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.95) X.loc[seg_id, 'q99_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.99) X.loc[seg_id, 'av_change_abs_roll_mean_' + str(windows)] = np.mean(np.diff(x_roll_mean)) X.loc[seg_id, 'av_change_rate_roll_mean_' + str(windows)] = calc_mean_change_rate(x_roll_mean) X.loc[seg_id, 'abs_max_roll_mean_' + str(windows)] = np.abs(x_roll_mean).max() return X
def make_gpwg(Mgg, reference_point, xyz_cid0, grid_cps, coords, log): """ Calculates the Grid Point Weight Generator (GPWG) table. Parameters ---------- reference_point : (3, ) float ndarray the reference point grid_point : int 0->origin, x>0, that grid point Mgg : (N, N) matrix the mass matrix xyz_cid0 : (ngrids, 3) float ndarray the xyz coordinates of the grids grid_cps : (ngrids, ) int ndarray array of cp values corresponding to xyz_cid0 coords : dict[cp] : Coord() dict of cp values corresponding to the Cp coordinate systems log : logger() logging object Returns ------- Mo : (6, 6) float ndarray the rigid body mass matrix in the basic coordinate system S : (3, 3) float ndarray the scalar partition matrix (also known as the principal mass axes) mass : (3, ) float ndarray the mass in the 3 pricincipal (basic) axes cg : (3, 3) float ndarray the cg in the 3 principal (basic) axes II : (3, 3) float ndarray inertias relative to the CG also called I(S) IQ : (3, ) float ndarray principal moments of inertia about the CG also called I(Q) Q : (3, 3) float ndarray the coordinate transformation between the S axes and the Q axes .. todo:: doesn't consider SPOINTs/EPOINTs .. todo:: hasn't been tested """ nnodes = xyz_cid0.shape[0] D = np.zeros((nnodes * 6, 6), dtype='float32') # we subtract ref point so as to not change xyz_cid0 for i, node in enumerate(xyz_cid0 - reference_point): r1, r2, r3 = node j = i * 6 Tr = np.array([[0., r3, -r2], [-r3, 0., r1], [r2, -r1, 0.]], dtype='float32') #print('Tr[%i]=\n%s\n' % (i+1, Tr)) cp = grid_cps[i] Ti = coords[cp].beta() if not np.array_equal(Ti, np.eye(3)): log.info('Ti[%i]=\n%s\n' % (i + 1, Ti)) TiT = Ti.T d = np.zeros((6, 6), dtype='float32') d[:3, :3] = TiT d[3:, 3:] = TiT d[:3, 3:] = TiT @ Tr D[j:j + 6, :] = d Mo = np.zeros((6, 6), dtype='float32') #print('D=\n%s\n' % D) # translati Mo = triple(D, Mgg) log.info('Mgg=\n%s\n' % Mgg) log.info('Mo=\n%s\n' % Mo) # t-translation; r-rotation Mt_bar = Mo[:3, :3] Mtr_bar = Mo[:3, 3:] #Mrt_bar = Mo[3:, :3] Mr_bar = Mo[3:, 3:] #print('dinner =', diag(Mt_bar)) delta = np.linalg.norm(np.diag(Mt_bar)) #print('einner =', Mt_bar - diag(Mt_bar)) epsilon = np.linalg.norm([ Mt_bar[0, 1], Mt_bar[0, 2], Mt_bar[1, 2], ]) if epsilon / delta > 0.001: # user warning 3042 pass log.info('Mt_bar (correct) =\n%s\n' % Mt_bar) log.info('delta=%s' % delta) log.info('epsilon=%s' % epsilon) log.info('e/d=%s\n' % (epsilon / delta)) # hermitian eigenvectors omega, S = np.linalg.eigh(Mt_bar) log.info('omega=%s' % omega) log.info('S (right, but not correct order) =\n%s\n' % S) Mt = triple(S, Mt_bar) Mtr = triple(S, Mtr_bar) Mr = triple(S, Mr_bar) # 4. determine the principal axis & cg in the principal mass axis system # eq G-18 Mx = Mt[0, 0] My = Mt[1, 1] Mz = Mt[2, 2] mass = np.diag(Mt) log.info('mass = %s' % mass) #if min(mass) == 0.: #raise RuntimeError('mass = %s' % mass) cg = np.array([ [Mtr[0, 0], -Mtr[0, 2], Mtr[0, 1]], [Mtr[1, 2], Mtr[1, 1], -Mtr[1, 0]], [-Mtr[2, 1], Mtr[2, 0], Mtr[2, 2]], ], dtype='float32') if mass[0] != 0.: cg[0, :] /= Mx if mass[1] != 0.: cg[1, :] /= My if mass[2] != 0.: cg[2, :] /= Mz #cg = nan_to_num(cg) log.info('cg=\n%s\n' % cg) #xx = cg[0, 0] yx = cg[0, 1] zx = cg[0, 2] xy = cg[1, 0] #yy = cg[1, 1] zy = cg[1, 2] xz = cg[2, 0] yz = cg[2, 1] #zz = cg[2, 2] I11 = Mr[0, 0] - My * zy**2 - Mz * yz**2 I21 = I12 = -Mr[0, 1] - Mz * xz * yz I13 = I31 = -Mr[0, 2] - My * xy * zy I22 = Mr[1, 1] - Mz * xz**2 - Mx * zx**2 I23 = -Mr[1, 2] - Mx * yx * zx I32 = I23 I33 = Mr[2, 2] - Mx * yx**2 - My * xy**2 II = np.array([ [I11, I12, I13], [I21, I22, I13], [I31, I32, I33], ], dtype='float32') II = np.nan_to_num(II) log.info('I(S)=\n%s\n' % II) # 6. Reverse the sign of the off diagonal terms np.fill_diagonal(-II, np.diag(II)) #print('I~=\n%s\n' % II) if np.nan in II: Q = np.zeros((3, 3), dtype='float32') else: omegaQ, Q = np.linalg.eig(II) #i = argsort(omegaQ) log.info('omegaQ = %s' % omegaQ) log.info('Q -> wrong =\n%s\n' % Q) IQ = triple(Q, II) #print('I(Q) -> wrong =\n%s\n' % IQ) return Mo, S, mass, cg, II, IQ, Q
def save_tensor_stats(self, tensor, tag, id, tensors_q={}, force_global_min_max=False): # ignore FC or 1x1 case if len(tensor.shape) < 3 or (tensor.shape[2] == 1 and tensor.shape[3] == 1): return # Assume activation dimentions [N,C,H,W] t = tensor.transpose(0, 1).contiguous() # [C, N, H, W] t = t.view(t.shape[0], -1) # [C, NxHxW] mean_ = t.mean(-1) std_ = torch.std(t, dim=-1, unbiased=True) for sn in self.stats_names: if sn == 'kurtosis': st = torch.mean(((t - mean_.unsqueeze(-1)) / std_.unsqueeze(-1))**4, dim=-1) - 3 elif sn == 'b': st = torch.mean(torch.abs(t - mean_.unsqueeze(-1)), dim=-1) elif sn == 'std': st = std_ elif sn == 'std_pos': t_relu = torch.nn.functional.relu(t) st = torch.std(t_relu, dim=-1, unbiased=True) elif sn == 'mean': st = mean_ elif sn == 'max': if force_global_min_max: st = t.max(-1)[0] else: st = torch.mean(tensor.view(tensor.shape[0], tensor.shape[1], -1).max(dim=-1)[0], dim=0) \ if self.batch_avg else t.max(-1)[0] elif sn == 'min': if force_global_min_max: st = t.min(-1)[0] else: st = torch.mean(tensor.view(tensor.shape[0], tensor.shape[1], -1).min(dim=-1)[0], dim=0)if self.batch_avg else \ torch.min(tensor.view(tensor.shape[0], tensor.shape[1], -1).min(dim=-1)[0], dim=0)[0] elif 'mse' in sn: if len(tensors_q) > 0: t_orig = tensors_q['orig'] t_q = tensors_q[sn.split('_')[1]] st = torch.mean(torch.mean(((t_orig - t_q)**2).view(t_orig.shape[0], t_orig.shape[1], -1), dim=-1), dim=0) else: continue # elif 'mae' in sn: # if len(tensors_q) > 0: # t = tensors_q['orig'].view(t.shape) # t_q = tensors_q[sn.split('_')[1]].view(t.shape) # st = torch.mean(torch.abs(t - t_q), dim=-1) # else: # continue elif 'cos' in sn: if len(tensors_q) > 0: t_orig = tensors_q['orig'].view(tensor.shape[0], tensor.shape[1], -1) t_q = tensors_q[sn.split('_')[1]].view(tensor.shape[0], tensor.shape[1], -1) st = cos_sim(t_orig, t_q, dims=[-1, 0]) else: continue # elif 'ang' in sn: # if len(tensors_q) > 0: # t = tensors_q['orig'].view(t.shape) # t_q = tensors_q[sn.split('_')[1]].view(t.shape) # cos = cos_sim(t, t_q) # st = torch.acos(cos) # else: # continue else: pass st = st.cpu().numpy() if 'cos' in sn: st = np.nan_to_num(st) st[st == 0] = 1. if id not in self.stats: self.stats[id] = {} if sn not in self.stats[id]: self.stats[id][sn] = st else: # if len(st.shape) > 1: self.stats[id][sn] = np.vstack([self.stats[id][sn], st])
def plot_community_conn_mat(conn_matrix, labels, out_path_fig_comm, community_aff, cmap, dpi_resolution=300): """ Plot a community-parcellated connectivity matrix. Parameters ---------- conn_matrix : array NxN matrix. labels : list List of string labels corresponding to ROI nodes. out_path_fig_comm : str File path to save the community-parcellated connectivity matrix image as a .png figure. community_aff : array Community-affiliation vector. """ import warnings warnings.filterwarnings("ignore") import matplotlib import mplcyberpunk from matplotlib import pyplot as plt matplotlib.use("agg") plt.style.use("cyberpunk") import matplotlib.patches as patches import matplotlib.ticker as mticker from nilearn.plotting import plot_matrix from pynets.core import thresholding plt.style.use("cyberpunk") conn_matrix_bin = thresholding.binarize(conn_matrix) conn_matrix = thresholding.standardize(conn_matrix) conn_matrix_plt = np.nan_to_num(np.multiply(conn_matrix, conn_matrix_bin)) sorting_array = sorted(range(len(community_aff)), key=lambda k: community_aff[k]) sorted_conn_matrix = conn_matrix[sorting_array, :] sorted_conn_matrix = sorted_conn_matrix[:, sorting_array] rois_num = sorted_conn_matrix.shape[0] if rois_num < 100: try: plot_matrix( conn_matrix_plt, figure=(10, 10), labels=labels, vmax=np.percentile(conn_matrix_plt[conn_matrix_plt > 0], 95), vmin=0, reorder=False, auto_fit=True, grid=False, colorbar=False, cmap=cmap, ) except RuntimeWarning: print("Connectivity matrix too sparse for plotting...") else: try: plot_matrix( conn_matrix_plt, figure=(10, 10), vmax=np.abs(np.max(conn_matrix_plt)), vmin=0, auto_fit=True, grid=False, colorbar=False, cmap=cmap, ) except RuntimeWarning: print("Connectivity matrix too sparse for plotting...") ax = plt.gca() total_size = 0 for community in np.unique(community_aff): size = sum(sorted(community_aff) == community) ax.add_patch( patches.Rectangle( (total_size, total_size), size, size, fill=False, edgecolor="white", alpha=None, linewidth=1, )) total_size += size if len(labels) > 500: tick_interval = 5 elif len(labels) > 100: tick_interval = 4 elif len(labels) > 50: tick_interval = 2 else: tick_interval = 1 plt.axes().yaxis.set_major_locator(mticker.MultipleLocator(tick_interval)) plt.axes().xaxis.set_major_locator(mticker.MultipleLocator(tick_interval)) for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']: plt.rcParams[param] = '#000000' plt.savefig(out_path_fig_comm, dpi=dpi_resolution) plt.close() return
def train_ctax_path(self, stepsize, number_of_weights): """ Here the weights for each ctax step is calculated Load values: linear paths and random paths including the reductions from TIMER output: b values (weigths) for given ctax levels """ self.stepsize = stepsize self.number_of_weights = number_of_weights # for some reason, some values are not exactly rounded self.train_path[:, -1] = np.round(self.train_path[:, -1]) # get lin paths based on final ctax self.lin_train = [ self.lin_path[self.lin_path[:, -1] == path[-1]] for path in self.train_path ] self.lin_train = np.vstack(self.lin_train) self.lin_train = self.lin_train[self.lin_train[:, -1].argsort()] self.train_path = self.train_path[self.train_path[:, -1].argsort()] delta_cs = self.lin_train - self.train_path final_ctax = self.lin_train[:, -1] delta_c_norm = delta_cs / final_ctax[:, None] delta_c_norm = np.nan_to_num(delta_c_norm) # for first two rows delta_c_dict = [{ 'delta_c': delta_c_norm[i], 'final ctax': final_ctax[i] } for i in range(len(delta_c_norm))] # stepsize is number of paths used to calculate the weights 20 is the dollar step used in TIMER for index in range(0, 200 + stepsize, stepsize): stepsize_ctax = index * 20 delta_c_step = [ ctax['delta_c'] for ctax in delta_c_dict if ctax['final ctax'] <= stepsize_ctax and ctax['final ctax'] >= stepsize_ctax - (stepsize * 20) ] # get number of weights wanted self.count_weights = int(len(delta_c_step[0]) / number_of_weights) delta_c_slice = [ np.mean(delta_c[1:].reshape(-1, self.count_weights), axis=1) for delta_c in delta_c_step ] lin_reduction_step = self.df_combined_lin[ (self.df_combined_lin[self.year] <= stepsize_ctax) & (self.df_combined_lin[self.year] >= stepsize_ctax - (stepsize * 20))] train_reduction_step = self.df_combined_train[ (self.df_combined_train[self.year] <= stepsize_ctax) & (self.df_combined_train[self.year] >= stepsize_ctax - (stepsize * 20))] train_reduction_step[self.year] = train_reduction_step[ self.year].round(0) lin_reduction_step[self.year] = lin_reduction_step[ self.year].round(0) lin_reduction_step = pd.merge(lin_reduction_step, train_reduction_step, on=self.year) lin_reduction_step = lin_reduction_step[['reduction_x']].values train_reduction_step = train_reduction_step[['reduction']].values # print(len(delta_c_slice), len(lin_reduction_step), len(train_reduction_step)) # set initial values to 0 x0 = [i * 0 for i in delta_c_slice[0]] res = minimize(self.objective, x0, args=(delta_c_slice, lin_reduction_step, train_reduction_step)) weights = pd.DataFrame([[x for x in res.x] + [index * 20]]) weights.columns = [*weights.columns[:-1], 'ctax'] self.weights = pd.concat([self.weights, weights]) self.weights = self.weights.reset_index(drop=True) print('weights dataframe:', '\n', self.weights) # quick vis of paths found weights_columns = weights.columns.values weights_columns = weights_columns[:-1] fig1, ax1 = plt.subplots() for column in weights_columns: ax1.plot(self.weights['ctax'], self.weights[column], label=column) ax1.set_xlabel('final ctax') ax1.set_ylabel('weight') ax1.legend()
def backward_G(self, epoch, seg_criterion=None, A_gt=False): # self.loss_G_A = torch.zeros(1).cuda() if not self.multi_D: pred_fake = self.netD_A.forward(self.fake_B) if self.opt.use_wgan: self.loss_G_A = -pred_fake.mean() elif self.opt.use_ragan: pred_real = self.netD_A.forward(self.real_B) self.loss_G_A = ( self.criterionGAN(pred_real - torch.mean(pred_fake), False) + self.criterionGAN(pred_fake - torch.mean(pred_real), True)) / 2 else: self.loss_G_A = self.criterionGAN(pred_fake, True) else: self.loss_G_A = 0 for c in range(5): # select by category; if empty: tensor([]) if (self.category == c).nonzero().size(0) == 0: continue pred_fake = self.netD_As[c].forward( torch.index_select( self.fake_B, 0, (self.category == c).nonzero().view(-1).type( torch.cuda.LongTensor))) if self.opt.use_wgan: self.loss_G_A += -pred_fake.mean() elif self.opt.use_ragan: pred_real = self.netD_As[c].forward( torch.index_select( self.real_B, 0, (self.category == c).nonzero().view(-1).type( torch.cuda.LongTensor))) self.loss_G_A += (self.criterionGAN( pred_real - torch.mean(pred_fake), False) + self.criterionGAN( pred_fake - torch.mean(pred_real), True)) / 2 else: self.loss_G_A += self.criterionGAN(pred_fake, True) loss_G_A = 0 if self.opt.patchD: pred_fake_patch = self.netD_P.forward(self.fake_patch) if self.opt.hybrid_loss: loss_G_A += self.criterionGAN(pred_fake_patch, True) else: pred_real_patch = self.netD_P.forward(self.real_patch) loss_G_A += (self.criterionGAN( pred_real_patch - torch.mean(pred_fake_patch), False) + self.criterionGAN( pred_fake_patch - torch.mean(pred_real_patch), True)) / 2 if self.opt.patchD_3 > 0: for i in range(self.opt.patchD_3): pred_fake_patch_1 = self.netD_P.forward(self.fake_patch_1[i]) if self.opt.hybrid_loss: loss_G_A += self.criterionGAN(pred_fake_patch_1, True) else: pred_real_patch_1 = self.netD_P.forward( self.real_patch_1[i]) loss_G_A += (self.criterionGAN( pred_real_patch_1 - torch.mean(pred_fake_patch_1), False) + self.criterionGAN( pred_fake_patch_1 - torch.mean(pred_real_patch_1), True)) / 2 if not self.opt.D_P_times2: self.loss_G_A += loss_G_A / float(self.opt.patchD_3 + 1) else: self.loss_G_A += loss_G_A / float(self.opt.patchD_3 + 1) * 2 else: if not self.opt.D_P_times2: self.loss_G_A += loss_G_A else: self.loss_G_A += loss_G_A * 2 self.loss_G = self.loss_G_A if epoch < 0: vgg_w = 0 else: if seg_criterion is None: vgg_w = 1 else: vgg_w = 0 if vgg_w > 0: if self.opt.vgg > 0: self.loss_vgg_b = self.vgg_loss.compute_vgg_loss( self.vgg, self.fake_B, self.real_A) * self.opt.vgg if self.opt.vgg > 0 else 0 if self.opt.patch_vgg: if not self.opt.IN_vgg: loss_vgg_patch = self.vgg_loss.compute_vgg_loss( self.vgg, self.fake_patch, self.input_patch) * self.opt.vgg else: loss_vgg_patch = self.vgg_patch_loss.compute_vgg_loss( self.vgg, self.fake_patch, self.input_patch) * self.opt.vgg if self.opt.patchD_3 > 0: for i in range(self.opt.patchD_3): if not self.opt.IN_vgg: loss_vgg_patch += self.vgg_loss.compute_vgg_loss( self.vgg, self.fake_patch_1[i], self.input_patch_1[i]) * self.opt.vgg else: loss_vgg_patch += self.vgg_patch_loss.compute_vgg_loss( self.vgg, self.fake_patch_1[i], self.input_patch_1[i]) * self.opt.vgg self.loss_vgg_b += loss_vgg_patch / float( self.opt.patchD_3 + 1) else: self.loss_vgg_b += loss_vgg_patch self.loss_G = self.loss_G_A + self.loss_vgg_b * vgg_w elif self.opt.fcn > 0: self.loss_fcn_b = self.fcn_loss.compute_fcn_loss( self.fcn, self.fake_B, self.real_A) * self.opt.fcn if self.opt.fcn > 0 else 0 if self.opt.patchD: loss_fcn_patch = self.fcn_loss.compute_vgg_loss( self.fcn, self.fake_patch, self.input_patch) * self.opt.fcn if self.opt.patchD_3 > 0: for i in range(self.opt.patchD_3): loss_fcn_patch += self.fcn_loss.compute_vgg_loss( self.fcn, self.fake_patch_1[i], self.input_patch_1[i]) * self.opt.fcn self.loss_fcn_b += loss_fcn_patch / float( self.opt.patchD_3 + 1) else: self.loss_fcn_b += loss_fcn_patch self.loss_G = self.loss_G_A + self.loss_fcn_b * vgg_w # self.loss_G = self.L1_AB + self.L1_BA ## Seg Loss ################################ if seg_criterion is not None: # mIoU of enhanced image inter, union = utils_seg.batch_intersection_union( self.fake_B_Seg.data, self.mask, 19) idx = union > 0 IoU = 1.0 * inter[idx] / (np.spacing(1) + union[idx]) self.mIoU = np.nan_to_num(IoU.mean()) with torch.no_grad(): # mIoU of origin image by pretrained Seg Model inter, union = utils_seg.batch_intersection_union( self.real_A_Seg.data, self.mask, 19) idx = union > 0 IoU = 1.0 * inter[idx] / (np.spacing(1) + union[idx]) self.mIoU_ori = np.nan_to_num(IoU.mean()) self.mIoU_delta_mean = 0.8 * self.mIoU_delta_mean + 0.2 * np.round( self.mIoU - self.mIoU_ori, 3) # mIoU of origin image by Generator inter, union = utils_seg.batch_intersection_union( self.seg_real_A.data, self.mask, 19) idx = union > 0 IoU = 1.0 * inter[idx] / (np.spacing(1) + union[idx]) print("mIoU_generator", np.round(np.nan_to_num(IoU.mean()), 3)) print("G:", self.loss_G.data[0], "mIoU gain:", np.round(self.mIoU - self.mIoU_ori, 3), "mean:", np.round(self.mIoU_delta_mean, 3), "lum:", 255 * (1 - self.input_A_gray).mean(), "epoch:", epoch) lambd = 3 self.loss_Seg = seg_criterion( self.fake_B_Seg, self.mask) + lambd * seg_criterion(self.seg_real_A, self.mask) self.loss_G += self.loss_Seg ############################################ ## GAN_GT Loss ################################ if A_gt: # msssim = msssim_loss((self.fake_B.clamp(-1, 1)+1)/2*255, (self.A_gt+1)/2*255, weight_map=self.A_boundary) l1 = (F.l1_loss((self.fake_B + 1) / 2 * 255, (self.A_gt + 1) / 2 * 255, reduction='none') * self.A_boundary).mean() # self.loss_gt = 3 * msssim + 0.16 * l1 self.loss_gt = 0.1 * l1 print("loss_gt", self.loss_gt.data[0]) self.loss_G += self.loss_gt ############################################ self.loss_G.backward(retain_graph=True)
def main(): usage = ''' Usage: ------------------------------------------------ Run the iterated MAD algorithm on two multispectral images python %s [OPTIONS] filename1 filename2 Options: -h this help -i <int> maximum iterations (default 50) -d <list> spatial subset list e.g. -d [0,0,500,500] -p <list> band positions list e.g. -p [1,2,3] -l <float> regularization (default 0) -n suppress graphics -c append canonical variates to output The output MAD variate file is has the same format as filename1 and is named path/MAD_filebasename1-filebasename2.ext1 where filename1 = path/filebasename1.ext1 filename2 = path/filebasename2.ext2 For ENVI files, ext1 or ext2 is the empty string. -----------------------------------------------------''' % sys.argv[0] options, args = getopt.getopt(sys.argv[1:], 'hncl:p:i:d:') pos = None dims = None niter = 50 graphics = True cvs = False lam = 0.0 for option, value in options: if option == '-h': print(usage) return elif option == '-n': graphics = False elif option == '-c': cvs = True elif option == '-p': pos = eval(value) elif option == '-d': dims = eval(value) elif option == '-i': niter = eval(value) elif option == '-l': lam = eval(value) if len(args) != 2: print('Incorrect number of arguments') print(usage) return gdal.AllRegister() fn1 = args[0] fn2 = args[1] path = os.path.dirname(fn1) basename1 = os.path.basename(fn1) root1, ext1 = os.path.splitext(basename1) basename2 = os.path.basename(fn2) root2, _ = os.path.splitext(basename2) outfn = path + '/' + 'MAD_%s-%s%s' % (root1, root2, ext1) inDataset1 = gdal.Open(fn1, GA_ReadOnly) inDataset2 = gdal.Open(fn2, GA_ReadOnly) try: cols = inDataset1.RasterXSize rows = inDataset1.RasterYSize bands = inDataset1.RasterCount cols2 = inDataset2.RasterXSize rows2 = inDataset2.RasterYSize bands2 = inDataset2.RasterCount except Exception as e: print('Error: %s --Images could not be read.' % e) sys.exit(1) if (bands != bands2) or (cols != cols2) or (rows != rows2): sys.stderr.write("Size mismatch") sys.exit(1) if pos is None: pos = range(1, bands + 1) else: bands = len(pos) if dims is None: x0 = 0 y0 = 0 else: x0, y0, cols, rows = dims # if second image is warped, assume it has same dimensions as dims if root2.find('_warp') != -1: x2 = 0 y2 = 0 else: x2 = x0 y2 = y0 print('------------IRMAD -------------') print(time.asctime()) print('first scene: ' + fn1) print('second scene: ' + fn2) start = time.time() # iteration of MAD cpm = auxil.Cpm(2 * bands) delta = 1.0 oldrho = np.zeros(bands) itr = 0 tile = np.zeros((cols, 2 * bands)) sigMADs = 0 means1 = 0 means2 = 0 A = 0 B = 0 rasterBands1 = [] rasterBands2 = [] rhos = np.zeros((niter, bands)) for b in pos: rasterBands1.append(inDataset1.GetRasterBand(b)) for b in pos: rasterBands2.append(inDataset2.GetRasterBand(b)) while (delta > 0.001) and (itr < niter): # spectral tiling for statistics for row in range(rows): for k in range(bands): tile[:, k] = rasterBands1[k].ReadAsArray(x0, y0 + row, cols, 1) tile[:, bands + k] = rasterBands2[k].ReadAsArray( x2, y2 + row, cols, 1) # eliminate no-data pixels tile = np.nan_to_num(tile) tst1 = np.sum(tile[:, 0:bands], axis=1) tst2 = np.sum(tile[:, bands::], axis=1) idx1 = set(np.where((tst1 != 0))[0]) idx2 = set(np.where((tst2 != 0))[0]) idx = list(idx1.intersection(idx2)) if itr > 0: mads = np.asarray((tile[:, 0:bands] - means1) * A - (tile[:, bands::] - means2) * B) chisqr = np.sum((mads / sigMADs)**2, axis=1) wts = 1 - stats.chi2.cdf(chisqr, [bands]) cpm.update(tile[idx, :], wts[idx]) else: cpm.update(tile[idx, :]) # weighted covariance matrices and means S = cpm.covariance() means = cpm.means() # reset prov means object cpm.__init__(2 * bands) s11 = S[0:bands, 0:bands] s11 = (1 - lam) * s11 + lam * np.identity(bands) s22 = S[bands:, bands:] s22 = (1 - lam) * s22 + lam * np.identity(bands) s12 = S[0:bands, bands:] s21 = S[bands:, 0:bands] c1 = s12 * linalg.inv(s22) * s21 b1 = s11 c2 = s21 * linalg.inv(s11) * s12 b2 = s22 # solution of generalized eigenproblems if bands > 1: mu2a, A = auxil.geneiv(c1, b1) mu2b, B = auxil.geneiv(c2, b2) # sort a idx = np.argsort(mu2a) A = (A[:, idx])[:, ::-1] # sort b idx = np.argsort(mu2b) B = (B[:, idx])[:, ::-1] mu2 = (mu2b[idx])[::-1] else: mu2 = c1 / b1 A = 1 / np.sqrt(b1) B = 1 / np.sqrt(b2) # canonical correlations mu = np.sqrt(mu2) a2 = np.diag(A.T * A) b2 = np.diag(B.T * B) sigma = np.sqrt((2 - lam * (a2 + b2)) / (1 - lam) - 2 * mu) rho = mu * (1 - lam) / np.sqrt((1 - lam * a2) * (1 - lam * b2)) # stopping criterion delta = max(abs(rho - oldrho)) rhos[itr, :] = rho oldrho = rho # tile the sigmas and means sigMADs = np.tile(sigma, (cols, 1)) means1 = np.tile(means[0:bands], (cols, 1)) means2 = np.tile(means[bands::], (cols, 1)) # ensure sum of positive correlations between X and U is positive D = np.diag(1 / np.sqrt(np.diag(s11))) s = np.ravel(np.sum(D * s11 * A, axis=0)) A = A * np.diag(s / np.abs(s)) # ensure positive correlation between each pair of canonical variates cov = np.diag(A.T * s12 * B) B = B * np.diag(cov / np.abs(cov)) itr += 1 # canonical correlations print('rho: %s' % str(rho)) # write results to disk driver = inDataset1.GetDriver() outBands = [] if cvs: outDataset = driver.Create(outfn, cols, rows, 3 * bands + 1, GDT_Float32) for k in range(3 * bands + 1): outBands.append(outDataset.GetRasterBand(k + 1)) else: outDataset = driver.Create(outfn, cols, rows, bands + 1, GDT_Float32) for k in range(bands + 1): outBands.append(outDataset.GetRasterBand(k + 1)) projection = inDataset1.GetProjection() geotransform = inDataset1.GetGeoTransform() if geotransform is not None: gt = list(geotransform) gt[0] = gt[0] + x0 * gt[1] gt[3] = gt[3] + y0 * gt[5] outDataset.SetGeoTransform(tuple(gt)) if projection is not None: outDataset.SetProjection(projection) for row in range(rows): for k in range(bands): tile[:, k] = rasterBands1[k].ReadAsArray(x0, y0 + row, cols, 1) tile[:, bands + k] = rasterBands2[k].ReadAsArray( x2, y2 + row, cols, 1) cv1 = (tile[:, 0:bands] - means1) * A cv2 = (tile[:, bands::] - means2) * B mads = np.asarray(cv1 - cv2) chisqr = np.sum((mads / (sigMADs))**2, axis=1) for k in range(bands): outBands[k].WriteArray(np.reshape(mads[:, k], (1, cols)), 0, row) outBands[bands].WriteArray(np.reshape(chisqr, (1, cols)), 0, row) if cvs: for k in range(bands + 1, 2 * bands + 1): outBands[k].WriteArray( np.reshape(cv1[:, k - bands - 1], (1, cols)), 0, row) for k in range(2 * bands + 1, 3 * bands + 1): outBands[k].WriteArray( np.reshape(cv2[:, k - 2 * bands - 1], (1, cols)), 0, row) for outBand in outBands: outBand.FlushCache() outDataset = None inDataset1 = None inDataset2 = None print('result written to: ' + outfn) print('elapsed time: %s' % str(time.time() - start)) x = np.array(range(itr - 1)) if graphics: plt.plot(x, rhos[0:itr - 1, :]) plt.title('Canonical correlations') plt.xlabel('Iteration') plt.show() cm1 = (s11 * A - s12 * B) * D * np.diag(1 / sigma) ax = plt.subplot(111) for i in range(bands): ax.plot(range(1, bands + 1), cm1[:, i], label='MAD' + str(i + 1)) plt.title('iMAD correlations with first scene') plt.xlabel('Band') ax.legend() plt.show()
def makekeyspec(samplekeydata, targetpercent): def makekeyaverage(data): b = np.array(data[0]) * 0 for i in data: j = np.array(i) b = b + j average = b // len(data) return average havengsample = 1 ngfileadr = [] while havengsample == 1: print('-' * 19 + '判断按键良品中' + '-' * 19) print('数目:', len(samplekeydata)) samplekeyaverage = makekeyaverage(samplekeydata) percentarray = [] diffvaluearray = [] for data in samplekeydata: specvalue = abs((((np.array(data)) / samplekeyaverage) - 1)) percentarray.append(specvalue) diffvalue = abs((np.array(data)) - samplekeyaverage) diffvaluearray.append(diffvalue) testsamplenumber = 0 samplenumber = 0 ngsamplenumber = [] havengsample = 0 percentarray = np.nan_to_num(percentarray) diffvaluearray = np.nan_to_num(diffvaluearray) for samplepercent in percentarray: maxpercent = np.max(samplepercent) if maxpercent >= targetpercent: maxlocation = np.where( samplepercent == np.max(samplepercent)) maxdatanumbers = len(maxlocation) diffarray = [] while (maxdatanumbers >= 1): x = 0 row = maxlocation[x] diff = diffvaluearray[testsamplenumber][row] diffarray.append(diff) maxdatanumbers -= 1 x += 1 maxdiff = np.max(diffarray) if (maxdiff <= 5): samplenumber += 1 break else: havengsample = 1 ngsamplenumber.append(testsamplenumber) del samplekeydata[samplenumber] testsamplenumber += 1 else: samplenumber += 1 testsamplenumber += 1 if havengsample == 1: for ng in ngsamplenumber: ngfileadr.append(L[ng]) print('*' * 19 + '按键不良样品' + '*' * 19) print(ngfileadr) print('\n') return samplekeyaverage
def plot_conn_mat(conn_matrix, labels, out_path_fig, cmap, binarized=False, dpi_resolution=300): """ Plot a connectivity matrix. Parameters ---------- conn_matrix : array NxN matrix. labels : list List of string labels corresponding to ROI nodes. out_path_fig : str File path to save the connectivity matrix image as a .png figure. """ import warnings warnings.filterwarnings("ignore") import matplotlib import mplcyberpunk matplotlib.use('Agg') from matplotlib import pyplot as plt plt.style.use("cyberpunk") from matplotlib import pyplot as plt from nilearn.plotting import plot_matrix from pynets.core import thresholding import matplotlib.ticker as mticker conn_matrix = thresholding.standardize(conn_matrix) conn_matrix_bin = thresholding.binarize(conn_matrix) conn_matrix_plt = np.nan_to_num(np.multiply(conn_matrix, conn_matrix_bin)) try: plot_matrix( conn_matrix_plt, figure=(10, 10), labels=labels, vmax=np.percentile(conn_matrix_plt[conn_matrix_plt > 0], 95), vmin=0, reorder="average", auto_fit=True, grid=False, colorbar=True, cmap=cmap, ) except RuntimeWarning: print("Connectivity matrix too sparse for plotting...") if len(labels) > 500: tick_interval = 5 elif len(labels) > 100: tick_interval = 4 elif len(labels) > 50: tick_interval = 2 else: tick_interval = 1 plt.axes().yaxis.set_major_locator(mticker.MultipleLocator(tick_interval)) plt.axes().xaxis.set_major_locator(mticker.MultipleLocator(tick_interval)) for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']: plt.rcParams[param] = '#000000' plt.savefig(out_path_fig, dpi=dpi_resolution) plt.close() return
import numpy as np max = 7000000 resample_factor = 25 # get data url = 'https://mapcore-bucket1.s3-us-west-2.amazonaws.com/ISAN/csv-data/use-case-2/Sample_1_18907001_channel_1.csv' bigfile = "C:\\Users\jkho021\Downloads\\bigdata.csv" data = np.loadtxt(bigfile, skiprows=1, usecols=tuple(range(0, 3)), delimiter=",", max_rows=max) data_array = data.view() data_array = data_array.transpose() data_array = np.nan_to_num(data_array) data_resampled = signal.resample(data_array[1], int(max / resample_factor)) with open('resampled.json', 'w') as f: json.dump({'data': data_resampled.tolist()}, f) # Create figure fig = go.Figure() fig.add_trace( go.Scattergl( x=np.linspace(0, len(data_array[1]), len(data_resampled)), y=data_resampled, mode='lines', name='resampled',
def apply_telcal(st, data, threshold=1 / 10., onlycomplete=True, sign=+1, savesols=False, returnsoltime=False): """ Wrap all telcal functions to parse telcal file and apply it to data sign defines if calibration is applied (+1) or backed out (-1). assumes dual pol and that each spw has same nch and chansize. Threshold is minimum ratio of gain amp to median gain amp. If no solution found, it will blank the data to zeros. """ assert sign in [-1, +1], 'sign must be +1 or -1' if st.gainfile is None: return data else: if (not os.path.exists(st.gainfile)) or (not os.path.isfile( st.gainfile)): logger.warning( '{0} is not a valid gain file. No calibration applied.'.format( st.gainfile)) return data else: sols = getsols(st, threshold=threshold, onlycomplete=onlycomplete, savesols=savesols) reffreq, nchan, chansize = st.metadata.spw_sorted_properties skyfreqs = np.around([ reffreq[i] + (chansize[i] * nchan[i] // 2) for i in range(len(nchan)) ], -6) / 1e6 # GN skyfreq is band center if len(sols): pols = np.array([0, 1]) solskyfreqs = np.unique(sols['skyfreq']) logger.info( "Applying solutions from frequencies {0} to data frequencies {1}" .format(solskyfreqs, np.unique(skyfreqs))) gaindelay = np.nan_to_num(calcgaindelay(sols, st.blarr, skyfreqs, pols, chansize[0] / 1e6, nchan[0], sign=sign), copy=False).take(st.chans, axis=1) else: logger.info( "No calibration solutions found for data freqs {0}".format( np.unique(skyfreqs))) gaindelay = np.zeros_like(data) # check for repeats or bad values repeats = [(item, count) for item, count in Counter( gaindelay[:, ::nchan[0]].flatten()).items() if count > 1] if len(repeats): for item, count in repeats: if item == 0j: logger.info( "{0} of {1} telcal solutions zeroed or flagged".format( count, gaindelay[:, ::nchan[0]].size)) if gaindelay.any(): blinds, chans, pols = np.where( gaindelay[:, ::nchan[0]] == 0) if len(blinds): counts = list( zip(*np.histogram( st.blarr[np.unique(blinds)].flatten(), bins=np.arange( 1, 1 + max(st.blarr[np.unique( blinds)].flatten()))))) logger.info('Flagged solutions for: {0}'.format( ', '.join([ 'Ant {1}: {0}'.format(a, b) for (a, b) in counts ]))) else: logger.warn( "Repeated telcal solutions ({0}: {1}) found. Likely a parsing error!" .format(item, count)) if returnsoltime: soltime = np.unique(sols['mjd']) return data * gaindelay, soltime else: return data * gaindelay
def _entropy(self, values): p = values.sum() / values.shape[0] q = 1.0 - p return - np.nan_to_num(p * np.log2(p)) - np.nan_to_num(q * np.log2(q))
def makespec(testsampledata, targetpercent): def makeaverage(sampledata2): b = (np.array(sampledata2[0])) * 0 for i in sampledata2: j = np.array(i) b = b + j average = b // (len(sampledata2)) return average havengsample = 1 ngfileadr = [] while havengsample == 1: print('-' * 19 + '判断良品中' + '-' * 19) print('数目:', len(testsampledata)) print('\n') sampleaverage = makeaverage(testsampledata) percentarray = [] diffvaluearray = [] for data in testsampledata: specvalue = abs(((np.array(data)) / sampleaverage) - 1) percentarray.append(specvalue) diffvalue = abs((np.array(data) - sampleaverage)) diffvaluearray.append(diffvalue) testsamplenumber = 0 samplenumber = 0 ngsamplenumber = [] havengsample = 0 percentarray = np.nan_to_num(percentarray) diffvaluearray = np.nan_to_num(diffvaluearray) for samplepercent in percentarray: maxpercent = np.max(samplepercent) if maxpercent >= targetpercent: singellinepercent = samplepercent.flatten( ) #样品数据从二维变为一维方便比较 singellinediff = ( diffvaluearray[testsamplenumber] ).flatten() #样品测试数值与average值的差值从二维变为一维方便比较 b = np.arange(len(singellinepercent)) c = b[ singellinepercent >= targetpercent] # c array 存放的是单个样品中大于targetpercent位置的索引 for i in range(len(c)): if singellinediff[c[i]] > 5: havengsample = 1 ngsamplenumber.append(testsamplenumber) del testsampledata[samplenumber] samplenumber -= 1 break testsamplenumber += 1 samplenumber += 1 if havengsample == 1: for ng in ngsamplenumber: ngfileadr.append(L[ng]) print('*' * 19 + 'VA区不良样品' + '*' * 19) print(ngfileadr) print('VA区不良样品总数:', len(ngfileadr)) print('\n') '''print(sampleaverage)''' return sampleaverage
def main(): parser = _build_args_parser() args = parser.parse_args() logging.basicConfig(level=logging.INFO) # make sure the input files exist if not isfile(args.time_series): parser.error('The file "{0}" must exist.'.format(args.time_series)) if not isfile(args.mesh): parser.error('The file "{0}" must exist.'.format(args.mesh)) # make sure files are not accidently overwritten if isfile(args.output): if args.overwrite: logging.info('Overwriting "{0}".'.format(args.output)) else: parser.error('The file "{0}" already exists. Use -f to overwrite it.'.format(args.output)) # load mapping mesh = np.load(args.mesh, allow_pickle=True) mapping = mesh['mapping'] shape = mesh['shape'] logging.info('Loading timeseries data.') # load time series for left and right hemispheres full_time_series_data = np.load(args.time_series) time_series_data = np.concatenate((full_time_series_data['lh_time_series'], full_time_series_data['rh_time_series'])) logging.info('TS length:' + str(time_series_data.shape)) logging.info('Calculating mean signal for ' + str(shape[0]) + ' vertices.') # initialise an array to fill in the loop mean_time_series = np.empty([shape[0], time_series_data.shape[1]], dtype=np.float64) # calculate mean signal at each # vertice given the current mapping for i in range(shape[0]): vertex = mapping[i] mean_time_series[i, :] = np.mean(time_series_data[vertex, :], axis=0) logging.info('Calculating FC.') n = shape[0] fc = np.ones(((n * (n+1)) / 2)) index = 0 # calculate the FC using an upper triangular indexing scheme for i in xrange(n-1): index += 1 offset = n - i # calculate the correlation between the current vertex all the vertices in the vector that come after it fc[index:(index+offset-1)] = corr2(mean_time_series[i:(i+1), :], mean_time_series[(i+1):, :]).ravel() # used for upper triangular indices index += offset-1 result = np.zeros((n,n)) result[np.triu_indices(n)] = fc # replace all nans with 0s result = np.nan_to_num(result) sub_sub_fc = None sub_surf_fc = None # calculate subcortical FC if not args.sub_rois is None: logging.info('Calculating FC for subcortical regions.') labels = ['label_' + lbl for lbl in map(str, args.sub_rois)] n = len(labels) sub_mean_time_series = np.empty([n, time_series_data.shape[1]], dtype=np.float64) for i in range(n): sub_mean_time_series[i,:] = np.mean(full_time_series_data[labels[i]], axis=0) fc = np.ones(((n * (n+1)) / 2)) index = 0 # calculate the subcortico-subcortical FC using an upper triangular indexing scheme for i in xrange(n-1): index += 1 offset = n - i fc[index:(index+offset-1)] = corr2(sub_mean_time_series[i:(i+1), :], sub_mean_time_series[(i+1):, :]).ravel() # used for upper triangular indices index += offset-1 sub_sub_fc = np.zeros((n,n)) sub_sub_fc[np.triu_indices(n)] = fc # replace all nans with 0s sub_sub_fc = np.nan_to_num(sub_sub_fc) # calculate the cortico-subcortical FC sub_surf_fc = np.ones((n, shape[0])) # calculate the subcortico-subcortical FC using an upper triangular indexing scheme for i in range(n): sub_surf_fc[i,:] = corr2(sub_mean_time_series[i:(i+1), :], mean_time_series).ravel() # replace all nans with 0s sub_surf_fc = np.nan_to_num(sub_surf_fc) # save the results if not args.sub_rois is None: scio.savemat(args.output, {'fc': result, 'sub_sub_fc': sub_sub_fc, 'sub_surf_fc': sub_surf_fc}) else: scio.savemat(args.output, {'fc': result})
noxh=noxh[7:(len(noxh)-14)] noxh[noxh<0]=np.nan co=[] hc=[] ben=[] nox=[] no2=[] for i in range(23,len(no2h)+23,24): co.append(np.nanmean(coh[(i-23):i])) hc.append(np.nanmean(hch[(i-23):i])) ben.append(np.nanmean(bh[(i-23):i])) nox.append(np.nanmean(noxh[(i-23):i])) no2.append(np.nanmean(no2h[(i-23):i])) co=np.nan_to_num(co) co[co==0]=np.random.random()*np.nanmean(co) hc=np.nan_to_num(hc) ben=np.nan_to_num(ben) ben[ben==0]=np.random.random()*np.nanmean(ben) nox=np.nan_to_num(nox) nox[nox==0]=np.random.random()*np.nanmean(nox) no2=np.nan_to_num(no2) no2[no2==0]=np.random.random()*np.nanmean(no2) dias=range(1,len(co)+1) fig=plt.figure() plt.plot(dias,no2) plt.show()