def calc_distance_map(pipeline, ap_name, ca_name, ca_type, plotFlag=True, histIdx=False, fontsize=10): """ Calculates distances/similarities between pipeline runs Optionally visualizes the result as a seaborn clustermap for PBO pipelines (requires multiple stations) Calculates the square root of the summed squared differences between eigenvectors. Only works, because of internal assumptions, on pipelines with multiple stations Returns the distances as a pandas dataframe @param pipeline: Pipeline to analyze. @param ap_name: Name of the pipeline item that is being perturbed @param ca_name: Name of the pipeline item used as the comparison metric for calculating the distance @param ca_type: Type of comparison metric [PCA for PCA, MogiSource of Mogi Source, MogiVector for Mogi vectors] @param plotFlag: Boolean flag for plotting the clustermap of distances @param histIdx: Flag for returning the perturbed pipeline item parameters @param fontsize: Fontsize adjustments @return cg: The generated clustermap of the calculated distances/similarities @return dist_mat: A matrix of the calculated distances/similarities @return history: The record of the perturbed pipeline item parameters """ # a history of the perturbed pipeline item history = [] for runInfo in pipeline.getMetadataHistory(): for stageItem in runInfo: if ap_name in stageItem: history.append(stageItem.rsplit(ap_name)[1].strip('[]')) # number of runs num_results = len(pipeline.RA_results) dist_mat = np.zeros([num_results, num_results]) # compute distances between all pairs of runs for i in range(num_results): for j in range(i, num_results): # Check the ca_name type to properly format the type of comparison if ca_type == 'PCA': ctitle = 'PCA Vector Distance Similarity' summation = 0 rstation_list = pipeline.RA_results[i][ca_name]['labels'] num_stations = len(rstation_list) # some redundancy because of the way the modules/functions were structured # eigenvectors (lat and lon) for the one run coord_list = pbo_util.getStationCoords( pipeline.data_fetcher.meta_data, rstation_list) _, _, EV1_lat, EV1_lon, _ = pbo_tools.dirEigenvectors( coord_list, pipeline.RA_results[i][ca_name]['CA'].components_[0]) # and for the other run _, _, EV2_lat, EV2_lon, _ = pbo_tools.dirEigenvectors( coord_list, pipeline.RA_results[j][ca_name]['CA'].components_[0]) for k in range(num_stations): ev1 = np.hstack((EV1_lat[k], EV1_lon[k])) ev2 = np.hstack((EV2_lat[k], EV2_lon[k])) # calculate the euclidean distance difference at each station summation += sp.spatial.distance.euclidean(ev1, ev2)**2 elif ca_type == 'MogiSource': ctitle = 'Mogi Source Distance [deg+km]' # for now, just uses lat, lon, and depth for Mogi comparison ev1 = np.array([ pipeline.RA_results[i][ca_name]['lat'], pipeline.RA_results[i][ca_name]['lon'], pipeline.RA_results[i][ca_name]['depth'] ]) ev2 = np.array([ pipeline.RA_results[j][ca_name]['lat'], pipeline.RA_results[j][ca_name]['lon'], pipeline.RA_results[j][ca_name]['depth'] ]) summation = sp.spatial.distance.euclidean(ev1, ev2)**2 elif ca_type == 'MogiVector': ctitle = 'Summed Mogi Vector Distance [mm]' # do the same comparison as eigenvectors for the mogi modeled vectors summation = 0 rstation_list = pipeline.RA_results[i][ca_name]['labels'] num_stations = len(rstation_list) # got mogi vectors for the two runs coord_list = np.array( pbo_util.getStationCoords(pipeline.data_fetcher.meta_data, rstation_list)) mogi_x_1, mogi_y_1 = MogiVectors( pipeline.RA_results[i][ca_name], coord_list[:, 0], coord_list[:, 1]) mogi_x_2, mogi_y_2 = MogiVectors( pipeline.RA_results[j][ca_name], coord_list[:, 0], coord_list[:, 1]) for k in range(num_stations): ev1 = np.hstack((mogi_x_1[k], mogi_y_1[k])) ev2 = np.hstack((mogi_x_2[k], mogi_y_2[k])) # calculate the euclidean distance difference at each station summation += sp.spatial.distance.euclidean(ev1, ev2)**2 # as all pca amplitudes are the same, to scale Mogi to difference in mm summation *= ( pipeline.RA_results[0][ca_name]['pca_amplitude'])**2 dist_mat[i][j] = np.sqrt(summation) dist_mat += dist_mat.transpose() if histIdx: dist_mat = pd.DataFrame(dist_mat, index=[ 'Configuration ' + str(ii).zfill(2) for ii in range(len(history)) ], columns=[ 'Configuration ' + str(ii).zfill(2) for ii in range(len(history)) ]) else: dist_mat = pd.DataFrame(dist_mat, index=history, columns=history) if plotFlag: cg = sns.clustermap(dist_mat) plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0, fontsize=fontsize) plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90, fontsize=fontsize) cg.cax.set_title(ctitle, fontsize=fontsize) if histIdx: return cg, history else: return cg else: if histIdx: return dist_mat, history else: return dist_mat
def process(self, obj_data): ''' Plot the General Component Analysis results present stored in obj_data. Saves the basemap in obj_data results. @param obj_data: Data Wrapper that holds component analysis HPCA ''' HPCA_name = self.comp_name Mogi_name = self.mogi_name pca_comp = self.pca_comp plt.figure() meta_data = obj_data.info() try: station_list = obj_data.get().minor_axis except AttributeError: station_list = list(obj_data.get().keys()) lat_range, lon_range = pbo_utils.getLatLonRange(meta_data, station_list) coord_list = pbo_utils.getStationCoords(meta_data, station_list) # Create a map projection of area offset = self.offset bmap = Basemap(llcrnrlat=lat_range[0] - offset, urcrnrlat=lat_range[1] + offset, llcrnrlon=lon_range[0] - offset, urcrnrlon=lon_range[1] + offset, projection='gnom', lon_0=np.mean(lon_range), lat_0=np.mean(lat_range), resolution=self._bmap_res) # bmap.fillcontinents(color='white') bmap.drawmapboundary(fill_color='white') # Draw just coastlines, no lakes for i,cp in enumerate(bmap.coastpolygons): if bmap.coastpolygontypes[i]<2: bmap.plot(cp[0],cp[1],'k-') parallels = np.arange(np.round(lat_range[0]-offset,decimals=1),np.round(lat_range[1]+offset,decimals=1),.1) meridians = np.arange(np.round(lon_range[0]-offset,decimals=1),np.round(lon_range[1]+offset,decimals=1),.1) bmap.drawmeridians(meridians, labels=[0,0,0,1],fontsize=14) bmap.drawparallels(parallels, labels=[1,0,0,0],fontsize=14) pca_results = obj_data.getResults()[HPCA_name] pca = pca_results['CA'] lonscale = 1 latscale = 1 scaleFactor = self.scaleFactor if self.pca_dir == 'V': station_lat_list, station_lon_list, ev_lat_list, ev_lon_list, dir_sign = pbo_tools.dirEigenvectors(coord_list, pca.components_[pca_comp],pdir='V') self.dir_sign = dir_sign pca_results['Projection'] *= dir_sign ev_lat_list *= latscale ev_lon_list *= lonscale # Plot station coords for coord in coord_list: bmap.plot(coord[1], coord[0], 'bo', markersize=8, latlon=True) x,y = bmap(coord[1], coord[0]) plt.text(x-(1+np.sign(ev_lon_list[coord_list.index(coord)]))*900+250, y-(1+np.sign(ev_lat_list[coord_list.index(coord)]))*100+450, station_list[coord_list.index(coord)],fontsize=14) bmap.quiver(station_lon_list, station_lat_list, ev_lon_list, ev_lat_list, latlon=True, scale = scaleFactor) ax_x = plt.gca().get_xlim() ax_y = plt.gca().get_ylim() x,y = bmap(ax_x[0]+.1*(ax_x[1]-ax_x[0]), ax_y[0]+.1*(ax_y[1]-ax_y[0]),inverse=True) bmap.quiver(x, y, 0, .2, latlon=True, scale = scaleFactor, headwidth=3,headlength=3) plt.text(ax_x[0]+.1*(ax_x[1]-ax_x[0])-650, ax_y[0]+.1*(ax_y[1]-ax_y[0])-1000,'20%', fontsize=14) else: station_lat_list, station_lon_list, ev_lat_list, ev_lon_list, dir_sign = pbo_tools.dirEigenvectors(coord_list, pca.components_[pca_comp]) self.dir_sign = dir_sign pca_results['Projection'] *= dir_sign ev_lat_list *= latscale ev_lon_list *= lonscale # Plot station coords for coord in coord_list: bmap.plot(coord[1], coord[0], 'bo', markersize=8, latlon=True) x,y = bmap(coord[1], coord[0]) plt.text(x-(1+np.sign(ev_lon_list[coord_list.index(coord)]))*900+250, y-(1+np.sign(ev_lat_list[coord_list.index(coord)]))*800+450, station_list[coord_list.index(coord)], fontsize=14) bmap.quiver(station_lon_list, station_lat_list, ev_lon_list, ev_lat_list, latlon=True, scale = scaleFactor) ax_x = plt.gca().get_xlim() ax_y = plt.gca().get_ylim() x,y = bmap(ax_x[0]+.1*(ax_x[1]-ax_x[0]), ax_y[0]+.1*(ax_y[1]-ax_y[0]),inverse=True) bmap.quiver(x, y, 0, .2, latlon=True, scale = scaleFactor, headwidth=3,headlength=3) plt.text(ax_x[0]+.1*(ax_x[1]-ax_x[0])-650, ax_y[0]+.1*(ax_y[1]-ax_y[0])-1000,'20%', fontsize=14) # Plotting Mogi source if Mogi_name != None: mogi_res = obj_data.getResults()[Mogi_name] bmap.plot(mogi_res['lon'], mogi_res['lat'], "g^", markersize = 10, latlon=True) mogi_x_disp, mogi_y_disp = mogi.MogiVectors(mogi_res,station_lat_list,station_lon_list) bmap.quiver(station_lon_list, station_lat_list, mogi_x_disp*dir_sign, mogi_y_disp*dir_sign, latlon=True, scale=scaleFactor,color='red') # Plot error ellipses for the PCA if self.errorE: ax = plt.gca() yScale = (bmap.urcrnrlat - bmap.llcrnrlat)/scaleFactor xScale = (bmap.urcrnrlon - bmap.llcrnrlon)/scaleFactor midY = (bmap.urcrnrlat + bmap.llcrnrlat)/2 midX = (bmap.urcrnrlon + bmap.llcrnrlon)/2 from matplotlib.patches import Ellipse n=len(pca_results['Projection'][:,0]) tau=self.KF_tau delta_t = np.arange(-(n-1),n+1) mseq = (1-np.abs(delta_t)/n) rdelt = np.exp(-np.abs(delta_t)/tau) neff = n/np.sum(mseq*rdelt) eigval = pca.explained_variance_ aaTs = [np.outer(pca.components_[ii,:],pca.components_[ii,:].T) for ii in range(pca.components_.shape[0])] VVs = [eigval[ii]/neff*np.sum([eigval[k]/(eigval[k]-eigval[ii])**2*aaTs[k] for k in (j for j in range(pca.components_.shape[0]) if j != ii)],axis=0) for ii in range(pca.components_.shape[0])] sigmas = np.diag(VVs[0])**(1/2) for kk in range(len(station_lon_list)): vlon = ev_lon_list[kk] vlat = ev_lat_list[kk] slon = station_lon_list[kk] slat = station_lat_list[kk] Elat = sigmas[2*kk] Elon = sigmas[2*kk+1] cir_w, cir_h = np.array(bmap(midX+Elon/scaleFactor,midY+Elat/scaleFactor*.85))-np.array(bmap(midX,midY)) x,y = bmap(slon+vlon*xScale*.95,slat+vlat*yScale*.85) # if need to rotate ellipse, np.arctan2(vlat,vlon)*180/np.pi etest = Ellipse(xy=(x,y),width=cir_w,height=cir_h,angle=0, edgecolor='k',fc='w',lw=1,zorder=-1) ax.add_artist(etest); obj_data.addResult(self.str_description, bmap)
def process(self, obj_data): ''' Finds the magma source (default-mogi) from PBO GPS data. Assumes time series columns are named ('dN', 'dE', 'dU'). Predicts location of the magma source using scipy.optimize.curve_fit The location of the magma source is stored in the data wrapper as a list res[0] = latitude res[1] = longitude res[2] = source depth (km) res[3] = volume change (meters^3) res[4] = extra parameters (depends on mogi fit type) @param obj_data: Data object containing the results from the PCA stage ''' h_pca_name = self.ap_paramList[0]() if len(self.ap_paramList)>=2: exN = {'mogi':0,'finite_sphere':1,'closed_pipe':1,'constant_open_pipe':1,'rising_open_pipe':2,'sill':0} try: mag_source = getattr(pbo_tools,self.ap_paramList[1]().lower()) ExScParams = tuple(np.ones((exN[self.ap_paramList[1]().lower()],))) except: mag_source = pbo_tools.mogi ExScParams = () print('No source type called '+self.ap_paramList[1]()+', defaulting to a Mogi source.') else: mag_source = pbo_tools.mogi ExScParams = () projection = obj_data.getResults()[h_pca_name]['Projection'] start_date = obj_data.getResults()[h_pca_name]['start_date'] end_date = obj_data.getResults()[h_pca_name]['end_date'] ct, pca_amp = self.FitPCA(projection) pca_amp *= np.pi tp_directions = ('dN', 'dE', 'dU') xvs = [] yvs = [] zvs = [] for label, data, err in obj_data.getIterator(): if label in tp_directions: distance,f_error = self.FitTimeSeries(data.loc[start_date:end_date], ct) if label == tp_directions[1]: xvs.append(distance) elif label == tp_directions[0]: yvs.append(distance) elif label == tp_directions[2]: zvs.append(distance) else: print('Ignoring column: ', label) xvs = np.array(xvs)*1e-6 yvs = np.array(yvs)*1e-6 zvs = np.array(zvs)*1e-6 ydata = np.hstack((xvs, yvs,zvs)).T station_list = obj_data.get().minor_axis meta_data = obj_data.info() station_coords = pbo_utils.getStationCoords(meta_data, station_list) dimensions = ('x','y','z') xdata = [] for dim in dimensions: for coord in station_coords: xdata.append((dim, coord[0], coord[1])) coord_range = np.array(pbo_utils.getLatLonRange(meta_data, station_list)) lat_guess = np.mean(coord_range[0,:]) lon_guess = np.mean(coord_range[1,:]) fit = optimize.curve_fit(mag_source, xdata, ydata, (lat_guess, lon_guess, 5, 1e-4)+ExScParams) res = collections.OrderedDict() res['lat'] = fit[0][0] res['lon'] = fit[0][1] res['depth'] = fit[0][2] res['amplitude'] = fit[0][3] if len(fit[0])>4: res['ex_params'] = fit[0][4:] else: res['ex_params'] = np.nan res['pca_amplitude'] = pca_amp if len(self.ap_paramList)>=2: res['source_type'] = self.ap_paramList[1]().lower() else: res['source_type'] = 'mogi' obj_data.addResult(self.str_description, res)
def multiCaPlot(pipeline, mogiFlag=False, offset=.15, direction='H', pca_comp=0, scaleFactor=2.5, map_res='i'): ''' The multiCaPlot function generates a geographic eigenvector plot of several pipeline runs This function plots multiple pipeline runs over perturbed pipeline parameters. The various perturbations are plotted more transparently (alpha=.5), while the median eigen_vector and Mogi inversion are plotted in solid blue and red @param pipeline: The pipeline object with multiple runs @param mogiFlag: Flag to indicate plotting the Mogi source as well as the PCA @param offset: Offset for padding the corners of the generated map @param direction: Indicates the eigenvectors to plot. Only Horizontal component is currently supported ('H') @param pca_comp: Choose the PCA component to use (integer) @param scaleFactor: Size of the arrow scaling factor @map_res: Map data resolution for Basemap ('c', 'i', 'h', 'f', or None) ''' # as this is a multi_ca_plot function, assumes GPCA plt.figure() meta_data = pipeline.data_generator.meta_data station_list = pipeline.data_generator.station_list lat_range, lon_range = pbo_tools.getLatLonRange(meta_data, station_list) coord_list = pbo_tools.getStationCoords(meta_data, station_list) # Create a map projection of area bmap = Basemap(llcrnrlat=lat_range[0] - offset, urcrnrlat=lat_range[1] + offset, llcrnrlon=lon_range[0] - offset, urcrnrlon=lon_range[1] + offset, projection='gnom', lon_0=np.mean(lon_range), lat_0=np.mean(lat_range), resolution=map_res) # bmap.fillcontinents(color='white') # bmap.drawmapboundary(fill_color='white') bmap.drawmapboundary(fill_color='#41BBEC') bmap.fillcontinents(color='white') # Draw just coastlines, no lakes for i, cp in enumerate(bmap.coastpolygons): if bmap.coastpolygontypes[i] < 2: bmap.plot(cp[0], cp[1], 'k-') parallels = np.arange(np.round(lat_range[0] - offset, decimals=1), np.round(lat_range[1] + offset, decimals=1), .1) meridians = np.arange(np.round(lon_range[0] - offset, decimals=1), np.round(lon_range[1] + offset, decimals=1), .1) bmap.drawmeridians(meridians, labels=[0, 0, 0, 1]) bmap.drawparallels(parallels, labels=[1, 0, 0, 0]) # Plot station coords for coord in coord_list: bmap.plot(coord[1], coord[0], 'ko', markersize=6, latlon=True, zorder=12) x, y = bmap(coord[1], coord[0]) plt.text(x + 250, y - 450, station_list[coord_list.index(coord)], zorder=12) # loop over each pipeline run elatmean = np.zeros(len(station_list)) elonmean = np.zeros_like(elatmean) # check if want to plot Mogi as well if mogiFlag: avg_mogi = np.array([0., 0.]) mlatmean = np.zeros_like(elatmean) mlonmean = np.zeros_like(elatmean) for nrun in range(len(pipeline.RA_results)): pca = pipeline.RA_results[nrun]['GPCA']['CA'] station_lat_list, station_lon_list, ev_lat_list, ev_lon_list, dir_sign = pbo_tools.dirEigenvectors( coord_list, pca.components_[pca_comp]) elatmean += ev_lat_list elonmean += ev_lon_list # plot each run in light blue bmap.quiver(station_lon_list, station_lat_list, ev_lon_list, ev_lat_list, latlon=True, scale=scaleFactor, alpha=.25, color='blue', zorder=11) if mogiFlag: mogi_res = pipeline.RA_results[nrun]['Mogi'] avg_mogi += np.array([mogi_res['lon'], mogi_res['lat']]) mogi_x_disp, mogi_y_disp = mogi.MogiVectors( mogi_res, station_lat_list, station_lon_list) mlatmean += mogi_y_disp mlonmean += mogi_x_disp bmap.plot(mogi_res['lon'], mogi_res['lat'], "g^", markersize=10, latlon=True, alpha=.25, zorder=12) bmap.quiver(station_lon_list, station_lat_list, mogi_x_disp * dir_sign, mogi_y_disp * dir_sign, latlon=True, scale=scaleFactor, color='red', alpha=.25, zorder=11) #plot the mean ev in blue elatmean = elatmean / len(pipeline.RA_results) elonmean = elonmean / len(pipeline.RA_results) bmap.quiver(station_lon_list, station_lat_list, elonmean, elatmean, latlon=True, scale=scaleFactor, color='blue', alpha=1, zorder=11) if mogiFlag: # plot mean mogi results avg_mogi = avg_mogi / len(pipeline.RA_results) mlatmean = mlatmean / len(pipeline.RA_results) mlonmean = mlonmean / len(pipeline.RA_results) bmap.plot(avg_mogi[0], avg_mogi[1], "g^", markersize=10, latlon=True, alpha=1, zorder=12) bmap.quiver(station_lon_list, station_lat_list, mlonmean * dir_sign, mlatmean * dir_sign, latlon=True, scale=scaleFactor, color='red', alpha=1, zorder=11) ax_x = plt.gca().get_xlim() ax_y = plt.gca().get_ylim() x, y = bmap(ax_x[0] + .1 * (ax_x[1] - ax_x[0]), ax_y[0] + .1 * (ax_y[1] - ax_y[0]), inverse=True) bmap.quiver(x, y, 0, .2, latlon=True, scale=scaleFactor, headwidth=3, headlength=3, zorder=11) plt.text(ax_x[0] + .1 * (ax_x[1] - ax_x[0]) - 650, ax_y[0] + .1 * (ax_y[1] - ax_y[0]) - 1000, '20%', zorder=11)