def LoadFilteredLOF(filterSpace=None): """ Run the LOF filter on the shale database """ #Reset any existing bootstraps (since they will have been based on the previous filter) Bootstrap.NukeCache() FilterShales = filterSpace is not None #Access the igneous database HL38.LoadModule("CommonDBs") dbIgn = pandas.read_csv(StringIO(HL38.DatabaseAsCSV("keller_unfiltered")), na_values=['', 'x']) #Construct a LOF filter, apply it to the shale database dbShale = ReadFile("all") filterApplied = "" if FilterShales: print("Applying filter for system: " + str(filterSpace)) filterApplied += MultiCompose(filterSpace) lofFilter = FilterLOF.Apply(dbShale, dbIgn, filterSpace) passrate = np.sum(lofFilter) / len(dbShale.index) print("Filter applied: pass rate is " + str(100 * passrate) + "%") dbShale_f = dbShale[lofFilter] else: dbShale_f = dbShale return FilterResult(dbShale, dbShale_f, filterApplied)
def Show3DMixSpace(self, t): """ Use mayavi to display a 3D mixing space of the current reconstruction. Only works if there are four endmembers. """ aS = HL38.MCMC_SingleTimeTest_WRB(self.RM, t) endNames = [self.RM.GetEndmemberName(idx) for idx in range(self.RM.GetEndmemberCount())] MixingSpace.Show3D(aS, endNames, self.config.reconSystems)
def SubplotMCMCStates(self, t, plotModern, plotArcha): """ Draw one set of MCMC states using the stateful pyplot interface """ #Run the C++ engine aS = HL38.MCMC_SingleTimeTest_WRB(self.RM, t) endNames = [self.RM.GetEndmemberName(idx) for idx in range(self.RM.GetEndmemberCount())] plt.gca().set_facecolor('k') X = np.array(aS[0].mcmcR) Y = np.array(aS[1].mcmcR) mask = np.bitwise_and(np.isfinite(X), np.isfinite(Y)) X = X[mask] Y = Y[mask] plt.hist2d(X, Y, bins=80, cmin=1) MixingSpace.Plot(plt, aS[0].cA, aS[0].cB, aS[1].cA, aS[1].cB, endNames) plt.plot(aS[0].bestR, aS[1].bestR, "+", label="BEST") #Best-fitting reconstruction plt.plot(aS[0].bootR, aS[1].bootR, "x", label="BOOT") #Modern-day value given by bootstrap #Plot modern estimates (if requested) if plotModern: if self.config.endScript == EndmemberConfig.KMF: bestR0 = (SciConstants.bestfitK*aS[0].cA[0] + SciConstants.bestfitM*aS[0].cA[1] + SciConstants.bestfitF*aS[0].cA[2]) / (SciConstants.bestfitK*aS[0].cB[0] + SciConstants.bestfitM*aS[0].cB[1] + SciConstants.bestfitF*aS[0].cB[2]) bestR1 = (SciConstants.bestfitK*aS[1].cA[0] + SciConstants.bestfitM*aS[1].cA[1] + SciConstants.bestfitF*aS[1].cA[2]) / (SciConstants.bestfitK*aS[1].cB[0] + SciConstants.bestfitM*aS[1].cB[1] + SciConstants.bestfitF*aS[1].cB[2]) plt.plot(bestR0, bestR1, "*", label="TiIso") #Modern-day value given by forward model, using KMF proportions from the Science paper [r1A, r1B] = DecomposeR(self.config.reconSystems[0]) [r2A, r2B] = DecomposeR(self.config.reconSystems[1]) TM_R0 = SciConstants.TaylorMcLennan[r1A] / SciConstants.TaylorMcLennan[r1B] TM_R1 = SciConstants.TaylorMcLennan[r2A] / SciConstants.TaylorMcLennan[r2B] RG_R0 = SciConstants.RudnickGao[r1A] / SciConstants.RudnickGao[r1B] RG_R1 = SciConstants.RudnickGao[r2A] / SciConstants.RudnickGao[r2B] plt.plot(TM_R0, TM_R1, "8", label="T&M'85") #Taylor & McLennan estimate for modern-day upper continental crust plt.plot(RG_R0, RG_R1, "8", label="R&G'03") #Rudnick & Gao estimate for modern-day upper continental crust #Plot Archaean estimates (if requested) if plotArcha: if self.config.endScript == EndmemberConfig.KMF: bestR0 = (SciConstants.bestfitKArch*aS[0].cA[0] + SciConstants.bestfitMArch*aS[0].cA[1] + SciConstants.bestfitFArch*aS[0].cA[2]) / (SciConstants.bestfitKArch*aS[0].cB[0] + SciConstants.bestfitMArch*aS[0].cB[1] + SciConstants.bestfitFArch*aS[0].cB[2]) bestR1 = (SciConstants.bestfitKArch*aS[1].cA[0] + SciConstants.bestfitMArch*aS[1].cA[1] + SciConstants.bestfitFArch*aS[1].cA[2]) / (SciConstants.bestfitKArch*aS[1].cB[0] + SciConstants.bestfitMArch*aS[1].cB[1] + SciConstants.bestfitFArch*aS[1].cB[2]) plt.plot(bestR0, bestR1, "*", label="TiIso") plt.xlabel(self.config.reconSystems[0]) plt.ylabel(self.config.reconSystems[1]) plt.legend()
def GenerateSynthetic(suffix=""): """ Generate a new synthetic shale database (with no filter applied) """ Bootstrap.NukeCache() stepTime = 1 dbSynth = pandas.read_csv( StringIO(HL38.GenerateSyntheticDatabase_Dual(stepTime))) return FilterResult( dbSynth, dbSynth, "GEN_SYNTH_DATA_STEPTIME-" + str(stepTime) + "-" + suffix)
def InitReconManager(self, prepareBootstraps = True): """ Set up the C++ reconstruction manager object, caching bootstrap results for future use. Return None if the shale database does not contain sufficient data. """ RM = HL38.ReconManager(self.ToDict(), self.DBstr) if prepareBootstraps: for r in self.reconSystems: if not Bootstrap.IsCached(r): [A, B] = DecomposeR(r) if RM.DataCountForBootstrap(A, B) < 10: print("INSUFFICIENT DATA TO CONSTRUCT ALL BOOTSTRAPS") RM.AddBootstrap(Bootstrap.GetCached(r, RM)) return RM
def InverseModelCalc(RM, rVals, rErrors, t): """ Calculate the best-fitting mixture for a given list of ratio values. For this, we invoke HL38's single-timestep MCMC functionality, but instead of generating the shale bootstraps for data, we load in our own (with the given values). """ #Delete previous bootstraps from the RM RM.ResetAllBootstraps() #Load our own bootstraps into the RM for rVal, rErr in zip(rVals, rErrors): b = HL38.WRB_Result() b.bestFit.AddNewPoint(t - 50.0, rVal) b.bestFit.AddNewPoint(t, rVal) b.bestFit.AddNewPoint(t + 50.0, rVal) b.stdError.AddNewPoint(t - 50.0, rErr) b.stdError.AddNewPoint(t, rErr) b.stdError.AddNewPoint(t + 50.0, rErr) b.bestFit.Finalise() b.stdError.Finalise() RM.AddBootstrap(b) #Run the single time-step reconstruction return HL38.MCMC_SingleTimeTest_WRB(RM, t)
def LoadHL38(dbName): """ Load a HL38 database into a pandas dataframe """ return pandas.read_csv(StringIO(HL38.DatabaseAsCSV(dbName)), na_values=['', 'x'])
def EvalMixSpace(fR, b_cache, ratio_list): """ Evaluates the quality of a given mixing space, returns a rating string """ if ratio_list is None: return "" #Define our target points as the proportions from the Science paper, #and also Tang et al's proportions. #Endmember type: DUAL, Endmember script: KMF CENTRAL_MIX = [ [[0.15, 0.27, 0.58], [0.15, 0.70, 0.15]], #Archaean proportions [[0.0, 0.28, 0.72]] ] #Modern proportions #Prepare results string buffer = "" for ratio in ratio_list: buffer += ratio if ratio != ratio_list[-1]: buffer += "-" buffer += "," #Load cached bootstraps into a separate list, outside the RM bList = [] for rName in ratio_list: bList.append(b_cache[rName]) #Run for both Archaean and modern time periods HL38.LoadModule("CommonDBs") RM = PrepRM(fR, ratio_list) for t, targetList in zip([3500.0, 0.0], CENTRAL_MIX): #Load error value errVals = [] for bootstrap in bList: errVals.append(bootstrap.stdError(t)) bestList = [] varsList = [] #Scan error points for targetPoint in targetList: #Calculate values of central point cPoint = list(RM.ForwardModelCalc(t, targetPoint)) #Run the MCMC reconstruction at this central point result = InverseModelCalc(RM, cPoint, errVals, t) #Get the endmember mistfit & variances bestList.append([]) varsList.append([]) for i in range(3): bestList[-1].append(result.best[i] - targetPoint[i]) varsList[-1].append(result.endmemberP975[i] - result.endmemberP025[i]) #Find maximum misfit misfitMax = bestList[0] errorMax = varsList[0] for misfit, error in zip(bestList, varsList): for i in range(3): misfitMax[i] = max(misfit[i], misfitMax[i]) errorMax[i] = max(error[i], errorMax[i]) #Log results for misfit in misfitMax: buffer += str(misfit) buffer += "," for error in errorMax: buffer += str(error) buffer += "," #Plot mixing spaces too, if asked for if PLOT_SPACES: Recon.Visualiser(PrepC()).CompareArchaModern() #Return return buffer + "\n"
def Find(recon_system, good_elements_list): """ Returns the optimal filter space for a given system. """ print("OPTIMISING FILTER SPACE FOR: " + str(recon_system)) #Get the igneous database HL38.LoadModule("CommonDBs") ign = Database.LoadHL38("keller_unfiltered") #Generate all possible ratios candidate_list = Rombinatorics.Gen_NoRepeats(good_elements_list, 1) #Calculate the MI of every reconstructor-candidate ratio pair n_r = len(recon_system) n_c = len(candidate_list) mi = {} print("COMPUTING MUTUAL INFORMATION") p_bar = progressbar.ProgressBar(max_value=n_r * n_c) for r in range(n_r): r_name = recon_system[r] mi[r_name] = [] [rA, rB] = Util.DecomposeR(r_name) reconstructor_val = ign[rA] / ign[rB] filter_r = np.isfinite(reconstructor_val) for c in range(n_c): c_name = candidate_list[c][0] [cA, cB] = Util.DecomposeR(c_name) candidate_val = ign[cA] / ign[cB] filter_c = np.isfinite(candidate_val) filter_rc = np.bitwise_and(filter_r, filter_c) calc_mi = MetaStat.MutualInformation(reconstructor_val[filter_rc], candidate_val[filter_rc], 20) mi[r_name].append((calc_mi, c_name)) p_bar.update(r * n_c + c) p_bar.finish() #Sort by MI for each reconstruction ratio separately for mi_list in mi.values(): mi_list.sort(key=lambda x: x[0], reverse=True) if DEBUG_LOG: for r_sys in recon_system: print(mi[r_sys]) #Pick the best ratios for each reconstruction system, in turn, such that elements aren't repeated r_list = [] elements_remaining = set(good_elements_list) def RatioGood(r): [A, B] = Util.DecomposeR(r) if A not in elements_remaining: return False if B not in elements_remaining: return False return True def AddRatio2FilterSpace(r): r_list.append(r) [A, B] = Util.DecomposeR(r) if A in elements_remaining: elements_remaining.remove(A) if B in elements_remaining: elements_remaining.remove(B) #Include the original reconstruction ratios in the filter for ratio in recon_system: AddRatio2FilterSpace(ratio) while elements_remaining: #Stop if filter is getting too long if len(r_list) > MAX_LEN: print("EXCEEDED MAX FILTER SIZE") break #Stop if we've exhausted all lists valid_sys = 0 for recon_r in recon_system: if mi[recon_r]: valid_sys += 1 if valid_sys == 0: print("EXHAUSTED VALID SYSTEMS LIST") break #Add top candidate from each reconstruction ratio to the filter list #Make sure elements are not repeated for recon_r in recon_system: top_pick = mi[recon_r][0] if RatioGood(top_pick[1]): AddRatio2FilterSpace(top_pick[1]) mi[recon_r] = mi[recon_r][1:] if not elements_remaining: print("EXHAUSTED ALL ELEMENTS!") print("FOUND OPTIMAL SPACE: " + str(r_list)) return r_list