def __init__(self, pop1, pop2, objFunc, areLargeValuesMoreDifferent, minAge=0.0, maxAge=4500.0, dt=1.0, nIters=5000): ''' :param pop1: an instance of detrital population :param pop2: a different instance of detrital population to compare to pop1 :param objFunc: one of the comparison function defined in populationMetrics.py (or a user defined function). objFunc needs to be a function of two variables (e.g. objFunc(pop1,pop2) that returns a single value comparing the two. :param areLargeValuesMoreDifferent: boolean, are large or small values 'good matches' between datasets? :param minAge: the minimum age axis to recompute pdfs/cdfs on for objFuncs that require those values :param maxAge: the maximum age axis to recompute pdfs/cdfs on for objFuncs that require those values :param dt: the age axis spacing to recompute pdfs/cdfs on for objFuncs that require those values :param nIters: the number of permutations to perform ''' # Store the nunmber of iterations self.nIters = nIters # Store the directionalite of the objective function self.areLargeValuesMoreDifferent = areLargeValuesMoreDifferent # Concatenate ages and errors allAges = np.hstack((pop1.ages, pop2.ages)) allErrors = np.hstack((pop1.errors, pop2.errors)) # Create indexing variable for ages and errors indices = np.hstack((np.zeros_like(pop1.ages), np.ones_like(pop2.ages))) # Preallocate space for permutation results self._permVals = np.zeros(nIters) #What is the value of the true labelling? self._thisVal = objFunc(pop1, pop2) # Iterate through the requested number of permutations for i in range(nIters): # Shuffle the indices permIndices = np.random.permutation(len(indices)) shuffledIndices = indices[permIndices] # Create new populations pop1_i = population(ages=allAges[shuffledIndices == 0], errors=allErrors[shuffledIndices == 0],tmin=minAge, tmax=maxAge, delt=dt) pop2_i = population(ages=allAges[shuffledIndices == 1], errors=allErrors[shuffledIndices == 1],tmin=minAge, tmax=maxAge, delt=dt) # Calculate this objectiveFunctionValue self._permVals[i] = objFunc(pop1_i, pop2_i) # Calculate the probability of this value self.pVal = self.getPforVal(self._thisVal)
def mixture_Dmax(daughterPop, parentPops, mixingCoefficients): '''Determine the Dmax value, the maximum difference between CDFs :param daughterPop: an instance of the population class describing a daughter population :param parentPops: a list of instances of the population class describing the parents :param mixingCoefficients: a list, or array, of fractional mixing coefficients that must sum to one ''' mixedPop = population(parentPopulations=parentPops, mixingCoefficients=mixingCoefficients) return Dmax(daughterPop, mixedPop)
def mixture_Vmax(daughterPop, parentPops, mixingCoefficients): '''Calculate the V max value of the kuiper statistic as summarized. by Saylor and Sundell, 2016, Geosphere :param daughterPop: an instance of the population class describing a daughter population :param parentPops: a list of instances of the population class describing the parents :param mixingCoefficients: a list, or array, of fractional mixing coefficients that must sum to one ''' mixedPop = population(parentPopulations=parentPops, mixingCoefficients=mixingCoefficients) return Vmax(daughterPop, mixedPop)
def mixture_correlationCoeff(daughterPop, parentPops, mixingCoefficients): '''Calculate the correlation coefficient (e.g. r^2) b/w the two pdfs. This method was proposed by Saylor and Sundell, 2016, Geosphere :param daughterPop: an instance of the population class describing a daughter population :param parentPops: a list of instances of the population class describing the parents :param mixingCoefficients: a list, or array, of fractional mixing coefficients that must sum to one ''' mixedPop = population(parentPopulations=parentPops, mixingCoefficients=mixingCoefficients) return correlationCoeff(daughterPop, mixedPop)
def mixture_Likeness(daughterPop, parentPops, mixingCoefficients): '''Determine the \'likeness\' of the daughter and mixed parent populations. Here likeness is defined following Satkoski et al., 2013 as summarized in Saylor and Sundell, 2016 :param daughterPop: an instance of the population class describing a daughter population :param parentPops: a list of instances of the population class describing the parents :param mixingCoefficients: a list, or array, of fractional mixing coefficients that must sum to one ''' mixedPop = population(parentPopulations=parentPops, mixingCoefficients=mixingCoefficients) return likeness(daughterPop, mixedPop)
def mixture_Similarity(daughterPop, parentPops, mixingCoefficients): '''Determine the \'similarity\' of the the mixed and daughtter DZ populations using the Gehrels, 2000 method This function is a wrapper to call calcSimilarity on the PDFs defined by two population objects :param daughterPop: an instance of the population class describing a daughter population :param parentPops: a list of instances of the population class describing the parents :param mixingCoefficients: a list, or array, of fractional mixing coefficients that must sum to one ''' mixedPop = population(parentPopulations=parentPops, mixingCoefficients=mixingCoefficients) return similarity(daughterPop, mixedPop)
def getMixedPopulation(self,mixtureNumber,parentPops,**kwargs): '''Return a population produced from the mixture definined by the ith mixture coefficients ''' return population(parentPopulations = parentPops,mixingCoefficients = self.mixingCoeffs[mixtureNumber],**kwargs)
#The name of the sheets where data is stored in that file #Name of the rows in the exceel sheet that contain information ageHeader = 'BestAge' #For ages errorHeader = 'BestAge_err_1s_Ma' #For 1-sigma errors sampleID = 'Sample_ID' #For sample names #How do we want to scale the PDFs? minAge = 0.0 maxAge = 300.0 dt = 1.0 #Read in a few different samples, 'DR584' DR584 = detPop.population(excelFileName=pathToExcelFile, excelSheetName=excelSheet, ageHeader=ageHeader, errorHeader=errorHeader, sampleIDfield=sampleID, sampleID='DR584') #Sample T693, which mixture modelling of Sharman and Johnstone, 2017 suggests is pretty similar to DR584 T693 = detPop.population(excelFileName=pathToExcelFile, excelSheetName=excelSheet, ageHeader=ageHeader, errorHeader=errorHeader, sampleIDfield=sampleID, sampleID='T693') #Sample T978, which mixture modelling of Sharman and Johnstone, 2017 suggests is pretty different to DR584 T978 = detPop.population(excelFileName=pathToExcelFile, excelSheetName=excelSheet, ageHeader=ageHeader,
#mixOrder = 'reverse' #high values correspond to good mixtures (e.g. cross correlation) #At what resolution to we want to create mixtures mixSpacing = 0.01 #Keep coarse for testing (e.g. 0.1 (10%) ) to improve efficiency # %% #============================================================================== # Load in the parents and daughters #============================================================================== parentSet = [] for parent in ParentList: parentSet.append( dp.population(excelFileName=filePath, excelSheetName=excelSheet, ageHeader=ageHeader, errorHeader=errorHeader, sampleIDfield=sampleID, sampleID=parent)) daughterSet = [] for daughter in DaughterList: daughterSet.append( dp.population(excelFileName=filePath, excelSheetName=excelSheet, ageHeader=ageHeader, errorHeader=errorHeader, sampleIDfield=sampleID, sampleID=daughter)) # %% #==============================================================================
minAge = 0.0 maxAge = 3501.0 tSpacing = 2.0 #Samples to export sampleList = [ 'MAN', 'MLB', 'DR585', 'DR591', 'DR584', 'T755', 'MDP', 'T1135', 'GSB', 'T693', 'T978', 'T958' ] with open(outfile, 'w') as f: #Iterate through all the samples for i, sample in enumerate(sampleList): print(sample) pop = dp.population(excelFileName=filePath, excelSheetName=excelSheet, ageHeader=ageHeader, errorHeader=errorHeader, sampleIDfield=sampleID, sampleID=sample) pop.calcDF(forceCalc=True, tmin=minAge, tmax=maxAge, delt=tSpacing, method=densityFunctionMethod, bandwidth=kdeBandwidth) #Before first sample, write the header if i == 0: ts = pop._tAxisDF_ tStrList = [' ' + ageFmt % t for t in ts] header = 'Age' + outDelimiter + outDelimiter.join(tStrList) + '\n' f.write(header)
def genRandomAges(n, mean, spread): ''' A function to return a selection of n random ages drawn from the population described by mean and spread ''' return np.random.randn(int(n)) * spread + mean #Create a random population of ages with two components #by concatenating two random arrays n = 100 #number of ages randAges = np.hstack((genRandomAges(n, 30.0, 5.0), genRandomAges(n, 200.0, 3.0))) randErrors = randAges * 0.10 #assign a constant relative error #Create a population based on these ages and errors popArray = population(ages=randAges, errors=randErrors) #Next we will load in a population from a file #This file must be a delimeted text file, must have headers titled 'age' and 'error' # or header must be specified. ageHdrName = 'ages' errorHdrName = 'errors' pathToFile = 'testData.csv' #We will just create a text file to load in, or feel free to provide your own by commenting out the next few lines, and #updating the above path to something of your own. ##### Starting file creation - comment out if providing your own. #Again, we will just create some random distributions n = 100 #number of ages