def bootstrapMCMCprobabilityOfDeath(EmpFrame, AICFrame, N=10, MCMConPartitions = [], Delta=2, Punishment='Death'): """Return the high and low confidence intervals for probabiity of death base on MCMC run on partitions. """ #NOTE: Unfinished WIP #Generate our own dummy data """ EmpFrame = oboM.changePeriod( oboM.loadCatEmp( 'Data/Raw/OBOextractNoGender1yr.csv'), Delta=2).iloc[ [30, 31, 116, 117]] AICFrame = oboM.loadMod('Data/Partitions/AICtableSortedValuesProperly2yr.csv').iloc[[30,31,116,117],0:4] """ puns = 7 offs = 9 PunIndex = c2n.puncatFullUp.index(Punishment) DFlon,DFlen = EmpFrame.shape OffenceEstimate = oboM.generateDependentModelLaplace(oboM.combineOnOffence(EmpFrame),Delta=Delta) #Generate MCMC data if it does not exist. if type(MCMConPartitions) is list: print('Generating MCMC on {} partitions'.format(N)) MCMConPartitions = mcmcOnPartitions(AICFrame, N = N, Verbose=True, Sort=False) MCMClon,MCMClen = MCMConPartitions.shape #Convert the partition numbers to partitions: #print('Converting the MCMC frame of partition numbers to partitions') #MCMCofPartitions = MCMCofPartitions.applymap(lambda x: oboP.partitioN[x]) for row in range(DFlon): print('Calculating probabilites for row {} of {}'.format(row,DFlon)) print('Looping through every MCMC partitioning') #Find the groupings for the counts per partition as a series Row = MCMCofPartitions.iloc[row] RowGrouping = Row.apply(lambda partId: [ x for x in oboP.partitioN.unrank_rgf(partId)[1:] for y in range(puns) ]) #Find the punishment groupings, with 1 for the punishment and 0 else, discarding 0: PunishmentGrouping = Row.apply(lambda partId: [ x if y is PunIndex else 0 for x in oboP.partitioN.unrank_rgf(partId)[1:] for y in list(range(puns)) ]) #Find the percentage probability estimates for the punishment, per block: RowCountsPerBlock = RowGrouping.apply(lambda group: EmpRow.groupby(group).sum()) PunishmentCountPerBlock = RowDeathOffGrouping.apply(lambda blah: EmpRow.groupby(blah).sum()[1:]) PunishmentEstimatesPerBlock = (PunishmentCountPerBlock/ RowGroupingCounts)*100 #Get rid of the pesky il defined infinities PunishmentEstimatesPerBlock[np.isinf(PunishmentEstimatesPerBlock)] = 0 #for MCMC in range(MCMClen): #print('Finding estimates for {} for row {}, partition {}'.format(Punishment, row, MCMC)) #RowDeathOffGrouping = EmpFrame.iloc[row].apply(lambda partId: [ x if y is PunIndex else 0 for x in partitioN.unrank_rgf( MCMConPartitions.iloc[row,MCMC] )[1:] for y in list(range(puns)) ]) #print(RowDeathOffGrouping) return OffenceEstimate, MCMConPartitions
def validateAICModelSelection(DummyEmp): """Create a DataFrame containing AIC scores of all standard models. Only NoGender at this stage. Parameters ---------- DumyEmp : pandas DataFrame, Dummy emperical data frame as generated by generateDummyDataFrame() Returns: pandas DataFrame, Sorted AIC scores for different Delta's and model types """ AICFrame = pd.DataFrame(columns = ['Delta','k','ll','AIC']) CatEmp = DummyEmp.mul(1) Deltas = [1,2,3,4,5,10] #Generate all the refactorings: CatEmps = [ oboM.changePeriod(CatEmp,Delta) for Delta in Deltas]*2 #*2 as one for Dep and one for Indep CatModsDep = [ oboM.generateDependentModelLaplace(CatEmp, Delta=Delta) for Delta in Deltas ] CatModsIndep = [ oboM.generateIndependentModel(CatEmp, Delta=Delta) for Delta in Deltas ] CatMods = CatModsDep + CatModsIndep #For all generated model types on all deltas for c,CatMod in enumerate(CatMods): ModType = 'Dependent' if (c < len(CatMods)/2) else 'Independent' if 'Dependent' in ModType: k = (9-1)*(7-1)*CatMod.shape[0] elif 'Independent' in ModType: k = (9+7-2)*CatMod.shape[0] else: print('Oh deary me') return ll = oboM.loglikilyhood(CatEmps[c], CatMod) AIC = 2*k - 2*ll AICFrame.loc[c] = [ ModType + ' Delta = ' + str(Deltas[int(c%(len(CatMods)/2))]), k, ll, AIC] ##print('{}, years: {}, model index {}, k {}, log-likelihood {}, AIC {}'.format(file,fileYearDelta,DeltasIndex,k,ll, AIC)) ##print('{} loaded'.format(file)) #except: #print('{} failed to load'.format(file)) ##Return the frame return AICFrame.sort('AIC')