def run_method(self): # Put the method back in because we always take it out self.put_back_arg('method', '-M') cmd_queue = [] subbed_vars = {} if self.args.mass is not None: mass_vals = utils.split_vals(self.args.mass) subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals] self.passthru.extend(['-m', '%(MASS)s']) if self.args.points is not None: self.passthru.extend(['--points', self.args.points]) if (self.args.split_points is not None and self.args.split_points > 0 and self.args.points is not None): points = int(self.args.points) split = self.args.split_points start = 0 ranges = [] while (start + (split - 1)) <= points: ranges.append((start, start + (split - 1))) start += split if start < points: ranges.append((start, points - 1)) subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges] self.passthru.extend( ['--firstPoint %(P_START)s --lastPoint %(P_END)s']) self.args.name += '.POINTS.%(P_START)s.%(P_END)s' # can only put the name option back now because we might have modified # it from what the user specified self.put_back_arg('name', '-n') proto = 'combine ' + (' '.join(self.passthru)) for it in itertools.product(*subbed_vars.values()): keys = subbed_vars.keys() dict = {} for i, k in enumerate(keys): for tuple_i, tuple_ele in enumerate(k): dict[tuple_ele] = it[i][tuple_i] self.job_queue.append(proto % dict) self.flush_queue()
def run_method(self): # Put the method back in because we always take it out self.put_back_arg('method', '-M') cmd_queue = [] subbed_vars = {} if self.args.mass is not None: mass_vals = utils.split_vals(self.args.mass) subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals] self.passthru.extend(['-m', '%(MASS)s']) if self.args.singlePoint is not None: single_points = utils.split_vals(self.args.singlePoint) subbed_vars[('SINGLEPOINT',)] = [(pval,) for pval in single_points] self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s']) self.args.name += '.POINT.%(SINGLEPOINT)s' if self.args.boundlist is not None: subbed_vars = {} with open(self.args.boundlist) as json_file: bnd = json.load(json_file) command1=['' for i in mass_vals] #command2=['' for i in mass_vals] i=0 for mval in mass_vals: for model in bnd: if not (command1[i]==''): command1[i]=command1[i]+':' #if not (command2[i]==''): command2[i]=command2[i]+',' command1[i]=command1[i]+model+'=0,'+str(bnd[model][mval]) # command2[i]=command2[i]+model+'=0' #'='+str(float(bnd[model][mval])/2.0) i+=1 #subbed_vars[('MASS', 'MODELBOUNDONE', 'MODELBOUNDTWO')] = [(mass_vals[i], command1[i], command2[i]) for i in range(len(mass_vals))] subbed_vars[('MASS', 'MODELBOUNDONE')] = [(mass_vals[i], command1[i]) for i in range(len(mass_vals))] self.passthru.extend(['--setPhysicsModelParameterRanges', '%(MODELBOUNDONE)s']) #self.passthru.extend(['--setPhysicsModelParameters', '%(MODELBOUNDTWO)s']) if self.args.points is not None: self.passthru.extend(['--points', self.args.points]) if (self.args.split_points is not None and self.args.split_points > 0 and self.args.points is not None): points = int(self.args.points) split = self.args.split_points start = 0 ranges = [] while (start + (split - 1)) <= points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): # # Send job, if the file it's supposed to create doesn't exist yet # # or if the file is empty because the previous job didn't finish ranges.append((start, start + (split - 1))) start += split if start < points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): ranges.append((start, points - 1)) #if (ranges == []): # print "No jobs were created; All files already exist" # exit() subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges] self.passthru.extend( ['--firstPoint %(P_START)s --lastPoint %(P_END)s']) self.args.name += '.POINTS.%(P_START)s.%(P_END)s' # can only put the name option back now because we might have modified # it from what the user specified self.put_back_arg('name', '-n') proto = 'combine ' + (' '.join(self.passthru)) for it in itertools.product(*subbed_vars.values()): keys = subbed_vars.keys() dict = {} for i, k in enumerate(keys): for tuple_i, tuple_ele in enumerate(k): dict[tuple_ele] = it[i][tuple_i] self.job_queue.append(proto % dict) self.flush_queue()
def run_method(self): ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) # Open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # Set all the parameter values locally using defaults if necessary grids = cfg['grids'] POIs = cfg['POIs'] opts = cfg['opts'] toys_per_cycle = cfg['toys_per_cycle'] zipname = cfg.get('zipfile', None) contours = cfg.get('contours', ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']) min_toys = cfg.get('min_toys', 500) max_toys = cfg.get('max_toys', 5000) signif = cfg.get('signif', 3.0) cl = cfg.get('CL', 0.95) verbose = cfg.get('verbose', False) make_plots = cfg.get('make_plots', False) # Write CLs values into the output even if current toys do not pass validation incomplete = cfg.get('output_incomplete', False) outfile = cfg.get('output', 'hybrid_grid.root') # NB: blacklisting not yet implemented for this method # Have to merge some arguments from both the command line and the "opts" in the json file to_freeze = [] to_set = [] set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts) if set_opt is not None: to_set.append(set_opt) freeze_opt, opts = self.extract_arg('--freezeNuisances', opts) if freeze_opt is not None: to_freeze.append(freeze_opt) if hasattr(self.args, 'setPhysicsModelParameters' ) and self.args.setPhysicsModelParameters is not None: to_set.append(self.args.setPhysicsModelParameters) if hasattr( self.args, 'freezeNuisances') and self.args.freezeNuisances is not None: to_freeze.append(self.args.freezeNuisances) points = [] blacklisted_points = [] for igrid in grids: assert (len(igrid) == 3) if igrid[2] == '': points.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) else: blacklisted_points.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) # This dictionary will keep track of the combine output files for each model point file_dict = {} for p in points: file_dict[p] = {} # The regex we will use to identify output files and extract POI values rgx = re.compile( 'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root' % (POIs[0], POIs[1])) # Can optionally copy output root files into a zip archive # If the user has specified a zipfile we will first # look for output files in this archive before scanning the # current directory if zipname: # Open the zip file in append mode, this should also # create it if it doesn't exist zipf = zipfile.ZipFile(zipname, 'a') for f in zipf.namelist(): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: if seed not in file_dict[p]: # For each model point have a dictionary keyed on the seed number # with a value pointing to the file in the archive in the format # ROOT expects: "zipfile.zip#higgsCombine.blah.root" file_dict[p][seed] = zipname + '#' + f # Now look for files in the local directory for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' % (POIs[0], POIs[1])): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: # Don't add this file to the list if its seed number is already # a value in the dict. if seed not in file_dict[p]: # If we're using the zipfile we'll add this now and # then delete it from the local directory # But: only in the file is good, we don't want to pollute the zip # file with incomplete or failed jobs if zipname and plot.TFileIsGood(f): zipf.write(f) # assume this throws if it fails print 'Adding %s to %s' % (f, zipname) file_dict[p][seed] = zipname + '#' + f os.remove(f) else: # otherwise just add the file to the dict in the normal way file_dict[p][seed] = f if zipname: zipf.close() # These lists will keep track of the CLs values which we will use # to create the output TGraph2Ds output_x = [] output_y = [] output_data = {} output_ntoys = [] output_clserr = {} output_signif = {} # One list of Z-values per contour for contour in contours: output_data[contour] = [] output_clserr[contour] = [] output_signif[contour] = [] # Also keep track of the number of model points which have met the # CLs criteria total_points = 0 complete_points = 0 for key, val in file_dict.iteritems(): total_points += 1 name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) files = [x for x in val.values() if plot.TFileIsGood(x)] # Merge the HypoTestResult objects from each file into one res = self.GetCombinedHypoTest(files) # Do the validation of this model point # ok, point_res = self.ValidateHypoTest( res, min_toys=min_toys, max_toys=max_toys, contours=contours, signif=signif, cl=cl, output=self.args.output, verbose=verbose) if res is not None else (False, { "ntoys": 0 }) print '>> Point %s [%i toys, %s]' % ( name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE') if ok: complete_points += 1 # Make plots of the test statistic distributions if requested if res is not None and make_plots: self.PlotTestStat(res, 'plot_' + name, opts=cfg['plot_settings'], poi_vals=(float(key[0]), float(key[1]))) # Add the resulting CLs values to the output arrays. Normally just # for the model points that passed the validation criteria, but if "output_incomplete" # has been set to true then we'll write all model points where at least one HypoTestResult # is present if res is not None and (ok or incomplete) and self.args.output: output_x.append(float(key[0])) output_y.append(float(key[1])) output_ntoys.append(point_res['ntoys']) for contour in contours: output_data[contour].append(point_res[contour][0]) output_clserr[contour].append(point_res[contour][1]) output_signif[contour].append(point_res[contour][2]) # Do the job cycle generation if requested if not ok and self.args.cycles > 0: print '>>> Going to generate %i job(s) for point %s' % ( self.args.cycles, key) # Figure out the next seed numbers we need to run by finding the maximum seed number # so far done_cycles = val.keys() new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1 new_cycles = range(new_idx, new_idx + self.args.cycles) print '>>> Done cycles: ' + ','.join( str(x) for x in done_cycles) print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles) # Build to combine command. Here we'll take responsibility for setting the name and the # model parameters, making sure the latter are frozen set_arg = ','.join( ['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set) freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze) point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % ( name, set_arg, freeze_arg) # Build a command for each job cycle setting the number of toys and random seed and passing through any other # user options from the config file or the command line for idx in new_cycles: cmd = ' '.join([ 'combine -M HybridNew', opts, point_args, '-T %i' % toys_per_cycle, '-s %i' % idx ] + self.passthru) self.job_queue.append(cmd) print ">> %i/%i points have completed and require no further toys" % ( complete_points, total_points) self.flush_queue() # Create and write output CLs TGraph2Ds here # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes if self.args.output: fout = ROOT.TFile(outfile, 'RECREATE') for c in contours: graph = ROOT.TGraph2D(len(output_data[c]), array('d', output_x), array('d', output_y), array('d', output_data[c])) graph.SetName(c) fout.WriteTObject(graph, c) # Also write a Graph with the CLsErr graph = ROOT.TGraph2D(len(output_clserr[c]), array('d', output_x), array('d', output_y), array('d', output_clserr[c])) graph.SetName('clsErr_' + c) fout.WriteTObject(graph, 'clsErr_' + c) # And a Graph with the significance graph = ROOT.TGraph2D(len(output_signif[c]), array('d', output_x), array('d', output_y), array('d', output_signif[c])) graph.SetName('signif_' + c) fout.WriteTObject(graph, 'signif_' + c) graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x), array('d', output_y), array('d', output_ntoys)) graph.SetName('ntoys' + c) fout.WriteTObject(graph, 'ntoys') fout.Close()
def run_method(self): ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) # This is what the logic should be: # - get the list of model points # - figure out which files are: # - completely missing # - there but corrupt, missing tree # - ok # - If we have anything in the third category proceed to produce output files # - Anything in the first two gets added to the queue only if --doFits is specified # so that the # Step 1 - open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # to do - have to handle the case where it doesn't exist points = [] blacklisted_points = [] for igrid in cfg['grids']: assert (len(igrid) == 3) if igrid[2] == '': points.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) else: blacklisted_points.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) POIs = cfg['POIs'] opts = cfg['opts'] # Have to merge some arguments from both the command line and the "opts" in the json file to_freeze = [] to_set = [] set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts) if set_opt is not None: to_set.append(set_opt) freeze_opt, opts = self.extract_arg('--freezeNuisances', opts) if freeze_opt is not None: to_freeze.append(freeze_opt) if hasattr(self.args, 'setPhysicsModelParameters' ) and self.args.setPhysicsModelParameters is not None: to_set.append(self.args.setPhysicsModelParameters) if hasattr( self.args, 'freezeNuisances') and self.args.freezeNuisances is not None: to_freeze.append(self.args.freezeNuisances) file_dict = {} for p in points: file_dict[p] = [] for f in glob.glob('higgsCombine.%s.*.%s.*.Asymptotic.mH*.root' % (POIs[0], POIs[1])): # print f rgx = re.compile( 'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.Asymptotic\.mH.*\.root' % (POIs[0], POIs[1])) matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) if p in file_dict: file_dict[p].append(f) for key, val in file_dict.iteritems(): name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) print '>> Point %s' % name if len(val) == 0: print 'Going to run limit for point %s' % (key, ) set_arg = ','.join( ['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set) freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze) point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % ( name, set_arg, freeze_arg) cmd = ' '.join(['combine -M Asymptotic', opts, point_args] + self.passthru) self.job_queue.append(cmd) bail_out = len(self.job_queue) > 0 self.flush_queue() if bail_out: print '>> New jobs were created / run in this cycle, run the script again to collect the output' sys.exit(0) xvals = [] yvals = [] zvals_m2s = [] zvals_m1s = [] zvals_exp = [] zvals_p1s = [] zvals_p2s = [] zvals_obs = [] for key, val in file_dict.iteritems(): for filename in val: fin = ROOT.TFile(filename) if fin.IsZombie(): continue tree = fin.Get('limit') for evt in tree: if abs(evt.quantileExpected + 1) < 0.01: xvals.append(float(key[0])) yvals.append(float(key[1])) #print 'At point %s have observed CLs = %f' % (key, evt.limit) zvals_obs.append(float(evt.limit)) if abs(evt.quantileExpected - 0.025) < 0.01: #print 'At point %s have -2sigma CLs = %f' % (key, evt.limit) zvals_m2s.append(float(evt.limit)) if abs(evt.quantileExpected - 0.16) < 0.01: #print 'At point %s have -1sigma CLs = %f' % (key, evt.limit) zvals_m1s.append(float(evt.limit)) if abs(evt.quantileExpected - 0.5) < 0.01: #print 'At point %s have expected CLs = %f' % (key, evt.limit) zvals_exp.append(float(evt.limit)) if abs(evt.quantileExpected - 0.84) < 0.01: #print 'At point %s have +1sigma CLs = %f' % (key, evt.limit) zvals_p1s.append(float(evt.limit)) if abs(evt.quantileExpected - 0.975) < 0.01: #print 'At point %s have +2sigma CLs = %f' % (key, evt.limit) zvals_p2s.append(float(evt.limit)) for POI1, POI2, CLs in blacklisted_points: xvals.append(float(POI1)) yvals.append(float(POI2)) zvals_m2s.append(float(CLs)) zvals_m1s.append(float(CLs)) zvals_exp.append(float(CLs)) zvals_p1s.append(float(CLs)) zvals_p2s.append(float(CLs)) zvals_obs.append(float(CLs)) graph_m2s = ROOT.TGraph2D(len(zvals_m2s), array('d', xvals), array('d', yvals), array('d', zvals_m2s)) graph_m1s = ROOT.TGraph2D(len(zvals_m1s), array('d', xvals), array('d', yvals), array('d', zvals_m1s)) graph_exp = ROOT.TGraph2D(len(zvals_exp), array('d', xvals), array('d', yvals), array('d', zvals_exp)) graph_p1s = ROOT.TGraph2D(len(zvals_p1s), array('d', xvals), array('d', yvals), array('d', zvals_p1s)) graph_p2s = ROOT.TGraph2D(len(zvals_p2s), array('d', xvals), array('d', yvals), array('d', zvals_p2s)) graph_obs = ROOT.TGraph2D(len(zvals_obs), array('d', xvals), array('d', yvals), array('d', zvals_obs)) #h_bins = cfg['hist_binning'] #hist = ROOT.TH2F('h_observed', '', h_bins[0], h_bins[1], h_bins[2], h_bins[3], h_bins[4], h_bins[5]) #for i in xrange(1, hist.GetNbinsX()+1): # for j in xrange(1, hist.GetNbinsY()+1): # hist.SetBinContent(i, j, graph.Interpolate(hist.GetXaxis().GetBinCenter(i), hist.GetYaxis().GetBinCenter(j))) fout = ROOT.TFile('asymptotic_grid.root', 'RECREATE') fout.WriteTObject(graph_m2s, 'exp-2') fout.WriteTObject(graph_m1s, 'exp-1') fout.WriteTObject(graph_exp, 'exp0') fout.WriteTObject(graph_p1s, 'exp+1') fout.WriteTObject(graph_p2s, 'exp+2') fout.WriteTObject(graph_obs, 'obs') #fout.WriteTObject(hist) fout.Close()
def run_method(self): # Put the method back in because we always take it out self.put_back_arg('method', '-M') # cmd_queue = [] subbed_vars = {} # pre_cmd = '' if self.args.mass is not None: mass_vals = utils.split_vals(self.args.mass) subbed_vars[('MASS', )] = [(mval, ) for mval in mass_vals] self.passthru.extend(['-m', '%(MASS)s']) if self.args.singlePoint is not None: single_points = utils.split_vals(self.args.singlePoint) subbed_vars[('SINGLEPOINT', )] = [(pval, ) for pval in single_points] self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s']) self.args.name += '.POINT.%(SINGLEPOINT)s' if self.args.seed is not None: seed_vals = utils.split_vals(self.args.seed) subbed_vars[('SEED', )] = [(sval, ) for sval in seed_vals] self.passthru.extend(['-s', '%(SEED)s']) for i, generate in enumerate(self.args.generate): split_char = ':' if '::' in generate else ';' gen_header, gen_content = generate.split(split_char * 2) print gen_header print gen_content gen_headers = gen_header.split(split_char) gen_entries = gen_content.split(split_char) key = tuple() arglist = [] for header in gen_headers: if header == 'n' or header == 'name': self.args.name += '.%(GENNAME' + str(i) + ')s' key += ('GENNAME' + str(i), ) else: self.passthru.extend(['%(' + header + ')s']) key += (header, ) for entry in gen_entries: if ',,' in entry: split_entry = entry.split(',,') else: split_entry = entry.split(',') final_arg = [] for header, e in zip(gen_headers, split_entry): argname = '-%s' % header if len( header) == 1 else '--%s' % header if header == 'n' or header == 'name': final_arg.append(e) elif len(e) and e != '!': final_arg.append('%s %s' % (argname, e)) else: final_arg.append('') arglist.append(tuple(final_arg)) subbed_vars[key] = arglist if len(self.args.datacard) >= 1: # Two lists of tuples, one which does specify the mass, and one # which doesn't dc_mass = [] dc_no_mass = [] for dc in self.args.datacard: # Split workspace into path and filename path, file = os.path.split(dc) # If the wsp is in the current directory should call it '.' if path == '': path = '.' # If we're not using the --there option then leave the # workspace argument as the full path if not self.args.there: file = dc # Figure out if the enclosing directory is a mass value dirs = path.split('/') if self.args.mass is None and len(dirs) >= 1 and isfloat( dirs[-1]): print 'Assuming card %s uses mass value %s' % (dc, dirs[-1]) dc_mass.append((path, file, dirs[-1])) dc_no_mass.append((path, file)) # If at least one mass value was inferred assume all of them are like this if len(dc_mass) > 0: subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s']) else: subbed_vars[( 'DIR', 'DATACARD', )] = dc_no_mass self.passthru.extend(['-d', '%(DATACARD)s']) # elif len(self.args.datacard) == 1: # self.passthru.extend(['-d', self.args.datacard[0]]) current_ranges = self.args.setPhysicsModelParameterRanges put_back_ranges = current_ranges is not None if self.args.boundlist is not None: # We definitely don't need to put the parameter ranges back # into the args because they're going in via the boundlist # option instead put_back_ranges = False with open(self.args.boundlist) as json_file: bnd = json.load(json_file) bound_pars = list(bnd.keys()) print 'Found bounds for parameters %s' % ','.join(bound_pars) # Fill a dictionaries of the bound info of the form: # { 'PAR1' : [(MASS, LOWER, UPER), ...], ...} bound_vals = {} for par in bound_pars: bound_vals[par] = list() for mass, bounds in bnd[par].iteritems(): bound_vals[par].append((float(mass), bounds[0], bounds[1])) bound_vals[par].sort(key=lambda x: x[0]) # find the subbed_vars entry containing the mass # We will extend it to also specify the ranges dict_key = None mass_idx = None for key in subbed_vars.keys(): if 'MASS' in key: dict_key = key mass_idx = dict_key.index('MASS') new_key = dict_key + ('MODELBOUND', ) new_list = [] for entry in subbed_vars[dict_key]: command = [] if current_ranges is not None: command.append(current_ranges) mval = entry[mass_idx] for par in bound_pars: # The (mass, None, None) is just a trick to make bisect_left do the comparison # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding # issues lower_bound = bisect.bisect_left( bound_vals[par], (float(mval) + 1E-5, None, None)) # If lower_bound == 0 this means we are at or below the lowest mass point, # in which case we should increase by one to take the bounds from this lowest # point if lower_bound == 0: lower_bound += 1 command.append('%s=%g,%g' % (par, bound_vals[par][lower_bound - 1][1], bound_vals[par][lower_bound - 1][2])) new_list.append(entry + (str(':'.join(command)), )) # now remove the current mass information from subbed_vars # and replace it with the updated one del subbed_vars[dict_key] subbed_vars[new_key] = new_list self.passthru.extend( ['--setPhysicsModelParameterRanges', '%(MODELBOUND)s']) # We might need to put the intercepted --setPhysicsModelParameterRanges arg back in if put_back_ranges: self.put_back_arg('setPhysicsModelParameterRanges', '--setPhysicsModelParameterRanges') if self.args.points is not None: self.passthru.extend(['--points', self.args.points]) if (self.args.split_points is not None and self.args.split_points > 0 and self.args.points is not None): points = int(self.args.points) split = self.args.split_points start = 0 ranges = [] while (start + (split - 1)) < points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): # # Send job, if the file it's supposed to create doesn't exist yet # # or if the file is empty because the previous job didn't finish ranges.append((start, start + (split - 1))) start += split if start < points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): ranges.append((start, points - 1)) #if (ranges == []): # print "No jobs were created; All files already exist" # exit() subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges] self.passthru.extend( ['--firstPoint %(P_START)s --lastPoint %(P_END)s']) self.args.name += '.POINTS.%(P_START)s.%(P_END)s' # can only put the name option back now because we might have modified # it from what the user specified self.put_back_arg('name', '-n') proto = 'combine ' + (' '.join(self.passthru)) if self.args.there: proto = 'pushd %(DIR)s; combine ' + (' '.join( self.passthru)) + '; popd' for it in itertools.product(*subbed_vars.values()): keys = subbed_vars.keys() dict = {} for i, k in enumerate(keys): for tuple_i, tuple_ele in enumerate(k): dict[tuple_ele] = it[i][tuple_i] self.job_queue.append(proto % dict) self.flush_queue()
def run_method(self): # Put the method back in because we always take it out self.put_back_arg('method', '-M') # cmd_queue = [] subbed_vars = {} # pre_cmd = '' if self.args.mass is not None: mass_vals = utils.split_vals(self.args.mass) subbed_vars[('MASS', )] = [(mval, ) for mval in mass_vals] self.passthru.extend(['-m', '%(MASS)s']) if self.args.singlePoint is not None: single_points = utils.split_vals(self.args.singlePoint) subbed_vars[('SINGLEPOINT', )] = [(pval, ) for pval in single_points] self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s']) self.args.name += '.POINT.%(SINGLEPOINT)s' if self.args.seed is not None: seed_vals = utils.split_vals(self.args.seed) subbed_vars[('SEED', )] = [(sval, ) for sval in seed_vals] self.passthru.extend(['-s', '%(SEED)s']) if len(self.args.datacard) >= 1: # Two lists of tuples, one which does specify the mass, and one # which doesn't dc_mass = [] dc_no_mass = [] for dc in self.args.datacard: # Split workspace into path and filename path, file = os.path.split(dc) # If the wsp is in the current directory should call it '.' if path == '': path = '.' # If we're not using the --there option then leave the # workspace argument as the full path if not self.args.there: file = dc # Figure out if the enclosing directory is a mass value dirs = path.split('/') if self.args.mass is None and len(dirs) >= 1 and isfloat( dirs[-1]): print 'Assuming card %s uses mass value %s' % (dc, dirs[-1]) dc_mass.append((path, file, dirs[-1])) dc_no_mass.append((path, file)) # If at least one mass value was inferred assume all of them are like this if len(dc_mass) > 0: subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s']) else: subbed_vars[( 'DIR', 'DATACARD', )] = dc_no_mass self.passthru.extend(['-d', '%(DATACARD)s']) # elif len(self.args.datacard) == 1: # self.passthru.extend(['-d', self.args.datacard[0]]) if self.args.boundlist is not None: with open(self.args.boundlist) as json_file: bnd = json.load(json_file) # find the subbed_vars entry containing the mass # We will extend it to also specify the ranges dict_key = None mass_idx = None for key in subbed_vars.keys(): if 'MASS' in key: dict_key = key mass_idx = dict_key.index('MASS') new_key = dict_key + ('MODELBOUND', ) new_list = [] for entry in subbed_vars[dict_key]: command = [] mval = entry[mass_idx] for model in bnd: command.append(model + '=0,' + str(bnd[model][mval])) new_list.append(entry + (':'.join(command), )) # now remove the current mass information from subbed_vars # and replace it with the updated one del subbed_vars[dict_key] subbed_vars[new_key] = new_list self.passthru.extend( ['--setPhysicsModelParameterRanges', '%(MODELBOUND)s']) if self.args.points is not None: self.passthru.extend(['--points', self.args.points]) if (self.args.split_points is not None and self.args.split_points > 0 and self.args.points is not None): points = int(self.args.points) split = self.args.split_points start = 0 ranges = [] while (start + (split - 1)) <= points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): # # Send job, if the file it's supposed to create doesn't exist yet # # or if the file is empty because the previous job didn't finish ranges.append((start, start + (split - 1))) start += split if start < points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): ranges.append((start, points - 1)) #if (ranges == []): # print "No jobs were created; All files already exist" # exit() subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges] self.passthru.extend( ['--firstPoint %(P_START)s --lastPoint %(P_END)s']) self.args.name += '.POINTS.%(P_START)s.%(P_END)s' # can only put the name option back now because we might have modified # it from what the user specified self.put_back_arg('name', '-n') proto = 'combine ' + (' '.join(self.passthru)) if self.args.there: proto = 'pushd %(DIR)s; combine ' + (' '.join( self.passthru)) + '; popd' for it in itertools.product(*subbed_vars.values()): keys = subbed_vars.keys() dict = {} for i, k in enumerate(keys): for tuple_i, tuple_ele in enumerate(k): dict[tuple_ele] = it[i][tuple_i] self.job_queue.append(proto % dict) self.flush_queue()
def run_method(self): ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) # Open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # Set all the parameter values locally using defaults if necessary grids = cfg['grids'] grids_to_remove = cfg.get('grids_to_remove', None) POIs = cfg['POIs'] opts = cfg['opts'] toys_per_cycle = cfg['toys_per_cycle'] zipname = cfg.get('zipfile', None) statfile = cfg.get('statusfile', None) contours = cfg.get('contours', ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']) min_toys = cfg.get('min_toys', 500) max_toys = cfg.get('max_toys', 5000) signif = cfg.get('signif', 3.0) cl = cfg.get('CL', 0.95) verbose = cfg.get('verbose', False) make_plots = cfg.get('make_plots', False) # Write CLs values into the output even if current toys do not pass validation incomplete = cfg.get('output_incomplete', False) outfile = cfg.get('output', 'hybrid_grid.root') from_asymptotic_settings = cfg.get('from_asymptotic_settings', dict()) # NB: blacklisting not yet implemented for this method # Have to merge some arguments from both the command line and the "opts" in the json file to_freeze = [] to_set = [] set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts) if set_opt is not None: to_set.append(set_opt) freeze_opt, opts = self.extract_arg('--freezeNuisances', opts) if freeze_opt is not None: to_freeze.append(freeze_opt) if hasattr(self.args, 'setPhysicsModelParameters' ) and self.args.setPhysicsModelParameters is not None: to_set.append(self.args.setPhysicsModelParameters) if hasattr( self.args, 'freezeNuisances') and self.args.freezeNuisances is not None: to_freeze.append(self.args.freezeNuisances) points = [] blacklisted_points = [] # For the automatic grid for the "from_asymptotic option" we should fix the format specifier for # the grid points, as the numerical precision of a given point may change once the grid spacing is # modified. By default we let split_vals do it's thing however fmt_spec = None # In this mode we're doing a classic limit search vs MH instead of a 2D grid. # Most of the same code can be used however. First we'll use the json file containing the # asymptotic limits to create a new grid from scratch. if self.args.from_asymptotic is not None: grids = [] bound_vals = None bound_pars = [] fmt_spec = '%.5g' with open(self.args.from_asymptotic) as limit_json: limits = json.load(limit_json) for m in limits.keys(): limit_vals = [x for x in limits[m].values()] max_limit = max(limit_vals) min_limit = min(limit_vals) # print (min_limit, max_limit) width = max_limit - min_limit max_limit += width * 0.3 min_limit = max(0.0, min_limit - width * 0.3) nsteps = from_asymptotic_settings.get('points', 100) step_width = (max_limit - min_limit) / nsteps grids.append( [m, '%g:%g|%g' % (min_limit, max_limit, step_width), '']) boundlist_file = from_asymptotic_settings.get('boundlist', '') if boundlist_file: with open(boundlist_file) as json_file: bnd = json.load(json_file) bound_pars = list(bnd.keys()) print 'Found bounds for parameters %s' % ','.join( bound_pars) bound_vals = {} for par in bound_pars: bound_vals[par] = list() for mass, bounds in bnd[par].iteritems(): bound_vals[par].append( (float(mass), bounds[0], bounds[1])) bound_vals[par].sort(key=lambda x: x[0]) # print (min_limit, max_limit) # sys.exit(0) for igrid in grids: assert (len(igrid) == 3) if igrid[2] == '': points.extend( itertools.product( utils.split_vals(igrid[0], fmt_spec=fmt_spec), utils.split_vals(igrid[1], fmt_spec=fmt_spec))) else: blacklisted_points.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) #In between cycles of toys we may find there's something wrong with some of the points in the grid and therefore want to remove them: points_to_remove = [] if grids_to_remove is not None: for igrid in grids_to_remove: assert (len(igrid) == 2) points_to_remove.extend( itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) for p in points_to_remove: points.remove(p) # This dictionary will keep track of the combine output files for each model point file_dict = {} for p in points: file_dict[p] = {} # The regex we will use to identify output files and extract POI values rgx = re.compile( 'higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root' % (POIs[0], POIs[1])) stats = {} if statfile and os.path.isfile(statfile): with open(statfile) as stat_json: stats = json.load(stat_json) # Can optionally copy output root files into a zip archive # If the user has specified a zipfile we will first # look for output files in this archive before scanning the # current directory if zipname: # Open the zip file in append mode, this should also # create it if it doesn't exist zipf = zipfile.ZipFile(zipname, 'a') for f in zipf.namelist(): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: if seed not in file_dict[p]: # For each model point have a dictionary keyed on the seed number # with a value pointing to the file in the archive in the format # ROOT expects: "zipfile.zip#higgsCombine.blah.root" file_dict[p][seed] = zipname + '#' + f # Now look for files in the local directory for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' % (POIs[0], POIs[1])): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: # Don't add this file to the list if its seed number is already # a value in the dict. if seed not in file_dict[p]: # If we're using the zipfile we'll add this now and # then delete it from the local directory # But: only in the file is good, we don't want to pollute the zip # file with incomplete or failed jobs if zipname and plot.TFileIsGood(f): zipf.write(f) # assume this throws if it fails print 'Adding %s to %s' % (f, zipname) file_dict[p][seed] = zipname + '#' + f os.remove(f) else: # otherwise just add the file to the dict in the normal way file_dict[p][seed] = f if zipname: zipf.close() # These lists will keep track of the CLs values which we will use # to create the output TGraph2Ds output_x = [] output_y = [] output_data = {} output_ntoys = [] output_clserr = {} output_signif = {} # One list of Z-values per contour for contour in contours: output_data[contour] = [] output_clserr[contour] = [] output_signif[contour] = [] # Also keep track of the number of model points which have met the # CLs criteria total_points = 0 complete_points = 0 for key, val in file_dict.iteritems(): status_changed = True total_points += 1 status_key = ':'.join(key) name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) # First check if we use the status json all_files = val.values() status_files = [] files = [] if status_key in stats: status_files = stats[status_key]['files'] if set(all_files) == set(status_files): print 'For point %s, no files have been updated' % name status_changed = False files = all_files else: files = [x for x in val.values() if plot.TFileIsGood(x)] if set(files) == set( status_files) and len(files) < len(all_files): print 'For point %s, new files exist but they are not declared good' % name status_changed = False else: files = [x for x in val.values() if plot.TFileIsGood(x)] # Merge the HypoTestResult objects from each file into one res = None precomputed = None if status_key in stats and not status_changed and stats[ status_key]["ntoys"] > 0: precomputed = stats[status_key] else: res = self.GetCombinedHypoTest(files) # Do the validation of this model point # ok, point_res = self.ValidateHypoTest(res, min_toys=min_toys, max_toys=max_toys, contours=contours, signif=signif, cl=cl, output=self.args.output, verbose=verbose, precomputed=precomputed) print '>> Point %s [%i toys, %s]' % ( name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE') stats[status_key] = {'files': files, 'ntoys': point_res['ntoys']} for cont in contours: if cont in point_res: stats[status_key][cont] = point_res[cont] if ok: complete_points += 1 # Make plots of the test statistic distributions if requested if res is not None and make_plots: self.PlotTestStat(res, 'plot_' + name, opts=cfg['plot_settings'], poi_vals=(float(key[0]), float(key[1])), point_info=point_res) # Add the resulting CLs values to the output arrays. Normally just # for the model points that passed the validation criteria, but if "output_incomplete" # has been set to true then we'll write all model points where at least one HypoTestResult # is present if (res is not None or precomputed is not None) and (ok or incomplete) and self.args.output: output_x.append(float(key[0])) output_y.append(float(key[1])) output_ntoys.append(point_res['ntoys']) for contour in contours: output_data[contour].append(point_res[contour][0]) output_clserr[contour].append(point_res[contour][1]) output_signif[contour].append(point_res[contour][2]) # Do the job cycle generation if requested if not ok and self.args.cycles > 0: print '>>> Going to generate %i job(s) for point %s' % ( self.args.cycles, key) # Figure out the next seed numbers we need to run by finding the maximum seed number # so far done_cycles = val.keys() new_idx = max(done_cycles) + 1 if len(done_cycles) > 0 else 1 new_cycles = range(new_idx, new_idx + self.args.cycles) print '>>> Done cycles: ' + ','.join( str(x) for x in done_cycles) print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles) # Build to combine command. Here we'll take responsibility for setting the name and the # model parameters, making sure the latter are frozen set_arg = ','.join( ['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set) freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze) point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % ( name, set_arg, freeze_arg) if self.args.from_asymptotic: mval = key[0] command = [] for par in bound_pars: # The (mass, None, None) is just a trick to make bisect_left do the comparison # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding # issues lower_bound = bisect.bisect_left( bound_vals[par], (float(mval) + 1E-5, None, None)) # If lower_bound == 0 this means we are at or below the lowest mass point, # in which case we should increase by one to take the bounds from this lowest # point if lower_bound == 0: lower_bound += 1 command.append( '%s=%g,%g' % (par, bound_vals[par][lower_bound - 1][1], bound_vals[par][lower_bound - 1][2])) if len(command) > 0: point_args += (' --setPhysicsModelParameterRanges %s' % (':'.join(command))) # print per_mass_point_args point_args += ' --singlePoint %s' % key[1] point_args += ' -m %s' % mval # Build a command for each job cycle setting the number of toys and random seed and passing through any other # user options from the config file or the command line for idx in new_cycles: cmd = ' '.join([ 'combine -M HybridNew', opts, point_args, '-T %i' % toys_per_cycle, '-s %i' % idx ] + self.passthru) self.job_queue.append(cmd) print ">> %i/%i points have completed and require no further toys" % ( complete_points, total_points) self.flush_queue() # Create and write output CLs TGraph2Ds here # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes if self.args.output and not self.args.from_asymptotic: fout = ROOT.TFile(outfile, 'RECREATE') for c in contours: graph = ROOT.TGraph2D(len(output_data[c]), array('d', output_x), array('d', output_y), array('d', output_data[c])) graph.SetName(c) fout.WriteTObject(graph, c) # Also write a Graph with the CLsErr graph = ROOT.TGraph2D(len(output_clserr[c]), array('d', output_x), array('d', output_y), array('d', output_clserr[c])) graph.SetName('clsErr_' + c) fout.WriteTObject(graph, 'clsErr_' + c) # And a Graph with the significance graph = ROOT.TGraph2D(len(output_signif[c]), array('d', output_x), array('d', output_y), array('d', output_signif[c])) graph.SetName('signif_' + c) fout.WriteTObject(graph, 'signif_' + c) graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x), array('d', output_y), array('d', output_ntoys)) graph.SetName('ntoys' + c) fout.WriteTObject(graph, 'ntoys') fout.Close() if self.args.output and self.args.from_asymptotic: # Need to collect all the files for each mass point and hadd them: files_by_mass = {} for key, val in file_dict.iteritems(): if key[0] not in files_by_mass: files_by_mass[key[0]] = list() files_by_mass[key[0]].extend(val.values()) for m, files in files_by_mass.iteritems(): gridfile = 'higgsCombine.gridfile.%s.%s.%s.root' % (POIs[0], m, POIs[1]) self.job_queue.append('hadd -f %s %s' % (gridfile, ' '.join(files))) for exp in ['', '0.025', '0.160', '0.500', '0.840', '0.975']: self.job_queue.append(' '.join([ 'combine -M HybridNew --rAbsAcc 0', opts, '--grid %s' % gridfile, '-n .final.%s.%s.%s' % (POIs[0], m, POIs[1]), '-m %s' % (m), ('--expectedFromGrid %s' % exp) if exp else '--noUpdateGrid' ] + self.passthru)) self.flush_queue() if statfile: with open(statfile, 'w') as stat_out: stat_json = json.dumps(stats, sort_keys=True, indent=2, separators=(',', ': ')) stat_out.write(stat_json)
def run_method(self): ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) # This is what the logic should be: # - get the list of model points # - figure out which files are: # - completely missing # - there but corrupt, missing tree # - ok # - If we have anything in the third category proceed to produce output files # - Anything in the first two gets added to the queue only if --doFits is specified # Step 1 - open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # to do - have to handle the case where it doesn't exist points = []; blacklisted_points = [] for igrid in cfg['grids']: assert(len(igrid) == 3) if igrid[2]=='' : points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) else : blacklisted_points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) POIs = cfg['POIs'] opts = cfg['opts'] # Have to merge some arguments from both the command line and the "opts" in the json file to_freeze = [] to_set = [] set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts) if set_opt is not None: to_set.append(set_opt) freeze_opt, opts = self.extract_arg('--freezeNuisances', opts) if freeze_opt is not None: to_freeze.append(freeze_opt) if hasattr(self.args, 'setPhysicsModelParameters') and self.args.setPhysicsModelParameters is not None: to_set.append(self.args.setPhysicsModelParameters) if hasattr(self.args, 'freezeNuisances') and self.args.freezeNuisances is not None: to_freeze.append(self.args.freezeNuisances) file_dict = { } for p in points: file_dict[p] = [] for f in glob.glob('higgsCombine.%s.*.%s.*.Asymptotic.mH*.root' % (POIs[0], POIs[1])): # print f rgx = re.compile('higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.Asymptotic\.mH.*\.root' % (POIs[0], POIs[1])) matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) if p in file_dict: file_dict[p].append(f) for key,val in file_dict.iteritems(): name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) print '>> Point %s' % name if len(val) == 0: print 'Going to run limit for point %s' % (key,) set_arg = ','.join(['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set) freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze) point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (name, set_arg, freeze_arg) cmd = ' '.join(['combine -M Asymptotic', opts, point_args] + self.passthru) self.job_queue.append(cmd) bail_out = len(self.job_queue) > 0 self.flush_queue() if bail_out: print '>> New jobs were created / run in this cycle, run the script again to collect the output' sys.exit(0) xvals = [] yvals = [] zvals_m2s = []; zvals_m1s = []; zvals_exp = []; zvals_p1s = []; zvals_p2s = []; zvals_obs = [] for key,val in file_dict.iteritems(): for filename in val: fin = ROOT.TFile(filename) if fin.IsZombie(): continue tree = fin.Get('limit') for evt in tree: if abs(evt.quantileExpected+1)<0.01: xvals.append(float(key[0])) yvals.append(float(key[1])) #print 'At point %s have observed CLs = %f' % (key, evt.limit) zvals_obs.append(float(evt.limit)) if abs(evt.quantileExpected-0.025)<0.01: #print 'At point %s have -2sigma CLs = %f' % (key, evt.limit) zvals_m2s.append(float(evt.limit)) if abs(evt.quantileExpected-0.16)<0.01: #print 'At point %s have -1sigma CLs = %f' % (key, evt.limit) zvals_m1s.append(float(evt.limit)) if abs(evt.quantileExpected-0.5)<0.01: #print 'At point %s have expected CLs = %f' % (key, evt.limit) zvals_exp.append(float(evt.limit)) if abs(evt.quantileExpected-0.84)<0.01: #print 'At point %s have +1sigma CLs = %f' % (key, evt.limit) zvals_p1s.append(float(evt.limit)) if abs(evt.quantileExpected-0.975)<0.01: #print 'At point %s have +2sigma CLs = %f' % (key, evt.limit) zvals_p2s.append(float(evt.limit)) for POI1, POI2, CLs in blacklisted_points: xvals.append(float(POI1)) yvals.append(float(POI2)) zvals_m2s.append(float(CLs)) zvals_m1s.append(float(CLs)) zvals_exp.append(float(CLs)) zvals_p1s.append(float(CLs)) zvals_p2s.append(float(CLs)) zvals_obs.append(float(CLs)) graph_m2s = ROOT.TGraph2D(len(zvals_m2s), array('d', xvals), array('d', yvals), array('d', zvals_m2s)) graph_m1s = ROOT.TGraph2D(len(zvals_m1s), array('d', xvals), array('d', yvals), array('d', zvals_m1s)) graph_exp = ROOT.TGraph2D(len(zvals_exp), array('d', xvals), array('d', yvals), array('d', zvals_exp)) graph_p1s = ROOT.TGraph2D(len(zvals_p1s), array('d', xvals), array('d', yvals), array('d', zvals_p1s)) graph_p2s = ROOT.TGraph2D(len(zvals_p2s), array('d', xvals), array('d', yvals), array('d', zvals_p2s)) graph_obs = ROOT.TGraph2D(len(zvals_obs), array('d', xvals), array('d', yvals), array('d', zvals_obs)) #h_bins = cfg['hist_binning'] #hist = ROOT.TH2F('h_observed', '', h_bins[0], h_bins[1], h_bins[2], h_bins[3], h_bins[4], h_bins[5]) #for i in xrange(1, hist.GetNbinsX()+1): # for j in xrange(1, hist.GetNbinsY()+1): # hist.SetBinContent(i, j, graph.Interpolate(hist.GetXaxis().GetBinCenter(i), hist.GetYaxis().GetBinCenter(j))) fout = ROOT.TFile('asymptotic_grid.root', 'RECREATE') fout.WriteTObject(graph_m2s, 'exp-2') fout.WriteTObject(graph_m1s, 'exp-1') fout.WriteTObject(graph_exp, 'exp0') fout.WriteTObject(graph_p1s, 'exp+1') fout.WriteTObject(graph_p2s, 'exp+2') fout.WriteTObject(graph_obs, 'obs') #fout.WriteTObject(hist) fout.Close()
def run_method(self): ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) # Open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # Set all the parameter values locally using defaults if necessary grids = cfg['grids'] grids_to_remove = cfg.get('grids_to_remove', None) POIs = cfg['POIs'] opts = cfg['opts'] toys_per_cycle = cfg['toys_per_cycle'] zipname = cfg.get('zipfile', None) statfile = cfg.get('statusfile', None) contours = cfg.get('contours', ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']) min_toys = cfg.get('min_toys', 500) max_toys = cfg.get('max_toys', 5000) signif = cfg.get('signif', 3.0) cl = cfg.get('CL', 0.95) verbose = cfg.get('verbose', False) make_plots = cfg.get('make_plots', False) # Write CLs values into the output even if current toys do not pass validation incomplete = cfg.get('output_incomplete', False) outfile = cfg.get('output','hybrid_grid.root') from_asymptotic_settings = cfg.get('from_asymptotic_settings', dict()) # NB: blacklisting not yet implemented for this method # Have to merge some arguments from both the command line and the "opts" in the json file to_freeze = [] to_set = [] set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts) if set_opt is not None: to_set.append(set_opt) freeze_opt, opts = self.extract_arg('--freezeNuisances', opts) if freeze_opt is not None: to_freeze.append(freeze_opt) if hasattr(self.args, 'setPhysicsModelParameters') and self.args.setPhysicsModelParameters is not None: to_set.append(self.args.setPhysicsModelParameters) if hasattr(self.args, 'freezeNuisances') and self.args.freezeNuisances is not None: to_freeze.append(self.args.freezeNuisances) points = [] blacklisted_points = [] # For the automatic grid for the "from_asymptotic option" we should fix the format specifier for # the grid points, as the numerical precision of a given point may change once the grid spacing is # modified. By default we let split_vals do it's thing however fmt_spec = None # In this mode we're doing a classic limit search vs MH instead of a 2D grid. # Most of the same code can be used however. First we'll use the json file containing the # asymptotic limits to create a new grid from scratch. if self.args.from_asymptotic is not None: grids = [] bound_vals = None bound_pars = [] fmt_spec = '%.5g' with open(self.args.from_asymptotic) as limit_json: limits = json.load(limit_json) for m in limits.keys(): limit_vals = [x for x in limits[m].values()] max_limit = max(limit_vals) min_limit = min(limit_vals) # print (min_limit, max_limit) width = max_limit - min_limit max_limit += width * 0.3 min_limit = max(0.0, min_limit - width * 0.3) nsteps = from_asymptotic_settings.get('points', 100) step_width = (max_limit - min_limit) / nsteps grids.append([m, '%g:%g|%g' % (min_limit, max_limit, step_width), '']) boundlist_file = from_asymptotic_settings.get('boundlist', '') if boundlist_file: with open(boundlist_file) as json_file: bnd = json.load(json_file) bound_pars = list(bnd.keys()) print 'Found bounds for parameters %s' % ','.join(bound_pars) bound_vals = {} for par in bound_pars: bound_vals[par] = list() for mass, bounds in bnd[par].iteritems(): bound_vals[par].append((float(mass), bounds[0], bounds[1])) bound_vals[par].sort(key=lambda x: x[0]) # print (min_limit, max_limit) # sys.exit(0) for igrid in grids: assert(len(igrid) == 3) if igrid[2] == '': points.extend(itertools.product(utils.split_vals(igrid[0], fmt_spec=fmt_spec), utils.split_vals(igrid[1], fmt_spec=fmt_spec))) else: blacklisted_points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) #In between cycles of toys we may find there's something wrong with some of the points in the grid and therefore want to remove them: points_to_remove = []; if grids_to_remove is not None : for igrid in grids_to_remove: assert(len(igrid) == 2) points_to_remove.extend(itertools.product(utils.split_vals(igrid[0]),utils.split_vals(igrid[1]))) for p in points_to_remove: points.remove(p) # This dictionary will keep track of the combine output files for each model point file_dict = { } for p in points: file_dict[p] = {} # The regex we will use to identify output files and extract POI values rgx = re.compile('higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root' % (POIs[0], POIs[1])) stats = {} if statfile and os.path.isfile(statfile): with open(statfile) as stat_json: stats = json.load(stat_json) # Can optionally copy output root files into a zip archive # If the user has specified a zipfile we will first # look for output files in this archive before scanning the # current directory if zipname: # Open the zip file in append mode, this should also # create it if it doesn't exist zipf = zipfile.ZipFile(zipname, 'a') for f in zipf.namelist(): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: if seed not in file_dict[p]: # For each model point have a dictionary keyed on the seed number # with a value pointing to the file in the archive in the format # ROOT expects: "zipfile.zip#higgsCombine.blah.root" file_dict[p][seed] = zipname+'#'+f # Now look for files in the local directory for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' % (POIs[0], POIs[1])): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: # Don't add this file to the list if its seed number is already # a value in the dict. if seed not in file_dict[p]: # If we're using the zipfile we'll add this now and # then delete it from the local directory # But: only in the file is good, we don't want to pollute the zip # file with incomplete or failed jobs if zipname and plot.TFileIsGood(f): zipf.write(f) # assume this throws if it fails print 'Adding %s to %s' % (f, zipname) file_dict[p][seed] = zipname+'#'+f os.remove(f) else: # otherwise just add the file to the dict in the normal way file_dict[p][seed] = f if zipname: zipf.close() # These lists will keep track of the CLs values which we will use # to create the output TGraph2Ds output_x = [] output_y = [] output_data = {} output_ntoys = [] output_clserr = {} output_signif = {} # One list of Z-values per contour for contour in contours: output_data[contour] = [] output_clserr[contour] = [] output_signif[contour] = [] # Also keep track of the number of model points which have met the # CLs criteria total_points = 0 complete_points = 0 for key,val in file_dict.iteritems(): status_changed = True total_points += 1 status_key = ':'.join(key) name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) # First check if we use the status json all_files = val.values() status_files = [] files = [x for x in val.values() if plot.TFileIsGood(x)] if status_key in stats: status_files = stats[status_key]['files'] if set(all_files) == set(status_files): print 'For point %s, no files have been updated' % name status_changed = False if set(files) == set(status_files) and len(files) < len(all_files): print 'For point %s, new files exist but they are not declared good' % name status_changed = False # Merge the HypoTestResult objects from each file into one res = None precomputed = None if status_key in stats and not status_changed: precomputed = stats[status_key] else: res = self.GetCombinedHypoTest(files) # Do the validation of this model point # ok, point_res = self.ValidateHypoTest(res, min_toys = min_toys, max_toys = max_toys, contours = contours, signif = signif, cl = cl, output = self.args.output, verbose = verbose, precomputed = precomputed) print '>> Point %s [%i toys, %s]' % (name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE') stats[status_key] = { 'files': files, 'ntoys': point_res['ntoys'] } for cont in contours: if cont in point_res: stats[status_key][cont] = point_res[cont] if ok: complete_points += 1 # Make plots of the test statistic distributions if requested if res is not None and make_plots: self.PlotTestStat(res, 'plot_'+name, opts = cfg['plot_settings'], poi_vals = (float(key[0]), float(key[1])), point_info=point_res) # Add the resulting CLs values to the output arrays. Normally just # for the model points that passed the validation criteria, but if "output_incomplete" # has been set to true then we'll write all model points where at least one HypoTestResult # is present if (res is not None or precomputed is not None) and (ok or incomplete) and self.args.output: output_x.append(float(key[0])) output_y.append(float(key[1])) output_ntoys.append(point_res['ntoys']) for contour in contours: output_data[contour].append(point_res[contour][0]) output_clserr[contour].append(point_res[contour][1]) output_signif[contour].append(point_res[contour][2]) # Do the job cycle generation if requested if not ok and self.args.cycles > 0: print '>>> Going to generate %i job(s) for point %s' % (self.args.cycles, key) # Figure out the next seed numbers we need to run by finding the maximum seed number # so far done_cycles = val.keys() new_idx = max(done_cycles)+1 if len(done_cycles) > 0 else 1 new_cycles = range(new_idx, new_idx+self.args.cycles) print '>>> Done cycles: ' + ','.join(str(x) for x in done_cycles) print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles) # Build to combine command. Here we'll take responsibility for setting the name and the # model parameters, making sure the latter are frozen set_arg = ','.join(['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set) freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze) point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (name, set_arg, freeze_arg) if self.args.from_asymptotic: mval = key[0] command = [] for par in bound_pars: # The (mass, None, None) is just a trick to make bisect_left do the comparison # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding # issues lower_bound = bisect.bisect_left(bound_vals[par], (float(mval)+1E-5, None, None)) # If lower_bound == 0 this means we are at or below the lowest mass point, # in which case we should increase by one to take the bounds from this lowest # point if lower_bound == 0: lower_bound += 1 command.append('%s=%g,%g' % (par, bound_vals[par][lower_bound-1][1], bound_vals[par][lower_bound-1][2])) if len(command) > 0: point_args += (' --setPhysicsModelParameterRanges %s' % (':'.join(command))) # print per_mass_point_args point_args += ' --singlePoint %s' % key[1] point_args += ' -m %s' % mval # Build a command for each job cycle setting the number of toys and random seed and passing through any other # user options from the config file or the command line for idx in new_cycles: cmd = ' '.join(['combine -M HybridNew', opts, point_args, '-T %i' % toys_per_cycle, '-s %i' % idx] + self.passthru) self.job_queue.append(cmd) print ">> %i/%i points have completed and require no further toys" % (complete_points, total_points) self.flush_queue() # Create and write output CLs TGraph2Ds here # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes if self.args.output and not self.args.from_asymptotic: fout = ROOT.TFile(outfile, 'RECREATE') for c in contours: graph = ROOT.TGraph2D(len(output_data[c]), array('d', output_x), array('d', output_y), array('d', output_data[c])) graph.SetName(c) fout.WriteTObject(graph, c) # Also write a Graph with the CLsErr graph = ROOT.TGraph2D(len(output_clserr[c]), array('d', output_x), array('d', output_y), array('d', output_clserr[c])) graph.SetName('clsErr_'+c) fout.WriteTObject(graph, 'clsErr_'+c) # And a Graph with the significance graph = ROOT.TGraph2D(len(output_signif[c]), array('d', output_x), array('d', output_y), array('d', output_signif[c])) graph.SetName('signif_'+c) fout.WriteTObject(graph, 'signif_'+c) graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x), array('d', output_y), array('d', output_ntoys)) graph.SetName('ntoys'+c) fout.WriteTObject(graph, 'ntoys') fout.Close() if self.args.output and self.args.from_asymptotic: # Need to collect all the files for each mass point and hadd them: files_by_mass = {} for key,val in file_dict.iteritems(): if key[0] not in files_by_mass: files_by_mass[key[0]] = list() files_by_mass[key[0]].extend(val.values()) for m, files in files_by_mass.iteritems(): gridfile = 'higgsCombine.gridfile.%s.%s.%s.root' % (POIs[0], m, POIs[1]) self.job_queue.append('hadd -f %s %s' % (gridfile, ' '.join(files))) for exp in ['', '0.025', '0.160', '0.500', '0.840', '0.975']: self.job_queue.append(' '.join([ 'combine -M HybridNew --rAbsAcc 0', opts, '--grid %s' % gridfile, '-n .final.%s.%s.%s' % (POIs[0], m, POIs[1]), '-m %s' % (m), ('--expectedFromGrid %s' % exp) if exp else '--noUpdateGrid' ] + self.passthru)) self.flush_queue() if statfile: with open(statfile, 'w') as stat_out: stat_json = json.dumps( stats, sort_keys=True, indent=2, separators=(',', ': ')) stat_out.write(stat_json)
def run_method(self): ROOT.PyConfig.IgnoreCommandLineOptions = True ROOT.gROOT.SetBatch(ROOT.kTRUE) # Open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # Set all the parameter values locally using defaults if necessary grids = cfg['grids'] POIs = cfg['POIs'] opts = cfg['opts'] toys_per_cycle = cfg['toys_per_cycle'] zipname = cfg.get('zipfile', None) contours = cfg.get('contours', ['obs', 'exp-2', 'exp-1', 'exp0', 'exp+1', 'exp+2']) min_toys = cfg.get('min_toys', 500) max_toys = cfg.get('max_toys', 5000) signif = cfg.get('signif', 3.0) cl = cfg.get('CL', 0.95) verbose = cfg.get('verbose', False) make_plots = cfg.get('make_plots', False) # Write CLs values into the output even if current toys do not pass validation incomplete = cfg.get('output_incomplete', False) outfile = cfg.get('output','hybrid_grid.root') # NB: blacklisting not yet implemented for this method # Have to merge some arguments from both the command line and the "opts" in the json file to_freeze = [] to_set = [] set_opt, opts = self.extract_arg('--setPhysicsModelParameters', opts) if set_opt is not None: to_set.append(set_opt) freeze_opt, opts = self.extract_arg('--freezeNuisances', opts) if freeze_opt is not None: to_freeze.append(freeze_opt) if hasattr(self.args, 'setPhysicsModelParameters') and self.args.setPhysicsModelParameters is not None: to_set.append(self.args.setPhysicsModelParameters) if hasattr(self.args, 'freezeNuisances') and self.args.freezeNuisances is not None: to_freeze.append(self.args.freezeNuisances) points = []; blacklisted_points = [] for igrid in grids: assert(len(igrid) == 3) if igrid[2] == '': points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) else: blacklisted_points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]), utils.split_vals(igrid[2]))) # This dictionary will keep track of the combine output files for each model point file_dict = { } for p in points: file_dict[p] = {} # The regex we will use to identify output files and extract POI values rgx = re.compile('higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.HybridNew\.mH.*\.(?P<toy>.*)\.root' % (POIs[0], POIs[1])) # Can optionally copy output root files into a zip archive # If the user has specified a zipfile we will first # look for output files in this archive before scanning the # current directory if zipname: # Open the zip file in append mode, this should also # create it if it doesn't exist zipf = zipfile.ZipFile(zipname, 'a') for f in zipf.namelist(): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: if seed not in file_dict[p]: # For each model point have a dictionary keyed on the seed number # with a value pointing to the file in the archive in the format # ROOT expects: "zipfile.zip#higgsCombine.blah.root" file_dict[p][seed] = zipname+'#'+f # Now look for files in the local directory for f in glob.glob('higgsCombine.%s.*.%s.*.HybridNew.mH*.root' % (POIs[0], POIs[1])): matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) seed = int(matches.group('toy')) if p in file_dict: # Don't add this file to the list if its seed number is already # a value in the dict. if seed not in file_dict[p]: # If we're using the zipfile we'll add this now and # then delete it from the local directory # But: only in the file is good, we don't want to pollute the zip # file with incomplete or failed jobs if zipname and plot.TFileIsGood(f): zipf.write(f) # assume this throws if it fails print 'Adding %s to %s' % (f, zipname) file_dict[p][seed] = zipname+'#'+f os.remove(f) else: # otherwise just add the file to the dict in the normal way file_dict[p][seed] = f if zipname: zipf.close() # These lists will keep track of the CLs values which we will use # to create the output TGraph2Ds output_x = [] output_y = [] output_data = {} output_ntoys = [] output_clserr = {} output_signif = {} # One list of Z-values per contour for contour in contours: output_data[contour] = [] output_clserr[contour] = [] output_signif[contour] = [] # Also keep track of the number of model points which have met the # CLs criteria total_points = 0 complete_points = 0 for key,val in file_dict.iteritems(): total_points += 1 name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) files = [x for x in val.values() if plot.TFileIsGood(x)] # Merge the HypoTestResult objects from each file into one res = self.GetCombinedHypoTest(files) # Do the validation of this model point # ok, point_res = self.ValidateHypoTest(res, min_toys = min_toys, max_toys = max_toys, contours = contours, signif = signif, cl = cl, output = self.args.output, verbose = verbose) if res is not None else (False, {"ntoys" : 0}) print '>> Point %s [%i toys, %s]' % (name, point_res['ntoys'], 'DONE' if ok else 'INCOMPLETE') if ok: complete_points += 1 # Make plots of the test statistic distributions if requested if res is not None and make_plots: self.PlotTestStat(res, 'plot_'+name, opts = cfg['plot_settings'], poi_vals = (float(key[0]), float(key[1]))) # Add the resulting CLs values to the output arrays. Normally just # for the model points that passed the validation criteria, but if "output_incomplete" # has been set to true then we'll write all model points where at least one HypoTestResult # is present if res is not None and (ok or incomplete) and self.args.output: output_x.append(float(key[0])) output_y.append(float(key[1])) output_ntoys.append(point_res['ntoys']) for contour in contours: output_data[contour].append(point_res[contour][0]) output_clserr[contour].append(point_res[contour][1]) output_signif[contour].append(point_res[contour][2]) # Do the job cycle generation if requested if not ok and self.args.cycles > 0: print '>>> Going to generate %i job(s) for point %s' % (self.args.cycles, key) # Figure out the next seed numbers we need to run by finding the maximum seed number # so far done_cycles = val.keys() new_idx = max(done_cycles)+1 if len(done_cycles) > 0 else 1 new_cycles = range(new_idx, new_idx+self.args.cycles) print '>>> Done cycles: ' + ','.join(str(x) for x in done_cycles) print '>>> New cycles: ' + ','.join(str(x) for x in new_cycles) # Build to combine command. Here we'll take responsibility for setting the name and the # model parameters, making sure the latter are frozen set_arg = ','.join(['%s=%s,%s=%s' % (POIs[0], key[0], POIs[1], key[1])] + to_set) freeze_arg = ','.join(['%s,%s' % (POIs[0], POIs[1])] + to_freeze) point_args = '-n .%s --setPhysicsModelParameters %s --freezeNuisances %s' % (name, set_arg, freeze_arg) # Build a command for each job cycle setting the number of toys and random seed and passing through any other # user options from the config file or the command line for idx in new_cycles: cmd = ' '.join(['combine -M HybridNew', opts, point_args, '-T %i' % toys_per_cycle, '-s %i' % idx] + self.passthru) self.job_queue.append(cmd) print ">> %i/%i points have completed and require no further toys" % (complete_points, total_points) self.flush_queue() # Create and write output CLs TGraph2Ds here # TODO: add graphs with the CLs errors, the numbers of toys and whether or not the point passes if self.args.output: fout = ROOT.TFile(outfile, 'RECREATE') for c in contours: graph = ROOT.TGraph2D(len(output_data[c]), array('d', output_x), array('d', output_y), array('d', output_data[c])) graph.SetName(c) fout.WriteTObject(graph, c) # Also write a Graph with the CLsErr graph = ROOT.TGraph2D(len(output_clserr[c]), array('d', output_x), array('d', output_y), array('d', output_clserr[c])) graph.SetName('clsErr_'+c) fout.WriteTObject(graph, 'clsErr_'+c) # And a Graph with the significance graph = ROOT.TGraph2D(len(output_signif[c]), array('d', output_x), array('d', output_y), array('d', output_signif[c])) graph.SetName('signif_'+c) fout.WriteTObject(graph, 'signif_'+c) graph = ROOT.TGraph2D(len(output_ntoys), array('d', output_x), array('d', output_y), array('d', output_ntoys)) graph.SetName('ntoys'+c) fout.WriteTObject(graph, 'ntoys') fout.Close()
def run_method(self): # Put the method back in because we always take it out self.put_back_arg('method', '-M') # cmd_queue = [] subbed_vars = {} # pre_cmd = '' if self.args.mass is not None: mass_vals = utils.split_vals(self.args.mass) subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals] self.passthru.extend(['-m', '%(MASS)s']) if self.args.singlePoint is not None: single_points = utils.split_vals(self.args.singlePoint) subbed_vars[('SINGLEPOINT',)] = [(pval,) for pval in single_points] self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s']) self.args.name += '.POINT.%(SINGLEPOINT)s' if self.args.seed is not None: seed_vals = utils.split_vals(self.args.seed) subbed_vars[('SEED',)] = [(sval,) for sval in seed_vals] self.passthru.extend(['-s', '%(SEED)s']) if len(self.args.datacard) >= 1: # Two lists of tuples, one which does specify the mass, and one # which doesn't dc_mass = [] dc_no_mass = [] for dc in self.args.datacard: # Split workspace into path and filename path, file = os.path.split(dc) # If the wsp is in the current directory should call it '.' if path == '': path = '.' # If we're not using the --there option then leave the # workspace argument as the full path if not self.args.there: file = dc # Figure out if the enclosing directory is a mass value dirs = path.split('/') if self.args.mass is None and len(dirs) >= 1 and isfloat(dirs[-1]): print 'Assuming card %s uses mass value %s' % (dc, dirs[-1]) dc_mass.append((path, file, dirs[-1])) dc_no_mass.append((path, file)) # If at least one mass value was inferred assume all of them are like this if len(dc_mass) > 0: subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s']) else: subbed_vars[('DIR', 'DATACARD',)] = dc_no_mass self.passthru.extend(['-d', '%(DATACARD)s']) # elif len(self.args.datacard) == 1: # self.passthru.extend(['-d', self.args.datacard[0]]) if self.args.boundlist is not None: with open(self.args.boundlist) as json_file: bnd = json.load(json_file) # find the subbed_vars entry containing the mass # We will extend it to also specify the ranges dict_key = None mass_idx = None for key in subbed_vars.keys(): if 'MASS' in key: dict_key = key mass_idx = dict_key.index('MASS') new_key = dict_key + ('MODELBOUND',) new_list = [] for entry in subbed_vars[dict_key]: command = [] mval = entry[mass_idx] for model in bnd: command.append(model+'=0,'+str(bnd[model][mval])) new_list.append(entry + (':'.join(command),)) # now remove the current mass information from subbed_vars # and replace it with the updated one del subbed_vars[dict_key] subbed_vars[new_key] = new_list self.passthru.extend(['--setPhysicsModelParameterRanges', '%(MODELBOUND)s']) if self.args.points is not None: self.passthru.extend(['--points', self.args.points]) if (self.args.split_points is not None and self.args.split_points > 0 and self.args.points is not None): points = int(self.args.points) split = self.args.split_points start = 0 ranges = [] while (start + (split - 1)) <= points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): # # Send job, if the file it's supposed to create doesn't exist yet # # or if the file is empty because the previous job didn't finish ranges.append((start, start + (split - 1))) start += split if start < points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): ranges.append((start, points - 1)) #if (ranges == []): # print "No jobs were created; All files already exist" # exit() subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges] self.passthru.extend( ['--firstPoint %(P_START)s --lastPoint %(P_END)s']) self.args.name += '.POINTS.%(P_START)s.%(P_END)s' # can only put the name option back now because we might have modified # it from what the user specified self.put_back_arg('name', '-n') proto = 'combine ' + (' '.join(self.passthru)) if self.args.there: proto = 'pushd %(DIR)s; combine ' + (' '.join(self.passthru))+'; popd' for it in itertools.product(*subbed_vars.values()): keys = subbed_vars.keys() dict = {} for i, k in enumerate(keys): for tuple_i, tuple_ele in enumerate(k): dict[tuple_ele] = it[i][tuple_i] self.job_queue.append(proto % dict) self.flush_queue()
def run_method(self): # This is what the logic should be: # - get the list of model points # - figure out which files are: # - completely missing # - there but corrupt, missing tree # - ok # - If we have anything in the third category proceed to produce output files # - Anything in the first two gets added to the queue only if --doFits is specified # so that the # Step 1 - open the json config file with open(self.args.config) as json_file: cfg = json.load(json_file) # to do - have to handle the case where it doesn't exist points = [] for igrid in cfg['grids']: assert(len(igrid) == 2) points.extend(itertools.product(utils.split_vals(igrid[0]), utils.split_vals(igrid[1]))) POIs = cfg['POIs'] file_dict = { } for p in points: file_dict[p] = [] for f in glob.glob('higgsCombine.%s.*.%s.*.Asymptotic.mH*.root' % (POIs[0], POIs[1])): # print f rgx = re.compile('higgsCombine\.%s\.(?P<p1>.*)\.%s\.(?P<p2>.*)\.Asymptotic\.mH.*\.root' % (POIs[0], POIs[1])) matches = rgx.search(f) p = (matches.group('p1'), matches.group('p2')) if p in file_dict: file_dict[p].append(f) for key,val in file_dict.iteritems(): name = '%s.%s.%s.%s' % (POIs[0], key[0], POIs[1], key[1]) print '>> Point %s' % name if len(val) == 0: print 'Going to run limit for point %s' % (key,) point_args = '-n .%s --setPhysicsModelParameters %s=%s,%s=%s --freezeNuisances %s,%s' % (name, POIs[0], key[0], POIs[1], key[1], POIs[0], POIs[1]) cmd = ' '.join(['combine -M Asymptotic', cfg['opts'], point_args] + self.passthru) self.job_queue.append(cmd) bail_out = len(self.job_queue) > 0 self.flush_queue() if bail_out: print ">> New jobs were created / run in this cycle, run the script again to collect the output" sys.exit(0) xvals = [] yvals = [] zvals = [] for key,val in file_dict.iteritems(): for filename in val: fin = ROOT.TFile(filename) if fin.IsZombie(): continue tree = fin.Get('limit') for evt in tree: if evt.quantileExpected == -1: print 'At point %s have observed CLs = %f' % (key, evt.limit) xvals.append(float(key[0])) yvals.append(float(key[1])) zvals.append(float(evt.limit)) graph = ROOT.TGraph2D(len(zvals), array('d', xvals), array('d', yvals), array('d', zvals)) h_bins = cfg['hist_binning'] hist = ROOT.TH2F('h_observed', '', h_bins[0], h_bins[1], h_bins[2], h_bins[3], h_bins[4], h_bins[5]) for i in xrange(1, hist.GetNbinsX()+1): for j in xrange(1, hist.GetNbinsY()+1): hist.SetBinContent(i, j, graph.Interpolate(hist.GetXaxis().GetBinCenter(i), hist.GetYaxis().GetBinCenter(j))) fout = ROOT.TFile('asymptotic_grid.root', 'RECREATE') fout.WriteTObject(graph, 'observed') fout.WriteTObject(hist) fout.Close()
def run_method(self): # Put the method back in because we always take it out self.put_back_arg('method', '-M') # cmd_queue = [] subbed_vars = {} # pre_cmd = '' if self.args.mass is not None: mass_vals = utils.split_vals(self.args.mass) subbed_vars[('MASS',)] = [(mval,) for mval in mass_vals] self.passthru.extend(['-m', '%(MASS)s']) if self.args.singlePoint is not None: single_points = utils.split_vals(self.args.singlePoint) subbed_vars[('SINGLEPOINT',)] = [(pval,) for pval in single_points] self.passthru.extend(['--singlePoint', '%(SINGLEPOINT)s']) self.args.name += '.POINT.%(SINGLEPOINT)s' if self.args.seed is not None: seed_vals = utils.split_vals(self.args.seed) subbed_vars[('SEED',)] = [(sval,) for sval in seed_vals] self.passthru.extend(['-s', '%(SEED)s']) for i, generate in enumerate(self.args.generate): split_char = ':' if '::' in generate else ';' gen_header, gen_content = generate.split(split_char*2) print gen_header print gen_content gen_headers = gen_header.split(split_char) gen_entries = gen_content.split(split_char) key = tuple() arglist = [] for header in gen_headers: if header == 'n' or header == 'name': self.args.name += '.%(GENNAME' + str(i) + ')s' key += ('GENNAME' + str(i),) else: self.passthru.extend(['%(' + header + ')s']) key += (header,) for entry in gen_entries: if ',,' in entry: split_entry = entry.split(',,') else: split_entry = entry.split(',') final_arg = [] for header, e in zip(gen_headers, split_entry): argname = '-%s' % header if len(header) == 1 else '--%s' % header if header == 'n' or header == 'name': final_arg.append(e) elif len(e) and e != '!': final_arg.append('%s %s' % (argname, e)) else: final_arg.append('') arglist.append(tuple(final_arg)) subbed_vars[key] = arglist if len(self.args.datacard) >= 1: # Two lists of tuples, one which does specify the mass, and one # which doesn't dc_mass = [] dc_no_mass = [] for dc in self.args.datacard: # Split workspace into path and filename path, file = os.path.split(dc) # If the wsp is in the current directory should call it '.' if path == '': path = '.' # If we're not using the --there option then leave the # workspace argument as the full path if not self.args.there: file = dc # Figure out if the enclosing directory is a mass value dirs = path.split('/') if self.args.mass is None and len(dirs) >= 1 and isfloat(dirs[-1]): print 'Assuming card %s uses mass value %s' % (dc, dirs[-1]) dc_mass.append((path, file, dirs[-1])) dc_no_mass.append((path, file)) # If at least one mass value was inferred assume all of them are like this if len(dc_mass) > 0: subbed_vars[('DIR', 'DATACARD', 'MASS')] = dc_mass self.passthru.extend(['-d', '%(DATACARD)s', '-m', '%(MASS)s']) else: subbed_vars[('DIR', 'DATACARD',)] = dc_no_mass self.passthru.extend(['-d', '%(DATACARD)s']) # elif len(self.args.datacard) == 1: # self.passthru.extend(['-d', self.args.datacard[0]]) current_ranges = self.args.setPhysicsModelParameterRanges put_back_ranges = current_ranges is not None if self.args.boundlist is not None: # We definitely don't need to put the parameter ranges back # into the args because they're going in via the boundlist # option instead put_back_ranges = False with open(self.args.boundlist) as json_file: bnd = json.load(json_file) bound_pars = list(bnd.keys()) print 'Found bounds for parameters %s' % ','.join(bound_pars) # Fill a dictionaries of the bound info of the form: # { 'PAR1' : [(MASS, LOWER, UPER), ...], ...} bound_vals = {} for par in bound_pars: bound_vals[par] = list() for mass, bounds in bnd[par].iteritems(): bound_vals[par].append((float(mass), bounds[0], bounds[1])) bound_vals[par].sort(key=lambda x: x[0]) # find the subbed_vars entry containing the mass # We will extend it to also specify the ranges dict_key = None mass_idx = None for key in subbed_vars.keys(): if 'MASS' in key: dict_key = key mass_idx = dict_key.index('MASS') new_key = dict_key + ('MODELBOUND',) new_list = [] for entry in subbed_vars[dict_key]: command = [] if current_ranges is not None: command.append(current_ranges) mval = entry[mass_idx] for par in bound_pars: # The (mass, None, None) is just a trick to make bisect_left do the comparison # with the list of tuples in bound_var[par]. The +1E-5 is to avoid float rounding # issues lower_bound = bisect.bisect_left(bound_vals[par], (float(mval)+1E-5, None, None)) # If lower_bound == 0 this means we are at or below the lowest mass point, # in which case we should increase by one to take the bounds from this lowest # point if lower_bound == 0: lower_bound += 1 command.append('%s=%g,%g' % (par, bound_vals[par][lower_bound-1][1], bound_vals[par][lower_bound-1][2])) new_list.append(entry + (str(':'.join(command)),)) # now remove the current mass information from subbed_vars # and replace it with the updated one del subbed_vars[dict_key] subbed_vars[new_key] = new_list self.passthru.extend(['--setPhysicsModelParameterRanges', '%(MODELBOUND)s']) # We might need to put the intercepted --setPhysicsModelParameterRanges arg back in if put_back_ranges: self.put_back_arg('setPhysicsModelParameterRanges', '--setPhysicsModelParameterRanges') if self.args.points is not None: self.passthru.extend(['--points', self.args.points]) if (self.args.split_points is not None and self.args.split_points > 0 and self.args.points is not None): points = int(self.args.points) split = self.args.split_points start = 0 ranges = [] while (start + (split - 1)) < points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(start+(split-1))+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): # # Send job, if the file it's supposed to create doesn't exist yet # # or if the file is empty because the previous job didn't finish ranges.append((start, start + (split - 1))) start += split if start < points: # filename = "higgsCombine"+self.args.name+".POINTS."+str(start)+"."+str(points - 1)+".MultiDimFit.mH"+str(self.args.mass)+".root" # if (not os.path.isfile(filename)) or (os.path.getsize(filename)<1024): ranges.append((start, points - 1)) #if (ranges == []): # print "No jobs were created; All files already exist" # exit() subbed_vars[('P_START', 'P_END')] = [(r[0], r[1]) for r in ranges] self.passthru.extend( ['--firstPoint %(P_START)s --lastPoint %(P_END)s']) self.args.name += '.POINTS.%(P_START)s.%(P_END)s' # can only put the name option back now because we might have modified # it from what the user specified self.put_back_arg('name', '-n') proto = 'combine ' + (' '.join(self.passthru)) if self.args.there: proto = 'pushd %(DIR)s; combine ' + (' '.join(self.passthru))+'; popd' for it in itertools.product(*subbed_vars.values()): keys = subbed_vars.keys() dict = {} for i, k in enumerate(keys): for tuple_i, tuple_ele in enumerate(k): dict[tuple_ele] = it[i][tuple_i] self.job_queue.append(proto % dict) self.flush_queue()