def convert_btag_csv_file(csvFilePath): btag_f = open(csvFilePath) nameandcols = btag_f.readline().split(';') btag_f.close() name = nameandcols[0].strip() columns = nameandcols[1].strip() columns = [column.strip() for column in columns.split(',')] corrections = np.genfromtxt(csvFilePath, dtype=None, names=tuple(columns), converters={1: lambda s: s.strip(), 2: lambda s: s.strip(), 10: lambda s: s.strip(' "')}, delimiter=',', skip_header=1, unpack=True, encoding='ascii' ) all_names = corrections[[columns[i] for i in range(4)]] labels = np.unique(corrections[[columns[i] for i in range(4)]]) wrapped_up = {} for label in labels: etaMins = np.unique(corrections[np.where(all_names == label)][columns[4]]) etaMaxs = np.unique(corrections[np.where(all_names == label)][columns[5]]) etaBins = np.union1d(etaMins, etaMaxs) ptMins = np.unique(corrections[np.where(all_names == label)][columns[6]]) ptMaxs = np.unique(corrections[np.where(all_names == label)][columns[7]]) ptBins = np.union1d(ptMins, ptMaxs) discrMins = np.unique(corrections[np.where(all_names == label)][columns[8]]) discrMaxs = np.unique(corrections[np.where(all_names == label)][columns[9]]) discrBins = np.union1d(discrMins, discrMaxs) vals = np.zeros(shape=(len(discrBins) - 1, len(ptBins) - 1, len(etaBins) - 1), dtype=corrections.dtype[10]) for i, eta_bin in enumerate(etaBins[:-1]): for j, pt_bin in enumerate(ptBins[:-1]): for k, discr_bin in enumerate(discrBins[:-1]): this_bin = np.where((all_names == label) & (corrections[columns[4]] == eta_bin) & (corrections[columns[6]] == pt_bin) & (corrections[columns[8]] == discr_bin)) vals[k, j, i] = corrections[this_bin][columns[10]][0] label_decode = [] for i in range(len(label)): label_decode.append(label[i]) if isinstance(label_decode[i], bytes): label_decode[i] = label_decode[i].decode() else: label_decode[i] = str(label_decode[i]) str_label = '_'.join([name] + label_decode) feval_dim = btag_feval_dims[label[0]] wrapped_up[(str_label, 'dense_evaluated_lookup')] = (vals, (etaBins, ptBins, discrBins), tuple(feval_dim)) return wrapped_up
def _build_standard_jme_lookup(name, layout, pars, nBinnedVars, nBinColumns, nEvalVars, formula, nParms, columns, dtypes, interpolatedFunc=False): #the first bin is always usual for JECs #the next bins may vary in number, so they're jagged arrays... yay bins = {} offset_col = 0 offset_name = 1 bin_order = [] for i in range(nBinnedVars): binMins = None binMaxs = None if i == 0: binMins = np.unique(pars[columns[0]]) binMaxs = np.unique(pars[columns[1]]) bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs) else: counts = np.zeros(0, dtype=np.int) allBins = np.zeros(0, dtype=np.double) for binMin in bins[bin_order[0]][:-1]: binMins = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col]]) binMaxs = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col + 1]]) theBins = np.union1d(binMins, binMaxs) allBins = np.append(allBins, theBins) counts = np.append(counts, theBins.size) bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( counts, allBins) bin_order.append(layout[i + offset_name]) offset_col += 1 #skip nvars to the variable columns #the columns here define clamps for the variables defined in columns[] # ----> clamps can be different from bins # ----> if there is more than one binning variable this array is jagged # ----> just make it jagged all the time binshapes = tuple([bins[thebin].size - 1 for thebin in bin_order]) clamp_mins = {} clamp_maxs = {} var_order = [] offset_col = 2 * nBinnedVars + 1 offset_name = nBinnedVars + 2 jagged_counts = np.ones(bins[bin_order[0]].size - 1, dtype=np.int) if len(bin_order) > 1: jagged_counts = np.maximum( bins[bin_order[1]].counts - 1, 0) #need counts-1 since we only care about Nbins for i in range(nEvalVars): var_order.append(layout[i + offset_name]) if not interpolatedFunc: clamp_mins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( jagged_counts, np.atleast_1d(pars[columns[i + offset_col]])) clamp_maxs[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( jagged_counts, np.atleast_1d(pars[columns[i + offset_col + 1]])) offset_col += 1 #now get the parameters, which we will look up with the clamped values parms = [] parm_order = [] offset_col = 2 * nBinnedVars + 1 + (interpolatedFunc == False) * 2 * nEvalVars for i in range(nParms): parms.append( awkward.JaggedArray.fromcounts(jagged_counts, pars[columns[i + offset_col]])) parm_order.append('p%i' % (i)) wrapped_up = {} wrapped_up[(name, 'jme_standard_function')] = (formula, (bins, bin_order), (clamp_mins, clamp_maxs, var_order), (parms, parm_order)) return wrapped_up
def convert_effective_area_file(eaFilePath): ea_f = open(eaFilePath, 'r') layoutstr = ea_f.readline().strip().strip('{}') ea_f.close() name = eaFilePath.split('/')[-1].split('.')[0] layout = layoutstr.split() if not layout[0].isdigit(): raise Exception( 'First column of Effective Area File Header must be a digit!') #setup the file format nBinnedVars = int(layout[0]) nBinColumns = 2 * nBinnedVars nEvalVars = int(layout[nBinnedVars + 1]) minMax = ['Min', 'Max'] columns = [] dtypes = [] offset = 1 for i in range(nBinnedVars): columns.extend(['%s%s' % (layout[i + offset], mm) for mm in minMax]) dtypes.extend(['<f8', '<f8']) offset += nBinnedVars + 1 for i in range(nEvalVars): columns.append('%s' % (layout[i + offset])) dtypes.append('<f8') pars = np.genfromtxt(eaFilePath, dtype=tuple(dtypes), names=tuple(columns), skip_header=1, unpack=True, encoding='ascii') bins = {} offset_col = 0 offset_name = 1 bin_order = [] for i in range(nBinnedVars): binMins = None binMaxs = None if i == 0: binMins = np.unique(pars[columns[0]]) binMaxs = np.unique(pars[columns[1]]) bins[layout[i + offset_name]] = np.union1d(binMins, binMaxs) else: counts = np.zeros(0, dtype=np.int) allBins = np.zeros(0, dtype=np.double) for binMin in bins[bin_order[0]][:-1]: binMins = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col]]) binMaxs = np.unique(pars[np.where( pars[columns[0]] == binMin)][columns[i + offset_col + 1]]) theBins = np.union1d(binMins, binMaxs) allBins = np.append(allBins, theBins) counts = np.append(counts, theBins.size) bins[layout[i + offset_name]] = awkward.JaggedArray.fromcounts( counts, allBins) bin_order.append(layout[i + offset_name]) offset_col += 1 # again this is only for one dimension of binning, fight me # we can figure out a 2D EA when we get there offset_name += 1 wrapped_up = {} lookup_type = 'dense_lookup' dims = bins[layout[1]] for i in range(nEvalVars): ea_name = '_'.join([name, columns[offset_name + i]]) values = pars[columns[offset_name + i]] wrapped_up[(ea_name, lookup_type)] = (values, dims) return wrapped_up