def bin_values(lis, bins, relative=False): bins = list(bins) bins.sort() lis.sort() maxrb = max(bins) minlb = min(bins) maxval = max(lis) minval = min(lis) bintup = list(zip(bins, bins[1:])) if minval < minlb: lowerunder = (minval, minlb) bintup.insert(0, (minval, minlb)) else: lowerunder = () if maxval > maxrb: upperover = (maxrb, maxval) bintup.insert(0, (maxrb, maxval)) else: upperover = () bincount = [] maxborder = bintup[len(bintup) - 1] minborder = bintup[0] for num in lis: for lb, rb in bintup: if lb < num <= rb: bincount.append(((lb, rb), num)) break elif num > maxrb: bincount.append((maxborder, num)) break elif num <= minlb: bincount.append((minborder, num)) break bincountdic = dict([(k, len(v)) for k, v in fhutils.keylis2dic(bincount).items()]) bincount = [(k, bincountdic.get(k, 0)) for k in bintup] bincount = [(str(k[0]) + '-' + str(k[1]), v) for k, v in bincount if k not in (lowerunder, upperover)] lowerout = bincountdic.get(lowerunder, 0) upperout = bincountdic.get(upperover, 0) if lowerout != 0: bincount.insert(0, ('<' + str(lowerunder[1]), lowerout)) if upperout != 0: bincount.append(('>' + str(upperover[0]), upperout)) check = sum([ele[1] for ele in bincount]) if check != len(lis): print('\t'.join(('input:', lis, '\n', 'bins:', bins, '\n', 'binned', bincount, '\n', len(lis), check))) raise NameError('MISSING OR TO MANY NUMBERS IN BINCOUNT') if relative: bincount = [(bino[0], (100.0 / sum([ele[1] for ele in bincount])) * bino[1]) for bino in bincount] print('\n'.join(['total count (100%): ' + str(sum([ele[1] for ele in bincount]))] + [ bino[0] + ' percent: ' + str((100.0 / sum([ele[1] for ele in bincount])) * bino[1]) for bino in bincount])) return bincount
def propagatetable(datatable, groupcolumn, measurecolumn, errorcolumn): """ primitive aggregate i.e. averaging function for a table with error values propagates error using ufloat mechanism NA filtered out by default - no control """ if isinstance(groupcolumn, str): groupcolumn = (groupcolumn,) if isinstance(groupcolumn, unicode): groupcolumn = (groupcolumn,) keyvalue = [] for row in datatable.iterrows(): tmp = [] for col in groupcolumn: tmp.append(row[col]) if row[measurecolumn] in ('NA',): continue elif row[errorcolumn] in ('NA',): value = row[measurecolumn] else: value = ufloat(row[measurecolumn], row[errorcolumn]) keyvalue.append((tuple(tmp), value)) keyvalue = fhutils.keylis2dic(keyvalue) resultable = fhutils.Table('propagate') resultable.columnames = list(groupcolumn) + [measurecolumn, errorcolumn] for k, valuelist in keyvalue.items(): valuelist = [ele for ele in valuelist if ele != 'NA'] # NA filtered out ufavg = ufmeanstd(valuelist, asufloat=True) row = list(k) if isinstance(ufavg, uncertainties.UFloat): row += [ufavg.nominal_value, ufavg.std_dev] else: row += [ufavg, 0] resultable.append(row) resultable.sortrow() return resultable