def dye_by_bin(plate_objects, analyzed_wells, eventful_wells, analyzed_sample_names, eventful_sample_names, bin_func, channel='FAM'): """ @deprecated -- use metrics """ # assumes standard dye concentrations (see bin_plots) bins = set([bin_func(w) for w in analyzed_wells]) bin_plots = dict([(bin, [[3,None],[10,None],[30,None],[100,None],[300,None],[500,None]]) for bin in bins]) groups = [] groups.extend([(sample_name, groupinto(sample_name_filter(analyzed_wells, sample_name), bin_func)) for sample_name in analyzed_sample_names]) groups.extend([(sample_name, groupinto(sample_name_filter(eventful_wells, sample_name), bin_func)) for sample_name in eventful_sample_names]) for i, (sample, group) in enumerate(groups): for bin, wells in group: amp_array = None for w in wells: qplate = plate_objects[(w.plate.file.dirname, w.plate.file.basename)] well = qplate.wells[w.well_name] if channel == 'VIC': if amp_array is None: amp_array = vic_amplitudes(well.peaks) else: amp_array = np.concatenate([amp_array, vic_amplitudes(well.peaks)]) else: if amp_array is None: amp_array = fam_amplitudes(well.peaks) else: amp_array = np.concatenate([amp_array, fam_amplitudes(well.peaks)]) if amp_array is not None: amp_mean = np.mean(amp_array) amp_sigma = np.std(amp_array) bin_plots[bin][i][1] = (amp_mean, amp_sigma) return bin_plots
def fpfn_by_bin(plate_objects, vic_channels, sample_names, bin_func): bins = set([bin_func(c) for c in vic_channels]) bin_plots = dict([(bin, []) for bin in bins]) bin_wells = defaultdict(list) # divide into plates for bin, group in groupinto(vic_channels, bin_func): # this is a wacky grouping, but for reuse in plate_objects (why did I not pick plate ids again?) plate_groups = groupinto(group, lambda c: (c.well.plate.file.dirname, c.well.plate.file.basename)) for plate_id, channels in plate_groups: qplate = plate_objects[plate_id] positives = [c for c in channels if c.well.well_name in fpfn_positive_well_names] negatives = [c for c in channels if c.well.well_name in fpfn_negative_well_names] # compute a threshold which is 1/4 between the positive and negative means for the plate positive_means = [] negative_means = [] for p in positives: amps = vic_amplitudes(accepted_peaks(qplate.wells[p.well.well_name])) positive_means.append((len(amps), np.mean(amps)*len(amps))) if positive_means: positive_mean = sum([pm[1] for pm in positive_means])/sum([pm[0] for pm in positive_means]) else: positive_mean = 32767 for n in negatives: amps = vic_amplitudes(accepted_peaks(qplate.wells[n.well.well_name])) negative_means.append((len(amps), np.mean(amps)*len(amps))) if negative_means: negative_mean = sum([nm[1] for nm in negative_means])/sum([nm[0] for nm in negative_means]) else: negative_mean = 0 threshold = ((3*negative_mean)+positive_mean)/4 fps = [c for c in channels if c.well.well_name in fpfn_fp_well_names] fns = [c for c in channels if c.well.well_name in fpfn_fn_well_names] fp_counts = [] fn_counts = [] for f in fps: pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold) fp_counts.append((f.well.id, len(pos), 10000*(float(len(pos))/(float(len(pos))+float(len(neg)))), qplate.wells[f.well.well_name])) for f in fns: pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold) fn_counts.append((f.well.id, len(neg), 10000*(float(len(neg))/(float(len(pos))+float(len(neg)))))) bin_wells[bin].append((fp_counts, fn_counts, threshold)) return bin_wells
def cnv_by_bin(wells, sample_names, bin_func): bins = set([bin_func(w) for w in wells]) bin_plots = dict([(bin, [[sample_name, []] for sample_name in sample_names]) for bin in bins]) #raise Exception, bin_plots groups = [] groups.extend([(sample_name, groupinto(sample_name_filter(wells, sample_name), bin_func)) for sample_name in sample_names]) for i, (sample, group) in enumerate(groups): for bin, wells in group: cnv_array = [] for w in wells: fam = w.channels[0] vic = w.channels[1] fam_pos = fam.positive_peaks fam_neg = fam.negative_peaks vic_pos = vic.positive_peaks vic_neg = vic.negative_peaks cnv, low, high = cnv_interval(fam_pos, fam_neg, vic_pos, vic_neg) if math.isnan(cnv): cnv = 0 if math.isnan(low): low = 0 if math.isnan(high): high = 0 cnv_array.append((w.id, (cnv, low, high))) bin_plots[bin][i][1] = cnv_array return bin_plots
def dnr_by_bin(plate_objects, fam_channels, sample_names, bin_func): """ @deprecated - use metrics """ bins = set([bin_func(c) for c in fam_channels]) # TODO: do sample names here bin_plots = dict([(bin, [[0.001,None],[0.01,None],[0.1,None],[1,None],[5,None]]) for bin in bins]) groups = [] groups.extend([(sample_name, groupinto(sample_name_channel_filter(fam_channels, sample_name), bin_func)) for sample_name in sample_names]) for i, (sample, group) in enumerate(groups): for bin, channels in group: conc_array = [] for c in channels: qplate = plate_objects[(c.well.plate.file.dirname, c.well.plate.file.basename)] well = qplate.wells[c.well.well_name] # TODO: use dynamic threshold or keep 4000? pos, neg = cluster_1d(accepted_peaks(well), 0, 4000) conc, clow, chigh = concentration_interval(len(pos), len(neg), droplet_vol=well.droplet_volume) conc_array.append((max(conc,0.0001), max(clow,0.0001), max(chigh,0.0001))) if len(conc_array) > 0: conc_mean = np.mean([ca[0] for ca in conc_array]) bin_plots[bin][i][1] = (conc_mean, conc_array) return bin_plots
def execute_built_query(base_q, group_by_plate, objects_expected=False, derived_metric=None): """ Executes the query built by one of the build_*_query functions. The result will be a 4-tuple: [statistic, timepoint, object id, object display string] :param base_q: The query to execute. :param group_by_plate: Whether to group the results by plate. :param objects_expected: Whether the first member of the result set is expected to be a model object. :param derived_metric: If the metric is a property derived from database columns, the name of this property. :return: A result set of (stat, run_time, object id, identifying information) """ if group_by_plate and not objects_expected: base_q = base_q.group_by(Plate.id).order_by(Plate.run_time) else: base_q = base_q.order_by(Plate.run_time, WellMetric.well_name) records = base_q.all() if group_by_plate and objects_expected: records = [(obj, dt, id, plate_name) for obj, dt, id, plate_name, well_name in records] elif not group_by_plate: records = [(obj, dt, id, "%s - %s" % (plate_name, well_name)) for obj, dt, id, plate_name, well_name in records] if objects_expected: import numpy as np # have to decorate & unwind groups if property derived # TODO: could there be a nicer way to define virtual props in SQLAlchemy records = [(getattr(obj, derived_metric), dt, id, name) for obj, dt, id, name in records] if group_by_plate: grouped_records = groupinto(records, lambda tup: tup[2]) records = [(np.mean([r[0] for r in recs if r[0] is not None]), recs[0][1], group_id, recs[0][3]) for group_id, recs in grouped_records] records = [(avg, dt, id, name) for avg, dt, id, name in records if not math.isnan(avg)] return records
def runs(self, id=None): c.dg = Session.query(DropletGenerator).get(int(id)) if not c.dg: abort(404) reprocess_config_id = request.params.get('rp_id', None) if reprocess_config_id: reprocess_config_id = int(reprocess_config_id) runs = Session.query(DropletGeneratorRun).filter( and_(DropletGeneratorRun.droplet_generator_id == c.dg.id, DropletGeneratorRun.run_number != None)).all() run_dict = dict([(run.run_number, run) for run in runs]) wells = Session.query(WellMetric, QLBWell.id, QLBWell.sample_name, QLBWell.dg_run_number, QLBWell.consumable_channel_num)\ .join(QLBWell)\ .join(PlateMetric)\ .filter(and_(QLBWell.droplet_generator_id == id, QLBWell.dg_run_number != None, PlateMetric.reprocess_config_id == reprocess_config_id))\ .order_by(QLBWell.dg_run_number, QLBWell.consumable_channel_num).all() sorted_runs = sorted(groupinto(wells, lambda tup: (tup[3])), key=operator.itemgetter(0)) c.runs = [(run_id, run_dict[run_id], sorted(info, key=operator.itemgetter(4))) for run_id, info in sorted_runs if run_dict.get(run_id, None)] #raise Exception, c.runs return render('/product/dg/runs.html')
def events_by_reader_dg(wells, quant_func): well_order = [((well.plate.plate.setup.id*96.0+well.consumable_chip_num*12.0+well.consumable_channel_num)/96.0, well) for idx, well in enumerate(wells)] # assume sorted by host_datetime incoming bin_func = lambda tup: tup[1].plate.plate.box2_id dg_func = lambda tup: tup[1].droplet_generator_id groups = groupinto(well_order, bin_func) reader_groups = [] for reader_num, group in groups: reader_groups.append((reader_num, [(dg_id, [(idx, quant_func(w)) for idx, w in wells]) for dg_id, wells in groupinto(group, dg_func)])) return reader_groups
def singleplex_by_bin(fam_channels, bin_func): channels = sorted(fam_channels, key=lambda c: (c.well.plate.plate.setup.id, c.well.consumable_chip_num, c.well.consumable_channel_num)) groups = groupinto(channels, bin_func) plots = dict([(box2, []) for box2, data in groups]) for i, (box2, well_channels) in enumerate(groups): conc_array = [] for w in well_channels: #qplate = plate_objects[(w.well.plate.file.dirname, w.well.plate.file.basename)] #well = qplate.wells[w.well.well_name] #pos, neg = cluster_1d(well.peaks, 0, 4000) conc_array.append((w.well.id, (w.concentration, w.conf_lower_bound, w.conf_upper_bound))) plots[box2] = conc_array return plots
def red_by_bin(fam_channels, sample_names, bin_func): bins = set([bin_func(c) for c in fam_channels]) bin_plots = dict([(bin, defaultdict(dict)) for bin in bins]) sample_cpd_map = dict(sample_names) groups = [] for sample_name, (sample_cpd, bg_cpd) in sample_names: channels = sample_name_channel_filter(fam_channels, sample_name) if channels: groups.append((sample_name, groupinto(channels, bin_func))) for i, (sample, group) in enumerate(groups): for bin, channels in group: pos_array = [] for c in channels: pos_array.append((c.well.id, c.positive_peaks or 0)) bin_plots[bin][sample_cpd_map[sample][0]][sample_cpd_map[sample][1]] = pos_array return bin_plots
def duplex_by_bin(fam_channels, sample_names, bin_func): channels = sorted(fam_channels, key=lambda c: (c.well.plate.plate.setup.id, c.well.consumable_chip_num, c.well.consumable_channel_num)) bins = set([bin_func(c) for c in channels]) bin_plots = dict([(bin, [[0.125, None],[0.25, None],[0.5, None],[1.02,None],[2,None]]) for bin in bins]) groups = [] groups.extend([(sample_name, groupinto(sample_name_channel_filter(channels, sample_name), bin_func)) for sample_name in sample_names]) # this is just dnr with different coefficients -- can that be patternized? for i, (sample, group) in enumerate(groups): for bin, channels in group: conc_array = [] for c in channels: conc, clow, chigh = max(c.concentration,0.0001), max(c.conf_lower_bound,0.0001), max(c.conf_upper_bound,0.0001) conc_array.append((conc, clow, chigh, c.well_id)) if len(conc_array) > 0: conc_mean = np.mean([ca[0] for ca in conc_array]) bin_plots[bin][i][1] = (conc_mean, conc_array) return bin_plots