def well_statistics(qlwell, override_thresholds=None): from pyqlb.nstats import concentration, concentration_interval from pyqlb.nstats.peaks import cluster_1d from pyqlb.nstats.well import well_observed_positives_negatives, accepted_peaks if override_thresholds is None: override_thresholds = [None]*len(qlwell.channels) wellui = WellStatisticsUI(channels=[]) for idx, channel in enumerate(qlwell.channels): if not override_thresholds[idx]: positives, negatives, unclassified = well_observed_positives_negatives(qlwell, idx) threshold = channel.statistics.threshold conc = channel.statistics.concentration conc_interval = (channel.statistics.concentration, channel.statistics.concentration_lower_bound, channel.statistics.concentration_upper_bound) else: positives, negatives = cluster_1d(accepted_peaks(qlwell), idx, override_thresholds[idx]) threshold = override_thresholds[idx] conc = concentration(len(positives), len(negatives), qlwell.droplet_volume) conc_interval = concentration_interval(len(positives), len(negatives), qlwell.droplet_volume) channelui = WellChannelStatisticsUI(positives=len(positives), negatives=len(negatives), threshold=threshold, concentration=conc, concentration_interval=conc_interval) wellui.channels.append(channelui) return wellui
def stats_for_qlp_well(well, compute_clusters=False, override_thresholds=None): """ Return statistics about a QLWell object read from a QLP file. The QLWell object should have a populated `peaks` attribute (reading from QLBs won't work) For parameter explanations and return values, see :func:`stats_for_qlp_well`. """ from pyqlb.nstats.peaks import cluster_1d, channel_amplitudes from pyqlb.nstats.well import accepted_peaks, above_min_amplitude_peaks, well_channel_sp_values, well_cluster_peaks from pyqlb.nstats.well import well_observed_positives_negatives, well_s2d_values, getClusters from pyqlb.nstats.well import high_flier_droplets, low_flier_droplets, singleRain_droplets, doubleRain_droplets, diagonal_scatter from numpy import mean as np_mean, std as np_std if not override_thresholds: override_thresholds = (None, None) statistics = well_statistics(well, override_thresholds=override_thresholds) accepted = len(accepted_peaks(well)) num_above_min = len(above_min_amplitude_peaks(well)) if num_above_min > 0 and accepted > 0: if well.sum_amplitude_bins: peaksets, boundaries, amps = revb_polydisperse_peaks(well, 0, threshold=override_thresholds[0]) poly_peaks = sum([len(p) for p in peaksets]) statistics[0].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min else: peaksets, boundaries, width_gates = polydisperse_peaks(well, 0, threshold=override_thresholds[0]) poly_peaks = sum([len(p) for p in peaksets]) statistics[0].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min else: statistics[0].revb_polydispersity_pct = 0 s, p_plus, p, p_minus = well_channel_sp_values(well, 0, override_threshold=override_thresholds[0]) statistics[0].s_value = s statistics[0].p_plus = p_plus statistics[0].p_plus_drops = int(p_plus*accepted) if p_plus is not None else None statistics[0].p = p statistics[0].p_drops = int(p*accepted) if p is not None else None statistics[0].p_minus = p_minus statistics[0].p_minus_drops = int(p_minus*accepted) if p_minus is not None else None if num_above_min > 0 and accepted > 0: if well.sum_amplitude_bins: peaksets, boundaries, amps = revb_polydisperse_peaks(well, 1, threshold=override_thresholds[1]) poly_peaks = sum([len(p) for p in peaksets]) statistics[1].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min else: peaksets, boundaries, width_gates = polydisperse_peaks(well, 1, threshold=override_thresholds[1]) poly_peaks = sum([len(p) for p in peaksets]) statistics[1].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min else: statistics[1].revb_polydispersity_pct = 0 s, p_plus, p, p_minus = well_channel_sp_values(well, 1, override_threshold=override_thresholds[1]) statistics[1].s_value = s statistics[1].p_plus = p_plus statistics[1].p_plus_drops = int(p_plus*accepted) if p_plus is not None else None statistics[1].p = p statistics[1].p_drops = int(p*accepted) if p is not None else None statistics[1].p_minus = p_minus statistics[1].p_minus_drops = int(p_minus*accepted) if p_minus is not None else None ## compute s2d plots s2d_vals = well_s2d_values( well, thresholds=override_thresholds) statistics[0].s2d_value = s2d_vals[0] if s2d_vals is not None else None statistics[1].s2d_value = s2d_vals[1] if s2d_vals is not None else None ## compute extra cluster metrics clusters = getClusters( well, override_thresholds ) dscatter = diagonal_scatter( clusters ) statistics.diagonal_scatter = dscatter[1] if dscatter is not None else None statistics.diagonal_scatter_pct = dscatter[2] *100 if dscatter is not None else None for channel in [0,1]: high_fliers = high_flier_droplets( clusters, channel ) statistics[channel].high_flier_value = high_fliers[1] if high_fliers is not None else None statistics[channel].high_flier_pct = high_fliers[2] * 100 if high_fliers is not None else None low_fliers = low_flier_droplets( clusters, channel ) statistics[channel].low_flier_value = low_fliers[1] if low_fliers is not None else None statistics[channel].low_flier_pct = low_fliers[2] * 100 if low_fliers is not None else None singleRain = singleRain_droplets( clusters, channel ) statistics[channel].single_rain_value = singleRain[1] if singleRain is not None else None statistics[channel].single_rain_pct = singleRain[2] * 100 if singleRain is not None else None doubleRain = doubleRain_droplets( clusters, channel ) statistics[channel].double_rain_value = doubleRain[1] if doubleRain is not None else None statistics[channel].double_rain_pct = doubleRain[2] * 100 if doubleRain is not None else None if compute_clusters: clusters = well_cluster_peaks(well, override_thresholds) else: clusters = {'positive_peaks': {'positive_peaks': [], 'negative_peaks': []}, 'negative_peaks': {'positive_peaks': [], 'negative_peaks': []}} # cheap hack statistics.alg_version = "%s.%s/%s.%s" % (well.statistics.peak_alg_major_version, well.statistics.peak_alg_minor_version, well.statistics.quant_alg_major_version, well.statistics.quant_alg_minor_version) statistics.ref_copy_num = well.ref_copy_num statistics[0].decision_tree = well.channels[0].decision_tree_verbose statistics[1].decision_tree = well.channels[1].decision_tree_verbose # end cheap hack # SNR for chan in (0,1): if override_thresholds[chan]: # TODO add this to pyqlb.nstats.well instead pos, neg = cluster_1d(accepted_peaks(well), chan, override_thresholds[chan]) else: pos, neg, unknown = well_observed_positives_negatives(well, chan) for attr, coll in (('positive_snr', pos),('negative_snr',neg)): if len(pos) > 0: amps = channel_amplitudes(coll, chan) amp_mean = np_mean(amps) amp_std = np_std(amps) if amp_std > 0: setattr(statistics[chan], attr, amp_mean/amp_std) else: setattr(statistics[chan], attr, 10000) else: setattr(statistics[chan], attr, 0) for channel in [0,1]: means,stds = total_events_amplitude_vals(well,channel) statistics[channel].total_events_amplitude_mean = means if means is not None else None statistics[channel].total_events_amplitude_stdev = stds if stds is not None else None return statistics, clusters
def _compute_well_channel_metrics(qlwell, well_channel_metric, channel_num): """ Internal method for populating a WellChannelMetric object from statistics determined by analyzing the QLWell's channel object. This will populate the well_channel_metric object with information such as positives and negatives, concentration, polydispersity, and other metrics that have separate values for FAM and VIC channels. :param qlwell: The QLWell object created by reading a QLP file. :param well_channel_metric: The WellChannelMetric object to populate. :param channel_num: Which channel to analyze. """ # for convenience wcm = well_channel_metric stats = qlwell.channels[channel_num].statistics wcm.min_quality_gating = stats.min_quality_gate wcm.min_quality_gating_conf = stats.min_quality_gate_conf wcm.min_width_gate = stats.min_width_gate wcm.min_width_gate_conf = stats.min_width_gate_conf wcm.max_width_gate = stats.max_width_gate wcm.max_width_gate_conf = stats.max_width_gate_conf wcm.width_gating_sigma = stats.width_gating_sigma # TODO: try to guarantee automatic threshold here? wcm.threshold = stats.threshold wcm.threshold_conf = stats.threshold_conf wcm.auto_threshold_expected = False wcm.concentration = stats.concentration wcm.conc_lower_bound = stats.concentration_lower_bound wcm.conc_upper_bound = stats.concentration_upper_bound wcm.conc_calc_mode = stats.concentration_calc_mode wcm.clusters_automatic = well_channel_automatic_classification(qlwell, channel_num) wcm.decision_tree_flags = qlwell.channels[channel_num].decision_tree_flags ap = accepted_peaks(qlwell) if channel_num == 0: ampfunc = fam_amplitudes elif channel_num == 1: ampfunc = vic_amplitudes else: raise ValueError, "Incompatible channel number: %s" % channel_num wcm.amplitude_mean = np.mean(ampfunc(ap)) wcm.amplitude_stdev = np.std(ampfunc(ap)) allpeaks = qlwell.peaks wcm.total_events_amplitude_mean = np.mean(ampfunc(allpeaks)) wcm.total_events_amplitude_stdev = np.std(ampfunc(allpeaks)) above_min_peaks = above_min_amplitude_peaks(qlwell) quality_gated_peaks = quality_rejected(above_min_peaks, wcm.min_quality_gating, channel_num, include_vertical_streak_flagged=False) wcm.quality_gated_peaks = len(quality_gated_peaks) width_gated_peaks = width_rejected(above_min_peaks, wcm.min_width_gate, wcm.max_width_gate, channel_num) wcm.width_gated_peaks = len(width_gated_peaks) # TODO add min amplitude, vertical streak wcm.s_value = separation_value(ap, channel_num, wcm.threshold) p_plus, p, p_minus = rain_pvalues(ap, channel_num, wcm.threshold) wcm.rain_p_plus = p_plus wcm.rain_p = p wcm.rain_p_minus = p_minus ## extra cluster events... NEW_DROPLET_CLUSTER_METRICS_CALCULATOR.compute(qlwell, qlwell.channels[channel_num], wcm) # DEFAULT_TEAVALS.compute(qlwell, qlwell.channels[channel_num], wcm) DEFAULT_POLYD_CALC.compute(qlwell, qlwell.channels[channel_num], wcm) DEFAULT_EXTRAC_CALC.compute(qlwell, qlwell.channels[channel_num], wcm) DEFAULT_NTC_POSITIVE_CALCULATOR.compute(qlwell, qlwell.channels[channel_num], wcm) wcm.baseline_mean = qlwell.channels[channel_num].statistics.baseline_mean wcm.baseline_stdev = qlwell.channels[channel_num].statistics.baseline_stdev wcm.cluster_conf = qlwell.channels[channel_num].statistics.cluster_conf positive_peaks, negative_peaks, unclassified_peaks = well_observed_positives_negatives(qlwell, channel_num) if len(positive_peaks) > 0 or len(negative_peaks) > 0: wcm.positive_peaks = len(positive_peaks) wcm.negative_peaks = len(negative_peaks) wcm.positive_mean = np.mean(ampfunc(positive_peaks)) wcm.positive_stdev = np.std(ampfunc(positive_peaks)) wcm.negative_mean = np.mean(ampfunc(negative_peaks)) wcm.negative_stdev = np.std(ampfunc(negative_peaks)) wcm.positive_skew = skew(positive_peaks, channel_num) wcm.positive_kurtosis = kurtosis(positive_peaks, channel_num) wcm.nonpositive_skew = skew(negative_peaks, channel_num) wcm.nonpositive_kurtosis = kurtosis(negative_peaks, channel_num) # CLUSTER-TODO: this is not going to be right for clusters. Consider # changing this. wcm.concentration_rise_ratio = quartile_concentration_ratio(qlwell, peaks=ap, channel_num=channel_num, threshold=wcm.threshold, min_events=4000) del positive_peaks del negative_peaks del unclassified_peaks else: wcm.nonpositive_skew = skew(ap, channel_num) wcm.nonpositive_kurtosis = kurtosis(ap, channel_num) del ap # daisy chain just in case return wcm