示例#1
0
def well_statistics(qlwell, override_thresholds=None):
    from pyqlb.nstats import concentration, concentration_interval
    from pyqlb.nstats.peaks import cluster_1d
    from pyqlb.nstats.well import well_observed_positives_negatives, accepted_peaks

    if override_thresholds is None:
        override_thresholds = [None]*len(qlwell.channels)
    wellui = WellStatisticsUI(channels=[])
    for idx, channel in enumerate(qlwell.channels):
        if not override_thresholds[idx]:
            positives, negatives, unclassified = well_observed_positives_negatives(qlwell, idx)
            threshold = channel.statistics.threshold
            conc = channel.statistics.concentration
            conc_interval = (channel.statistics.concentration,
                             channel.statistics.concentration_lower_bound,
                             channel.statistics.concentration_upper_bound)
        else:
            positives, negatives = cluster_1d(accepted_peaks(qlwell), idx, override_thresholds[idx])
            threshold = override_thresholds[idx]
            conc = concentration(len(positives), len(negatives), qlwell.droplet_volume)
            conc_interval = concentration_interval(len(positives), len(negatives), qlwell.droplet_volume)

        channelui = WellChannelStatisticsUI(positives=len(positives),
                                            negatives=len(negatives),
                                            threshold=threshold,
                                            concentration=conc,
                                            concentration_interval=conc_interval)
        wellui.channels.append(channelui)

    return wellui
示例#2
0
def stats_for_qlp_well(well, compute_clusters=False, override_thresholds=None):
    """
    Return statistics about a QLWell object read from a QLP file.
    The QLWell object should have a populated `peaks` attribute (reading from QLBs won't work)

    For parameter explanations and return values, see :func:`stats_for_qlp_well`.
    """
    from pyqlb.nstats.peaks import cluster_1d, channel_amplitudes
    from pyqlb.nstats.well import accepted_peaks, above_min_amplitude_peaks, well_channel_sp_values, well_cluster_peaks
    from pyqlb.nstats.well import well_observed_positives_negatives, well_s2d_values, getClusters
    from pyqlb.nstats.well import high_flier_droplets, low_flier_droplets, singleRain_droplets, doubleRain_droplets, diagonal_scatter
    from numpy import mean as np_mean, std as np_std

    if not override_thresholds:
        override_thresholds = (None, None)

    statistics = well_statistics(well, override_thresholds=override_thresholds)
    accepted = len(accepted_peaks(well))
    num_above_min = len(above_min_amplitude_peaks(well))

    if num_above_min > 0 and accepted > 0:
        if well.sum_amplitude_bins:
            peaksets, boundaries, amps = revb_polydisperse_peaks(well, 0, threshold=override_thresholds[0])
            poly_peaks = sum([len(p) for p in peaksets])
            statistics[0].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min
        else:
            peaksets, boundaries, width_gates = polydisperse_peaks(well, 0, threshold=override_thresholds[0])
            poly_peaks = sum([len(p) for p in peaksets])
            statistics[0].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min
    else:
        statistics[0].revb_polydispersity_pct = 0

    s, p_plus, p, p_minus = well_channel_sp_values(well, 0, override_threshold=override_thresholds[0])
    statistics[0].s_value = s
    statistics[0].p_plus = p_plus
    statistics[0].p_plus_drops = int(p_plus*accepted) if p_plus is not None else None
    statistics[0].p = p
    statistics[0].p_drops = int(p*accepted) if p is not None else None
    statistics[0].p_minus = p_minus
    statistics[0].p_minus_drops = int(p_minus*accepted) if p_minus is not None else None

    if num_above_min > 0 and accepted > 0:
        if well.sum_amplitude_bins:
            peaksets, boundaries, amps = revb_polydisperse_peaks(well, 1, threshold=override_thresholds[1])
            poly_peaks = sum([len(p) for p in peaksets])
            statistics[1].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min
        else:
            peaksets, boundaries, width_gates = polydisperse_peaks(well, 1, threshold=override_thresholds[1])
            poly_peaks = sum([len(p) for p in peaksets])
            statistics[1].revb_polydispersity_pct = 100*float(poly_peaks)/num_above_min
    else:
        statistics[1].revb_polydispersity_pct = 0

    s, p_plus, p, p_minus = well_channel_sp_values(well, 1, override_threshold=override_thresholds[1])
    statistics[1].s_value = s
    statistics[1].p_plus = p_plus
    statistics[1].p_plus_drops = int(p_plus*accepted) if p_plus is not None else None
    statistics[1].p = p
    statistics[1].p_drops = int(p*accepted) if p is not None else None
    statistics[1].p_minus = p_minus
    statistics[1].p_minus_drops = int(p_minus*accepted) if p_minus is not None else None

    ## compute s2d plots
    s2d_vals = well_s2d_values( well, thresholds=override_thresholds)
    statistics[0].s2d_value = s2d_vals[0] if s2d_vals is not None else None
    statistics[1].s2d_value = s2d_vals[1] if s2d_vals is not None else None

    ## compute extra cluster metrics
    clusters = getClusters( well, override_thresholds )
    dscatter = diagonal_scatter( clusters )
    statistics.diagonal_scatter = dscatter[1] if dscatter is not None else None
    statistics.diagonal_scatter_pct  = dscatter[2] *100 if dscatter is not None else None
    for channel in [0,1]:
        high_fliers = high_flier_droplets( clusters, channel )
        statistics[channel].high_flier_value = high_fliers[1] if high_fliers is not None else None
        statistics[channel].high_flier_pct = high_fliers[2] * 100 if high_fliers is not None else None

        low_fliers  = low_flier_droplets( clusters, channel )
        statistics[channel].low_flier_value  = low_fliers[1] if low_fliers is not None else None
        statistics[channel].low_flier_pct    = low_fliers[2] * 100 if low_fliers is not None else None
        
        singleRain  = singleRain_droplets( clusters, channel )
        statistics[channel].single_rain_value  = singleRain[1] if singleRain is not None else None
        statistics[channel].single_rain_pct  = singleRain[2] * 100 if singleRain is not None else None
        
        doubleRain  = doubleRain_droplets( clusters, channel )
        statistics[channel].double_rain_value = doubleRain[1] if doubleRain is not None else None
        statistics[channel].double_rain_pct = doubleRain[2] * 100 if doubleRain is not None else None


    if compute_clusters:
        clusters = well_cluster_peaks(well, override_thresholds)
    else:
        clusters = {'positive_peaks': {'positive_peaks': [], 'negative_peaks': []},
                    'negative_peaks': {'positive_peaks': [], 'negative_peaks': []}}
 
    # cheap hack
    statistics.alg_version = "%s.%s/%s.%s" % (well.statistics.peak_alg_major_version,
                                              well.statistics.peak_alg_minor_version,
                                              well.statistics.quant_alg_major_version,
                                              well.statistics.quant_alg_minor_version)
    statistics.ref_copy_num = well.ref_copy_num
    statistics[0].decision_tree = well.channels[0].decision_tree_verbose
    statistics[1].decision_tree = well.channels[1].decision_tree_verbose
    # end cheap hack

    # SNR
    for chan in (0,1):
        if override_thresholds[chan]:
            # TODO add this to pyqlb.nstats.well instead
            pos, neg = cluster_1d(accepted_peaks(well), chan, override_thresholds[chan])
        else:
            pos, neg, unknown = well_observed_positives_negatives(well, chan)

        for attr, coll in (('positive_snr', pos),('negative_snr',neg)):
            if len(pos) > 0:
                amps = channel_amplitudes(coll, chan)
                amp_mean = np_mean(amps)
                amp_std = np_std(amps)
                if amp_std > 0:
                    setattr(statistics[chan], attr, amp_mean/amp_std)
                else:
                    setattr(statistics[chan], attr, 10000)
            else:
                setattr(statistics[chan], attr, 0)

    for channel in [0,1]:
        means,stds = total_events_amplitude_vals(well,channel) 
        statistics[channel].total_events_amplitude_mean = means if means is not None else None
        statistics[channel].total_events_amplitude_stdev = stds if stds is not None else None

    return statistics, clusters
示例#3
0
def _compute_well_channel_metrics(qlwell, well_channel_metric, channel_num):
    """
    Internal method for populating a WellChannelMetric object from statistics
    determined by analyzing the QLWell's channel object.

    This will populate the well_channel_metric object with information such
    as positives and negatives, concentration, polydispersity, and other
    metrics that have separate values for FAM and VIC channels.

    :param qlwell: The QLWell object created by reading a QLP file.
    :param well_channel_metric: The WellChannelMetric object to populate.
    :param channel_num: Which channel to analyze.
    """
    # for convenience
    wcm = well_channel_metric
    stats = qlwell.channels[channel_num].statistics
    wcm.min_quality_gating = stats.min_quality_gate
    wcm.min_quality_gating_conf = stats.min_quality_gate_conf
    wcm.min_width_gate = stats.min_width_gate
    wcm.min_width_gate_conf = stats.min_width_gate_conf
    wcm.max_width_gate = stats.max_width_gate
    wcm.max_width_gate_conf = stats.max_width_gate_conf
    wcm.width_gating_sigma = stats.width_gating_sigma
    # TODO: try to guarantee automatic threshold here?
    wcm.threshold = stats.threshold
    wcm.threshold_conf = stats.threshold_conf
    wcm.auto_threshold_expected = False
    wcm.concentration = stats.concentration
    wcm.conc_lower_bound = stats.concentration_lower_bound
    wcm.conc_upper_bound = stats.concentration_upper_bound
    wcm.conc_calc_mode = stats.concentration_calc_mode
    wcm.clusters_automatic = well_channel_automatic_classification(qlwell, channel_num)

    wcm.decision_tree_flags = qlwell.channels[channel_num].decision_tree_flags

    ap = accepted_peaks(qlwell)
    
    if channel_num == 0:
        ampfunc = fam_amplitudes
    elif channel_num == 1:
        ampfunc = vic_amplitudes
    else:
        raise ValueError, "Incompatible channel number: %s" % channel_num

    wcm.amplitude_mean = np.mean(ampfunc(ap))
    wcm.amplitude_stdev = np.std(ampfunc(ap))

    allpeaks = qlwell.peaks
    wcm.total_events_amplitude_mean  = np.mean(ampfunc(allpeaks))
    wcm.total_events_amplitude_stdev = np.std(ampfunc(allpeaks))

    above_min_peaks = above_min_amplitude_peaks(qlwell)
    quality_gated_peaks = quality_rejected(above_min_peaks, wcm.min_quality_gating, channel_num, include_vertical_streak_flagged=False)
    wcm.quality_gated_peaks = len(quality_gated_peaks)
    width_gated_peaks = width_rejected(above_min_peaks, wcm.min_width_gate, wcm.max_width_gate, channel_num)
    wcm.width_gated_peaks = len(width_gated_peaks)
    # TODO add min amplitude, vertical streak

    wcm.s_value = separation_value(ap, channel_num, wcm.threshold)
    p_plus, p, p_minus = rain_pvalues(ap, channel_num, wcm.threshold)
   
    wcm.rain_p_plus = p_plus
    wcm.rain_p = p
    wcm.rain_p_minus = p_minus

    ## extra cluster events...
    NEW_DROPLET_CLUSTER_METRICS_CALCULATOR.compute(qlwell, qlwell.channels[channel_num], wcm)

#    DEFAULT_TEAVALS.compute(qlwell, qlwell.channels[channel_num], wcm)
    DEFAULT_POLYD_CALC.compute(qlwell, qlwell.channels[channel_num], wcm)
    DEFAULT_EXTRAC_CALC.compute(qlwell, qlwell.channels[channel_num], wcm)
    DEFAULT_NTC_POSITIVE_CALCULATOR.compute(qlwell, qlwell.channels[channel_num], wcm)

    wcm.baseline_mean = qlwell.channels[channel_num].statistics.baseline_mean
    wcm.baseline_stdev = qlwell.channels[channel_num].statistics.baseline_stdev
    wcm.cluster_conf = qlwell.channels[channel_num].statistics.cluster_conf

    positive_peaks, negative_peaks, unclassified_peaks = well_observed_positives_negatives(qlwell, channel_num)
    if len(positive_peaks) > 0 or len(negative_peaks) > 0:
        wcm.positive_peaks = len(positive_peaks)
        wcm.negative_peaks = len(negative_peaks)
        wcm.positive_mean = np.mean(ampfunc(positive_peaks))
        wcm.positive_stdev = np.std(ampfunc(positive_peaks))
        wcm.negative_mean = np.mean(ampfunc(negative_peaks))
        wcm.negative_stdev = np.std(ampfunc(negative_peaks))

        wcm.positive_skew = skew(positive_peaks, channel_num)
        wcm.positive_kurtosis = kurtosis(positive_peaks, channel_num)
        wcm.nonpositive_skew = skew(negative_peaks, channel_num)
        wcm.nonpositive_kurtosis = kurtosis(negative_peaks, channel_num)

        # CLUSTER-TODO: this is not going to be right for clusters.  Consider
        # changing this.
        wcm.concentration_rise_ratio = quartile_concentration_ratio(qlwell, peaks=ap,
                                                                    channel_num=channel_num, threshold=wcm.threshold,
                                                                    min_events=4000)

        del positive_peaks
        del negative_peaks
        del unclassified_peaks
    else:
        wcm.nonpositive_skew = skew(ap, channel_num)
        wcm.nonpositive_kurtosis = kurtosis(ap, channel_num)
    
    del ap
    # daisy chain just in case
    return wcm