Python groupinto示例，qtools.lib.collection.groupinto Python示例

示例#1

0

显示文件

文件： beta.py 项目： v-makarenko/vtoolsmq

def dye_by_bin(plate_objects, analyzed_wells, eventful_wells,
               analyzed_sample_names, eventful_sample_names, bin_func, channel='FAM'):
    """
    @deprecated -- use metrics
    """
    # assumes standard dye concentrations (see bin_plots)
    bins = set([bin_func(w) for w in analyzed_wells])
    bin_plots = dict([(bin, [[3,None],[10,None],[30,None],[100,None],[300,None],[500,None]]) for bin in bins])

    groups = []
    groups.extend([(sample_name, groupinto(sample_name_filter(analyzed_wells, sample_name), bin_func)) for sample_name in analyzed_sample_names])
    groups.extend([(sample_name, groupinto(sample_name_filter(eventful_wells, sample_name), bin_func)) for sample_name in eventful_sample_names])

    for i, (sample, group) in enumerate(groups):
        for bin, wells in group:
            amp_array = None
            for w in wells:
                qplate = plate_objects[(w.plate.file.dirname, w.plate.file.basename)]
                well = qplate.wells[w.well_name]
                if channel == 'VIC':
                    if amp_array is None:
                        amp_array = vic_amplitudes(well.peaks)
                    else:
                        amp_array = np.concatenate([amp_array, vic_amplitudes(well.peaks)])
                else:
                    if amp_array is None:
                        amp_array = fam_amplitudes(well.peaks)
                    else:
                        amp_array = np.concatenate([amp_array, fam_amplitudes(well.peaks)])
            if amp_array is not None:
                amp_mean = np.mean(amp_array)
                amp_sigma = np.std(amp_array)
                bin_plots[bin][i][1] = (amp_mean, amp_sigma)
    
    return bin_plots

示例#2

0

显示文件

文件： beta.py 项目： v-makarenko/vtoolsmq

def fpfn_by_bin(plate_objects, vic_channels, sample_names, bin_func):
    bins = set([bin_func(c) for c in vic_channels])
    bin_plots = dict([(bin, []) for bin in bins])

    bin_wells = defaultdict(list)
    # divide into plates
    for bin, group in groupinto(vic_channels, bin_func):
        # this is a wacky grouping, but for reuse in plate_objects (why did I not pick plate ids again?)
        plate_groups = groupinto(group, lambda c: (c.well.plate.file.dirname, c.well.plate.file.basename))
        for plate_id, channels in plate_groups:
            qplate = plate_objects[plate_id]
            positives = [c for c in channels if c.well.well_name in fpfn_positive_well_names]
            negatives = [c for c in channels if c.well.well_name in fpfn_negative_well_names]
            
            # compute a threshold which is 1/4 between the positive and negative means for the plate
            positive_means = []
            negative_means = []
            for p in positives:
                amps = vic_amplitudes(accepted_peaks(qplate.wells[p.well.well_name]))
                positive_means.append((len(amps), np.mean(amps)*len(amps)))
            
            if positive_means:
                positive_mean = sum([pm[1] for pm in positive_means])/sum([pm[0] for pm in positive_means])
            else:
                positive_mean = 32767
            
            for n in negatives:
                amps = vic_amplitudes(accepted_peaks(qplate.wells[n.well.well_name]))
                negative_means.append((len(amps), np.mean(amps)*len(amps)))
            
            if negative_means:
                negative_mean = sum([nm[1] for nm in negative_means])/sum([nm[0] for nm in negative_means])
            else:
                negative_mean = 0
            
            threshold = ((3*negative_mean)+positive_mean)/4

            fps = [c for c in channels if c.well.well_name in fpfn_fp_well_names]
            fns = [c for c in channels if c.well.well_name in fpfn_fn_well_names]

            fp_counts = []
            fn_counts = []

            for f in fps:
                pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold)
                fp_counts.append((f.well.id, len(pos),
                                  10000*(float(len(pos))/(float(len(pos))+float(len(neg)))),
                                  qplate.wells[f.well.well_name]))
            
            for f in fns:
                pos, neg = cluster_1d(accepted_peaks(qplate.wells[f.well.well_name]), 1, threshold)
                fn_counts.append((f.well.id, len(neg),
                                  10000*(float(len(neg))/(float(len(pos))+float(len(neg))))))
            
            bin_wells[bin].append((fp_counts, fn_counts, threshold))
    
    return bin_wells

示例#3

0

显示文件

文件： beta.py 项目： v-makarenko/vtoolsmq

def cnv_by_bin(wells, sample_names, bin_func):
    bins = set([bin_func(w) for w in wells])
    bin_plots = dict([(bin, [[sample_name, []] for sample_name in sample_names]) for bin in bins])
    #raise Exception, bin_plots

    groups = []
    groups.extend([(sample_name, groupinto(sample_name_filter(wells, sample_name), bin_func)) for sample_name in sample_names])

    for i, (sample, group) in enumerate(groups):
        for bin, wells in group:
            cnv_array = []
            for w in wells:
                fam = w.channels[0]
                vic = w.channels[1]
                fam_pos = fam.positive_peaks
                fam_neg = fam.negative_peaks
                vic_pos = vic.positive_peaks
                vic_neg = vic.negative_peaks
                cnv, low, high = cnv_interval(fam_pos, fam_neg, vic_pos, vic_neg)
                if math.isnan(cnv):
                    cnv = 0
                if math.isnan(low):
                    low = 0
                if math.isnan(high):
                    high = 0

                cnv_array.append((w.id, (cnv, low, high)))
            
            bin_plots[bin][i][1] = cnv_array
        
    return bin_plots

示例#4

0

显示文件

文件： beta.py 项目： v-makarenko/vtoolsmq

def dnr_by_bin(plate_objects, fam_channels, sample_names, bin_func):
    """
    @deprecated - use metrics
    """
    bins = set([bin_func(c) for c in fam_channels])
    # TODO: do sample names here
    bin_plots = dict([(bin, [[0.001,None],[0.01,None],[0.1,None],[1,None],[5,None]]) for bin in bins])

    groups = []
    groups.extend([(sample_name, groupinto(sample_name_channel_filter(fam_channels, sample_name), bin_func)) for sample_name in sample_names])

    for i, (sample, group) in enumerate(groups):
        for bin, channels in group:
            conc_array = []
            for c in channels:
                qplate = plate_objects[(c.well.plate.file.dirname, c.well.plate.file.basename)]
                well = qplate.wells[c.well.well_name]
                # TODO: use dynamic threshold or keep 4000?
                pos, neg = cluster_1d(accepted_peaks(well), 0, 4000)
                conc, clow, chigh = concentration_interval(len(pos), len(neg), droplet_vol=well.droplet_volume)
                conc_array.append((max(conc,0.0001), max(clow,0.0001), max(chigh,0.0001)))
            
            if len(conc_array) > 0:
                conc_mean = np.mean([ca[0] for ca in conc_array])
                bin_plots[bin][i][1] = (conc_mean, conc_array)
    
    return bin_plots

示例#5

0

显示文件

文件： trend.py 项目： v-makarenko/vtoolsmq

def execute_built_query(base_q, group_by_plate, objects_expected=False, derived_metric=None):
    """
    Executes the query built by one of the build_*_query functions.  The result
    will be a 4-tuple: [statistic, timepoint, object id, object display string]

    :param base_q: The query to execute.
    :param group_by_plate: Whether to group the results by plate.
    :param objects_expected: Whether the first member of the result set is expected to be a model object.
    :param derived_metric: If the metric is a property derived from database columns, the name of this property.
    :return: A result set of (stat, run_time, object id, identifying information)
    """
    if group_by_plate and not objects_expected:
        base_q = base_q.group_by(Plate.id).order_by(Plate.run_time)
    else:
        base_q = base_q.order_by(Plate.run_time, WellMetric.well_name)

    records = base_q.all()

    if group_by_plate and objects_expected:
        records = [(obj, dt, id, plate_name) for obj, dt, id, plate_name, well_name in records]
    elif not group_by_plate:
        records = [(obj, dt, id, "%s - %s" % (plate_name, well_name)) for obj, dt, id, plate_name, well_name in records]

    if objects_expected:
        import numpy as np
        # have to decorate & unwind groups if property derived
        # TODO: could there be a nicer way to define virtual props in SQLAlchemy
        records = [(getattr(obj, derived_metric), dt, id, name) for obj, dt, id, name in records]
        if group_by_plate:
            grouped_records = groupinto(records, lambda tup: tup[2])
            records = [(np.mean([r[0] for r in recs if r[0] is not None]), recs[0][1], group_id, recs[0][3]) for group_id, recs in grouped_records]
            records = [(avg, dt, id, name) for avg, dt, id, name in records if not math.isnan(avg)]
    
    return records

示例#6

0

显示文件

文件： dg.py 项目： v-makarenko/vtoolsmq

    def runs(self, id=None):
        c.dg = Session.query(DropletGenerator).get(int(id))
        if not c.dg:
            abort(404)
        
        reprocess_config_id = request.params.get('rp_id', None)
        if reprocess_config_id:
            reprocess_config_id = int(reprocess_config_id)
        
        runs = Session.query(DropletGeneratorRun).filter(
                    and_(DropletGeneratorRun.droplet_generator_id == c.dg.id,
                         DropletGeneratorRun.run_number != None)).all()
        
        run_dict = dict([(run.run_number, run) for run in runs])

        wells = Session.query(WellMetric,
                              QLBWell.id,
                              QLBWell.sample_name,
                              QLBWell.dg_run_number,
                              QLBWell.consumable_channel_num)\
                       .join(QLBWell)\
                       .join(PlateMetric)\
                       .filter(and_(QLBWell.droplet_generator_id == id,
                                    QLBWell.dg_run_number != None,
                                    PlateMetric.reprocess_config_id == reprocess_config_id))\
                       .order_by(QLBWell.dg_run_number, QLBWell.consumable_channel_num).all()
        
        sorted_runs = sorted(groupinto(wells, lambda tup: (tup[3])), key=operator.itemgetter(0))
        c.runs = [(run_id, run_dict[run_id], sorted(info, key=operator.itemgetter(4))) for run_id, info in sorted_runs if run_dict.get(run_id, None)]
        #raise Exception, c.runs
        return render('/product/dg/runs.html')

示例#7

0

显示文件

文件： beta.py 项目： v-makarenko/vtoolsmq

def events_by_reader_dg(wells, quant_func):
    well_order = [((well.plate.plate.setup.id*96.0+well.consumable_chip_num*12.0+well.consumable_channel_num)/96.0, well) for idx, well in enumerate(wells)] # assume sorted by host_datetime incoming
    bin_func = lambda tup: tup[1].plate.plate.box2_id
    dg_func = lambda tup: tup[1].droplet_generator_id

    groups = groupinto(well_order, bin_func)
    reader_groups = []
    for reader_num, group in groups:
        reader_groups.append((reader_num, [(dg_id, [(idx, quant_func(w)) for idx, w in wells]) for dg_id, wells in groupinto(group, dg_func)]))
    
    return reader_groups

示例#8

0

显示文件

文件： beta.py 项目： v-makarenko/vtoolsmq

def singleplex_by_bin(fam_channels, bin_func):
    channels = sorted(fam_channels, key=lambda c: (c.well.plate.plate.setup.id, c.well.consumable_chip_num, c.well.consumable_channel_num))
    groups = groupinto(channels, bin_func)
    plots = dict([(box2, []) for box2, data in groups])
    for i, (box2, well_channels) in enumerate(groups):
        conc_array = []
        for w in well_channels:
            #qplate = plate_objects[(w.well.plate.file.dirname, w.well.plate.file.basename)]
            #well = qplate.wells[w.well.well_name]
            #pos, neg = cluster_1d(well.peaks, 0, 4000)
            conc_array.append((w.well.id, (w.concentration, w.conf_lower_bound, w.conf_upper_bound)))
        
        plots[box2] = conc_array
    
    return plots

示例#9

0

显示文件

文件： beta.py 项目： v-makarenko/vtoolsmq

def red_by_bin(fam_channels, sample_names, bin_func):
    bins = set([bin_func(c) for c in fam_channels])
    bin_plots = dict([(bin, defaultdict(dict)) for bin in bins])

    sample_cpd_map = dict(sample_names)
    groups = []
    for sample_name, (sample_cpd, bg_cpd) in sample_names:
        channels = sample_name_channel_filter(fam_channels, sample_name)
        if channels:
            groups.append((sample_name, groupinto(channels, bin_func)))
    
    for i, (sample, group) in enumerate(groups):
        for bin, channels in group:
            pos_array = []
            for c in channels:
                pos_array.append((c.well.id, c.positive_peaks or 0))

            bin_plots[bin][sample_cpd_map[sample][0]][sample_cpd_map[sample][1]] = pos_array
    
    return bin_plots

示例#10

0

显示文件

文件： beta.py 项目： v-makarenko/vtoolsmq

def duplex_by_bin(fam_channels, sample_names, bin_func):
    channels = sorted(fam_channels, key=lambda c: (c.well.plate.plate.setup.id, c.well.consumable_chip_num, c.well.consumable_channel_num))
    bins = set([bin_func(c) for c in channels])
    bin_plots = dict([(bin, [[0.125, None],[0.25, None],[0.5, None],[1.02,None],[2,None]]) for bin in bins])

    groups = []
    groups.extend([(sample_name, groupinto(sample_name_channel_filter(channels, sample_name), bin_func)) for sample_name in sample_names])

    # this is just dnr with different coefficients -- can that be patternized?
    for i, (sample, group) in enumerate(groups):
        for bin, channels in group:
            conc_array = []
            for c in channels:
                conc, clow, chigh = max(c.concentration,0.0001), max(c.conf_lower_bound,0.0001), max(c.conf_upper_bound,0.0001)
                conc_array.append((conc, clow, chigh, c.well_id))
            
            if len(conc_array) > 0:
                conc_mean = np.mean([ca[0] for ca in conc_array])
                bin_plots[bin][i][1] = (conc_mean, conc_array)
    
    return bin_plots