Пример #1
0
def get_adapterstype(flowcell,lane,index,table='DB_library_data'):
    if index:
        td = preprocess_radtag_lane.get_table_as_dict(table,sq='flowcell="%s" and lane="%s" and index="%s"' % (flowcell,lane,index))
    else:
        td = preprocess_radtag_lane.get_table_as_dict(table,sq='flowcell="%s" and lane="%s"' % (flowcell,lane))
    adapterstypes = list(set([d.get('adapterstype','') for d in td]))
    if len(adapterstypes) == 1:
        return adapterstypes[0]
    else:
        errstr = 'invalid number of matches: %s' % adapterstypes
        raise ValueError, errstr
Пример #2
0
def get_adapterstype(flowcell,lane,index,table='DB_library_data'):
    if index:
        td = [d for d in preprocess_radtag_lane.get_table_as_dict(table) if d.get('flowcell','') == flowcell and d.get('lane','') == lane and d.get('index','') == index]
    else:
        td = [d for d in preprocess_radtag_lane.get_table_as_dict(table) if d.get('flowcell','') == flowcell and d.get('lane','') == lane]
    adapterstypes = list(set([d.get('adapterstype','') for d in td]))
    if len(adapterstypes) == 1:
        return adapterstypes[0]
    else:
        errstr = 'invalid number of matches: %s' % adapterstypes
        raise ValueError, errstr
Пример #3
0
def get_adapterstype(flowcell, lane, index, table='DB_library_data'):
    if index:
        td = [
            d for d in preprocess_radtag_lane.get_table_as_dict(table)
            if d.get('flowcell', '') == flowcell and d.get('lane', '') == lane
            and d.get('index', '') == index
        ]
    else:
        td = [
            d for d in preprocess_radtag_lane.get_table_as_dict(table)
            if d.get('flowcell', '') == flowcell and d.get('lane', '') == lane
        ]
    adapterstypes = list(set([d.get('adapterstype', '') for d in td]))
    if len(adapterstypes) == 1:
        return adapterstypes[0]
    else:
        errstr = 'invalid number of matches: %s' % adapterstypes
        raise ValueError, errstr
Пример #4
0
                counts_by_pool[pool][ind] += ct

    return counts_by_pool
            
def get_uniqued_info(uniqued):
    if 'index' in uniqued:
        ufields = os.path.splitext(os.path.basename(uniqued))[0].rsplit('_',3)
        ufields[3] = ufields[3][5:]
    else:
        ufields = os.path.splitext(os.path.basename(uniqued))[0].rsplit('_',2)
        ufields.append(None)

    ufields[1] = ufields[1][4:]

    return ufields


if __name__ == "__main__":

    db = preprocess_radtag_lane.get_table_as_dict('DB_library_data',suppress_fc_check=True)
    uniqued = sys.argv[1]

    ufields = get_uniqued_info(uniqued)

    counts_by_pool = get_counts_by_pool(uniqued,db)

    for k,v in counts_by_pool.items():
        print '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%0.1f\t%d' % (ufields[0],ufields[1],ufields[2],ufields[3],k,sum(v.values()),len(v),numpy.mean(v.values()),numpy.median(v.values()))
    
    
Пример #5
0
def get_idxseq(table='DB_multiplex_indices'):
    return dict([(d['idx'],d['seq']) for d in preprocess_radtag_lane.get_table_as_dict(table,suppress_fc_check=True)])
Пример #6
0
def get_adaptseq(table='DB_adapt_trim_seqs'):
    return dict([(d['adapterstype'],{'r1':d['r1'],'r2':d['r2']}) for d in preprocess_radtag_lane.get_table_as_dict(table,suppress_fc_check=True)])
Пример #7
0
    return counts_by_pool


def get_uniqued_info(uniqued):
    if 'index' in uniqued:
        ufields = os.path.splitext(os.path.basename(uniqued))[0].rsplit('_', 3)
        ufields[3] = ufields[3][5:]
    else:
        ufields = os.path.splitext(os.path.basename(uniqued))[0].rsplit('_', 2)
        ufields.append(None)

    ufields[1] = ufields[1][4:]

    return ufields


if __name__ == "__main__":

    db = preprocess_radtag_lane.get_table_as_dict('DB_library_data',
                                                  suppress_fc_check=True)
    uniqued = sys.argv[1]

    ufields = get_uniqued_info(uniqued)

    counts_by_pool = get_counts_by_pool(uniqued, db)

    for k, v in counts_by_pool.items():
        print '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%0.1f\t%d' % (
            ufields[0], ufields[1], ufields[2], ufields[3], k, sum(v.values()),
            len(v), numpy.mean(v.values()), numpy.median(v.values()))
Пример #8
0
def get_adaptseq(table='DB_adapt_trim_seqs'):
    return dict([(d['adapterstype'], {
        'r1': d['r1'],
        'r2': d['r2']
    }) for d in preprocess_radtag_lane.get_table_as_dict(
        table, suppress_fc_check=True)])
Пример #9
0
def get_idxseq(table='DB_multiplex_indices'):
    return dict([(d['idx'], d['seq'])
                 for d in preprocess_radtag_lane.get_table_as_dict(
                     table, suppress_fc_check=True)])