def get_adapterstype(flowcell,lane,index,table='DB_library_data'): if index: td = preprocess_radtag_lane.get_table_as_dict(table,sq='flowcell="%s" and lane="%s" and index="%s"' % (flowcell,lane,index)) else: td = preprocess_radtag_lane.get_table_as_dict(table,sq='flowcell="%s" and lane="%s"' % (flowcell,lane)) adapterstypes = list(set([d.get('adapterstype','') for d in td])) if len(adapterstypes) == 1: return adapterstypes[0] else: errstr = 'invalid number of matches: %s' % adapterstypes raise ValueError, errstr
def get_adapterstype(flowcell,lane,index,table='DB_library_data'): if index: td = [d for d in preprocess_radtag_lane.get_table_as_dict(table) if d.get('flowcell','') == flowcell and d.get('lane','') == lane and d.get('index','') == index] else: td = [d for d in preprocess_radtag_lane.get_table_as_dict(table) if d.get('flowcell','') == flowcell and d.get('lane','') == lane] adapterstypes = list(set([d.get('adapterstype','') for d in td])) if len(adapterstypes) == 1: return adapterstypes[0] else: errstr = 'invalid number of matches: %s' % adapterstypes raise ValueError, errstr
def get_adapterstype(flowcell, lane, index, table='DB_library_data'): if index: td = [ d for d in preprocess_radtag_lane.get_table_as_dict(table) if d.get('flowcell', '') == flowcell and d.get('lane', '') == lane and d.get('index', '') == index ] else: td = [ d for d in preprocess_radtag_lane.get_table_as_dict(table) if d.get('flowcell', '') == flowcell and d.get('lane', '') == lane ] adapterstypes = list(set([d.get('adapterstype', '') for d in td])) if len(adapterstypes) == 1: return adapterstypes[0] else: errstr = 'invalid number of matches: %s' % adapterstypes raise ValueError, errstr
counts_by_pool[pool][ind] += ct return counts_by_pool def get_uniqued_info(uniqued): if 'index' in uniqued: ufields = os.path.splitext(os.path.basename(uniqued))[0].rsplit('_',3) ufields[3] = ufields[3][5:] else: ufields = os.path.splitext(os.path.basename(uniqued))[0].rsplit('_',2) ufields.append(None) ufields[1] = ufields[1][4:] return ufields if __name__ == "__main__": db = preprocess_radtag_lane.get_table_as_dict('DB_library_data',suppress_fc_check=True) uniqued = sys.argv[1] ufields = get_uniqued_info(uniqued) counts_by_pool = get_counts_by_pool(uniqued,db) for k,v in counts_by_pool.items(): print '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%0.1f\t%d' % (ufields[0],ufields[1],ufields[2],ufields[3],k,sum(v.values()),len(v),numpy.mean(v.values()),numpy.median(v.values()))
def get_idxseq(table='DB_multiplex_indices'): return dict([(d['idx'],d['seq']) for d in preprocess_radtag_lane.get_table_as_dict(table,suppress_fc_check=True)])
def get_adaptseq(table='DB_adapt_trim_seqs'): return dict([(d['adapterstype'],{'r1':d['r1'],'r2':d['r2']}) for d in preprocess_radtag_lane.get_table_as_dict(table,suppress_fc_check=True)])
return counts_by_pool def get_uniqued_info(uniqued): if 'index' in uniqued: ufields = os.path.splitext(os.path.basename(uniqued))[0].rsplit('_', 3) ufields[3] = ufields[3][5:] else: ufields = os.path.splitext(os.path.basename(uniqued))[0].rsplit('_', 2) ufields.append(None) ufields[1] = ufields[1][4:] return ufields if __name__ == "__main__": db = preprocess_radtag_lane.get_table_as_dict('DB_library_data', suppress_fc_check=True) uniqued = sys.argv[1] ufields = get_uniqued_info(uniqued) counts_by_pool = get_counts_by_pool(uniqued, db) for k, v in counts_by_pool.items(): print '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%0.1f\t%d' % ( ufields[0], ufields[1], ufields[2], ufields[3], k, sum(v.values()), len(v), numpy.mean(v.values()), numpy.median(v.values()))
def get_adaptseq(table='DB_adapt_trim_seqs'): return dict([(d['adapterstype'], { 'r1': d['r1'], 'r2': d['r2'] }) for d in preprocess_radtag_lane.get_table_as_dict( table, suppress_fc_check=True)])
def get_idxseq(table='DB_multiplex_indices'): return dict([(d['idx'], d['seq']) for d in preprocess_radtag_lane.get_table_as_dict( table, suppress_fc_check=True)])