def sample_data_from_DB(sampleids, mousedb="Hoekstra lab mouse database"): td = preprocess_radtag_lane.no_net_get_table_as_dict(mousedb) ped = dict( [ (d["id"], (d["damid"], d["sireid"])) for d in td if d.get("id", "") in sampleids and d.has_key("sireid") and d.has_key("damid") ] ) ped_parents = reduce(lambda x, y: x + y, ped.values()) ped.update( dict( [ (d["id"], (d["damid"], d["sireid"])) for d in td if d.get("id", "") in ped_parents and d.has_key("sireid") and d.has_key("damid") ] ) ) recombinants = [ d["id"] for d in td if d.get("id", "") in ped.keys() and "," in d["damstrain"] and "," in d["sirestrain"] ] parents = [] for f2 in recombinants: for f1 in ped[f2]: for g0 in ped[f1]: parents.append(g0) parents = list(set(parents)) parents_spp = dict([(d["id"], d["damstrain"]) for d in td if d.get("id", "") in parents]) parents_spp return ped, recombinants, parents, parents_spp
def sample_data_from_DB(sampleids, mousedb='Hoekstra lab mouse database'): td = preprocess_radtag_lane.no_net_get_table_as_dict(mousedb) ped = dict([ (d['id'], (d['damid'],d['sireid'])) \ for d in td \ if d.get('id','') in sampleids \ and d.has_key('sireid') \ and d.has_key('damid')]) ped_parents = reduce(lambda x, y: x + y, ped.values()) ped.update(dict([ (d['id'],(d['damid'],d['sireid'])) \ for d in td \ if d.get('id','') in ped_parents \ and d.has_key('sireid') \ and d.has_key('damid')])) recombinants = [d['id'] for d in td if d.get('id','') in ped.keys() \ and ',' in d['damstrain'] and ',' in d['sirestrain']] parents = [] for f2 in recombinants: for f1 in ped[f2]: for g0 in ped[f1]: parents.append(g0) parents = list(set(parents)) parents_spp = dict([(d['id'], d['damstrain']) for d in td if d.get('id', '') in parents]) parents_spp return ped, recombinants, parents, parents_spp
parser.add_argument('-mr','--mapreads_argstr',default="''",type=eval, \ help='additional arguments for map_reads_by_indiv-stampy.py. \nMust be single AND double quoted for spaces, e.g. "\'--cleanup --fast_merge --reduce_reads\'"'+ds) parser.add_argument('reference_fasta',help='reference for stampy') parser.add_argument('outroot',help='directory for logfile and vcf creation') parser.add_argument('projects',nargs='+',help='project names from DB_library_data to include in run') opts = parser.parse_args() if opts.vcfname is None: vcfname = '-'.join(opts.projects) else: vcfname = opts.vcfname index_lookup = preprocess_radtag_lane.no_net_get_table_as_dict(multiplex_idx_db,tcp_host) td = preprocess_radtag_lane.no_net_get_table_as_dict(config.LIBRARY_DATA,tcp_host) td = [d for d in td if d.get('project',None) in opts.projects and d.has_key('datapath')] print >> sys.stderr, '%s individual records found for projects %s' % (len(td),opts.projects) preprocess_targets = [] expected_fq_d = {} if opts.force_db_id: transtable,failures = preprocess_radtag_lane.get_legacy_to_DB_lookup(td) for d in td: #UPDATE FOR DB ID LOOKUP if opts.force_db_id: if d['sampleid'] in transtable: