Python sanitizePatientName示例，broad.sanitizePatientName Python示例

示例#1

0

显示文件

文件： importer.py 项目： cincinnatusc123/gleeson

def populatePatients( conn, patient_names ) :
    patient_dbix = conn.getNextID("Patients");
    for patient in patient_names :
        query = "select id from Patients where name='%s'" % patient
        pid = conn.queryScalar(query,int)
        if not pid :
            patient = broad.sanitizePatientName( patient )
            conn.insert( "Patients", [patient_dbix,patient], ["id","name"] )
            patients_dbix[patient] = patient_dbix

        patient_dbix += 1

示例#2

0

显示文件

文件： importer.py 项目： cincinnatusc123/gleeson_old

def populatePatients( conn, patient_names ) :
    patient_dbix = conn.getNextID("Patients");
    for patient in patient_names :
        query = "select id from Patients where name='%s'" % patient
        pid = conn.queryScalar(query,int)
        if not pid :
            patient = broad.sanitizePatientName( patient )
            conn.insert( "Patients", [patient_dbix,patient,plate_id], ["id","name","plate"] )
            patients_dbix[patient] = patient_dbix
        else :
            src = conn.queryScalar("select plate from Patients where id=%d" % pid, int)
            if not presentIn( src, plate_id ) :
                #update the plate column if this variant is from a novel plate
                update = "update Patients set plate = plate+%d where id=%d" \
                          % ( plate_contrib, pid)
                conn.cur.execute( update )
        
        patient_dbix += 1

示例#3

0

显示文件

文件： queries.py 项目： cincinnatusc123/gleeson

def familyReports() :
    #print "changed getPatients(), things will break"
    outdir = globes.OUT_DIR
    conn = db.Conn("localhost")

    conn2 = db.Conn("localhost")
    print "connetions made"

    num_vcols = len(vcols)

    #handles to the file names
    fouts = {}
    #buffers to accumulate writes to the fouts
    fbuffers = {}

    query = '''select name from Patients'''

    #open files for each patient, one for hets, one for homs
    #print the column headers for each file
    #also initialize the buffer space
    for r in conn.iterateQuery( query ) :
        patient = broad.sanitizePatientName( r[0] )
        fouts[patient] = {}
        fbuffers[patient] = {}
        for gt in ["hets","homs"] :
            filename = '%s/%s_%s.tsv' % (outdir,patient,gt)
            fouts[patient][gt] = filename

            f = open(filename, 'wb')
            fout = csv.writer( f,\
                               delimiter='\t', \
                               quoting=csv.QUOTE_MINIMAL )
            #fouts[patient][gt].writerow( column_headers )
            fout.writerow( column_headers )
            f.close()

            fbuffers[patient][gt] = []

    #what are the interesting variants
    query = '''select %s, %s, %s
           from Variants as v inner join Isoforms as i on v.id = i.var_id
                              inner join Genes as g on g.id = i.gene_id
           where (%s)  and v.AF < 0.1
           order by AF''' % (vcols_string, icols_string, gcols_string, gvs)

    print query

    #write the buffers out to the respective files
    def flush() :
        for pat in fbuffers :
            for gt in ["homs","hets"] :
                f = open( fouts[pat][gt], 'a')
                fout = csv.writer( f,\
                                   delimiter='\t', \
                                   quoting=csv.QUOTE_MINIMAL )
                fout.writerows( fbuffers[pat][gt] )
                f.close()
                #fouts[pat][gt].writerows( fbuffers[pat][gt] )
                fbuffers[pat][gt] = []


    for varix,r in enumerate(conn.query( query )) :
        #do a buffer flush
        if varix % 10000 == 0 :
            flush()
            print varix

        var_id = r[0]
        isIndel = int(r[6]) == 2
        if isIndel :
            where = ""
        else :
   #        only look at patients meeting these call reqs
            where = " and c.DP >= %d" % COVERAGE
        lookup = getPatients( conn2, var_id, where_clause=where )
        (noinfs,hets,homs) = [lookup[gt] for gt in [0,1,2]]

        if len(hets) == len(homs) == 0 : continue

        hom_pats = [p[1] for p in homs]
        num_homs = len(hom_pats)
        hom_string = '; '.join(hom_pats)

        het_pats = [p[1] for p in hets]
        het_string = '; '.join(het_pats)
        num_hets = len(het_pats)

        output_row = formatQueryRow( r )

        for ix,(pat_id,pat,call) in enumerate(homs) :
            pat = broad.sanitizePatientName( pat )
            hom_shares = hom_pats[:ix] + hom_pats[ix+1:]
            new_hom_string = '; '.join(hom_shares)
            fbuffers[pat]["homs"].append( output_row + \
                                         [call, num_homs-1, new_hom_string, \
                                          num_hets, het_string] )

        for ix,(pat_id,pat,call) in enumerate(hets) :
            pat = broad.sanitizePatientName( pat )
            het_shares = het_pats[:ix] + het_pats[ix+1:]
            new_het_string = '; '.join(het_shares)
            fbuffers[pat]["hets"].append( output_row + \
                                         [call, num_homs, hom_string, \
                                          num_hets-1, new_het_string] )
            #fouts[pat]["hets"].writerow( output_row + \
                                         #[call, num_homs, hom_string, \
                                          #num_hets-1, new_het_string] )

    flush()

示例#4

0

显示文件

文件： query.py 项目： cincinnatusc123/gleeson_old

def familyReports() :
    outdir = globes.OUT_DIR
    conn = db.Conn("localhost")
    conn2 = db.Conn("localhost")
    print "connetions made"
    vcols = conn2.getColumns('Variants')
    vcols = ["id", "chrom","pos","ref","mut","type","ref_aa","mut_aa","qual",
             "filter","AF","granthamScore","scorePhastCons",
             "consScoreGERP","distanceToSplice","AfricanHapMapFreq",
             "EuropeanHapMapFreq", "AsianHapMapFreq","clinicalAssociation"]

    icols = conn2.getColumns('Isoforms')

    #the general report
    #fout = open("%s/indelReport.tsv" % (outdir),'w')
    #freport = csv.writer( fout, \
                          #delimiter='\t', \
                          #quoting=csv.QUOTE_MINIMAL )
    #freport.writerow( vcols + icols + ["Homs","Hets"] )

    #the per family reports
    fouts = {}

    #plate_id = globes.plates["CIDR"]
    #if we want to restrict attention to a certain plate of patients
    #query = "select distinct(pat_id) from Calls where plate = %d" % plate_id
    #string = []
    #for row in conn.iterateQuery( query ) :
        #string.append("id=%d" % row[0])
    #string = ' or '.join(string)
    string = '1 = 1'
    query = '''select name from Patients where %s''' % string

    for r in conn.iterateQuery( query ) :
        patient = broad.sanitizePatientName( r[0] )
        fouts[patient] = {}
        for gt in ["hets","homs"] :
            filename = '%s/%s_%s.tsv' % (outdir,patient,gt)
            fouts[patient][gt] = csv.writer( open(filename, 'wb'),\
                                             delimiter='\t', \
                                             quoting=csv.QUOTE_MINIMAL )
        #print header
            fouts[patient][gt].writerow( ["GT","DP","GQ","geneSymbol"] + vcols[1:] + icols + [ "#HomShares", "Hom Shares", "#HetShares", "Het Shares"] )

    #what are the interesting variants
    vcols_string = ', '.join(["v.%s" % c for c in vcols])
    dont_want = ["intron","near-gene-5","intergenic","near-gene-3","coding-synonymous","coding-notMod3"]
    gvs = ["ss_functionGVS <> '%s'" % dw for dw in dont_want]
    gvs = ' and '.join(gvs)
    query = '''select %s, i.*, g.geneSymbol
           from Variants as v inner join Isoforms as i on v.id = i.var_id inner join Genes as g on i.gene_id = g.id
           where v.dbSNP is NULL and (%s)  and v.AF < 0.1
           order by AF''' % (vcols_string, gvs)
#(ss_polyPhen = 'probably-damaging' or ss_polyPhen = 'possibly-damaging')
    print query

    for varix,r in enumerate(conn.query( query )) :
        if varix % 5000 == 0 : print varix
        var_id = r[0]
        #only look at patients meeting these call reqs
        where = " and c.DP >= 8"
        (noinfs,hets,homs) = getPatients( conn, var_id, where )

        if len(hets) == len(homs) == 0 : continue

        hom_pats = [p[1] for p in homs]
        num_homs = len(hom_pats)
        hom_string = '; '.join(hom_pats)

        het_pats = [p[1] for p in hets]
        het_string = '; '.join(het_pats)
        num_hets = len(het_pats)

        for ix,(pat_id,pat,call) in enumerate(homs) :
            pat = broad.sanitizePatientName( pat )
            hom_shares = hom_pats[:ix] + hom_pats[ix+1:]
            hom_string = '; '.join(hom_shares)
            fouts[pat]["homs"].writerow( call.split(':') + \
                                 [r[-1]] + \
                                 list(r[1:-1]) + \
                                 [num_homs-1, hom_string, num_hets, het_string] )

        for ix,(pat_id,pat,call) in enumerate(hets) :
            pat = broad.sanitizePatientName( pat )
            het_shares = het_pats[:ix] + het_pats[ix+1:]
            het_string = '; '.join(het_shares)
            fouts[pat]["hets"].writerow( call.split(':') + \
                                 [r[-1]] + \
                                 list(r[1:-1]) + \
                                 [num_homs,hom_string,num_hets-1,het_string] )