示例#1
0
def makeReport() :
#SELECT jc.var_id, COUNT( DISTINCT jc.pat_id ) 
#FROM  `JointCalls` AS jc
#INNER JOIN Variants AS v ON v.id = jc.var_id
#INNER JOIN Isoforms AS i ON i.var_id = v.id
#WHERE AF =0
#AND (
#functionGVS <>  'intron'
#AND functionGVS <>  'near-gene-5'
#AND functionGVS <>  'intergenic'
#AND functionGVS <>  'near-gene-3'
#AND functionGVS <>  'coding-synonymous'
#AND functionGVS <>  'coding-notMod3'
#)
#AND (
#p_AA_AB > .9
#OR p_AA_AB > .9
#OR p_AB_BB > .9
#)
#GROUP BY jc.var_id
#HAVING COUNT( DISTINCT jc.pat_id ) >1
#ORDER BY COUNT( DISTINCT jc.pat_id ) DESC /*inner join Isoforms as i on i.var_id = jc.var_id WHERE p_AA_BB > .90 or p_AA_AB > .9*/
#LIMIT 0 , 30

    fout = open("../output/joint.tsv",'w')
    csvout = csv.writer( fout, \
                    delimiter='\t', \
                    quoting=csv.QUOTE_MINIMAL )

    extend = ["patient"] + headers[4:]
    reuse = queries.column_headers[:-5] 
    csvout.writerow( reuse + extend )

    conn = db.Conn("localhost")
    conn2 = db.Conn("localhost")

    call_where = "and (p_AA_AB > .9 or p_AA_AB > .9 or p_AB_BB > .9)"
    call_where = ''' and (normal_counts_a / (normal_counts_a + normal_counts_b)
                          > .95 ) and normal_counts_a + normal_counts_b > 10'''
    query = '''
SELECT i.id, %s,%s,%s
FROM `JointCalls` as jc inner join Variants as v on v.id = jc.var_id
                        inner join Isoforms as i on i.var_id = v.id
                        inner join Genes as g on i.gene_id = g.id
where AF <= .0005 and
  (%s)
  %s
  and dbSNP is null
  and scorePhastCons >= .5
ORDER BY AF, jc.var_id, i.id''' % \
    (queries.vcols_string, queries.icols_string, queries.gcols_string, \
     queries.gvs, call_where)

    rows = conn.query( query )
    last_iid = -1
    for i, row in enumerate(rows) :
        print row
        outrow = queries.formatQueryRow(row,offset=1)

        iso_id = row[0]
        var_id = row[1]
        gt_lists = queries.getPatients( conn2, var_id, table='JointCalls', where_clause = call_where )
        #print " ----------- "
        #print gt_lists
        if last_iid != iso_id :
            for gt,calls in gt_lists :
                for call in calls :
                    pat_name = call[1]
                    rest = call[2].split(',')[6:]
                    together = outrow+[pat_name]+rest
                    together_string = "".join([str(t) for t in together])
                    csvout.writerow( together )
                    last = together_string

            last_iid = iso_id
        else :
            print "dupe!"
        #if i > 20 : break
        #hets_string = ", ".join([t[1] for t in hets])
        #homs_string = ", ".join([t[1] for t in homs])
        #if patient in hets_string or patient in homs_string :
            #already_called += 1
        #outrow.extend( ["-",len(homs),homs_string,len(hets),hets_string] )

    fout.close()
示例#2
0
from Variants as v inner join Isoforms as i on i.var_id = v.id
                   inner join Genes as g on g.id = i.gene_id
where chrom = %s and pos = %s and AF < .1 and (%s)''' % \
    (queries.vcols_string, queries.icols_string, queries.gcols_string, \
     chrom, pos, queries.gvs)


    rows = conn.query(query)
    num_rows = len(rows)
    if num_rows > 0 :
        hits += 1

    total += 1

    for row in rows :
        outrow = queries.formatQueryRow(row)
        (noinfs, hets, homs) = queries.getPatients( conn2, row[0] )
        hets_string = ", ".join([t[1] for t in hets])
        homs_string = ", ".join([t[1] for t in homs])
        if patient in hets_string or patient in homs_string :
            already_called += 1
        outrow.extend( ["-",len(homs),homs_string,len(hets),hets_string] )
        csvout.writerow( outrow )
        # hom and het shares

print patient
print "%d/%d hits/total" % (hits,total)
print "%d/%d already_called/hits" % (already_called,hits)
fin.close()
fout.close()