def outputCliqueInLatexTable(curs, region, clique_id, table_label, fig_label, stock_db, clique2ecotype_table, outf): """ 2007-10-18 2007-10-21 shorten some label names ecotypeid -> id stockparent -> stkparent add one more column, collector """ from pymodule.latex import outputMatrixInLatexTable data_matrix = [] curs.execute("select e.id, e.name, e.nativename, e.stockparent, e.latitude, e.longitude, c.abbr, s.name, p.firstname, p.surname from %s.ecotype e, %s.address a, %s.site s, %s.country c, %s.person p, %s ce where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id and e.id=ce.ecotypeid and p.id=e.collectorid and ce.clique_id=%s order by nativename, stockparent"%(stock_db, stock_db, stock_db, stock_db, stock_db, clique2ecotype_table, clique_id)) rows = curs.fetchall() ecotype_id_ls = [] for row in rows: ecotype_id = row[0] ecotype_id_ls.append(ecotype_id) new_row = list(row[:-4]) new_row.append('%s %s'%(row[-2], row[-1])) #firstname surname new_row.append(row[-4]) #country new_row.append(row[-3]) #site data_matrix.append(new_row) caption = 'haplotype clique %s has %s ecotypes from %s. check Figure~\\ref{%s}.'%(clique_id, len(ecotype_id_ls), region, fig_label) header_ls = ['id', 'name', 'nativename', 'stkparent', 'lat', 'lon', 'collector', 'country', 'site'] outf.write(outputMatrixInLatexTable(data_matrix, caption, table_label, header_ls)) return ecotype_id_ls
def outputCliqueInLatexTable(curs, region, clique_id, table_label, fig_label, stock_db, clique2ecotype_table, outf): """ 2007-10-18 2007-10-21 shorten some label names ecotypeid -> id stockparent -> stkparent add one more column, collector """ from pymodule.latex import outputMatrixInLatexTable data_matrix = [] curs.execute( "select e.id, e.name, e.nativename, e.stockparent, e.latitude, e.longitude, c.abbr, s.name, p.firstname, p.surname from %s.ecotype e, %s.address a, %s.site s, %s.country c, %s.person p, %s ce where e.siteid=s.id and s.addressid=a.id and a.countryid=c.id and e.id=ce.ecotypeid and p.id=e.collectorid and ce.clique_id=%s order by nativename, stockparent" % (stock_db, stock_db, stock_db, stock_db, stock_db, clique2ecotype_table, clique_id)) rows = curs.fetchall() ecotype_id_ls = [] for row in rows: ecotype_id = row[0] ecotype_id_ls.append(ecotype_id) new_row = list(row[:-4]) new_row.append('%s %s' % (row[-2], row[-1])) #firstname surname new_row.append(row[-4]) #country new_row.append(row[-3]) #site data_matrix.append(new_row) caption = 'haplotype clique %s has %s ecotypes from %s. check Figure~\\ref{%s}.' % ( clique_id, len(ecotype_id_ls), region, fig_label) header_ls = [ 'id', 'name', 'nativename', 'stkparent', 'lat', 'lon', 'collector', 'country', 'site' ] outf.write( outputMatrixInLatexTable(data_matrix, caption, table_label, header_ls)) return ecotype_id_ls
def OutputCliquesGroupedByRegion(curs, region2clique_id_ls, clique_id2size, clique2ecotype_table, component2clique_table, output_fname, fig_output_dir, stock_db='stock20071008', min_clique_size=3, need_to_draw_figures=0): """ 2007-10-18 """ from variation.src.common import draw_graph_on_map, get_ecotypeid2pos from variation.src.common import get_pic_area from pymodule.latex import outputFigureInLatex import sys,os ecotypeid2pos = get_ecotypeid2pos(curs, '%s.ecotype'%stock_db) outf = open(output_fname, 'w') no_of_regions = 0 clique_id2region = {} #for the cc2clique latex table for region, clique_id_ls in region2clique_id_ls.iteritems(): region = ' and '.join(region) #region is tuple, convert it to string sys.stderr.write("%s:\n"%region) outf.write("\\subsection{%s}\n"%region) size_clique_id_ls = [] for clique_id in clique_id_ls: if clique_id2size[clique_id]>=min_clique_size: size_clique_id_ls.append([clique_id2size[clique_id], clique_id]) outf.write('region %s has %s cliques in total but %s cliques above min size %s.\n'%(region, len(clique_id_ls), len(size_clique_id_ls), min_clique_size)) if len(size_clique_id_ls)==0: continue size_clique_id_ls.sort() caption = 'region %s has %s cliques in total but %s cliques above min size %s.'%(region, len(clique_id_ls), len(size_clique_id_ls), min_clique_size) no_of_regions += 1 table_label = 't_region_%s'%no_of_regions clique_id_size_ls = [[row[1],row[0]] for row in size_clique_id_ls] header_ls = ['clique_id', 'size'] outf.write(outputMatrixInLatexTable(clique_id_size_ls, caption, table_label, header_ls)) for size, clique_id in size_clique_id_ls: sys.stderr.write("clique %s, size %s\n"%(clique_id, size)) #output the table and get ecotype_id_ls fig_label = 'fclique%s'%clique_id table_label = 'tclique%s'%clique_id ecotype_id_ls = outputCliqueInLatexTable(curs, region, clique_id, table_label, fig_label, stock_db, clique2ecotype_table, outf) #draw the clique site_g, site2weight, site2pos = construct_site_graph_out_of_ecotype_id_ls(ecotype_id_ls, ecotypeid2pos) fig_fname = 'haplotype_%s_size%s_%ssites_map'%(clique_id,len(ecotype_id_ls), len(site2pos)) #just the name without filetype extension, like png, eps etc. if need_to_draw_figures: draw_graph_on_map(site_g, site2weight, site2pos, 'clique %s, size %s, %s sites of %s'%(clique_id, len(ecotype_id_ls), len(site2pos), region), pic_area=get_pic_area(site2pos.values(), 30), output_fname_prefix=os.path.join(fig_output_dir, fig_fname)) #output the figure in latex fig_fname = os.path.join('figures/', '%s.png'%fig_fname) #relative path of the figure to the latex file caption = 'map of haplotype clique %s has %s ecotypes from %s sites of %s. check Table~\\ref{%s}.'%(clique_id, len(ecotype_id_ls), len(site2pos), region, table_label) outf.write(outputFigureInLatex(fig_fname, caption, fig_label)) clique_id2region[clique_id] = region del site_g, site2pos, site2weight, ecotype_id_ls outf.write("\\subsection{which component the clique is from}\n") cc_id_clique_id_region_ls = [] cc_id2clique_id_ls = {} for clique_id, region in clique_id2region.iteritems(): curs.execute("select cc_id from %s where clique_id=%s"%(component2clique_table, clique_id)) rows = curs.fetchall() cc_id = rows[0][0] cc_id_clique_id_region_ls.append([cc_id, clique_id, region]) if cc_id not in cc_id2clique_id_ls: cc_id2clique_id_ls[cc_id] = [] cc_id2clique_id_ls[cc_id].append(clique_id) cc_id_clique_id_region_ls.sort() caption = '%s components versus %s cliques'%(len(cc_id2clique_id_ls), len(cc_id_clique_id_region_ls)) table_label = 't_cc2clique_1' header_ls = ['component id', 'clique id', 'region'] outf.write(outputMatrixInLatexTable(cc_id_clique_id_region_ls, caption, table_label, header_ls)) del outf
def run(self): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() import MySQLdb #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc') conn = MySQLdb.connect(db=self.dbname,host=self.hostname) curs = conn.cursor() if self.debug: import pdb pdb.set_trace() nt_number2diff_matrix_index = self.get_nt_number2diff_matrix_index(nt2number) SNPpos2col_index, snpid2col_index, snp_acc_ls, snp_index2snp_info_ls = self.setup_SNP_dstruc(curs, self.snp_locus_table) ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, accession_id2ecotype_id_ls = self.setup_accession_ecotype_dstruc(curs, self.accession2ecotype_table, self.ecotype_table, self.calls_table) ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched = self.get_ecotype_X_snp_matrix(curs, ecotype_id2row_index, snpid2col_index, self.calls_table) if self.sub_justin_output_fname: header = ['ecotype_id', 'ecotype_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix(ecotype_X_snp_matrix, self.sub_justin_output_fname, header, ecotype_id_ls, ecotype_id_ls) alignment_id2positions_to_be_checked_ls, alignment_id2start = self.get_alignment_id2positions_to_be_checked_ls(curs, self.alignment_table) accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id= self.get_accession_X_snp_matrix(curs, accession_id2row_index, SNPpos2col_index, self.sequence_table, self.alignment_table, alignment_id2positions_to_be_checked_ls) if self.output_fname: header = ['accession_id', 'accession_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix(accession_X_snp_matrix, self.output_fname, header, accession_id_ls, accession_id_ls) summary_diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, diff_details_ls_type=2) print "diff_matrix_touched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[0] print "diff_matrix_touched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[1] print "diff_matrix_untouched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[2] print "diff_matrix_untouched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[3] summary_diff_matrix_caption_ls = ['PCR-tried vs sequenom-tried', 'PCR-tried vs sequenom-untried', 'PCR-untried vs sequenom-tried', 'PCR-untried vs sequenom-untried'] if self.latex_output_fname: outf = open(self.latex_output_fname, 'w') outf.write('\\section{2010 PCR versus sequenom. summary} \\label{section_summary}\n') for i in range(len(summary_diff_matrix_ls)): from pymodule.latex import outputMatrixInLatexTable wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(summary_diff_matrix_ls[i]) table_label = 'table_dm%s'%i outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, summary_diff_matrix_caption_ls[i], table_label)) table_no = i #output the whole diff_details_ls outf.write('\\section{Real Mismatches between pcr and sequenom (deletion/NA excluded)} \\label{section_real_mismatch}\n') diff_details_ls = self.beautify_snp_diff_details_ls(diff_details_ls, ecotype_id2info_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_label = 'table_dm%s'%table_no caption = 'mismatches between pcr and sequenom data (deletion/NA excluded, sorted by accession id)' outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=['nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'SNP', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call'])) #Strain-wise comparison outf.write('\\section{2010 PCR versus sequenom for each strain} \\label{section_strain_wise}\n') accession_id_ls.sort() for accession_id in accession_id_ls: ecotype_id_ls = accession_id2ecotype_id_ls[accession_id] outf.write('\\subsection{strain %s(accession id=%s)}\n'%(ecotype_id2info_ls[ecotype_id_ls[0]][0], accession_id)) for ecotype_id in ecotype_id_ls: outf.write('\\subsubsection{corresponding ecotype %s(stkparent=%s, ecotype id=%s, duplicate=%s)}\n'%(ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1], ecotype_id[0], ecotype_id[1])) e_row_index = ecotype_id2row_index[ecotype_id] a_row_index = accession_id2row_index[accession_id] diff_matrix_ls, diff_details_ls= self.cmp_two_lists(accession_X_snp_matrix[a_row_index,:], accession_X_snp_matrix_touched[a_row_index,:], ecotype_X_snp_matrix[e_row_index,:], ecotype_X_snp_matrix_touched[e_row_index,:], nt_number2diff_matrix_index) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'accession id=%s vs ecotype id=%s, duplicate=%s(nativename=%s, stockparent=%s)'%(accession_id, ecotype_id[0], ecotype_id[1], ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1]) outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_diff_details_ls(diff_details_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'detailed difference for accession id=%s vs ecotype id=%s, duplicate=%s'%(accession_id, ecotype_id[0], ecotype_id[1]) outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=['snp', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call'])) #SNP-wise comparison outf.write('\\section{2010 PCR versus sequenom for each SNP} \\label{section_snp_wise}\n') for snp_column in range(accession_X_snp_matrix.shape[1]): snp_acc, chromosome, position = snp_index2snp_info_ls[snp_column] alignment_id = snp_index2alignment_id[snp_column] alignment_start = alignment_id2start[alignment_id] outf.write('\\subsection{SNP %s(chrom=%s, pos=%s, alignment id=%s, alignment start=%s)}\n'%(snp_acc, chromosome, position, alignment_id, alignment_start)) diff_matrix_ls, diff_details_ls = self.cmp_two_matricies(accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, snp_column=snp_column, diff_details_ls_type=1) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names(diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'SNP %s(chromosome=%s, position=%s, alignment id=%s, alignment start=%s)'%(snp_acc, chromosome, position, alignment_id, alignment_start) outf.write(outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_snp_diff_details_ls(diff_details_ls, ecotype_id2info_ls) table_no += 1 table_label = 'table_dm%s'%table_no caption = 'detailed difference for SNP %s'%(snp_acc) header_ls = ['nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'pcr_call', 'sequenom_call'] outf.write(outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls)) del outf
def OutputCliquesGroupedByRegion(curs, region2clique_id_ls, clique_id2size, clique2ecotype_table, component2clique_table, output_fname, fig_output_dir, stock_db='stock20071008', min_clique_size=3, need_to_draw_figures=0): """ 2007-10-18 """ from variation.src.common import draw_graph_on_map, get_ecotypeid2pos from variation.src.common import get_pic_area from pymodule.latex import outputFigureInLatex import sys, os ecotypeid2pos = get_ecotypeid2pos(curs, '%s.ecotype' % stock_db) outf = open(output_fname, 'w') no_of_regions = 0 clique_id2region = {} #for the cc2clique latex table for region, clique_id_ls in region2clique_id_ls.iteritems(): region = ' and '.join(region) #region is tuple, convert it to string sys.stderr.write("%s:\n" % region) outf.write("\\subsection{%s}\n" % region) size_clique_id_ls = [] for clique_id in clique_id_ls: if clique_id2size[clique_id] >= min_clique_size: size_clique_id_ls.append( [clique_id2size[clique_id], clique_id]) outf.write( 'region %s has %s cliques in total but %s cliques above min size %s.\n' % (region, len(clique_id_ls), len(size_clique_id_ls), min_clique_size)) if len(size_clique_id_ls) == 0: continue size_clique_id_ls.sort() caption = 'region %s has %s cliques in total but %s cliques above min size %s.' % ( region, len(clique_id_ls), len(size_clique_id_ls), min_clique_size) no_of_regions += 1 table_label = 't_region_%s' % no_of_regions clique_id_size_ls = [[row[1], row[0]] for row in size_clique_id_ls] header_ls = ['clique_id', 'size'] outf.write( outputMatrixInLatexTable(clique_id_size_ls, caption, table_label, header_ls)) for size, clique_id in size_clique_id_ls: sys.stderr.write("clique %s, size %s\n" % (clique_id, size)) #output the table and get ecotype_id_ls fig_label = 'fclique%s' % clique_id table_label = 'tclique%s' % clique_id ecotype_id_ls = outputCliqueInLatexTable(curs, region, clique_id, table_label, fig_label, stock_db, clique2ecotype_table, outf) #draw the clique site_g, site2weight, site2pos = construct_site_graph_out_of_ecotype_id_ls( ecotype_id_ls, ecotypeid2pos) fig_fname = 'haplotype_%s_size%s_%ssites_map' % ( clique_id, len(ecotype_id_ls), len(site2pos) ) #just the name without filetype extension, like png, eps etc. if need_to_draw_figures: draw_graph_on_map( site_g, site2weight, site2pos, 'clique %s, size %s, %s sites of %s' % (clique_id, len(ecotype_id_ls), len(site2pos), region), pic_area=get_pic_area(site2pos.values(), 30), output_fname_prefix=os.path.join(fig_output_dir, fig_fname)) #output the figure in latex fig_fname = os.path.join( 'figures/', '%s.png' % fig_fname) #relative path of the figure to the latex file caption = 'map of haplotype clique %s has %s ecotypes from %s sites of %s. check Table~\\ref{%s}.' % ( clique_id, len(ecotype_id_ls), len(site2pos), region, table_label) outf.write(outputFigureInLatex(fig_fname, caption, fig_label)) clique_id2region[clique_id] = region del site_g, site2pos, site2weight, ecotype_id_ls outf.write("\\subsection{which component the clique is from}\n") cc_id_clique_id_region_ls = [] cc_id2clique_id_ls = {} for clique_id, region in clique_id2region.iteritems(): curs.execute("select cc_id from %s where clique_id=%s" % (component2clique_table, clique_id)) rows = curs.fetchall() cc_id = rows[0][0] cc_id_clique_id_region_ls.append([cc_id, clique_id, region]) if cc_id not in cc_id2clique_id_ls: cc_id2clique_id_ls[cc_id] = [] cc_id2clique_id_ls[cc_id].append(clique_id) cc_id_clique_id_region_ls.sort() caption = '%s components versus %s cliques' % ( len(cc_id2clique_id_ls), len(cc_id_clique_id_region_ls)) table_label = 't_cc2clique_1' header_ls = ['component id', 'clique id', 'region'] outf.write( outputMatrixInLatexTable(cc_id_clique_id_region_ls, caption, table_label, header_ls)) del outf
def run(self): from FilterStrainSNPMatrix import FilterStrainSNPMatrix FilterStrainSNPMatrix_instance = FilterStrainSNPMatrix() import MySQLdb #conn = MySQLdb.connect(db="stock",host='natural.uchicago.edu', user='******', passwd='iamhereatusc') conn = MySQLdb.connect(db=self.dbname, host=self.hostname) curs = conn.cursor() if self.debug: import pdb pdb.set_trace() nt_number2diff_matrix_index = self.get_nt_number2diff_matrix_index( nt2number) SNPpos2col_index, snpid2col_index, snp_acc_ls, snp_index2snp_info_ls = self.setup_SNP_dstruc( curs, self.snp_locus_table) ecotype_id2accession_id, ecotype_id2row_index, ecotype_id2info_ls, ecotype_id_ls, accession_id2row_index, accession_id_ls, accession_id2ecotype_id_ls = self.setup_accession_ecotype_dstruc( curs, self.accession2ecotype_table, self.ecotype_table, self.calls_table) ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched = self.get_ecotype_X_snp_matrix( curs, ecotype_id2row_index, snpid2col_index, self.calls_table) if self.sub_justin_output_fname: header = ['ecotype_id', 'ecotype_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix( ecotype_X_snp_matrix, self.sub_justin_output_fname, header, ecotype_id_ls, ecotype_id_ls) alignment_id2positions_to_be_checked_ls, alignment_id2start = self.get_alignment_id2positions_to_be_checked_ls( curs, self.alignment_table) accession_X_snp_matrix, accession_X_snp_matrix_touched, snp_index2alignment_id = self.get_accession_X_snp_matrix( curs, accession_id2row_index, SNPpos2col_index, self.sequence_table, self.alignment_table, alignment_id2positions_to_be_checked_ls) if self.output_fname: header = ['accession_id', 'accession_id'] + snp_acc_ls FilterStrainSNPMatrix_instance.write_data_matrix( accession_X_snp_matrix, self.output_fname, header, accession_id_ls, accession_id_ls) summary_diff_matrix_ls, diff_details_ls = self.cmp_two_matricies( accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, diff_details_ls_type=2) print "diff_matrix_touched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[0] print "diff_matrix_touched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[1] print "diff_matrix_untouched_accession_vs_touched_ecotype" print summary_diff_matrix_ls[2] print "diff_matrix_untouched_accession_vs_untouched_ecotype" print summary_diff_matrix_ls[3] summary_diff_matrix_caption_ls = [ 'PCR-tried vs sequenom-tried', 'PCR-tried vs sequenom-untried', 'PCR-untried vs sequenom-tried', 'PCR-untried vs sequenom-untried' ] if self.latex_output_fname: outf = open(self.latex_output_fname, 'w') outf.write( '\\section{2010 PCR versus sequenom. summary} \\label{section_summary}\n' ) for i in range(len(summary_diff_matrix_ls)): from pymodule.latex import outputMatrixInLatexTable wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( summary_diff_matrix_ls[i]) table_label = 'table_dm%s' % i outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, summary_diff_matrix_caption_ls[i], table_label)) table_no = i #output the whole diff_details_ls outf.write( '\\section{Real Mismatches between pcr and sequenom (deletion/NA excluded)} \\label{section_real_mismatch}\n' ) diff_details_ls = self.beautify_snp_diff_details_ls( diff_details_ls, ecotype_id2info_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_label = 'table_dm%s' % table_no caption = 'mismatches between pcr and sequenom data (deletion/NA excluded, sorted by accession id)' outf.write( outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls=[ 'nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'SNP', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call' ])) #Strain-wise comparison outf.write( '\\section{2010 PCR versus sequenom for each strain} \\label{section_strain_wise}\n' ) accession_id_ls.sort() for accession_id in accession_id_ls: ecotype_id_ls = accession_id2ecotype_id_ls[accession_id] outf.write( '\\subsection{strain %s(accession id=%s)}\n' % (ecotype_id2info_ls[ecotype_id_ls[0]][0], accession_id)) for ecotype_id in ecotype_id_ls: outf.write( '\\subsubsection{corresponding ecotype %s(stkparent=%s, ecotype id=%s, duplicate=%s)}\n' % (ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1], ecotype_id[0], ecotype_id[1])) e_row_index = ecotype_id2row_index[ecotype_id] a_row_index = accession_id2row_index[accession_id] diff_matrix_ls, diff_details_ls = self.cmp_two_lists( accession_X_snp_matrix[a_row_index, :], accession_X_snp_matrix_touched[a_row_index, :], ecotype_X_snp_matrix[e_row_index, :], ecotype_X_snp_matrix_touched[e_row_index, :], nt_number2diff_matrix_index) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'accession id=%s vs ecotype id=%s, duplicate=%s(nativename=%s, stockparent=%s)' % ( accession_id, ecotype_id[0], ecotype_id[1], ecotype_id2info_ls[ecotype_id][0], ecotype_id2info_ls[ecotype_id][1]) outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_diff_details_ls( diff_details_ls, snp_index2snp_info_ls, alignment_id2start, snp_index2alignment_id) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'detailed difference for accession id=%s vs ecotype id=%s, duplicate=%s' % ( accession_id, ecotype_id[0], ecotype_id[1]) outf.write( outputMatrixInLatexTable( diff_details_ls, caption, table_label, header_ls=[ 'snp', 'chromosome', 'position', 'alignment_id', 'alignment_start', 'pcr_call', 'sequenom_call' ])) #SNP-wise comparison outf.write( '\\section{2010 PCR versus sequenom for each SNP} \\label{section_snp_wise}\n' ) for snp_column in range(accession_X_snp_matrix.shape[1]): snp_acc, chromosome, position = snp_index2snp_info_ls[ snp_column] alignment_id = snp_index2alignment_id[snp_column] alignment_start = alignment_id2start[alignment_id] outf.write( '\\subsection{SNP %s(chrom=%s, pos=%s, alignment id=%s, alignment start=%s)}\n' % (snp_acc, chromosome, position, alignment_id, alignment_start)) diff_matrix_ls, diff_details_ls = self.cmp_two_matricies( accession_X_snp_matrix, accession_X_snp_matrix_touched, ecotype_X_snp_matrix, ecotype_X_snp_matrix_touched, nt_number2diff_matrix_index, ecotype_id2accession_id, ecotype_id2row_index, accession_id2row_index, snp_column=snp_column, diff_details_ls_type=1) wrapped_diff_matrix = self.wrap_diff_matrix_with_row_col_names( diff_matrix_ls[0]) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'SNP %s(chromosome=%s, position=%s, alignment id=%s, alignment start=%s)' % ( snp_acc, chromosome, position, alignment_id, alignment_start) outf.write( outputMatrixInLatexTable(wrapped_diff_matrix, caption, table_label)) if diff_details_ls: diff_details_ls = self.beautify_snp_diff_details_ls( diff_details_ls, ecotype_id2info_ls) table_no += 1 table_label = 'table_dm%s' % table_no caption = 'detailed difference for SNP %s' % (snp_acc) header_ls = [ 'nativename', 'stkparent', 'ecotype_id', 'duplicate', 'accession_id', 'pcr_call', 'sequenom_call' ] outf.write( outputMatrixInLatexTable(diff_details_ls, caption, table_label, header_ls)) del outf