def per_hit_header(self, hit): if not self._nonzero: return '\nNo PCR products have been found.\n' header_string = '' header_string += 'Reaction ended in: %d cycles.\n' \ % self._reaction_ends[hit]['cycles'] header_string += 'C(dNTP) after reaction: ~%s\n' \ % tdf.format_concentration(self._reaction_ends[hit]['dNTP']) if self._reaction_ends[hit]['cycles'] < self._num_cycles: if self._reaction_ends[hit]['dNTP'] == 0: header_string += 'dNTP have been depleted.\n' else: header_string += 'Primers have been depleted.\n' if self._reaction_ends[hit]['poly']: header_string += wrap_text('Polymerase activity was ' 'insufficient in these cycles:\n') shortage_string = '' for shortage_period in self._reaction_ends[hit]['poly']: if shortage_string: shortage_string += ', ' if shortage_period[0] == shortage_period[1]: shortage_string += str(shortage_period[0]) else: shortage_string += '%d-%d' % tuple(shortage_period) header_string += wrap_text(shortage_string + '\n') header_string += '\n' return header_string
def _format_header(self): header = '' if self._optimized: header += 'Optimization was successful.\n' header += wrap_text('Results of PCR simulation with optimized values of the parameters ' 'are given below.\n\n') else: header += 'Optimization was unsuccessful: %s\n' % self._exit_status header += wrap_text('Results of PCR simulation with the values of the parameters ' 'from the last iteration of the optimization are given below. These ' 'values, nevertheless, usually give better simulation results than ' 'the initial ones.\n\n') header += iPCR_Base._format_header(self) header += self._PCR_Simulation.format_report_header() return header
def format_products_report(self): header = wrap_text('For each target sequence a list of possible ' 'PCR products which were predicted by the ' 'simulation is given. ' 'Information about each product includes:\n' '-start and end positions on a target sequence.\n' ' *the first nucleotide of a sequence is at the position 1\n' ' *primers are not included, so their 3\'-ends are located at ' 'start-1 and end+1 positions.\n' '-length in base pairs.\n' ' *the length too do not include primers.\n' '-concentration which was calculated during PCR simulation.\n' '-number of PCR cycles during which the product has been generated.\n' ' *if this number is lower than the number of simulated reaction cycles ' 'it means that primers producing the product were depleted.\n' '-lists of forward and reverse primers which produced the product' '\n\n\n') def worker(hit): prod_string = hr(' %s ' % hit, '*') prod_string += hr(' %d products have been found ' % len(self._products[hit]), '*') products = self._products[hit].values() products.sort(key=lambda x: x.start) for pi, product in enumerate(products): prod_string += hr(' product %d ' % (pi+1), '=') prod_string += product.pretty_print(with_name=False, include_fwd_3_mismatch=self._with_exonuclease) prod_string += hr('', '=') prod_string += hr('', '*') return prod_string prod_strings = self.parallelize_work(1, worker, self._products.keys()) if not prod_strings: return 'Failed to compile products report. See the log for errors.' return header+''.join(prod_strings)+'\n' #end class
def _format_primers_report_header(self): header_string = '' header_string += time_hr() header_string += wrap_text('For each degenerate primer provided, a set ' 'of unambiguous primers is generated. ' 'For each such set the minimum, maximum and ' 'mean melting temperatures are calculated. ' 'For each primer in each set stable self-' 'dimers and hairpins are predicted. ' 'For every possible combination of two ' 'unambiguous primers cross-dimers are also ' 'predicted. If an unambiguous primer is ' 'provided, it is treated as a set with a ' 'single element.\n\n') header_string += hr(' PCR conditions ') header_string += TD_Functions.format_PCR_conditions(self._primers)+'\n' header_string += hr(' primers and their melting temperatures ') for primer in self._primers: header_string += repr(primer) + '\n' #warning if len(self._primers) > 1: if abs(self._primers[0].Tm_min - self._primers[1].Tm_min) >= 5: header_string += '\nWarning: lowest melting temperatures of sense and antisense primes \n' header_string += ' differ more then by 5C\n' header_string += '\n' return header_string
def _format_primers_report_header(self): header_string = '' header_string += time_hr() header_string += wrap_text( 'For each degenerate primer provided, a set ' 'of unambiguous primers is generated. ' 'For each such set the minimum, maximum and ' 'mean melting temperatures are calculated. ' 'For each primer in each set stable self-' 'dimers and hairpins are predicted. ' 'For every possible combination of two ' 'unambiguous primers cross-dimers are also ' 'predicted. If an unambiguous primer is ' 'provided, it is treated as a set with a ' 'single element.\n\n') header_string += hr(' PCR conditions ') header_string += TD_Functions.format_PCR_conditions( self._primers) + '\n' header_string += hr(' primers and their melting temperatures ') for primer in self._primers: header_string += repr(primer) + '\n' #warning if len(self._primers) > 1: if abs(self._primers[0].Tm_min - self._primers[1].Tm_min) >= 5: header_string += '\nWarning: lowest melting temperatures of sense and antisense primes \n' header_string += ' differ more then by 5C\n' header_string += '\n' return header_string
def _format_header(self): header = '' if self._optimized: header += 'Optimization was successful.\n' header += wrap_text( 'Results of PCR simulation with optimized values of the parameters ' 'are given below.\n\n') else: header += 'Optimization was unsuccessful: %s\n' % self._exit_status header += wrap_text( 'Results of PCR simulation with the values of the parameters ' 'from the last iteration of the optimization are given below. These ' 'values, nevertheless, usually give better simulation results than ' 'the initial ones.\n\n') header += iPCR_Base._format_header(self) header += self._PCR_Simulation.format_report_header() return header
def format_quantity_explanation(self): expl_string = '' expl_string += hr(' estimation of PCR products concentrations ') expl_string += 'The value of the objective function of at the solution ' + \ '(the lower the better):\n %e\n' % \ self._max_objective_value expl_string += wrap_text('This value shows "distance" to the solution of ' 'the system of equilibrium equations which were used ' 'to calculate concentrations of PCR products.\n\n') expl_string += wrap_text(('Products with concentration less than %.2f%% ' 'of the concentration of the most abundant ' 'product or less than initial DNA concentration ' 'are not shown.' '\n\n') % (self._min_quantity_factor*100)) expl_string += wrap_text('Boundaries of a product and it\'s length do ' 'not include primers.\n\n') expl_string += '\n' return expl_string
def format_quantity_explanation(self): expl_string = '' expl_string += hr(' estimation of PCR products concentrations ') expl_string += 'The value of the objective function of at the solution ' + \ '(the lower the better):\n %e\n' % \ self._max_objective_value expl_string += wrap_text( 'This value shows "distance" to the solution of ' 'the system of equilibrium equations which were used ' 'to calculate concentrations of PCR products.\n\n') expl_string += wrap_text( ('Products with concentration less than %.2f%% ' 'of the concentration of the most abundant ' 'product or less than initial DNA concentration ' 'are not shown.' '\n\n') % (self._min_quantity_factor * 100)) expl_string += wrap_text('Boundaries of a product and it\'s length do ' 'not include primers.\n\n') expl_string += '\n' return expl_string
def _format_header(self): header = wrap_text('All possible PCR products are ' 'filtered by amplicon size.\n' 'If --no-exonuclease option was ' 'provided, products which may be formed by ' 'primers with ' "mismatches on 3'-end are ignored.\n" 'Quantities of the remaining products ' 'are estimated using equilibrium equations ' 'and current PCR parameters.\n\n') header += iPCR_Base._format_header(self) header += self._PCR_Simulation.format_report_header() return header
def _format_header(self): header = wrap_text('For each hit all alignments are sorted ' 'into "forward" and "reverse" groups. ' 'Pairs of forward and reverse ' 'alignments comprise possible PCR products.\n' 'If --no-exonuclease option was ' 'provided, alignments with mismatches on ' "3'-end are ignored.\n" 'Quantities of products are estimated ' 'using equilibrium equations and ' 'current PCR parameters.\n') header += self._PCR_Simulations.values()[0].format_report_header() header += self._PCR_Simulations.values()[0].format_quantity_explanation() return header
def format_products_report(self): header = wrap_text( 'For each target sequence a list of possible ' 'PCR products which were predicted by the ' 'simulation is given. ' 'Information about each product includes:\n' '-start and end positions on a target sequence.\n' ' *the first nucleotide of a sequence is at the position 1\n' ' *primers are not included, so their 3\'-ends are located at ' 'start-1 and end+1 positions.\n' '-length in base pairs.\n' ' *the length too do not include primers.\n' '-concentration which was calculated during PCR simulation.\n' '-number of PCR cycles during which the product has been generated.\n' ' *if this number is lower than the number of simulated reaction cycles ' 'it means that primers producing the product were depleted.\n' '-lists of forward and reverse primers which produced the product' '\n\n\n') def worker(hit): prod_string = hr(' %s ' % hit, '*') prod_string += hr( ' %d products have been found ' % len(self._products[hit]), '*') products = self._products[hit].values() products.sort(key=lambda x: x.start) for pi, product in enumerate(products): prod_string += hr(' product %d ' % (pi + 1), '=') prod_string += product.pretty_print( with_name=False, include_fwd_3_mismatch=self._with_exonuclease) prod_string += hr('', '=') prod_string += hr('', '*') return prod_string prod_strings = self.parallelize_work(1, worker, self._products.keys()) if not prod_strings: return 'Failed to compile products report. See the log for errors.' return header + ''.join(prod_strings) + '\n' #end class
def formatted_desc(self): return wrap_text(self.desc.replace('%%', '%')).strip()
''' Created on 2016-01-14 @author: Allis Tauri <*****@*****.**> ''' import cProfile from BioUtils.Tools.Text import wrap_text, line_by_line if __name__ == '__main__': txt = '''If true, TextWrapper attempts to detect sentence endings and ensure that sentences are always separated by exactly two spaces. This is generally desired for text in a monospaced font. However, the sentence detection algorithm is imperfect: it assumes that a sentence ending consists of a lowercase letter followed by one of '.', '!', or '?', possibly followed by one of '"' or "'", followed by a space. One problem with this is algorithm is that it is unable to detect the difference between “Dr.” in''' print wrap_text(txt) print '='*80 texts = ['asdfasd sreydstnsr mywyy eratg AG RADFG SDFGA lkjoiuguivuasdfhpwoiefjahgaiohghouygmuimjiuh', 'qasf; a[r uq[ewjrfasdhfuiah [WERJ AOSF BA;We werqt', 'wreyqeuqy a poiertqprohg aspoei toeaprt a'] widths = [20,30,30] print line_by_line(texts, widths) cProfile.run('''for i in xrange(10000): wrap_text(txt)''', 'word_wrap.profile') print 'Done'
''' Created on 2016-01-14 @author: Allis Tauri <*****@*****.**> ''' import cProfile from BioUtils.Tools.Text import wrap_text, line_by_line if __name__ == '__main__': txt = '''If true, TextWrapper attempts to detect sentence endings and ensure that sentences are always separated by exactly two spaces. This is generally desired for text in a monospaced font. However, the sentence detection algorithm is imperfect: it assumes that a sentence ending consists of a lowercase letter followed by one of '.', '!', or '?', possibly followed by one of '"' or "'", followed by a space. One problem with this is algorithm is that it is unable to detect the difference between “Dr.” in''' print wrap_text(txt) print '=' * 80 texts = [ 'asdfasd sreydstnsr mywyy eratg AG RADFG SDFGA lkjoiuguivuasdfhpwoiefjahgaiohghouygmuimjiuh', 'qasf; a[r uq[ewjrfasdhfuiah [WERJ AOSF BA;We werqt', 'wreyqeuqy a poiertqprohg aspoei toeaprt a' ] widths = [20, 30, 30] print line_by_line(texts, widths) cProfile.run('''for i in xrange(10000): wrap_text(txt)''', 'word_wrap.profile') print 'Done'
def write_hits_report(self): if not self._have_blast_results: return blast_report = self._open_report('blast hits report', self._hits_report_filename) if not blast_report: return #header blast_report.write(time_hr()) blast_report.write(wrap_text('All hits are filtered by dG of the annealing ' 'of alignments and, if --no-exonuclease ' 'option was provided, hits with mismatches on ' "3'-end are ignored.\n")) blast_report.write('\n') #filter parameters blast_report.write(hr(' filtration parameters ')) if self._with_exonuclease: blast_report.write("DNA polymerase HAS 3'-5'-exonuclease activity\n") else: blast_report.write("DNA polymerase doesn't have 3'-5'-exonuclease activity\n") blast_report.write('Maximum dG of an alignment: %.2f kcal/mol\n' % max_dimer_dG) blast_report.write('\n') blast_report.write(hr('')) blast_report.write('\n\n') #print records for r, blast_record in enumerate(self._blast_results): num_hits = len(blast_record.alignments) blast_report.write(hr(' query ID: %s ' % blast_record.query, symbol='#')) #filter hits by alignments and format report text for each hit hits = [] for h in xrange(num_hits): hit = blast_record.alignments[h] desc = blast_record.descriptions[h] #check and format hsps hsps = [] for hsp in hit.hsps: hsp_duplex = self._duplex_from_hsp(hsp) #check if any stable dimers are formed by this duplex if not hsp_duplex: continue #check 3' mismatch if not self._with_exonuclease \ and not hsp_duplex.have_3_matches: continue #get primer concentration for primer in self._primers: if primer.has_subsequence(hsp_duplex.fwd_seq): hsp_primer_concentration = primer.concentration break #format hsps representation hsp_text = ('score: %d; bits: %d; E-value: %.2e;\n\n' % (hsp.score, hsp.bits, hsp.expect)) hsp_text += hsp_duplex.print_most_stable(include_fwd_3_mismatch=self._with_exonuclease) hsp_text += 'Conversion degree = %.2f%%\n\n' \ % (tdf.primer_DNA_conversion_degree(hsp_primer_concentration, hsp_duplex.K)*100) hsp_text += 'Template strand: ' if hsp.frame[1] == 1: hsp_text += 'antisense\n' hsp_text += 'Position on template: %d ==> %d\n' \ % (hsp.sbjct_start, hsp.sbjct_end) elif hsp.frame[1] == -1: hsp_text += 'sense\n' hsp_text += 'Position on template: %d <== %d\n' \ % (hsp.sbjct_start, hsp.sbjct_end) hsp_text += hr('', '.') hsps.append((hsp_duplex.dG, hsp_text)) #no need to include weak hits to the report if not hsps: continue #sort hsps by minimum dG hsps.sort(key=lambda(x): x[0]) #format hit hit_str = wrap_text(hit.title)+'\n' hit_str += 'Length: %d\n' % hit.length hit_str += 'Max bits: %d\n' % desc.bits hit_str += 'Alignments: %d\n' % len(hit.hsps) hit_str += '\n' hit_str += hr(' %d alignments after filtration ' % len(hsps)) hit_str += ''.join(_hsp[1] for _hsp in hsps)[:-1] hits.append((hsps[0][0], desc.title, desc.score, desc.bits, desc.e, len(hsps), hit_str)) if not hits: blast_report.write(wrap_text('All hits were filtered out.\n')) #write report to the file else: #print the short list of all hits num_hits = len(hits) num_hits_len = len(str(num_hits)) #sort hits by minimum dG hits.sort(key=lambda(x): x[0]) #print header blast_report.write(hr(' %d hits ' % num_hits, symbol='#')) for h, hit in enumerate(hits): spacer = ' '*(num_hits_len-len(str(h))) blast_report.write(wrap_text('%d.%s %s\n' % (h+1, spacer, (hit[1].split('|')[-1]).strip()))) blast_report.write(('%s min dG: %.2f; score: %d; bits: %d; ' 'E-value: %.2e; alignments: %d\n\n') % (' '*num_hits_len, hit[0], hit[2], hit[3], hit[4], hit[5])) blast_report.write('\n') #print each formatted hit for h, hit in enumerate(hits): blast_report.write(hr(' Hit #%d ' % (h+1), symbol='=')) blast_report.write(hit[6]) blast_report.write(hr('', symbol='=')) if h < num_hits-1: blast_report.write('\n\n') blast_report.write(hr('', symbol='#')) if r < len(self._blast_results)-1: blast_report.write('\n\n') blast_report.close() print '\nTop hits with top HSPs from BLAST results were written to:\n ' + \ self._hits_report_filename self._add_report('BLAST hits', self._hits_report_filename)