def test_fileBased(self): test_file_roots = [ 'primer_must_use_th', 'primer_task_th', 'primer_thal_args', 'primer_thal_max_seq_error', 'primer_first_base_index', 'test_compl_error', 'test_left_to_right_of_right', 'dv_conc_vs_dntp_conc', 'primer_internal', 'primer_tm_lc_masking', 'primer_ok_regions', 'primer_start_codon', 'primer_task', 'primer_renewed_tasks', 'primer_must_overlap_point', 'primer_overlap_junction', 'primer_all_settingsfiles', 'primer_high_tm_load_set', 'primer_high_gc_load_set', 'primer_gc_end', 'primer_num_best', 'primer_check', 'primer_end_pathology', 'long_seq', 'p3-tmpl-mispriming' ] print() failures = [] for fn_root in test_file_roots: base_fp = os.path.join(LOCAL_DIR, 'input_files', fn_root) input_fp = base_fp + '_input' with open(input_fp) as input_fd: input_raw = input_fd.read() input_dicts = self._convertBoulderInput(input_raw) sys.stdout.write('->Testing file {:<40}\r'.format(fn_root)) sys.stdout.flush() current_global_args = {} for global_args, seq_args, p3_args in input_dicts: test_id = str(seq_args.get('SEQUENCE_ID', '')) current_global_args.update(global_args) simulated_binding_res = simulatedbindings.designPrimers( seq_args, current_global_args) wrapper_error = simulated_binding_res.get('PRIMER_ERROR') if wrapper_error is not None: with self.assertRaises(IOError): binding_res = bindings.designPrimers( seq_args, current_global_args) else: try: binding_res = bindings.designPrimers( seq_args, current_global_args) except IOError: if max([ x in p3_args.get('P3_COMMENT', '') for x in ('complain', 'fail') ]): pass disagreements = self._compareResults( binding_res, simulated_binding_res) if disagreements is not None: failures.append((fn_root, test_id, disagreements)) print(' ' * 60, end='\r') if len(failures): err_msg = ('Failures occured during file testing:\n' + '\n'.join(['->{}\t{}\n{}'.format(*f) for f in failures])) raise RuntimeError(err_msg)
def test_memoryLeaks(self): sm = _getMemUsage() for x in range(100): # bindings.runP3Design() bindings.designPrimers( { "SEQUENCE_ID": "MH1000", "SEQUENCE_TEMPLATE": "GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG", "SEQUENCE_INCLUDED_REGION": [36, 342], }, { "PRIMER_OPT_SIZE": 20, "PRIMER_PICK_INTERNAL_OLIGO": 1, "PRIMER_INTERNAL_MAX_SELF_END": 8, "PRIMER_MIN_SIZE": 18, "PRIMER_MAX_SIZE": 25, "PRIMER_OPT_TM": 60.0, "PRIMER_MIN_TM": 57.0, "PRIMER_MAX_TM": 63.0, "PRIMER_MIN_GC": 20.0, "PRIMER_MAX_GC": 80.0, "PRIMER_MAX_POLY_X": 100, "PRIMER_INTERNAL_MAX_POLY_X": 100, "PRIMER_SALT_MONOVALENT": 50.0, "PRIMER_DNA_CONC": 50.0, "PRIMER_MAX_NS_ACCEPTED": 0, "PRIMER_MAX_SELF_ANY": 12, "PRIMER_MAX_SELF_END": 8, "PRIMER_PAIR_MAX_COMPL_ANY": 12, "PRIMER_PAIR_MAX_COMPL_END": 8, "PRIMER_PRODUCT_SIZE_RANGE": [ [75, 100], [100, 125], [125, 150], [150, 175], [175, 200], [200, 225], ], }, ) sleep(0.1) # Pause for any GC em = _getMemUsage() print("\n\tMemory usage before 1k runs of designPrimers: ", sm) print("\tMemory usage after 1k runs of designPrimers: ", em) print("\t\t\t\t\tDifference: \t", em - sm) if em - sm > 1000: raise AssertionError( "Memory usage increase after 1k runs of \n\t" "designPrimers > 1000 bytes -- potential \n\t" "memory leak (mem increase: {})".format(em - sm) )
def makeprimers(name, template, minsize, maxsize): try: binding_res = bindings.designPrimers( { 'SEQUENCE_ID': name, 'SEQUENCE_TEMPLATE': template, }, { 'PRIMER_OPT_SIZE': 24, 'PRIMER_MIN_SIZE': 21, 'PRIMER_MAX_SIZE': 27, 'PRIMER_OPT_TM': 58.0, 'PRIMER_MIN_TM': 56.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_GC_CLAMP': 1, 'PRIMER_PRODUCT_SIZE_RANGE': [[minsize,maxsize]], } ) leftprimers = {} rightprimers = {} for k, v in binding_res.items(): if k.endswith('SEQUENCE'): if k.startswith('PRIMER_LEFT'): leftprimers[int(k.split('_')[2])] = v if k.startswith('PRIMER_RIGHT'): rightprimers[int(k.split('_')[2])] = v return leftprimers, rightprimers except: sys.stderr.write("warning: primer design failed for " + name + "\n") return {}, {}
def test_compareSim(self): sequence_template = 'GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG' quality_list = [random.randint(20,90) for i in range(len(sequence_template))] seq_args = { 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': sequence_template, 'SEQUENCE_QUALITY': quality_list, 'SEQUENCE_INCLUDED_REGION': [36,342] } global_args = { 'PRIMER_OPT_SIZE': 20, 'PRIMER_PICK_INTERNAL_OLIGO': 1, 'PRIMER_INTERNAL_MAX_SELF_END': 8, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 25, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_MAX_POLY_X': 100, 'PRIMER_INTERNAL_MAX_POLY_X': 100, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_SELF_ANY': 12, 'PRIMER_MAX_SELF_END': 8, 'PRIMER_PAIR_MAX_COMPL_ANY': 12, 'PRIMER_PAIR_MAX_COMPL_END': 8, 'PRIMER_PRODUCT_SIZE_RANGE': [[75,100],[100,125],[125,150],[150,175],[175,200],[200,225]], } simulated_binding_res = simulatedbindings.designPrimers(seq_args, global_args) binding_res = bindings.designPrimers(seq_args, global_args) self._compareResults(binding_res, simulated_binding_res)
def test_CompareSim(self): sequence_template = 'GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG' quality_list = [random.randint(20,90) for i in range(len(sequence_template))] seq_args = { 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': sequence_template, 'SEQUENCE_QUALITY': quality_list, 'SEQUENCE_INCLUDED_REGION': [36,342] } global_args = { 'PRIMER_OPT_SIZE': 20, 'PRIMER_PICK_INTERNAL_OLIGO': 1, 'PRIMER_INTERNAL_MAX_SELF_END': 8, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 25, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_MAX_POLY_X': 100, 'PRIMER_INTERNAL_MAX_POLY_X': 100, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_SELF_ANY': 12, 'PRIMER_MAX_SELF_END': 8, 'PRIMER_PAIR_MAX_COMPL_ANY': 12, 'PRIMER_PAIR_MAX_COMPL_END': 8, 'PRIMER_PRODUCT_SIZE_RANGE': [[75,100],[100,125],[125,150],[150,175],[175,200],[200,225]], } simulated_binding_res = simulatedbindings.designPrimers(seq_args, global_args) binding_res = bindings.designPrimers(seq_args, global_args) self._compareResults(binding_res, simulated_binding_res)
def for_record(cls, record: SeqRecord) -> "Primers": """Create Primers to amplify a SeqRecord. Args: record: the sequence to amplify via primers Returns: to PCR the record """ p3_output = designPrimers( { "SEQUENCE_TEMPLATE": str(record.seq), "SEQUENCE_INCLUDED_REGION": [0, len(record.seq)], }, { "PRIMER_PRODUCT_SIZE_RANGE": [len(record.seq), len(record.seq)], "PRIMER_TASK": "pick_cloning_primers", "PRIMER_NUM_RETURN": 1, "PRIMER_PICK_ANYWAY": 1, "PRIMER_MIN_SIZE": MIN_PRIMER_LEN, }, ) return cls.from_p3(p3_output)
def findPrimers(inputData, resultFormat="better"): """ return primer3 result with given format Parameters ---------- inputData : input data asd resultFormat : result format (raw/better),optional sadsa Returns ---------- result : sad """ inputData = transformInput(inputData) result = {} try: result = designPrimers(inputData['seq_args'], inputData['global_args']) except: # input data is broken raise Exception('input data is broken') if resultFormat == "better": return createBetterResult(result); return result
def make_primers(region): # Designs primers to span a 500 bp region contig, start, stop = re.search('(^.+?)\[(\d+), (\d+)\)', region).groups() start = int(start) stop = int(stop) seq = contigs_dict[contig] index = [ 'PRIMER_LEFT_0_SEQUENCE', 'PRIMER_RIGHT_0_SEQUENCE', 'PRIMER_PAIR_0_PENALTY', 'PRIMER_PAIR_0_PRODUCT_SIZE', 'amplicon_start', 'amplicon_end' ] try: primers = bindings.designPrimers( { 'SEQUENCE_ID': region, 'SEQUENCE_TEMPLATE': str(seq.seq), 'SEQUENCE_INCLUDED_REGION': [start - 500, 2100], 'SEQUENCE_TARGET': [[start - 200, 400]], }, { 'PRIMER_PRODUCT_SIZE_RANGE': [[75, 2100]], 'PRIMER_EXPLAIN_FLAG': 1, 'PRIMER_MAX_TM': 68, 'PRIMER_MIN_TM': 52, 'PRIMER_PICK_INTERNAL_OLIGO': 0 }) return pd.Series([ primers['PRIMER_LEFT_0_SEQUENCE'], primers['PRIMER_RIGHT_0_SEQUENCE'], primers['PRIMER_PAIR_0_PENALTY'], primers['PRIMER_PAIR_0_PRODUCT_SIZE'], primers['PRIMER_LEFT_0'][0], primers['PRIMER_RIGHT_0'][0] + primers['PRIMER_RIGHT_0'][1] ], index=index) except: return pd.Series([np.nan] * 6, index=index)
def _primer3py( self, seq_args: dict = None, global_args: dict = None, ) -> dict: """Return dictionary of primer3-py results for sequence flanked by iP and iS sequences. Notes: Refer to the primer3-py documentation for further information. Args: seq_args: Primer3 sequence arguments. If left blank chooses suitable defaults. global_args: Primer3 global arguments If left blank chooses suitable defaults. """ template = str(self.basic_slice().seq) input_seq_args = { "SEQUENCE_TEMPLATE": template, } if seq_args: input_seq_args.update(seq_args) input_global_args = { "PRIMER_TASK": "generic", "PRIMER_PICK_LEFT_PRIMER": 1, "PRIMER_PICK_RIGHT_PRIMER": 1, "PRIMER_PRODUCT_SIZE_RANGE": [[len(template), len(template)]], } if global_args: input_global_args.update(global_args) return designPrimers( input_seq_args, input_global_args, )
def primer_main(args): from genomonsv import realignmentFunction from primer3 import bindings # make directory for output if necessary if os.path.dirname(args.output) != "" and not os.path.exists(os.path.dirname(args.output)): os.makedirs(os.path.dirname(args.output)) # yaml input param = {"reference_genome": args.reference, "split_refernece_thres": 1000, "validate_sequence_length": 250} hout = open(args.output, 'w') with open(args.result_file, 'r') as hin: for line in hin: if line.startswith("Chr_1" + '\t' + "Pos_1"): line = line.rstrip('\n') header_info.read(line) print >> hout, line + '\t' + "Primer1" + '\t' + "Primer2" + '\t' + "Primer3" + '\t' + "Primer4" + '\t' + "Primer5" continue F = line.rstrip('\n').split('\t') chr1, pos1, dir1, chr2, pos2, dir2, junc_seq = F[header_info.chr_1], F[header_info.pos_1], F[header_info.dir_1], \ F[header_info.chr_2], F[header_info.pos_2], F[header_info.dir_2], F[header_info.inserted_seq] junc_seq_len = 0 if junc_seq == "---" else len(junc_seq) realignmentFunction.getRefAltForSV(args.output + ".contig.tmp.fa", param, chr1, pos1, dir1, chr2, pos2, dir2, junc_seq) with open(args.output + ".contig.tmp.fa") as hin2: lines2 = hin2.readlines() for i in range(len(lines2)): lines2[i] = lines2[i].rstrip('\n') if lines2[i].startswith('>') and lines2[i].endswith("alt"): seq = lines2[i + 1].rstrip('\n') primer = bindings.designPrimers( { 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': seq, 'SEQUENCE_TARGET': [225,50 + junc_seq_len], 'SEQUENCE_INCLUDED_REGION': [10, len(seq) - 20] }, { 'PRIMER_PRODUCT_SIZE_RANGE': [[150,250],[100,300],[301,400],[401,500]], }) primer_left_right = ["---"] * 5 for i in range(5): if "PRIMER_LEFT_" + str(i) + "_SEQUENCE" in primer and "PRIMER_RIGHT_" + str(i) + "_SEQUENCE" in primer: primer_left_right[i] = primer["PRIMER_LEFT_" + str(i) + "_SEQUENCE"] + ";" + primer["PRIMER_RIGHT_" + str(i) + "_SEQUENCE"] print >> hout, '\t'.join(F) + '\t' + '\t'.join(primer_left_right) hout.close() subprocess.call(["rm", "-rf", args.output + ".contig.tmp.fa"])
def CheckingPrimersWidth_(self, crFasta, start, end, width): prev = crFasta.sequence[start - width:start] next = crFasta.sequence[end:end + width] #build dictionaries primerDict = { 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': prev + next, 'SEQUENCE_INCLUDED_REGION': [0, 2 * width], 'SEQUENCE_EXCLUDED_REGION': [width - 80, 160] } primerDict2 = { 'PRIMER_OPT_SIZE': 20, 'PRIMER_PICK_INTERNAL_OLIGO': 1, 'PRIMER_INTERNAL_MAX_SELF_END': 8, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 25, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_MAX_POLY_X': 100, 'PRIMER_INTERNAL_MAX_POLY_X': 100, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_SELF_ANY': 12, 'PRIMER_MAX_SELF_END': 8, 'PRIMER_PAIR_MAX_COMPL_ANY': 12, 'PRIMER_PAIR_MAX_COMPL_END': 8, 'PRIMER_PRODUCT_SIZE_RANGE': [[width - 75, 2 * width]], } ans = primer3.designPrimers(primerDict, primerDict2) return_values = [ 'PRIMER_LEFT_%s_SEQUENCE', 'PRIMER_LEFT_%s_SEQUENCE', 'PRIMER_RIGHT_%s_SEQUENCE', 'PRIMER_LEFT_%s_TM', 'PRIMER_RIGHT_%s_TM', 'PRIMER_LEFT_%s_GC_PERCENT', 'PRIMER_RIGHT_%s_GC_PERCENT', 'PRIMER_PAIR_%s_PRODUCT_SIZE', 'PRIMER_LEFT_%s_TM', 'PRIMER_RIGHT_%s_TM' ] primerDesingCheck = [] for x in range(self._numAlternativeCheckings): auxDict = {} for elem in return_values: designPrimer_key = elem % x auxDict[designPrimer_key] = ans[designPrimer_key] auxDict['negative_result'] = ans['PRIMER_PAIR_%s_PRODUCT_SIZE' % x] + (end - start) primerDesingCheck.append(auxDict) return primerDesingCheck
def test_memoryLeaks(self): sm = _getMemUsage() for x in range(100): # bindings.runP3Design() bindings.designPrimers( { 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': 'GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG', 'SEQUENCE_INCLUDED_REGION': [36,342] }, { 'PRIMER_OPT_SIZE': 20, 'PRIMER_PICK_INTERNAL_OLIGO': 1, 'PRIMER_INTERNAL_MAX_SELF_END': 8, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 25, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_MAX_POLY_X': 100, 'PRIMER_INTERNAL_MAX_POLY_X': 100, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_SELF_ANY': 12, 'PRIMER_MAX_SELF_END': 8, 'PRIMER_PAIR_MAX_COMPL_ANY': 12, 'PRIMER_PAIR_MAX_COMPL_END': 8, 'PRIMER_PRODUCT_SIZE_RANGE': [[75,100],[100,125],[125,150],[150,175],[175,200],[200,225]], }) sleep(0.1) # Pause for any GC em = _getMemUsage() print('\n\tMemory usage before 1k runs of designPrimers: ', sm) print('\tMemory usage after 1k runs of designPrimers: ', em) print('\t\t\t\t\tDifference: \t', em-sm) if em-sm > 1000: raise AssertionError('Memory usage increase after 1k runs of \n\t' 'designPrimers > 1000 bytes -- potential \n\t' 'memory leak (mem increase: {})'.format(em-sm))
def test_memoryLeaks(self): sm = _getMemUsage() for x in range(100): bindings.designPrimers( { 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': 'GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG', 'SEQUENCE_INCLUDED_REGION': [36,342] }, { 'PRIMER_OPT_SIZE': 20, 'PRIMER_PICK_INTERNAL_OLIGO': 1, 'PRIMER_INTERNAL_MAX_SELF_END': 8, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 25, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_MAX_POLY_X': 100, 'PRIMER_INTERNAL_MAX_POLY_X': 100, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_SELF_ANY': 12, 'PRIMER_MAX_SELF_END': 8, 'PRIMER_PAIR_MAX_COMPL_ANY': 12, 'PRIMER_PAIR_MAX_COMPL_END': 8, 'PRIMER_PRODUCT_SIZE_RANGE': [[75,100],[100,125],[125,150],[150,175],[175,200],[200,225]], }) sleep(0.1) # Pause for any GC em = _getMemUsage() print('\n\tMemory usage before 1k runs of designPrimers: ', sm) print('\tMemory usage after 1k runs of designPrimers: ', em) print('\t\t\t\t\tDifference: \t', em-sm) if em-sm > 1000: raise AssertionError('Memory usage increase after 1k runs of \n\t' 'designPrimers > 1000 bytes -- potential \n\t' 'memory leak (mem increase: {})'.format(em-sm))
def make_primers(sequence): '''default iSECT values: size: optimal=21 min=18 max=28 Tm: opt=61 Min=53 Max=71 %GC: min=20 max=80 clamp=1 maxN=300 ext5=300 ext3=300 primer_zone=200 BPos=110 (distance from LB primer to insertion site) ''' primer3_seq_args['SEQUENCE_TEMPLATE'] = sequence a = designPrimers(primer3_seq_args, primer3_primer_args) return a
primer = bindings.designPrimers( { 'SEQUENCE_ID': seq_id, 'SEQUENCE_TEMPLATE': seq, 'SEQUENCE_FORCE_LEFT_END': 249 }, { 'PRIMER_OPT_SIZE': 22, 'PRIMER_NUM_RETURN': 1, 'PRIMER_PICK_INTERNAL_OLIGO': 0, 'PRIMER_INTERNAL_MAX_SELF_END': 8, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 25, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_MAX_POLY_X': 100, 'PRIMER_INTERNAL_MAX_POLY_X': 100, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_SELF_ANY': 12, 'PRIMER_MAX_SELF_END': 8, 'PRIMER_PAIR_MAX_COMPL_ANY': 12, 'PRIMER_PAIR_MAX_COMPL_END': 8, 'PRIMER_PRODUCT_SIZE_RANGE': [[100, 125], [125, 150], [ 150, 175 ], [200, 225], [225, 250], [250, 275], [275, 300]], })
def primer3(request): params = request.json_body p_keys = params.keys() if 'gene_name' in p_keys: gene_name = params.get('gene_name') if 'sequence' in p_keys: sequence = params.get('sequence') if gene_name is not None and sequence is not None: return HTTPBadRequest( body=json.dumps({'error': 'Both gene name AND sequence provided'})) if gene_name is None and sequence is None: return HTTPBadRequest( body=json.dumps({'error': 'No gene name OR sequence provided'})) if gene_name is None: decodeseq = sequence sequence = str(sequence.replace('\r', '').replace('\n', '')) input = 'seq' else: gene_name = gene_name.upper() locus = DBSession.query(Locusdbentity).filter( or_(Locusdbentity.gene_name == gene_name, Locusdbentity.systematic_name == gene_name)).one_or_none() if locus is None: return HTTPBadRequest(body=json.dumps({ 'error': 'Gene name provided does not exist in the database: ' + gene_name })) tax_id = DBSession.query(Straindbentity.taxonomy_id).filter( Straindbentity.strain_type == 'Reference').one_or_none() dna = DBSession.query(Dnasequenceannotation.residues).filter( and_(Dnasequenceannotation.taxonomy_id == tax_id, Dnasequenceannotation.dbentity_id == locus.dbentity_id, Dnasequenceannotation.dna_type == '1KB')).one_or_none() if dna is None: return HTTPBadRequest(body=json.dumps({ 'error': 'Sequence for provided gene name does not exist in the database: ' + gene_name })) else: decodeseq = dna sequence = str(dna) sequence = sequence[3:-3] input = 'name' if 'maximum_tm' in p_keys: maximum_tm = params.get('maximum_tm') if 'minimum_tm' in p_keys: minimum_tm = params.get('minimum_tm') if 'optimum_tm' in p_keys: optimum_tm = params.get('optimum_tm') if 'maximum_gc' in p_keys: maximum_gc = params.get('maximum_gc') if 'minimum_gc' in p_keys: minimum_gc = params.get('minimum_gc') if 'optimum_gc' in p_keys: optimum_gc = params.get('optimum_gc') if 'maximum_length' in p_keys: maximum_length = params.get('maximum_length') if 'minimum_length' in p_keys: minimum_length = params.get('minimum_length') if 'optimum_primer_length' in p_keys: optimum_primer_length = params.get('optimum_primer_length') if 'max_three_prime_pair_complementarity' in p_keys: max_three_prime_pair_complementarity = params.get( 'max_three_prime_pair_complementarity') if 'max_pair_complementarity' in p_keys: max_pair_complementarity = params.get('max_pair_complementarity') if 'max_three_prime_self_complementarity' in p_keys: max_three_prime_self_complementarity = params.get( 'max_three_prime_self_complementarity') if 'max_self_complementarity' in p_keys: max_self_complementarity = params.get('max_self_complementarity') if 'input_end' in p_keys: input_end = params.get('input_end') if 'input_start' in p_keys: input_start = params.get('input_start') if 'maximum_product_size' in p_keys: maximum_product_size = params.get('maximum_product_size') if 'end_point' in p_keys: end_point = params.get('end_point') if gene_name is None: target_start = input_start target_extend_by = input_end - input_start else: if input_start < 0: target_start = 1000 - abs(input_start) target_extend_by = (1000 + input_end) - target_start else: target_start = 1000 + input_start target_extend_by = input_end - input_start interval_range = [[100, 150], [150, 250], [100, 300], [301, 400], [401, 500], [501, 600], [601, 700], [701, 850], [851, 1000]] if maximum_product_size: range_start = target_extend_by range_stop = maximum_product_size if maximum_product_size < target_extend_by: return HTTPBadRequest(body=json.dumps({ 'error': 'Maximum product size cannot be less than target size.' })) interval_range = [[range_start, range_stop]] elif (target_extend_by > 800): interval_range = [[target_extend_by, target_extend_by + 300]] sequence_target = [[target_start, target_extend_by]] if end_point == 'YES': force_left_start = target_start force_right_start = target_start + target_extend_by sequence_target = [] elif end_point == 'NO': force_left_start = -1000000 force_right_start = -1000000 try: result = bindings.designPrimers( { 'SEQUENCE_ID': str(gene_name), 'SEQUENCE_TEMPLATE': sequence, 'SEQUENCE_TARGET': sequence_target, 'SEQUENCE_FORCE_LEFT_START': force_left_start, 'SEQUENCE_FORCE_RIGHT_START': force_right_start }, { 'PRIMER_FIRST_BASE_INDEX': 1, 'PRIMER_THERMODYNAMIC_OLIGO_ALIGNMENT': 1, 'PRIMER_THERMODYNAMIC_TEMPLATE_ALIGNMENT': 0, 'PRIMER_PICK_LEFT_PRIMER': 1, 'PRIMER_PICK_INTERNAL_OLIGO': 0, 'PRIMER_PICK_RIGHT_PRIMER': 1, 'PRIMER_LIBERAL_BASE': 1, 'PRIMER_LIB_AMBIGUITY_CODES_CONSENSUS': 0, 'PRIMER_LOWERCASE_MASKING': 0, 'PRIMER_PICK_ANYWAY': 0, 'PRIMER_EXPLAIN_FLAG': 1, 'PRIMER_MASK_TEMPLATE': 0, 'PRIMER_TASK': 'generic', 'PRIMER_MASK_FAILURE_RATE': 0.1, 'PRIMER_MASK_5P_DIRECTION': 1, 'PRIMER_MASK_3P_DIRECTION': 0, 'PRIMER_MIN_QUALITY': 0, 'PRIMER_MIN_END_QUALITY': 0, 'PRIMER_QUALITY_RANGE_MIN': 0, 'PRIMER_QUALITY_RANGE_MAX': 100, 'PRIMER_MIN_SIZE': minimum_length, 'PRIMER_OPT_SIZE': optimum_primer_length, 'PRIMER_MAX_SIZE': maximum_length, 'PRIMER_MIN_TM': minimum_tm, 'PRIMER_OPT_TM': optimum_tm, 'PRIMER_MAX_TM': maximum_tm, 'PRIMER_PAIR_MAX_DIFF_TM': 5.0, 'PRIMER_TM_FORMULA': 1, 'PRIMER_PRODUCT_MIN_TM': -1000000.0, 'PRIMER_PRODUCT_OPT_TM': 0.0, 'PRIMER_PRODUCT_MAX_TM': 1000000.0, 'PRIMER_MIN_GC': minimum_gc, 'PRIMER_OPT_GC_PERCENT': optimum_gc, 'PRIMER_MAX_GC': maximum_gc, 'PRIMER_PRODUCT_SIZE_RANGE': interval_range, 'PRIMER_NUM_RETURN': 5, 'PRIMER_MAX_END_STABILITY': 9.0, 'PRIMER_MAX_LIBRARY_MISPRIMING': 12.00, 'PRIMER_PAIR_MAX_LIBRARY_MISPRIMING': 20.00, 'PRIMER_MAX_SELF_ANY_TH': 45.0, 'PRIMER_MAX_SELF_END_TH': 35.0, 'PRIMER_PAIR_MAX_COMPL_ANY_TH': 45.0, 'PRIMER_PAIR_MAX_COMPL_END_TH': 35.0, 'PRIMER_MAX_HAIRPIN_TH': 24.0, 'PRIMER_MAX_SELF_ANY': max_self_complementarity, 'PRIMER_MAX_SELF_END': max_three_prime_self_complementarity, 'PRIMER_PAIR_MAX_COMPL_ANY': max_pair_complementarity, 'PRIMER_PAIR_MAX_COMPL_END': max_three_prime_pair_complementarity, 'PRIMER_MAX_TEMPLATE_MISPRIMING_TH': 40.00, 'PRIMER_PAIR_MAX_TEMPLATE_MISPRIMING_TH': 70.00, 'PRIMER_MAX_TEMPLATE_MISPRIMING': 12.00, 'PRIMER_PAIR_MAX_TEMPLATE_MISPRIMING': 24.00, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_POLY_X': 4, 'PRIMER_INSIDE_PENALTY': -1.0, 'PRIMER_OUTSIDE_PENALTY': 0, 'PRIMER_GC_CLAMP': 0, 'PRIMER_MAX_END_GC': 5, 'PRIMER_MIN_LEFT_THREE_PRIME_DISTANCE': 3, 'PRIMER_MIN_RIGHT_THREE_PRIME_DISTANCE': 3, 'PRIMER_MIN_5_PRIME_OVERLAP_OF_JUNCTION': 7, 'PRIMER_MIN_3_PRIME_OVERLAP_OF_JUNCTION': 4, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_SALT_CORRECTIONS': 1, 'PRIMER_SALT_DIVALENT': 1.5, 'PRIMER_DNTP_CONC': 0.6, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_SEQUENCING_SPACING': 500, 'PRIMER_SEQUENCING_INTERVAL': 250, 'PRIMER_SEQUENCING_LEAD': 50, 'PRIMER_SEQUENCING_ACCURACY': 20, 'PRIMER_WT_SIZE_LT': 1.0, 'PRIMER_WT_SIZE_GT': 1.0, 'PRIMER_WT_TM_LT': 1.0, 'PRIMER_WT_TM_GT': 1.0, 'PRIMER_WT_GC_PERCENT_LT': 0.0, 'PRIMER_WT_GC_PERCENT_GT': 0.0, 'PRIMER_WT_SELF_ANY_TH': 0.0, 'PRIMER_WT_SELF_END_TH': 0.0, 'PRIMER_WT_HAIRPIN_TH': 0.0, 'PRIMER_WT_TEMPLATE_MISPRIMING_TH': 0.0, 'PRIMER_WT_SELF_ANY': 0.0, 'PRIMER_WT_SELF_END': 0.0, 'PRIMER_WT_TEMPLATE_MISPRIMING': 0.0, 'PRIMER_WT_NUM_NS': 0.0, 'PRIMER_WT_LIBRARY_MISPRIMING': 0.0, 'PRIMER_WT_SEQ_QUAL': 0.0, 'PRIMER_WT_END_QUAL': 0.0, 'PRIMER_WT_POS_PENALTY': 0.0, 'PRIMER_WT_END_STABILITY': 0.0, 'PRIMER_WT_MASK_FAILURE_RATE': 0.0, 'PRIMER_PAIR_WT_PRODUCT_SIZE_LT': 0.0, 'PRIMER_PAIR_WT_PRODUCT_SIZE_GT': 0.0, 'PRIMER_PAIR_WT_PRODUCT_TM_LT': 0.0, 'PRIMER_PAIR_WT_PRODUCT_TM_GT': 0.0, 'PRIMER_PAIR_WT_COMPL_ANY_TH': 0.0, 'PRIMER_PAIR_WT_COMPL_END_TH': 0.0, 'PRIMER_PAIR_WT_TEMPLATE_MISPRIMING_TH': 0.0, 'PRIMER_PAIR_WT_COMPL_ANY': 0.0, 'PRIMER_PAIR_WT_COMPL_END': 0.0, 'PRIMER_PAIR_WT_TEMPLATE_MISPRIMING': 0.0, 'PRIMER_PAIR_WT_DIFF_TM': 0.0, 'PRIMER_PAIR_WT_LIBRARY_MISPRIMING': 0.0, 'PRIMER_PAIR_WT_PR_PENALTY': 1.0, 'PRIMER_PAIR_WT_IO_PENALTY': 0.0, 'PRIMER_INTERNAL_MIN_SIZE': 18, 'PRIMER_INTERNAL_OPT_SIZE': 20, 'PRIMER_INTERNAL_MAX_SIZE': 27, 'PRIMER_INTERNAL_MIN_TM': 57.0, 'PRIMER_INTERNAL_OPT_TM': 60.0, 'PRIMER_INTERNAL_MAX_TM': 63.0, 'PRIMER_INTERNAL_MIN_GC': 20.0, 'PRIMER_INTERNAL_OPT_GC_PERCENT': 50.0, 'PRIMER_INTERNAL_MAX_GC': 80.0, 'PRIMER_INTERNAL_MAX_SELF_ANY_TH': 47.00, 'PRIMER_INTERNAL_MAX_SELF_END_TH': 47.00, 'PRIMER_INTERNAL_MAX_HAIRPIN_TH': 47.00, 'PRIMER_INTERNAL_MAX_SELF_ANY': 12.00, 'PRIMER_INTERNAL_MAX_SELF_END': 12.00, 'PRIMER_INTERNAL_MIN_QUALITY': 0, 'PRIMER_INTERNAL_MAX_NS_ACCEPTED': 0, 'PRIMER_INTERNAL_MAX_POLY_X': 5, 'PRIMER_INTERNAL_MAX_LIBRARY_MISHYB': 12.00, 'PRIMER_INTERNAL_SALT_MONOVALENT': 50.0, 'PRIMER_INTERNAL_DNA_CONC': 50.0, 'PRIMER_INTERNAL_SALT_DIVALENT': 1.5, 'PRIMER_INTERNAL_DNTP_CONC': 0.0, 'PRIMER_INTERNAL_WT_SIZE_LT': 1.0, 'PRIMER_INTERNAL_WT_SIZE_GT': 1.0, 'PRIMER_INTERNAL_WT_TM_LT': 1.0, 'PRIMER_INTERNAL_WT_TM_GT': 1.0, 'PRIMER_INTERNAL_WT_GC_PERCENT_LT': 0.0, 'PRIMER_INTERNAL_WT_GC_PERCENT_GT': 0.0, 'PRIMER_INTERNAL_WT_SELF_ANY_TH': 0.0, 'PRIMER_INTERNAL_WT_SELF_END_TH': 0.0, 'PRIMER_INTERNAL_WT_HAIRPIN_TH': 0.0, 'PRIMER_INTERNAL_WT_SELF_ANY': 0.0, 'PRIMER_INTERNAL_WT_SELF_END': 0.0, 'PRIMER_INTERNAL_WT_NUM_NS': 0.0, 'PRIMER_INTERNAL_WT_LIBRARY_MISHYB': 0.0, 'PRIMER_INTERNAL_WT_SEQ_QUAL': 0.0, 'PRIMER_INTERNAL_WT_END_QUAL': 0.0 }, debug=False) presult, notes = primer3_parser(result) obj = { 'result': presult, 'gene_name': gene_name, 'seq': decodeseq, 'input': input } return obj except Exception as e: return HTTPBadRequest(body=json.dumps({'error': str(e)}))
def testHuman(self): binding_res = bindings.designPrimers( { 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': 'GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG', 'SEQUENCE_INCLUDED_REGION': [36,342] }, { 'PRIMER_OPT_SIZE': 20, 'PRIMER_PICK_INTERNAL_OLIGO': 1, 'PRIMER_INTERNAL_MAX_SELF_END': 8, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 25, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_MAX_POLY_X': 100, 'PRIMER_INTERNAL_MAX_POLY_X': 100, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_SELF_ANY': 12, 'PRIMER_MAX_SELF_END': 8, 'PRIMER_PAIR_MAX_COMPL_ANY': 12, 'PRIMER_PAIR_MAX_COMPL_END': 8, 'PRIMER_PRODUCT_SIZE_RANGE': [[75,100],[100,125],[125,150],[150,175],[175,200],[200,225]], } ) wrapper_res = wrappers.designPrimers( { 'PRIMER_OPT_SIZE': 20, 'PRIMER_PICK_INTERNAL_OLIGO': 1, 'PRIMER_INTERNAL_MAX_SELF_END': 8, 'PRIMER_MIN_SIZE': 18, 'PRIMER_MAX_SIZE': 25, 'PRIMER_OPT_TM': 60.0, 'PRIMER_MIN_TM': 57.0, 'PRIMER_MAX_TM': 63.0, 'PRIMER_MIN_GC': 20.0, 'PRIMER_MAX_GC': 80.0, 'PRIMER_MAX_POLY_X': 100, 'PRIMER_INTERNAL_MAX_POLY_X': 100, 'PRIMER_SALT_MONOVALENT': 50.0, 'PRIMER_DNA_CONC': 50.0, 'PRIMER_MAX_NS_ACCEPTED': 0, 'PRIMER_MAX_SELF_ANY': 12, 'PRIMER_MAX_SELF_END': 8, 'PRIMER_PAIR_MAX_COMPL_ANY': 12, 'PRIMER_PAIR_MAX_COMPL_END': 8, 'PRIMER_PRODUCT_SIZE_RANGE': '75-100 100-125 125-150 150-175 175-200 200-225', 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': 'GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG', 'SEQUENCE_INCLUDED_REGION': '36,342' } ) print('\n\n\n{:<30} {:<25} {:<25}'.format('Output Key', 'Wrapper Result', 'Binding Result')) print('-'*80) for k, v in binding_res.items(): print('{:<30} {:<25} {:<25}'.format(k, repr(wrapper_res.get(k)), repr(v)))
forward_primer, reverse_primer = 'SO:0000121', 'SO:0000132' genes_with_errors = [] with open(genome_name+'_KO_primers_pyrG_selection.csv', 'w') as pyrg_f, open(genome_name+'_KO_primers_ptrA_selection.csv', 'w') as ptra_f, \ open('genes_with_errors', 'w') as genes_with_errors_f, open('primers.gff', 'w') as gff_f: files = (pyrg_f, ptra_f) for f in files: f.write(','.join(['primer name', 'primer sequence', 'penalty', 'product size'])+'\n') for gene in coord: try: print(gene.name) if gene.min_coord - 1400 > 0 and gene.max_coord+1400 < len(fasta_dict[gene.chrom]) and gene.max_coord - gene.min_coord > 200: left = bindings.designPrimers({ 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': str(fasta_dict[gene.chrom].seq), 'SEQUENCE_INCLUDED_REGION': [gene.min_coord-1400, 1500], 'SEQUENCE_PRIMER_PAIR_OK_REGION_LIST': [-1, -1, gene.min_coord, 100], }, {'PRIMER_PRODUCT_SIZE_RANGE': [[1100, 1300]]}) right = bindings.designPrimers({ 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': str(fasta_dict[gene.chrom].seq), 'SEQUENCE_INCLUDED_REGION': [gene.max_coord-100, 1500], 'SEQUENCE_PRIMER_PAIR_OK_REGION_LIST': [gene.max_coord-100, 100, -1, -1], }, {'PRIMER_PRODUCT_SIZE_RANGE': [[1100, 1300]]}) left_to_right_dist = (right['PRIMER_RIGHT_0'][0] - right['PRIMER_RIGHT_0'][1]) - (left['PRIMER_LEFT_0'][0] + left['PRIMER_LEFT_0'][1]) nested_range = [left_to_right_dist-200, left_to_right_dist] nest = bindings.designPrimers({ 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': str(fasta_dict[gene.chrom].seq),
def test_fileBased(self): test_file_roots = [ 'primer_must_use_th', 'primer_task_th', 'primer_thal_args', 'primer_thal_max_seq_error', 'primer_first_base_index', 'test_compl_error', 'test_left_to_right_of_right', 'dv_conc_vs_dntp_conc', 'primer_internal', 'primer_tm_lc_masking', 'primer_ok_regions', 'primer_start_codon', 'primer_task', 'primer_renewed_tasks', 'primer_must_overlap_point', 'primer_overlap_junction', 'primer_all_settingsfiles', 'primer_high_tm_load_set', 'primer_high_gc_load_set', 'primer_gc_end', 'primer_num_best', 'primer_check', 'primer_end_pathology', 'long_seq', 'p3-tmpl-mispriming' ] print() failures = [] for fn_root in test_file_roots: base_fp = os.path.join(LOCAL_DIR, 'input_files', fn_root) input_fp = base_fp + '_input' with open(input_fp) as input_fd: input_raw = input_fd.read() input_dicts = self._convertBoulderInput(input_raw) sys.stdout.write('->Testing file {:<40}\r'.format(fn_root)) sys.stdout.flush() current_global_args = {} for global_args, seq_args, p3_args in input_dicts: test_id = str(seq_args.get('SEQUENCE_ID', '')) current_global_args.update(global_args) simulated_binding_res = simulatedbindings.designPrimers( seq_args, current_global_args) wrapper_error = simulated_binding_res.get('PRIMER_ERROR') if wrapper_error is not None: with self.assertRaises(IOError): binding_res = bindings.designPrimers(seq_args, current_global_args) else: try: binding_res = bindings.designPrimers(seq_args, current_global_args) except IOError: if max([x in p3_args.get('P3_COMMENT', '') for x in ('complain', 'fail')]): pass disagreements = self._compareResults(binding_res, simulated_binding_res) if disagreements is not None: failures.append((fn_root, test_id, disagreements)) print(' '* 60, end='\r') if len(failures): err_msg = ('Failures occured during file testing:\n' + '\n'.join(['->{}\t{}\n{}'.format(*f) for f in failures])) raise RuntimeError(err_msg)
r_primer_map = {'+': 'R', '-': 'F'} with open('Af3357_qPCR_primers.csv','w') as f, \ open('genes_with_errors','w') as genes_with_errors_f, open('primers.gff','w') as gff_f: temp = [] for idx, gene in enumerate(coord, 1): if len(gene.seq) < 100: continue if len(gene.exons) > 2: print(gene.name) goi = bindings.designPrimers( { 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': gene.cdna, 'SEQUENCE_OVERLAP_JUNCTION_LIST': gene.exon_juncs, }, { 'PRIMER_PRODUCT_SIZE_RANGE': [[75, 150]], 'PRIMER_EXPLAIN_FLAG': 1, 'PRIMER_MAX_TM': 68, 'PRIMER_MIN_TM': 52, 'PRIMER_PICK_INTERNAL_OLIGO': 0, overlap_map[gene.strand]: 9 }) else: print(gene.name) goi = bindings.designPrimers( { 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': gene.cdna }, { 'PRIMER_PRODUCT_SIZE_RANGE': [[75, 150]], 'PRIMER_EXPLAIN_FLAG': 1,
for f in files: f.write(','.join( ['primer name', 'primer sequence', 'penalty', 'product size']) + '\n') for gene in coord: try: print(gene.name) if gene.min_coord - 1400 > 0 and gene.max_coord + 1400 < len( fasta_dict[ gene.chrom]) and gene.max_coord - gene.min_coord > 200: left = bindings.designPrimers( { 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': str(fasta_dict[gene.chrom].seq), 'SEQUENCE_INCLUDED_REGION': [gene.min_coord - 1400, 1500], 'SEQUENCE_PRIMER_PAIR_OK_REGION_LIST': [-1, -1, gene.min_coord, 100], }, {'PRIMER_PRODUCT_SIZE_RANGE': [[1100, 1300]]}) right = bindings.designPrimers( { 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': str(fasta_dict[gene.chrom].seq), 'SEQUENCE_INCLUDED_REGION': [gene.max_coord - 100, 1500], 'SEQUENCE_PRIMER_PAIR_OK_REGION_LIST': [gene.max_coord - 100, 100, -1, -1],
def primer_main(args): from genomon_sv import realignmentFunction from primer3 import bindings # make directory for output if necessary if os.path.dirname(args.output) != "" and not os.path.exists(os.path.dirname(args.output)): os.makedirs(os.path.dirname(args.output)) param = {"reference_genome": args.reference, "split_refernece_thres": 1000, "validate_sequence_length": 250} hout = open(args.output, 'w') with open(args.result_file, 'r') as hin: for line in hin: if line.startswith("#"): continue if utils.header_check(line.rstrip('\n')): line = line.rstrip('\n') header_info.read(line) print(line + '\t' + "Primer1" + '\t' + "Primer2" + '\t' + "Primer3" + '\t' + "Primer4" + '\t' + "Primer5", file = hout) continue F = line.rstrip('\n').split('\t') chr1, pos1, dir1, chr2, pos2, dir2, junc_seq = F[header_info.chr_1], F[header_info.pos_1], F[header_info.dir_1], \ F[header_info.chr_2], F[header_info.pos_2], F[header_info.dir_2], F[header_info.inserted_seq] if utils.check_atypical_chromosomes(chr1, chr2): print("Skip a SV incolving atypical chromosomes: %s,%s,%s,%s,%s,%s" % \ (chr1, pos1, dir1, chr2, pos2, dir2), file = sys.stderr) continue junc_seq_len = 0 if junc_seq == "---" else len(junc_seq) realignmentFunction.getRefAltForSV(args.output + ".contig.tmp.fa", chr1, pos1, dir1, chr2, pos2, dir2, junc_seq, args.reference, 1000, 250) with open(args.output + ".contig.tmp.fa") as hin2: lines2 = hin2.readlines() for i in range(len(lines2)): lines2[i] = lines2[i].rstrip('\n') if lines2[i].startswith('>') and lines2[i].endswith("alt"): seq = lines2[i + 1].rstrip('\n') primer = bindings.designPrimers( { 'SEQUENCE_ID': 'MH1000', 'SEQUENCE_TEMPLATE': seq, 'SEQUENCE_TARGET': [225,50 + junc_seq_len], 'SEQUENCE_INCLUDED_REGION': [10, len(seq) - 20] }, { 'PRIMER_PRODUCT_SIZE_RANGE': [[150,250],[100,300],[301,400],[401,500]], }) primer_left_right = ["---"] * 5 for i in range(5): if "PRIMER_LEFT_" + str(i) + "_SEQUENCE" in primer and "PRIMER_RIGHT_" + str(i) + "_SEQUENCE" in primer and \ "PRIMER_LEFT_" + str(i) + "_TM" in primer and "PRIMER_RIGHT_" + str(i) + "_TM" in primer and \ "PRIMER_PAIR_" + str(i) + "_PRODUCT_SIZE" in primer: primer_left_right[i] = primer["PRIMER_LEFT_" + str(i) + "_SEQUENCE"] + ";" + primer["PRIMER_RIGHT_" + str(i) + "_SEQUENCE"] + ';' + \ str(round(primer["PRIMER_LEFT_" + str(i) + "_TM"], 3)) + ";" + str(round(primer["PRIMER_RIGHT_" + str(i) + "_TM"], 3)) + ';' + \ str(primer["PRIMER_PAIR_" + str(i) + "_PRODUCT_SIZE"]) print('\t'.join(F) + '\t' + '\t'.join(primer_left_right), file = hout) hout.close() subprocess.check_call(["rm", "-rf", args.output + ".contig.tmp.fa"])
f_primer_map = {'+': 'F', '-': 'R'} r_primer_map = {'+': 'R', '-': 'F'} with open('Af3357_qPCR_primers.csv','w') as f, \ open('genes_with_errors','w') as genes_with_errors_f, open('primers.gff','w') as gff_f: temp = [] for idx, gene in enumerate(coord,1): if len(gene.seq) < 100: continue if len(gene.exons) > 2: print(gene.name) goi = bindings.designPrimers({ 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': gene.cdna, 'SEQUENCE_OVERLAP_JUNCTION_LIST': gene.exon_juncs, }, {'PRIMER_PRODUCT_SIZE_RANGE': [[75, 150]], 'PRIMER_EXPLAIN_FLAG':1, 'PRIMER_MAX_TM':68, 'PRIMER_MIN_TM':52, 'PRIMER_PICK_INTERNAL_OLIGO':0, overlap_map[gene.strand]: 9}) else: print(gene.name) goi = bindings.designPrimers({ 'SEQUENCE_ID': gene.name, 'SEQUENCE_TEMPLATE': gene.cdna }, {'PRIMER_PRODUCT_SIZE_RANGE': [[75, 150]], 'PRIMER_EXPLAIN_FLAG':1, 'PRIMER_MAX_TM':68, 'PRIMER_MIN_TM':52,
def testHuman(self): binding_res = bindings.designPrimers( { "SEQUENCE_ID": "MH1000", "SEQUENCE_TEMPLATE": "GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG", "SEQUENCE_INCLUDED_REGION": [36, 342], }, { "PRIMER_OPT_SIZE": 20, "PRIMER_PICK_INTERNAL_OLIGO": 1, "PRIMER_INTERNAL_MAX_SELF_END": 8, "PRIMER_MIN_SIZE": 18, "PRIMER_MAX_SIZE": 25, "PRIMER_OPT_TM": 60.0, "PRIMER_MIN_TM": 57.0, "PRIMER_MAX_TM": 63.0, "PRIMER_MIN_GC": 20.0, "PRIMER_MAX_GC": 80.0, "PRIMER_MAX_POLY_X": 100, "PRIMER_INTERNAL_MAX_POLY_X": 100, "PRIMER_SALT_MONOVALENT": 50.0, "PRIMER_DNA_CONC": 50.0, "PRIMER_MAX_NS_ACCEPTED": 0, "PRIMER_MAX_SELF_ANY": 12, "PRIMER_MAX_SELF_END": 8, "PRIMER_PAIR_MAX_COMPL_ANY": 12, "PRIMER_PAIR_MAX_COMPL_END": 8, "PRIMER_PRODUCT_SIZE_RANGE": [[75, 100], [100, 125], [125, 150], [150, 175], [175, 200], [200, 225]], }, ) wrapper_res = wrappers.designPrimers( { "PRIMER_OPT_SIZE": 20, "PRIMER_PICK_INTERNAL_OLIGO": 1, "PRIMER_INTERNAL_MAX_SELF_END": 8, "PRIMER_MIN_SIZE": 18, "PRIMER_MAX_SIZE": 25, "PRIMER_OPT_TM": 60.0, "PRIMER_MIN_TM": 57.0, "PRIMER_MAX_TM": 63.0, "PRIMER_MIN_GC": 20.0, "PRIMER_MAX_GC": 80.0, "PRIMER_MAX_POLY_X": 100, "PRIMER_INTERNAL_MAX_POLY_X": 100, "PRIMER_SALT_MONOVALENT": 50.0, "PRIMER_DNA_CONC": 50.0, "PRIMER_MAX_NS_ACCEPTED": 0, "PRIMER_MAX_SELF_ANY": 12, "PRIMER_MAX_SELF_END": 8, "PRIMER_PAIR_MAX_COMPL_ANY": 12, "PRIMER_PAIR_MAX_COMPL_END": 8, "PRIMER_PRODUCT_SIZE_RANGE": "75-100 100-125 125-150 150-175 175-200 200-225", "SEQUENCE_ID": "MH1000", "SEQUENCE_TEMPLATE": "GCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCCTACATTTTAGCATCAGTGAGTACAGCATGCTTACTGGAAGAGAGGGTCATGCAACAGATTAGGAGGTAAGTTTGCAAAGGCAGGCTAAGGAGGAGACGCACTGAATGCCATGGTAAGAACTCTGGACATAAAAATATTGGAAGTTGTTGAGCAAGTNAAAAAAATGTTTGGAAGTGTTACTTTAGCAATGGCAAGAATGATAGTATGGAATAGATTGGCAGAATGAAGGCAAAATGATTAGACATATTGCATTAAGGTAAAAAATGATAACTGAAGAATTATGTGCCACACTTATTAATAAGAAAGAATATGTGAACCTTGCAGATGTTTCCCTCTAGTAG", "SEQUENCE_INCLUDED_REGION": "36,342", } ) print("\n\n\n{:<30} {:<25} {:<25}".format("Output Key", "Wrapper Result", "Binding Result")) print("-" * 80) for k, v in binding_res.items(): print("{:<30} {:<25} {:<25}".format(k, repr(wrapper_res.get(k)), repr(v)))