try: file_handle = open(file_path, 'r') except IOError: print "The genome is not available in the current directory" sys.exit(1) # This step reads the genome data into a string for lines in file_handle: if lines.startswith('>'): continue else: sequence += lines.rstrip().lower() # Exon and its upstream and downstream sequences are extracted from the complete genome sequence print "EXTRACTING UPSTREAM AND DOWNSTREAM REGIONS...." upstream_seq, exon_sequence, downstream_seq = extractSequence.extract_single(sequence, strand, exon_start, exon_end, base_count) sequence = "" if exon_sequence == '' or upstream_seq == '' or downstream_seq == '': print "The specified coordinates are unable to extract any sequence" sys.exit(1) # Output is written into a file print "WRITING OUTPUT TO " + outfile_name + "...." write_seq(upstream_seq, exon_sequence, downstream_seq, outfile_name, genome, seq_start, seq_end, strand) write_fasta(upstream_seq, exon_sequence, downstream_seq, outfile_name, genome, seq_start, seq_end, strand) elif os.path.exists(location): if mode == 'single': mode_response = raw_input('USER HAS PROVIDED SINGLE MODE WITH MULTIPLE LOCATIONS...PRESS ENTER TO CONTINUE PROGRAM OR PRESS 2 TO CHANGE MODE : ') if mode_response == '2': mode = 'batch'
def parse(mode, query_start, query_end, chr_num, genome, base_count, strand): exon_sequence = "" upstream_seq = "" downstream_seq = "" sequence = "" alt_query_start = "" alt_query_end = "" file_path = 'data/' + genome + '/' + chr_num + '.fa' if query_start == '' or query_end == '' or chr_num == '': print "The input sequence could not be mapped to any genomic sequence data" sys.exit(1) if mode == 'batch': if query_start > query_end: strand = '-' else: strand = '+' else: if query_start > query_end: if strand == '+': print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (-)." print "Changing strand information....." #response = raw_input("Enter 2 to change strand : ") #if response == '2': strand = '-' else: if strand == '-': print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (+)." print "Changing strand information....." #response = raw_input("Enter 2 to change strand : ") #if response == '2': strand = '+' if strand == '+': alt_query_start = query_start alt_query_end = query_end elif strand == '-': alt_query_start = query_end alt_query_end = query_start try: file_handle = open(file_path, 'r') except IOError: print "The genome is not available in the current directory." sys.exit(1) for lines in file_handle: if lines.startswith('>'): continue sequence += lines.rstrip().lower() file_handle.close() location = chr_num + ':' + alt_query_start + '..' + alt_query_end exon_start, exon_end = PredictExon.extract_exon(location, strand) if exon_start == 0 or exon_end == 0: exon_start = int(alt_query_start) exon_end = int(alt_query_end) upstream_seq, exon_sequence, downstream_seq = extractSequence.extract_single( sequence, strand, exon_start, exon_end, base_count) sequence = "" if exon_sequence == '' or upstream_seq == '' or downstream_seq == '': print "The specified coordinates are unable to extract any sequence." sys.exit(1) return upstream_seq, exon_sequence, downstream_seq, strand
def parse(mode, query_start, query_end, chr_num, genome, base_count, strand): exon_sequence = "" upstream_seq = "" downstream_seq = "" sequence = "" alt_query_start = "" alt_query_end = "" file_path = 'data/' + genome + '/' + chr_num + '.fa' if query_start == '' or query_end == '' or chr_num == '': print "The input sequence could not be mapped to any genomic sequence data" sys.exit(1) if mode == 'batch': if query_start > query_end: strand = '-' else: strand = '+' else: if query_start > query_end: if strand == '+' : print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (-)." print "Changing strand information....." #response = raw_input("Enter 2 to change strand : ") #if response == '2': strand = '-' else: if strand == '-': print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (+)." print "Changing strand information....." #response = raw_input("Enter 2 to change strand : ") #if response == '2': strand = '+' if strand == '+': alt_query_start = query_start alt_query_end = query_end elif strand == '-': alt_query_start = query_end alt_query_end = query_start try: file_handle = open(file_path, 'r') except IOError: print "The genome is not available in the current directory." sys.exit(1) for lines in file_handle: if lines.startswith('>'): continue sequence += lines.rstrip().lower() file_handle.close() location = chr_num + ':' + alt_query_start + '..' + alt_query_end exon_start, exon_end = PredictExon.extract_exon(location, strand) if exon_start == 0 or exon_end == 0: exon_start = int(alt_query_start) exon_end = int(alt_query_end) upstream_seq, exon_sequence, downstream_seq = extractSequence.extract_single(sequence, strand, exon_start, exon_end, base_count) sequence = "" if exon_sequence == '' or upstream_seq == '' or downstream_seq == '': print "The specified coordinates are unable to extract any sequence." sys.exit(1) return upstream_seq, exon_sequence, downstream_seq, strand
try: file_handle = open(file_path, 'r') except IOError: print "The genome is not available in the current directory" sys.exit(1) # This step reads the genome data into a string for lines in file_handle: if lines.startswith('>'): continue else: sequence += lines.rstrip().lower() # Exon and its upstream and downstream sequences are extracted from the complete genome sequence print "EXTRACTING UPSTREAM AND DOWNSTREAM REGIONS...." upstream_seq, exon_sequence, downstream_seq = extractSequence.extract_single( sequence, strand, exon_start, exon_end, base_count) sequence = "" if exon_sequence == '' or upstream_seq == '' or downstream_seq == '': print "The specified coordinates are unable to extract any sequence" sys.exit(1) # Output is written into a file print "WRITING OUTPUT TO " + outfile_name + "...." write_seq(upstream_seq, exon_sequence, downstream_seq, outfile_name, genome, seq_start, seq_end, strand) write_fasta(upstream_seq, exon_sequence, downstream_seq, outfile_name, genome, seq_start, seq_end, strand) elif os.path.exists(location): if mode == 'single': mode_response = raw_input(