os.makedirs("Results") outfile_name = "Results/" + 'exon_sequence_in_' + genome # The steps below are taken if the user provides genomic location input in correct format. # Using the location input, exon sequence, upstream and downstream regions # are extracted from the whole genome information if parsed_data['type'] == '2': if re.match('chr\d+\:\d+\.\.\d+', location): print "User has provided genomic location as input data." if mode == 'batch': mode_response = raw_input('USER HAS PROVIDED BATCH MODE WITH A SINGLE LOCATION...PRESS ENTER TO CONTINUE PROGRAM OR PRESS 2 TO CHANGE MODE : ') if mode_response == '2': mode = 'single' exon_start, exon_end = PredictExon.extract_exon(location, strand) seq_start = location.split(':')[1].split('..')[0] seq_end = location.split(':')[1].split('..')[1] chr_num = location.split(':')[0] file_path = 'data/' + genome + '/' + chr_num + '.fa' if exon_start == 0 or exon_end == 0: exon_start = int(seq_start) exon_end = int(seq_end) # This checks to make sure the complete genome file exists in the current directory. # If not present, program will terminate with a message try: file_handle = open(file_path, 'r') except IOError:
def parse(mode, query_start, query_end, chr_num, genome, base_count, strand): exon_sequence = "" upstream_seq = "" downstream_seq = "" sequence = "" alt_query_start = "" alt_query_end = "" file_path = 'data/' + genome + '/' + chr_num + '.fa' if query_start == '' or query_end == '' or chr_num == '': print "The input sequence could not be mapped to any genomic sequence data" sys.exit(1) if mode == 'batch': if query_start > query_end: strand = '-' else: strand = '+' else: if query_start > query_end: if strand == '+': print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (-)." print "Changing strand information....." #response = raw_input("Enter 2 to change strand : ") #if response == '2': strand = '-' else: if strand == '-': print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (+)." print "Changing strand information....." #response = raw_input("Enter 2 to change strand : ") #if response == '2': strand = '+' if strand == '+': alt_query_start = query_start alt_query_end = query_end elif strand == '-': alt_query_start = query_end alt_query_end = query_start try: file_handle = open(file_path, 'r') except IOError: print "The genome is not available in the current directory." sys.exit(1) for lines in file_handle: if lines.startswith('>'): continue sequence += lines.rstrip().lower() file_handle.close() location = chr_num + ':' + alt_query_start + '..' + alt_query_end exon_start, exon_end = PredictExon.extract_exon(location, strand) if exon_start == 0 or exon_end == 0: exon_start = int(alt_query_start) exon_end = int(alt_query_end) upstream_seq, exon_sequence, downstream_seq = extractSequence.extract_single( sequence, strand, exon_start, exon_end, base_count) sequence = "" if exon_sequence == '' or upstream_seq == '' or downstream_seq == '': print "The specified coordinates are unable to extract any sequence." sys.exit(1) return upstream_seq, exon_sequence, downstream_seq, strand
def parse(mode, query_start, query_end, chr_num, genome, base_count, strand): exon_sequence = "" upstream_seq = "" downstream_seq = "" sequence = "" alt_query_start = "" alt_query_end = "" file_path = 'data/' + genome + '/' + chr_num + '.fa' if query_start == '' or query_end == '' or chr_num == '': print "The input sequence could not be mapped to any genomic sequence data" sys.exit(1) if mode == 'batch': if query_start > query_end: strand = '-' else: strand = '+' else: if query_start > query_end: if strand == '+' : print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (-)." print "Changing strand information....." #response = raw_input("Enter 2 to change strand : ") #if response == '2': strand = '-' else: if strand == '-': print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (+)." print "Changing strand information....." #response = raw_input("Enter 2 to change strand : ") #if response == '2': strand = '+' if strand == '+': alt_query_start = query_start alt_query_end = query_end elif strand == '-': alt_query_start = query_end alt_query_end = query_start try: file_handle = open(file_path, 'r') except IOError: print "The genome is not available in the current directory." sys.exit(1) for lines in file_handle: if lines.startswith('>'): continue sequence += lines.rstrip().lower() file_handle.close() location = chr_num + ':' + alt_query_start + '..' + alt_query_end exon_start, exon_end = PredictExon.extract_exon(location, strand) if exon_start == 0 or exon_end == 0: exon_start = int(alt_query_start) exon_end = int(alt_query_end) upstream_seq, exon_sequence, downstream_seq = extractSequence.extract_single(sequence, strand, exon_start, exon_end, base_count) sequence = "" if exon_sequence == '' or upstream_seq == '' or downstream_seq == '': print "The specified coordinates are unable to extract any sequence." sys.exit(1) return upstream_seq, exon_sequence, downstream_seq, strand
outfile_name = "Results/" + 'exon_sequence_in_' + genome # The steps below are taken if the user provides genomic location input in correct format. # Using the location input, exon sequence, upstream and downstream regions # are extracted from the whole genome information if parsed_data['type'] == '2': if re.match('chr\d+\:\d+\.\.\d+', location): print "User has provided genomic location as input data." if mode == 'batch': mode_response = raw_input( 'USER HAS PROVIDED BATCH MODE WITH A SINGLE LOCATION...PRESS ENTER TO CONTINUE PROGRAM OR PRESS 2 TO CHANGE MODE : ' ) if mode_response == '2': mode = 'single' exon_start, exon_end = PredictExon.extract_exon(location, strand) seq_start = location.split(':')[1].split('..')[0] seq_end = location.split(':')[1].split('..')[1] chr_num = location.split(':')[0] file_path = 'data/' + genome + '/' + chr_num + '.fa' if exon_start == 0 or exon_end == 0: exon_start = int(seq_start) exon_end = int(seq_end) # This checks to make sure the complete genome file exists in the current directory. # If not present, program will terminate with a message try: file_handle = open(file_path, 'r') except IOError: