os.makedirs("Results")
    outfile_name = "Results/" + 'exon_sequence_in_' + genome


# The steps below are taken if the user provides genomic location input in correct format. 
# Using the location input, exon sequence, upstream and downstream regions 
# are extracted from the whole genome information
if parsed_data['type'] == '2':
    if re.match('chr\d+\:\d+\.\.\d+', location):
        print "User has provided genomic location as input data."
        if mode == 'batch':
            mode_response = raw_input('USER HAS PROVIDED BATCH MODE WITH A SINGLE LOCATION...PRESS ENTER TO CONTINUE PROGRAM OR PRESS 2 TO CHANGE MODE : ')
            if mode_response == '2':
                mode = 'single'

        exon_start, exon_end = PredictExon.extract_exon(location, strand)
            
        seq_start = location.split(':')[1].split('..')[0]
        seq_end = location.split(':')[1].split('..')[1]    
        chr_num = location.split(':')[0]
        file_path = 'data/' + genome + '/' + chr_num + '.fa'

        if exon_start == 0 or exon_end == 0:
            exon_start = int(seq_start)
            exon_end = int(seq_end)

        # This checks to make sure the complete genome file exists in the current directory.
        # If not present, program will terminate with a message
        try:
            file_handle = open(file_path, 'r')
        except IOError:
示例#2
0
def parse(mode, query_start, query_end, chr_num, genome, base_count, strand):

    exon_sequence = ""
    upstream_seq = ""
    downstream_seq = ""
    sequence = ""
    alt_query_start = ""
    alt_query_end = ""
    file_path = 'data/' + genome + '/' + chr_num + '.fa'

    if query_start == '' or query_end == '' or chr_num == '':
        print "The input sequence could not be mapped to any genomic sequence data"
        sys.exit(1)

    if mode == 'batch':
        if query_start > query_end:
            strand = '-'
        else:
            strand = '+'
    else:
        if query_start > query_end:
            if strand == '+':
                print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (-)."
                print "Changing strand information....."
                #response = raw_input("Enter 2 to change strand : ")
                #if response == '2':
                strand = '-'
        else:
            if strand == '-':
                print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (+)."
                print "Changing strand information....."
                #response = raw_input("Enter 2 to change strand : ")
                #if response == '2':
                strand = '+'

    if strand == '+':
        alt_query_start = query_start
        alt_query_end = query_end
    elif strand == '-':
        alt_query_start = query_end
        alt_query_end = query_start

    try:
        file_handle = open(file_path, 'r')
    except IOError:
        print "The genome is not available in the current directory."
        sys.exit(1)

    for lines in file_handle:
        if lines.startswith('>'):
            continue
        sequence += lines.rstrip().lower()

    file_handle.close()

    location = chr_num + ':' + alt_query_start + '..' + alt_query_end
    exon_start, exon_end = PredictExon.extract_exon(location, strand)

    if exon_start == 0 or exon_end == 0:
        exon_start = int(alt_query_start)
        exon_end = int(alt_query_end)

    upstream_seq, exon_sequence, downstream_seq = extractSequence.extract_single(
        sequence, strand, exon_start, exon_end, base_count)
    sequence = ""

    if exon_sequence == '' or upstream_seq == '' or downstream_seq == '':
        print "The specified coordinates are unable to extract any sequence."
        sys.exit(1)

    return upstream_seq, exon_sequence, downstream_seq, strand
示例#3
0
def parse(mode, query_start, query_end, chr_num, genome, base_count, strand):

    exon_sequence = ""
    upstream_seq = ""
    downstream_seq = ""
    sequence = ""
    alt_query_start = ""
    alt_query_end = ""
    file_path = 'data/' + genome + '/' + chr_num + '.fa'
    
    if query_start == '' or query_end == '' or chr_num == '':
        print "The input sequence could not be mapped to any genomic sequence data"
        sys.exit(1)

    if mode == 'batch':
        if query_start > query_end:
            strand = '-'
        else:
            strand = '+'
    else:
        if query_start > query_end:
            if strand == '+' :
                print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (-)."
                print "Changing strand information....."
                #response = raw_input("Enter 2 to change strand : ")
                #if response == '2':
                strand = '-'
        else:
            if strand == '-':
                print "USER SUPPLIED SEQUENCE IS PRESENT ON THE OPPOSITE STRAND (+)."
                print "Changing strand information....."
                #response = raw_input("Enter 2 to change strand : ")
                #if response == '2':
                strand = '+'

    if strand == '+':
        alt_query_start = query_start
        alt_query_end = query_end
    elif strand == '-':
        alt_query_start = query_end
        alt_query_end = query_start
        
    try:
        file_handle = open(file_path, 'r')
    except IOError:
        print "The genome is not available in the current directory."
        sys.exit(1)

    for lines in file_handle:
        if lines.startswith('>'):
            continue
        sequence += lines.rstrip().lower()

    file_handle.close()

    location = chr_num + ':' + alt_query_start + '..' + alt_query_end
    exon_start, exon_end = PredictExon.extract_exon(location, strand)

    if exon_start == 0 or exon_end == 0:
        exon_start = int(alt_query_start)
        exon_end = int(alt_query_end)

    upstream_seq, exon_sequence, downstream_seq = extractSequence.extract_single(sequence, strand, exon_start, exon_end, base_count)
    sequence = ""

    if exon_sequence == '' or upstream_seq == '' or downstream_seq == '':
        print "The specified coordinates are unable to extract any sequence."
        sys.exit(1)

    return upstream_seq, exon_sequence, downstream_seq, strand
    outfile_name = "Results/" + 'exon_sequence_in_' + genome

# The steps below are taken if the user provides genomic location input in correct format.
# Using the location input, exon sequence, upstream and downstream regions
# are extracted from the whole genome information
if parsed_data['type'] == '2':
    if re.match('chr\d+\:\d+\.\.\d+', location):
        print "User has provided genomic location as input data."
        if mode == 'batch':
            mode_response = raw_input(
                'USER HAS PROVIDED BATCH MODE WITH A SINGLE LOCATION...PRESS ENTER TO CONTINUE PROGRAM OR PRESS 2 TO CHANGE MODE : '
            )
            if mode_response == '2':
                mode = 'single'

        exon_start, exon_end = PredictExon.extract_exon(location, strand)

        seq_start = location.split(':')[1].split('..')[0]
        seq_end = location.split(':')[1].split('..')[1]
        chr_num = location.split(':')[0]
        file_path = 'data/' + genome + '/' + chr_num + '.fa'

        if exon_start == 0 or exon_end == 0:
            exon_start = int(seq_start)
            exon_end = int(seq_end)

        # This checks to make sure the complete genome file exists in the current directory.
        # If not present, program will terminate with a message
        try:
            file_handle = open(file_path, 'r')
        except IOError: