Пример #1
0
def build_ivals(fp, genome_db, reads_db):
    for row in bowtie_parser.read(fp):
        src_seq = get_src_sequence(genome_db, row)
        read_seq = get_read_sequence(reads_db, row)

        yield src_seq, read_seq
import bowtie_parser
import sys

files = sys.argv[2:]
dict1 = {}
fp = open(sys.argv[1], 'w')

for file in files:
    dict = {}

    for n, line in enumerate(bowtie_parser.read(open(file))):
        contig_id = line.seqid
        length = int(contig_id.split('_')[3])+33-1
        start = line.start
        read = line.read

        if dict.has_key(contig_id): 
            for index in range(start,int(start)+len(read)):
                dict[contig_id][1][index]=1
        
        else:
            count_mapped = [0]*length
            dict[contig_id]=[length]
            dict[contig_id].append(count_mapped)
    for key in dict.keys():
        mapped_bases = dict[key][1].count(1)
        dict[key][1] = mapped_bases
        mapped_percent = dict[key][1]/float(dict[key][0])
        dict[key].append(mapped_percent)

    group_index = file.find('group')
Пример #3
0
###

parser = optparse.OptionParser()
parser.add_option('-M', '--max-reads', dest='max_reads', default=0, type=int,
                  help='only use first M reads, then exit')

(options, args) = parser.parse_args()

bowtie_mapping_file, = args
bowtie_fp = open(bowtie_mapping_file)

###

# iterate over the bowtie mapping file (output from bowtie)
for n, line in enumerate(bowtie_parser.read(bowtie_fp)):
    # print out status/progress.
    if n % 10000 == 0:
        print>>sys.stderr, 'scanning reads', n

        if options.max_reads and n > options.max_reads:
            print>>sys.stderr, 'EXITING EARLY; -M specified as %d' % \
                               options.max_reads
            break
            
    # retrieve mismatches from the bowtie mapping
    mismatches = line.mismatches.strip()

    # record the mismatch positions
    if mismatches:
        mismatches = [ x for x in mismatches.split(',') if 'N' not in x ]