Exemplo n.º 1
0
def isIndexRevComp(indexfile,indexes,n=500000):
    """Determine if the indexes are reverse complemented or not
    
    :param indexfile: filename of the Fastq index file
    :param indexes: list or tuple of index strings
    :param n: integer number of reads to sample
    """
    print("HERE")
    ifile = Fastq(indexfile)
    ilength=len(indexes[0])
    print(ilength)
    indexreads = collections.defaultdict(int)
    for i in range(n):
        indexreads[ifile.next().sequence[:ilength]]+=1
    counts = {'normal':0,
              'revcomp':0}
    for k,v in list(indexreads.items()):
        print(k,v)
        for i in indexes:
            if(pylev.levenshtein(k,i)<=1):
                counts['normal']+=v
                continue
            if(pylev.levenshtein(k,revcomp(i))<=1):
                counts['revcomp']+=v
    if(counts['revcomp']>counts['normal']):
        print('using revcomp')
    else:
        print('NOT revcomp')
        
    return(counts['revcomp']>counts['normal'])
Exemplo n.º 2
0
def isIndexRevComp(indexfile, indexes, n=500000):
    """Determine if the indexes are reverse complemented or not
    
    :param indexfile: filename of the Fastq index file
    :param indexes: list or tuple of index strings
    :param n: integer number of reads to sample
    """
    print("HERE")
    ifile = Fastq(indexfile)
    ilength = len(indexes[0])
    print(ilength)
    indexreads = collections.defaultdict(int)
    for i in range(n):
        indexreads[ifile.next().sequence[:ilength]] += 1
    counts = {'normal': 0, 'revcomp': 0}
    for k, v in list(indexreads.items()):
        print(k, v)
        for i in indexes:
            if (pylev.levenshtein(k, i) <= 1):
                counts['normal'] += v
                continue
            if (pylev.levenshtein(k, revcomp(i)) <= 1):
                counts['revcomp'] += v
    if (counts['revcomp'] > counts['normal']):
        print('using revcomp')
    else:
        print('NOT revcomp')

    return (counts['revcomp'] > counts['normal'])