示例#1
0
def find_nocov_variants(covlist,chrom='',caller='',min_cov=5):
    variants = []
    assert min(covlist[1:]) >= 0
    nocov = [i for i,v in enumerate(covlist) if v < min_cov]
    nocov.remove(0) # take off the -1 at index 0
    if len(covlist)-1 == len(nocov): return None # entire sequence has no coverage
    nocov_intervals = list(intervals(nocov))
    for iv in nocov_intervals:
        data = {'chrom':chrom,'caller':caller,'pos':iv[0], 'type': 'no_cov'}
        data['length'] = iv[1] - iv[0] + 1
        data['mean_cov'] = scipy.mean(covlist[iv[0]:(iv[1]+1)])
        variants.append(Variant.from_dict(data))
  
    return variants
示例#2
0
def find_variants(covlist,
                  seq,
                  chrom,
                  min_cov=5,
                  min_score=30,
                  exclude_edges=False,
                  exclude_overlaps=False):
    ''' identify coverage variants in covlist
      Returns dict with keys 'mean_cov','pct_cov', and 'variants', where dict['variants']
      is a list of Variant objects
  '''
    assert min(covlist[1:]) >= 0
    assert len(covlist) - 1 == len(
        seq
    ), "Number of coverage values (%d) is not equal to sequence length (%d)" % (
        len(covlist) - 1, len(seq))
    retval = {}
    nocov = [i for i, v in enumerate(covlist) if v < min_cov]
    nocov.remove(0)
    retval['mean_cov'] = scipy.mean(covlist[1:])
    retval['pct_cov'] = 1 - (float(len(nocov)) / (len(covlist) - 1))
    if len(nocov) == len(seq):
        return retval
    nocov_intervals = list(intervals(nocov))
    #covscores,localmeans = local_coverage_score(covlist)
    covscores, localmeans = adjusted_coverage_score(covlist)
    covdip = [i for i, v in enumerate(covscores) if v >= min_score]
    covdip_intervals = list(intervals(covdip))

    # refine intervals
    if exclude_edges:
        # ignore intervals that overlap the beginning and end of reference
        covdip_intervals = [
            iv for iv in covdip_intervals
            if not iv[0] == 1 and not iv[1] == (len(covlist) - 1)
        ]
    if exclude_overlaps:
        # ignore covdip intervals that overlap with nocov intervals
        covdip_intervals = remove_overlap(covdip_intervals, nocov_intervals)
        # covdip = list(itertools.chain(*[range(v1,v2+1) for v1,v2 in covdip_intervals])

    # positions with no coverage are not considered to be coverage dips
    covdip = [p for p in covdip if p not in nocov]

    variants = []
    for iv in nocov_intervals:
        data = {'chrom': chrom, 'pos': iv[0], 'type': 'no_cov'}
        data['length'] = iv[1] - iv[0] + 1
        data['mean_cov'] = scipy.mean(covlist[iv[0]:(iv[1] + 1)])
        variants.append(Variant.from_dict(data))

    for iv in covdip_intervals:
        data = {'chrom': chrom, 'pos': iv[0], 'type': 'cov_dip'}
        data['length'] = iv[1] - iv[0] + 1
        data['mean_cov'] = scipy.mean(covlist[iv[0]:(iv[1] + 1)])
        intscores = covscores[iv[0]:(iv[1] + 1)]
        intmeans = localmeans[iv[0]:(iv[1] + 1)]
        data['quality'] = max(intscores)
        data['info'] = {
            'CovScores':
            '%s' % ','.join(['%d' % int(round(v)) for v in intscores]),
            'LocalMeans':
            '%s' % ','.join(['%d' % int(round(v)) for v in intmeans]),
        }
        data['ref'] = str(seq[iv[0]:(iv[1] + 1)].seq).upper()
        # data['alt'] = data['ref'].lower()
        variants.append(Variant.from_dict(data))

    if variants:
        retval['variants'] = variants
    return retval
示例#3
0
def find_variants(covlist, seq, chrom, min_cov=5, min_score=30, exclude_edges=False, exclude_overlaps=False):
    ''' identify coverage variants in covlist
        Returns dict with keys 'mean_cov','pct_cov', and 'variants', where dict['variants']
        is a list of Variant objects
    '''

    assert min(covlist[1:]) >= 0
    assert len(covlist) - 1 == len(seq), "Number of coverage values (%d) is not equal to sequence length (%d)" % (len(covlist)-1,len(seq))
    retval = {}
    nocov = [i for i,v in enumerate(covlist) if v < min_cov]
    nocov.remove(0)
    
    retval['mean_cov'] = scipy.mean(covlist[1:])  
    retval['pct_cov'] = 1 - (float(len(nocov)) / (len(covlist) - 1))

    if len(nocov) == len(seq):
        return retval
    
    nocov_intervals = list(intervals(nocov))
    #covscores,localmeans = local_coverage_score(covlist)
    covscores,localmeans = adjusted_coverage_score(covlist)

    covdip = [i for i,v in enumerate(covscores) if v >= min_score]
    covdip_intervals = list(intervals(covdip))
  
    # refine intervals
    if exclude_edges:
        # ignore intervals that overlap the beginning and end of reference
        covdip_intervals = [iv for iv in covdip_intervals if not iv[0]==1 and not iv[1]==(len(covlist)-1)]
  
    if exclude_overlaps:
        # ignore covdip intervals that overlap with nocov intervals
        covdip_intervals = remove_overlap(covdip_intervals,nocov_intervals)
        # covdip = list(itertools.chain(*[range(v1,v2+1) for v1,v2 in covdip_intervals])
  
    # positions with no coverage are not considered to be coverage dips 
    covdip = [p for p in covdip if p not in nocov]
  
    variants = []
    for iv in nocov_intervals:
        data = {'chrom':chrom, 'pos':iv[0], 'type': 'no_cov'}
        data['length'] = iv[1] - iv[0] + 1
        data['mean_cov'] = scipy.mean(covlist[iv[0]:(iv[1]+1)])
        variants.append(Variant.from_dict(data))
  
    for iv in covdip_intervals:
        data = {'chrom':chrom, 'pos':iv[0], 'type': 'cov_dip'}
        data['length'] = iv[1] - iv[0] + 1
        data['mean_cov'] = scipy.mean(covlist[iv[0]:(iv[1]+1)])
        intscores = covscores[iv[0]:(iv[1]+1)]
        intmeans  = localmeans[iv[0]:(iv[1]+1)]
        data['quality'] = max(intscores)
        data['info'] = {'CovScores':'%s' % ','.join(['%d' % int(round(v)) for v in intscores]),
                        'LocalMeans':'%s' % ','.join(['%d' % int(round(v)) for v in intmeans]),
                        }
        data['ref'] = str(seq[iv[0]:(iv[1]+1)].seq).upper()
        # data['alt'] = data['ref'].lower()    
        variants.append(Variant.from_dict(data))
  
    if variants:
        retval['variants'] = variants
  
    return retval