def get_repeating_lines(start, increment, lines, maxPercentDif=.34): "Returns either None or a tuple for an xrange of the repeating line numbers." head = lines[start] count = 0 for line in islice(lines,start+increment,None,increment): if percent_dif(head, line) < maxPercentDif: count+=1 else: break if count <= 1: return None else: return (start, start+increment*(count+1), increment)
def is_probable_lle(group, lines, outlierFactor=1): "Returns true if the group is a probable error in a repetative group." start, end, inc = group percent_difs = [] for i in xrange(start, end, inc): for j in xrange(i+inc, end, inc): percent_difs.append(percent_dif(lines[i], lines[j])) avg = mean(percent_difs) sigma = std(percent_difs) for dif in percent_difs: if abs(dif-avg)>outlierFactor*sigma: return True return False