def setUp(self): # Set a random seed so hash functions are always the same random.seed(0) self.family = lsh.HammingDistanceFamily(20) self.dist_thres = 5 def f(a, b): assert len(a) == len(b) return sum(1 for i in range(len(a)) if a[i] != b[i]) self.dist_fn = f
def __init__(self, dist_thres, probe_length): """ Args: dist_thres: only call two probes near-duplicates if their Hamming distance is within this value; this should be equal to or commensurate with (but not greater than) the number of mismatches at/below which a probe is considered to hybridize to a target sequence so that candidate probes further apart than this value are not collapsed as near-duplicates probe_length: length of probes """ super().__init__(k=20) self.lsh_family = lsh.HammingDistanceFamily(probe_length) self.dist_thres = dist_thres def hamming_dist(a, b): # a and b are probe.Probe objects return a.mismatches(b) self.dist_fn = hamming_dist
def setUp(self): # Set a random seed so hash functions are always the same random.seed(0) self.family = lsh.HammingDistanceFamily(20)