def __init__(self, sul: SUL, m=None, horizon=None, stop_on=set(), stop_on_startswith=set(), order_type='shortest first'): super().__init__(sul) self.m = m self.horizon = horizon assert (horizon is None or m is None) and not ( m is None and horizon is None), "Set either m or horizon" # These are the outputs we want to cut our testing tree short on self.stop_on = stop_on self.stop_on_startswith = stop_on_startswith # This prefix set keeps track of what paths lead to the outputs we want to stop early on self.stopping_set = PrefixSet() # Figure out how to order the access sequences order_types = { 'longest first': lambda P: sorted(P, key=len, reverse=True), 'shortest first': lambda P: sorted(P, key=len, reverse=False), } assert order_type in order_types.keys( ), "Unknown access sequence ordering" self.order_type = order_type self.acc_seq_order = order_types[order_type]
def create_trie(): tsvs = ["https://www2.census.gov/topics/genealogy/1990surnames/dist.female.first", "https://www2.census.gov/topics/genealogy/1990surnames/dist.male.first"] # "https://www2.census.gov/topics/genealogy/1990surnames/dist.all.last"] # A harded-coded list of exceptions. (names that are more often see as common noun # at the front of sentences.) exceptions = ["winter", "grant", "van", "son", "young", "royal", "long", "june", "august", "joy", "young", "aura", "ray", "ok", "harmony", "ha", "sun", "in", "many", "see", "so", "my", "may", "an", "les", "will", "love", "man"] names = [] for tsv_url in tsvs: tsv_file = urllib2.urlopen(tsv_url) tabbed = zip(*[line for line in csv.reader(tsv_file, delimiter=' ')]) names = names + list(tabbed[0]) names_lower = set() for name in names: name = name.lower() if name not in exceptions: names_lower.add(name) trie = PrefixSet(names_lower) with open('proper_names.pickle', 'w') as outfile: pickle.dump(trie, outfile) return trie
def __init__(self, sul: RERSConnectorV4 = None, separator=" ", storagepath=None, saveinterval=15): super().__init__(sul, storagepath, saveinterval) self.separator = separator self.cache = StringTrie(separator=separator) self.error_cache = StringTrie(separator=separator) self.invalid_cache = PrefixSet() # hookup rers cache self.sul.hookup_cache(self.cache, self.error_cache, self.invalid_cache) self.passthrough = False
class SmartWmethodEquivalenceCheckerV4(EquivalenceChecker): def __init__(self, sul: SUL, m=None, horizon=None, stop_on=set(), stop_on_startswith=set(), order_type='shortest first'): super().__init__(sul) self.m = m self.horizon = horizon assert (horizon is None or m is None) and not ( m is None and horizon is None), "Set either m or horizon" # These are the outputs we want to cut our testing tree short on self.stop_on = stop_on self.stop_on_startswith = stop_on_startswith # This prefix set keeps track of what paths lead to the outputs we want to stop early on self.stopping_set = PrefixSet() # Keep track of how many times each access sequence has been part of a counterexample self.acc_seq_ce_counter = {} # Figure out how to order the access sequences order_types = { 'longest first': lambda P: sorted(P, key=len, reverse=True), 'shortest first': lambda P: sorted(P, key=len, reverse=False), 'ce count': lambda P: sorted(P, key=lambda x: (self.acc_seq_ce_counter[x], -len(x)), reverse=True) } assert order_type in order_types.keys( ), "Unknown access sequence ordering" self.order_type = order_type self.acc_seq_order = order_types[order_type] def test_equivalence( self, fsm: Union[DFA, MealyMachine]) -> Tuple[bool, Iterable]: print("[info] Starting equivalence test") if self.m is not None: n = len(fsm.get_states()) m = self.m assert m >= n, "hypothesis has more states than w-method bound" depth = m - n else: depth = self.horizon print("Depth:", depth) print("[info] Calculating distinguishing set") W = get_distinguishing_set(fsm, check=False) P = get_state_cover_set(fsm) print("[info] Got state cover set") # Ensure all access sequences have a counter for p in P: if p not in self.acc_seq_ce_counter: self.acc_seq_ce_counter[p] = 0 A = sorted([(x, ) for x in fsm.get_alphabet()]) equivalent = True counterexample = None for access_sequence in self.acc_seq_order(P): print("[info] Trying access sequence:", access_sequence) to_visit = deque() to_visit.extend(A) while len(to_visit) > 0: cur = to_visit.popleft() # Grow the testing tree where possible self.sul.reset() sul_output_pre = self.sul.process_input(access_sequence + cur) if sul_output_pre in self.stop_on or any([ sul_output_pre.startswith(x) for x in self.stop_on_startswith ]): self.stopping_set.add(access_sequence + cur) #continue elif len(cur) <= depth: for a in A: if access_sequence + cur + a not in self.stopping_set\ and access_sequence + cur + a not in P: to_visit.append(cur + a) # Perform the standard W-method tests for w in W: equivalent, counterexample = self._are_equivalent( fsm, access_sequence + cur + w) if not equivalent: # find longest access sequence which overlaps with the current query longest_acc_seq = None cur_query = access_sequence + cur + w for acc_seq in P: if cur_query[0:len(acc_seq)] == acc_seq: if longest_acc_seq is None or len( acc_seq) > len(longest_acc_seq): longest_acc_seq = acc_seq print("Counterexample:", counterexample) print("Longest acc seq:", longest_acc_seq) self.acc_seq_ce_counter[longest_acc_seq] += 1 return equivalent, counterexample # Nothing found for this access sequence: self.acc_seq_ce_counter[access_sequence] = min( 0, self.acc_seq_ce_counter[access_sequence]) self.acc_seq_ce_counter[access_sequence] -= 1 return equivalent, counterexample
class SmartWmethodEquivalenceCheckerV2(EquivalenceChecker): def __init__(self, sul: SUL, m=None, horizon=None, stop_on=set(), stop_on_startswith=set(), order_type='shortest first'): super().__init__(sul) self.m = m self.horizon = horizon assert (horizon is None or m is None) and not ( m is None and horizon is None), "Set either m or horizon" # These are the outputs we want to cut our testing tree short on self.stop_on = stop_on self.stop_on_startswith = stop_on_startswith # This prefix set keeps track of what paths lead to the outputs we want to stop early on self.stopping_set = PrefixSet() # Figure out how to order the access sequences order_types = { 'longest first': lambda P: sorted(P, key=len, reverse=True), 'shortest first': lambda P: sorted(P, key=len, reverse=False), } assert order_type in order_types.keys( ), "Unknown access sequence ordering" self.order_type = order_type self.acc_seq_order = order_types[order_type] def test_equivalence( self, fsm: Union[DFA, MealyMachine]) -> Tuple[bool, Iterable]: print("[info] Starting equivalence test") if self.m is not None: n = len(fsm.get_states()) m = self.m assert m >= n, "hypothesis has more states than w-method bound" depth = m - n else: depth = self.horizon print("Depth:", depth) print("[info] Calculating distinguishing set") W = get_distinguishing_set(fsm, check=False) P = get_state_cover_set(fsm) print("[info] Got state cover set") A = sorted([(x, ) for x in fsm.get_alphabet()]) equivalent = True counterexample = None acc_seq_tasks = deque( zip(self.acc_seq_order(P), [ deque([a for a in A if a not in self.stopping_set]) for x in range(len(P)) ])) while len(acc_seq_tasks) > 0: access_sequence, to_visit = acc_seq_tasks.popleft() # bprint("[info] Trying access sequence:", access_sequence) assert len(to_visit) > 0 cur = to_visit.popleft() # Test without distinguishing sequence, important for early stopping equivalent, counterexample = self._are_equivalent( fsm, access_sequence + cur) if not equivalent: return equivalent, counterexample if access_sequence + cur not in self.stopping_set: # Basically the usual W-method tests: for w in W: equivalent, counterexample = self._are_equivalent( fsm, access_sequence + cur + w) if not equivalent: return equivalent, counterexample # If not, keep building if len(cur) <= depth: for a in A: if access_sequence + cur + a not in self.stopping_set: to_visit.append(cur + a) if len(to_visit) > 0: acc_seq_tasks.append((access_sequence, to_visit)) #else: #print(access_sequence) return equivalent, counterexample def _are_equivalent(self, fsm, input): #print("[info] Testing:", input) fsm.reset() hyp_output = fsm.process_input(input) self.sul.reset() sul_output = self.sul.process_input(input) stats.increment('test_query') if self._teacher is not None: self._teacher.test_query_counter += 1 if sul_output in self.stop_on or any( [sul_output.startswith(x) for x in self.stop_on_startswith]): #print('[info] added input to early stopping set') self.stopping_set.add(input) equivalent = hyp_output == sul_output if not equivalent: print("EQ CHECKER", input, "HYP", hyp_output, "SUL", sul_output) self._onCounterexample(input) return equivalent, input
class SmartWmethodEquivalenceChecker(EquivalenceChecker): def __init__(self, sul: SUL, m=None, horizon=None, stop_on=set(), stop_on_startswith=set(), order_type='shortest first'): super().__init__(sul) self.m = m self.horizon = horizon assert (horizon is None or m is None) and not ( m is None and horizon is None), "Set either m or horizon" # These are the outputs we want to cut our testing tree short on self.stop_on = stop_on self.stop_on_startswith = stop_on_startswith # This prefix set keeps track of what paths lead to the outputs we want to stop early on self.stopping_set = PrefixSet() # Keep track of how many times each access sequence has been part of a counterexample self.acc_seq_ce_counter = {} # Figure out how to order the access sequences order_types = { 'longest first': lambda P: sorted(P, key=len, reverse=True), 'shortest first': lambda P: sorted(P, key=len, reverse=False), 'ce count': lambda P: sorted(P, key=lambda x: (self.acc_seq_ce_counter[x], -len(x)), reverse=True) } assert order_type in order_types.keys( ), "Unknown access sequence ordering" self.order_type = order_type self.acc_seq_order = order_types[order_type] def test_equivalence( self, fsm: Union[DFA, MealyMachine]) -> Tuple[bool, Iterable]: print("[info] Starting equivalence test") if self.m is not None: n = len(fsm.get_states()) m = self.m assert m >= n, "hypothesis has more states than w-method bound" depth = m - n else: depth = self.horizon print("Depth:", depth) print("[info] Calculating distinguishing set") W = get_distinguishing_set(fsm, check=False) P = get_state_cover_set(fsm) print("[info] Got state cover set") # Ensure all access sequences have a counter for p in P: if p not in self.acc_seq_ce_counter: self.acc_seq_ce_counter[p] = 0 A = sorted([(x, ) for x in fsm.get_alphabet()]) equivalent = True counterexample = None for access_sequence in self.acc_seq_order(P): print("[info] Trying access sequence:", access_sequence) to_visit = deque() to_visit.extend(A) while len(to_visit) > 0: cur = to_visit.popleft() # Basically the usual W-method tests: for w in W: equivalent, counterexample = self._are_equivalent( fsm, access_sequence + cur + w) if not equivalent: self.acc_seq_ce_counter[access_sequence] += 1 return equivalent, counterexample # Also test without distinguishing sequence, important for early stopping equivalent, counterexample = self._are_equivalent( fsm, access_sequence + cur) if not equivalent: self.acc_seq_ce_counter[access_sequence] += 1 return equivalent, counterexample # Cut this branch short? if access_sequence + cur in self.stopping_set: continue # If not, keep building #else: if len(cur) <= depth: for a in A: if access_sequence + cur + a not in self.stopping_set: to_visit.append(cur + a) # Nothing found for this access sequence: self.acc_seq_ce_counter[access_sequence] = min( 0, self.acc_seq_ce_counter[access_sequence]) self.acc_seq_ce_counter[access_sequence] -= 1 return equivalent, counterexample def _are_equivalent(self, fsm, input): #print("[info] Testing:", input) fsm.reset() hyp_output = fsm.process_input(input) self.sul.reset() sul_output = self.sul.process_input(input) if self._teacher is not None: self._teacher.test_query_counter += 1 if sul_output in self.stop_on or any( [sul_output.startswith(x) for x in self.stop_on_startswith]): #print('[info] added input to early stopping set') self.stopping_set.add(input) equivalent = hyp_output == sul_output if not equivalent: print("EQ CHECKER", input, "HYP", hyp_output, "SUL", sul_output) self._onCounterexample(input) return equivalent, input