Пример #1
0
    def __init__(self,
                 sul: SUL,
                 m=None,
                 horizon=None,
                 stop_on=set(),
                 stop_on_startswith=set(),
                 order_type='shortest first'):
        super().__init__(sul)
        self.m = m
        self.horizon = horizon
        assert (horizon is None or m is None) and not (
            m is None and horizon is None), "Set either m or horizon"

        # These are the outputs we want to cut our testing tree short on
        self.stop_on = stop_on
        self.stop_on_startswith = stop_on_startswith
        # This prefix set keeps track of what paths lead to the outputs we want to stop early on
        self.stopping_set = PrefixSet()

        # Figure out how to order the access sequences
        order_types = {
            'longest first': lambda P: sorted(P, key=len, reverse=True),
            'shortest first': lambda P: sorted(P, key=len, reverse=False),
        }
        assert order_type in order_types.keys(
        ), "Unknown access sequence ordering"
        self.order_type = order_type
        self.acc_seq_order = order_types[order_type]
Пример #2
0
def create_trie():
	tsvs = ["https://www2.census.gov/topics/genealogy/1990surnames/dist.female.first",
			"https://www2.census.gov/topics/genealogy/1990surnames/dist.male.first"]
			# "https://www2.census.gov/topics/genealogy/1990surnames/dist.all.last"]

	# A harded-coded list of exceptions. (names that are more often see as common noun
	# at the front of sentences.)
	exceptions = ["winter", "grant", "van", "son", "young", "royal", "long", "june", "august", "joy", "young", "aura", "ray", "ok", "harmony", "ha", "sun", "in", "many", "see", "so", "my", "may", "an", "les", "will", "love", "man"]

	names = []
	for tsv_url in tsvs:
		tsv_file = urllib2.urlopen(tsv_url)

		tabbed = zip(*[line for line in csv.reader(tsv_file, delimiter=' ')])
		names = names + list(tabbed[0])

	names_lower = set()
	for name in names:
		name = name.lower()
		if name not in exceptions:
			names_lower.add(name)

	trie = PrefixSet(names_lower)

	with open('proper_names.pickle', 'w') as outfile:
		pickle.dump(trie, outfile)

	return trie
Пример #3
0
    def __init__(self, sul: RERSConnectorV4 = None, separator=" ", storagepath=None, saveinterval=15):
        super().__init__(sul, storagepath, saveinterval)
        self.separator = separator
        self.cache = StringTrie(separator=separator)
        self.error_cache = StringTrie(separator=separator)
        self.invalid_cache = PrefixSet()

        # hookup rers cache
        self.sul.hookup_cache(self.cache,
                              self.error_cache,
                              self.invalid_cache)

        self.passthrough = False
Пример #4
0
class SmartWmethodEquivalenceCheckerV4(EquivalenceChecker):
    def __init__(self,
                 sul: SUL,
                 m=None,
                 horizon=None,
                 stop_on=set(),
                 stop_on_startswith=set(),
                 order_type='shortest first'):
        super().__init__(sul)
        self.m = m
        self.horizon = horizon
        assert (horizon is None or m is None) and not (
            m is None and horizon is None), "Set either m or horizon"

        # These are the outputs we want to cut our testing tree short on
        self.stop_on = stop_on
        self.stop_on_startswith = stop_on_startswith
        # This prefix set keeps track of what paths lead to the outputs we want to stop early on
        self.stopping_set = PrefixSet()

        # Keep track of how many times each access sequence has been part of a counterexample
        self.acc_seq_ce_counter = {}

        # Figure out how to order the access sequences
        order_types = {
            'longest first':
            lambda P: sorted(P, key=len, reverse=True),
            'shortest first':
            lambda P: sorted(P, key=len, reverse=False),
            'ce count':
            lambda P: sorted(P,
                             key=lambda x:
                             (self.acc_seq_ce_counter[x], -len(x)),
                             reverse=True)
        }
        assert order_type in order_types.keys(
        ), "Unknown access sequence ordering"
        self.order_type = order_type
        self.acc_seq_order = order_types[order_type]

    def test_equivalence(
            self, fsm: Union[DFA, MealyMachine]) -> Tuple[bool, Iterable]:
        print("[info] Starting equivalence test")
        if self.m is not None:
            n = len(fsm.get_states())
            m = self.m
            assert m >= n, "hypothesis has more states than w-method bound"
            depth = m - n
        else:
            depth = self.horizon

        print("Depth:", depth)

        print("[info] Calculating distinguishing set")
        W = get_distinguishing_set(fsm, check=False)

        P = get_state_cover_set(fsm)
        print("[info] Got state cover set")

        # Ensure all access sequences have a counter
        for p in P:
            if p not in self.acc_seq_ce_counter:
                self.acc_seq_ce_counter[p] = 0

        A = sorted([(x, ) for x in fsm.get_alphabet()])

        equivalent = True
        counterexample = None

        for access_sequence in self.acc_seq_order(P):
            print("[info] Trying access sequence:", access_sequence)
            to_visit = deque()
            to_visit.extend(A)

            while len(to_visit) > 0:
                cur = to_visit.popleft()

                # Grow the testing tree where possible
                self.sul.reset()
                sul_output_pre = self.sul.process_input(access_sequence + cur)
                if sul_output_pre in self.stop_on or any([
                        sul_output_pre.startswith(x)
                        for x in self.stop_on_startswith
                ]):
                    self.stopping_set.add(access_sequence + cur)
                    #continue
                elif len(cur) <= depth:
                    for a in A:
                        if access_sequence + cur + a not in self.stopping_set\
                                and access_sequence + cur + a not in P:
                            to_visit.append(cur + a)

                # Perform the standard W-method tests
                for w in W:
                    equivalent, counterexample = self._are_equivalent(
                        fsm, access_sequence + cur + w)
                    if not equivalent:

                        # find longest access sequence which overlaps with the current query
                        longest_acc_seq = None
                        cur_query = access_sequence + cur + w
                        for acc_seq in P:
                            if cur_query[0:len(acc_seq)] == acc_seq:
                                if longest_acc_seq is None or len(
                                        acc_seq) > len(longest_acc_seq):
                                    longest_acc_seq = acc_seq

                        print("Counterexample:", counterexample)
                        print("Longest acc seq:", longest_acc_seq)

                        self.acc_seq_ce_counter[longest_acc_seq] += 1
                        return equivalent, counterexample

            # Nothing found for this access sequence:
            self.acc_seq_ce_counter[access_sequence] = min(
                0, self.acc_seq_ce_counter[access_sequence])
            self.acc_seq_ce_counter[access_sequence] -= 1

        return equivalent, counterexample
Пример #5
0
class SmartWmethodEquivalenceCheckerV2(EquivalenceChecker):
    def __init__(self,
                 sul: SUL,
                 m=None,
                 horizon=None,
                 stop_on=set(),
                 stop_on_startswith=set(),
                 order_type='shortest first'):
        super().__init__(sul)
        self.m = m
        self.horizon = horizon
        assert (horizon is None or m is None) and not (
            m is None and horizon is None), "Set either m or horizon"

        # These are the outputs we want to cut our testing tree short on
        self.stop_on = stop_on
        self.stop_on_startswith = stop_on_startswith
        # This prefix set keeps track of what paths lead to the outputs we want to stop early on
        self.stopping_set = PrefixSet()

        # Figure out how to order the access sequences
        order_types = {
            'longest first': lambda P: sorted(P, key=len, reverse=True),
            'shortest first': lambda P: sorted(P, key=len, reverse=False),
        }
        assert order_type in order_types.keys(
        ), "Unknown access sequence ordering"
        self.order_type = order_type
        self.acc_seq_order = order_types[order_type]

    def test_equivalence(
            self, fsm: Union[DFA, MealyMachine]) -> Tuple[bool, Iterable]:
        print("[info] Starting equivalence test")
        if self.m is not None:
            n = len(fsm.get_states())
            m = self.m
            assert m >= n, "hypothesis has more states than w-method bound"
            depth = m - n
        else:
            depth = self.horizon

        print("Depth:", depth)

        print("[info] Calculating distinguishing set")
        W = get_distinguishing_set(fsm, check=False)

        P = get_state_cover_set(fsm)
        print("[info] Got state cover set")

        A = sorted([(x, ) for x in fsm.get_alphabet()])

        equivalent = True
        counterexample = None

        acc_seq_tasks = deque(
            zip(self.acc_seq_order(P), [
                deque([a for a in A if a not in self.stopping_set])
                for x in range(len(P))
            ]))

        while len(acc_seq_tasks) > 0:
            access_sequence, to_visit = acc_seq_tasks.popleft()
            # bprint("[info] Trying access sequence:", access_sequence)
            assert len(to_visit) > 0

            cur = to_visit.popleft()

            # Test without distinguishing sequence, important for early stopping
            equivalent, counterexample = self._are_equivalent(
                fsm, access_sequence + cur)
            if not equivalent:
                return equivalent, counterexample

            if access_sequence + cur not in self.stopping_set:
                # Basically the usual W-method tests:
                for w in W:
                    equivalent, counterexample = self._are_equivalent(
                        fsm, access_sequence + cur + w)
                    if not equivalent:
                        return equivalent, counterexample

                # If not, keep building
                if len(cur) <= depth:
                    for a in A:
                        if access_sequence + cur + a not in self.stopping_set:
                            to_visit.append(cur + a)

            if len(to_visit) > 0:
                acc_seq_tasks.append((access_sequence, to_visit))
            #else:
            #print(access_sequence)

        return equivalent, counterexample

    def _are_equivalent(self, fsm, input):
        #print("[info] Testing:", input)
        fsm.reset()
        hyp_output = fsm.process_input(input)
        self.sul.reset()
        sul_output = self.sul.process_input(input)
        stats.increment('test_query')

        if self._teacher is not None:
            self._teacher.test_query_counter += 1

        if sul_output in self.stop_on or any(
            [sul_output.startswith(x) for x in self.stop_on_startswith]):
            #print('[info] added input to early stopping set')
            self.stopping_set.add(input)

        equivalent = hyp_output == sul_output
        if not equivalent:
            print("EQ CHECKER", input, "HYP", hyp_output, "SUL", sul_output)
            self._onCounterexample(input)

        return equivalent, input
Пример #6
0
class SmartWmethodEquivalenceChecker(EquivalenceChecker):
    def __init__(self,
                 sul: SUL,
                 m=None,
                 horizon=None,
                 stop_on=set(),
                 stop_on_startswith=set(),
                 order_type='shortest first'):
        super().__init__(sul)
        self.m = m
        self.horizon = horizon
        assert (horizon is None or m is None) and not (
            m is None and horizon is None), "Set either m or horizon"

        # These are the outputs we want to cut our testing tree short on
        self.stop_on = stop_on
        self.stop_on_startswith = stop_on_startswith
        # This prefix set keeps track of what paths lead to the outputs we want to stop early on
        self.stopping_set = PrefixSet()

        # Keep track of how many times each access sequence has been part of a counterexample
        self.acc_seq_ce_counter = {}

        # Figure out how to order the access sequences
        order_types = {
            'longest first':
            lambda P: sorted(P, key=len, reverse=True),
            'shortest first':
            lambda P: sorted(P, key=len, reverse=False),
            'ce count':
            lambda P: sorted(P,
                             key=lambda x:
                             (self.acc_seq_ce_counter[x], -len(x)),
                             reverse=True)
        }
        assert order_type in order_types.keys(
        ), "Unknown access sequence ordering"
        self.order_type = order_type
        self.acc_seq_order = order_types[order_type]

    def test_equivalence(
            self, fsm: Union[DFA, MealyMachine]) -> Tuple[bool, Iterable]:
        print("[info] Starting equivalence test")
        if self.m is not None:
            n = len(fsm.get_states())
            m = self.m
            assert m >= n, "hypothesis has more states than w-method bound"
            depth = m - n
        else:
            depth = self.horizon

        print("Depth:", depth)

        print("[info] Calculating distinguishing set")
        W = get_distinguishing_set(fsm, check=False)

        P = get_state_cover_set(fsm)
        print("[info] Got state cover set")

        # Ensure all access sequences have a counter
        for p in P:
            if p not in self.acc_seq_ce_counter:
                self.acc_seq_ce_counter[p] = 0

        A = sorted([(x, ) for x in fsm.get_alphabet()])

        equivalent = True
        counterexample = None

        for access_sequence in self.acc_seq_order(P):
            print("[info] Trying access sequence:", access_sequence)
            to_visit = deque()
            to_visit.extend(A)

            while len(to_visit) > 0:
                cur = to_visit.popleft()

                # Basically the usual W-method tests:
                for w in W:
                    equivalent, counterexample = self._are_equivalent(
                        fsm, access_sequence + cur + w)
                    if not equivalent:
                        self.acc_seq_ce_counter[access_sequence] += 1
                        return equivalent, counterexample

                # Also test without distinguishing sequence, important for early stopping
                equivalent, counterexample = self._are_equivalent(
                    fsm, access_sequence + cur)
                if not equivalent:
                    self.acc_seq_ce_counter[access_sequence] += 1
                    return equivalent, counterexample

                # Cut this branch short?
                if access_sequence + cur in self.stopping_set:
                    continue

                # If not, keep building
                #else:
                if len(cur) <= depth:
                    for a in A:
                        if access_sequence + cur + a not in self.stopping_set:
                            to_visit.append(cur + a)

            # Nothing found for this access sequence:
            self.acc_seq_ce_counter[access_sequence] = min(
                0, self.acc_seq_ce_counter[access_sequence])
            self.acc_seq_ce_counter[access_sequence] -= 1

        return equivalent, counterexample

    def _are_equivalent(self, fsm, input):
        #print("[info] Testing:", input)
        fsm.reset()
        hyp_output = fsm.process_input(input)
        self.sul.reset()
        sul_output = self.sul.process_input(input)

        if self._teacher is not None:
            self._teacher.test_query_counter += 1

        if sul_output in self.stop_on or any(
            [sul_output.startswith(x) for x in self.stop_on_startswith]):
            #print('[info] added input to early stopping set')
            self.stopping_set.add(input)

        equivalent = hyp_output == sul_output
        if not equivalent:
            print("EQ CHECKER", input, "HYP", hyp_output, "SUL", sul_output)
            self._onCounterexample(input)

        return equivalent, input