示例#1
0
def cpt_method_3(cpt: CompressedPrefixTree,
                 largest_prefix: int,
                 weights: List[Any],
                 prefixes: List[Any] = [[]]) -> List[List[Any]]:
    """Create a specialized generated spt for testing purposes

                        []
                [0]           [1]
            [0,0] [0,1]   [1,0] [1,1]
            ...             ...

    Note: height of spt = len(largest prefix tree) + 2 = largest_prefix + 2
    """
    if not isinstance(cpt.value, list):
        return []
    elif len(prefixes[0]) == largest_prefix:
        return []
    else:
        # extract the prefix
        accum_prefixes = []
        # values don't matter
        values = random.sample(range(1000000), 10)
        for prefix in prefixes:
            for n in range(0, 2):
                pref = prefix + [n]
                accum_prefixes.append(pref)
                cpt.insert(values.pop(), float(weights.pop()), pref)
                accum_prefixes.extend(
                    cpt_method_3(cpt, largest_prefix, weights, [pref]))
        return accum_prefixes
示例#2
0
def test_compressed_prefix_tree_structure() -> None:
    """This is a test for the correct structure of a compressed prefix tree.

    NOTE: This test should pass even if you insert these values in a different
    order. This is a good thing to try out.
    """
    t = CompressedPrefixTree('sum')
    t.insert('cat', 2.0, ['c', 'a', 't'])
    t.insert('car', 3.0, ['c', 'a', 'r'])
    t.insert('dog', 4.0, ['d', 'o', 'g'])

    # t has 3 values (note that __len__ only counts the values, which are
    # stored at the *leaves* of the tree).
    assert len(t) == 3

    # This tree is using the 'sum' aggregate weight option.
    assert t.weight == 2.0 + 3.0 + 4.0

    # t has two subtrees, and order matters (because of weights).
    assert len(t.subtrees) == 2
    left = t.subtrees[0]
    right = t.subtrees[1]

    # But note that the prefix values are different!
    assert left.value == ['c', 'a']
    assert left.weight == 5.0

    assert right.value == ['d', 'o', 'g']
    assert right.weight == 4.0
示例#3
0
def test_insert(length: int) -> None:
    """Test the aggregate weight, length,..etc of the SimplePrefixTree"""
    import sys
    sys.setrecursionlimit(5000)

    # insertion method 1 (n = length)
    # prefixes = [[0,..,n-1],[1,..,n-1],[2,...n-1],....[n-1]]
    # spt must len(prefixes) subtrees

    # insertion method 2 (n = length)
    # prefixes = [[0,..,n-1],[0,..,n-2],[0,...n-3],....[0]]
    # spt must have 1 subtree

    # insertion method 3 (n = length)

    methods = ['1', '2', '3']

    for method in methods:
        prefixes = []
        values = []
        weights = []
        cpt = CompressedPrefixTree('sum')
        cpt_avg = CompressedPrefixTree('average')
        if method == '3':
            prefixes = cpt_method_3(cpt, 3, list(range(15)))
            cpt_method_3(cpt_avg, 3, list(range(15)))
            values = prefixes  # values is only tested on length
            weights = list(range(15))
            weights.reverse()
        else:
            for x in range(0, length):
                if method == '1':
                    start = x
                    stop = length
                else:
                    start = 0
                    stop = length - x
                prefixes.append(list(range(start, stop)))
                values.append(length - x)
                weights.append(length - x)
                cpt.insert(values[len(values) - 1], weights[len(weights) - 1],
                           prefixes[len(prefixes) - 1])
                cpt_avg.insert(values[len(values) - 1],
                               weights[len(weights) - 1],
                               prefixes[len(prefixes) - 1])
        if method == '1':
            assert len(cpt.subtrees) == len(prefixes)
        elif method == '2':
            assert len(cpt.subtrees) == 2
        else:  # method == '3'
            assert len(cpt.subtrees) == 2
        assert cpt.weight == sum(weights)
        assert cpt_avg.weight == sum(weights) / len(values)
        assert len(cpt) == len(values)
        assert check_subtrees_non_increasing_order(cpt)
        assert check_subtrees_non_increasing_order(cpt_avg)
        assert check_subtrees_compressibility(cpt)
        assert tree_weight_check(cpt, 'sum')
        assert tree_weight_check(cpt_avg, 'average')
示例#4
0
 def test_partial_common_prefix(self):
     tree = CompressedPrefixTree('average')
     tree.insert('car', 100.0, ['c', 'a', 'r'])
     tree.insert('door', 4.0, ['d', 'o', 'o', 'r'])
     tree.insert('danger', 6.0, ['d', 'a', 'n', 'g', 'e', 'r'])
     tree.insert('cat', 20.0, ['c', 'a', 't'])
     tree.insert('care', 30.0, ['c', 'a', 'r', 'e'])
     expected = [('danger', 6.0)]
     self.assertEqual(tree.autocomplete(['d', 'a'], 3), expected)
示例#5
0
 def test_internal_full_match_prefix(self):
     tree = CompressedPrefixTree('average')
     tree.insert('car', 100.0, ['c', 'a', 'r'])
     tree.insert('door', 4.0, ['d', 'o', 'o', 'r'])
     tree.insert('danger', 6.0, ['d', 'a', 'n', 'g', 'e', 'r'])
     tree.insert('cat', 20.0, ['c', 'a', 't'])
     tree.insert('care', 30.0, ['c', 'a', 'r', 'e'])
     expected = [('car', 100.0), ('care', 30.0), ('cat', 20.0)]
     self.assertEqual(tree.autocomplete(['c'], 4), expected)
示例#6
0
def test_cpt_rep_invariant() -> None:
    """Tests cpt representation invariant specifically for the case where
    the root isn't == []"""

    for weight in ['average']:

        cpt = CompressedPrefixTree(weight)
        cpt.insert('doggy', 1.0, ['d', 'o', 'g', 'g', 'y'])
        assert cpt.value == ['d', 'o', 'g', 'g', 'y']

        cpt.insert('donna', 2.0, ['d', 'o', 'n', 'n', 'a'])
        assert cpt.value == ['d', 'o']
        assert cpt.subtrees[0].value == ['d', 'o', 'n', 'n', 'a']

        cpt.insert('dogi', 2.0, ['d', 'o', 'g', 'i'])
        assert cpt.value == ['d', 'o']
        assert cpt.subtrees[1].value == ['d', 'o', 'g']
        assert cpt.subtrees[1].subtrees[0].value == ['d', 'o', 'g', 'i']
        assert cpt.subtrees[1].subtrees[1].value == ['d', 'o', 'g', 'g', 'y']

        cpt.insert('dim', 5.0, ['d', 'i', 'm'])
        assert cpt.value == ['d']
        assert cpt.subtrees[0].value == ['d', 'i', 'm']
        assert cpt.subtrees[1].subtrees[1].value == ['d', 'o', 'g']

        cpt.insert('che', 10.0, ['c', 'h', 'e'])
        assert cpt.value == []
        assert cpt.subtrees[0].value == ['c', 'h', 'e']
        assert cpt.subtrees[1].value == ['d']

        cpt.remove(['c'])
        assert cpt.value == ['d']
        assert cpt.subtrees[0].value == ['d', 'i', 'm']
        assert cpt.subtrees[1].subtrees[1].value == ['d', 'o', 'g']

        cpt.remove(['d', 'o', 'g'])
        assert cpt.value == ['d']
        assert cpt.subtrees[0].value == ['d', 'i', 'm']
        assert cpt.subtrees[1].value == ['d', 'o', 'n', 'n', 'a']

        cpt.remove(['d', 'i'])
        assert cpt.value == ['d', 'o', 'n', 'n', 'a']
        assert len(cpt.subtrees) == 1
        assert cpt.subtrees[0].is_leaf

        cpt.remove(['d'])
        assert cpt.value == []
        assert len(cpt.subtrees) == 0
        assert cpt.weight == 0
示例#7
0
def test_insert_2() -> None:
    """Test SimplePrefixTree.insert() method using different types of
    CPTs"""
    # sum
    cpt = CompressedPrefixTree('sum')
    # empty cpt
    assert len(cpt) == 0
    assert cpt.value == []
    # cpt w/ len == 1
    cpt.insert('x', 1, ['x'])
    assert len(cpt) == 1
    assert num_nodes(cpt) == 2
    # cpt w/ len == 1, internal nodes > 1, achieved in test_insert_num_nodes()
    # cpt w/ len == 2, internal nodes == 2
    cpt = CompressedPrefixTree('sum')
    cpt.insert('x', 1, [])
    assert len(cpt) == 1
    assert num_nodes(cpt) == 2
class MelodyAutocompleteEngine:
    """An autocomplete engine that suggests melodies based on a few intervals.

    The values stored are Melody objects, and the corresponding
    prefix sequence for a Melody is its interval sequence.

    Because the prefix is based only on interval sequence and not the
    starting pitch or duration of the notes, it is possible for different
    melodies to have the same prefix.

    # === Private Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has the following format:
            - The first entry is the name of a melody (a string).
            - The remaining entries are grouped into pairs (as in Assignment 1)
              where the first number in each pair is a note pitch,
              and the second number is the corresponding duration.

            HOWEVER, there may be blank entries (stored as an empty string '');
            as soon as you encounter a blank entry, stop processing this line
            and move onto the next line the CSV file.

        Each melody is be inserted into the Autocompleter with a weight of 1.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        else:
            self.autocompleter = CompressedPrefixTree(config['weight_type'])
        with open(config['file'], encoding='utf8') as csvfile:
            f = csv.reader(csvfile)
            for line in f:
                helper = line[1::2]
                while helper[len(helper) - 1] == '':
                    helper.pop()
                prefix = []
                notes = []
                for i in range(1, len(helper) - 1):
                    prefix.append(int(helper[i]) - int(helper[i - 1]))
                for element in helper:
                    index = line.index(element)
                    note = (int(line[index]), int(line[index + 1]))
                    notes.append(note)
                melody = Melody(line[0], notes)
                self.autocompleter.insert(melody, 1.0, prefix)

    def autocomplete(
            self,
            prefix: List[int],
            limit: Optional[int] = None) -> List[Tuple[Melody, float]]:
        """Return up to <limit> matches for the given interval sequence.

        The return value is a list of tuples (melody, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given interval sequence.

        Precondition:
            limit is None or limit > 0
        """
        results = self.autocompleter.autocomplete(prefix)
        if limit is None:
            return results
        else:
            return results[:limit]

    def remove(self, prefix: List[int]) -> None:
        """Remove all melodies that match the given interval sequence.
        """
        self.autocompleter.remove(prefix)
class SentenceAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few words.

    A *word* is a string containing only alphanumeric characters.
    The *prefix sequence* for a string is the list of words in the string
    (separated by whitespace). The words themselves do not contain spaces.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has two entries:
            - the first entry is a string
            - the second entry is the a number representing the weight of that
              string

        Note that the line may or may not contain spaces.
        Each string will be sanitized, and if the resulting string contains
        at least one word, it is inserted into the Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given the weight specified on the
        line from the csv file.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight.
        """

        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        elif config['autocompleter'] == 'compressed':
            self.autocompleter = CompressedPrefixTree(config['weight_type'])

        with open(config['file']) as csvfile:
            reader = csv.reader(csvfile)
            for line in reader:
                dirty, weight_str = line[0], line[1]
                chars = [c.lower() for c in dirty if c.isalnum() or c == ' ']
                clean = ''.join(chars)
                prefix = clean.split()
                weight = float(weight_str)
                if len(clean) >= 1:
                    self.autocompleter.insert(clean, weight, prefix)

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight.

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        prefix_lst = prefix.split()
        return self.autocompleter.autocomplete(prefix_lst, limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        prefix_lst = prefix.split()
        self.autocompleter.remove(prefix_lst)
class LetterAutocompleteEngine():
    """An autocomplete engine that suggests strings based on a few letters.

    The *prefix sequence* for a string is the list of characters in the string.
    This can include space characters.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    weight_type: either 'sum' or 'average', which specifies the
              weight type for the prefix tree.
    _autocompleter_type: stores the type of the autocompleter
    """
    autocompleter: Autocompleter

    #new private variables
    _weight_type: str
    _autocompleter_type: str

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a text file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Each line of the specified file counts as one input string.
        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one alphanumeric character, it is inserted into the
        Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given a weight of one.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight (because of how Autocompleter.insert works).
        """
        # We've opened the file for you here. You should iterate over the
        # lines of the file and process them according to the description in
        # this method's docstring.

        # initialize autocompleter
        self._weight_type = config['weight_type']
        self._autocompleter_type = config['autocompleter']

        if self._autocompleter_type == 'simple':
            self.autocompleter = SimplePrefixTree(self._weight_type)
        else:
            self.autocompleter = CompressedPrefixTree(self._weight_type)

        #read file line by line
        with open(config['file'], encoding='utf8') as f:
            # a = f.readlines()
            # print("Line {}: {}".format(1, a))
            cnt = 0
            for line in f:
                line = line.lower()
                line = line.replace("\n", "")

                count = 0
                # sanatize string
                for char in line:
                    if char.isalnum() or char == ' ':
                        count += 1
                    else:
                        line = line.replace(char, "")
                # check if there is a character in string and insert
                if count >= 1:
                    # print("Line {}: {}".format(list(line), line))
                    self.autocompleter.insert(line, 1.0, list(line))
                    cnt += 1

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """

        return self.autocompleter.autocomplete(list(prefix), limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix string.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        self.autocompleter.remove(list(prefix))
示例#11
0
class MelodyAutocompleteEngine:
    """An autocomplete engine that suggests melodies based on a few intervals.

    The values stored are Melody objects, and the corresponding
    prefix sequence for a Melody is its interval sequence.

    Because the prefix is based only on interval sequence and not the
    starting pitch or duration of the notes, it is possible for different
    melodies to have the same prefix.

    # === Private Attributes ===
    autocompleter: An Autocompleter used by this engine.
    _melody_name: A List of tuples with the melody and its name
    config: a dictionary mapping inut values to its values
    """
    autocompleter: Autocompleter
    _melody_name: List
    config: Dict[str, Any]

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has the following format:
            - The first entry is the name of a melody (a string).
            - The remaining entries are grouped into pairs (as in Assignment 1)
              where the first number in each pair is a note pitch,
              and the second number is the corresponding duration.

            HOWEVER, there may be blank entries (stored as an empty string '');
            as soon as you encounter a blank entry, stop processing this line
            and move onto the next line the CSV file.

        Each melody is be inserted into the Autocompleter with a weight of 1.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        self.config = config
        self._melody_name = []

        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        else:
            self.autocompleter = CompressedPrefixTree(config['weight_type'])

        with open(config['file']) as csvfile:
            reader = csv.reader(csvfile)
            for line in reader:
                if line != '':
                    self._melody_help(line)

    def _melody_help(self, line: Any) -> None:
        """
        sanitizes line then inserts it into tree appropriately
        """
        new = [line[s:s + 2] for s in range(1, len(line), 2)]
        melody = []
        for item in new:
            if item[0] != '' and item[1] != '':
                melody.append((
                    int(item[0]),
                    int(item[1]),
                ))
        interval = []
        for i in range(len(melody) - 1):
            interval.append(melody[i + 1][0] - melody[i][0])
        self._melody_name.append((
            melody,
            line[0],
        ))
        self.autocompleter.insert(melody, 1.0, interval)

    def autocomplete(
            self,
            prefix: List[int],
            limit: Optional[int] = None) -> List[Tuple[Melody, float]]:
        """Return up to <limit> matches for the given interval sequence.

        The return value is a list of tuples (melody, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given interval sequence.

        Precondition:
            limit is None or limit > 0
        """
        a = self.autocompleter.autocomplete(prefix, limit)
        new = []
        name = ''
        for item in a:
            for mel in self._melody_name:
                if mel[0] == item[0]:
                    name = mel[1]
            new.append((
                Melody(name, item[0]),
                item[1],
            ))
        return new

    def remove(self, prefix: List[int]) -> None:
        """Remove all melodies that match the given interval sequence.
        """
        self.autocompleter.remove(prefix)
"""CSC148 Assignment 2: Autocomplete engines
示例#13
0
def test_autocomplete(length: int) -> None:
    """Test the aggregate weight, length,..etc of the CompressedPrefixTree"""
    import sys
    sys.setrecursionlimit(5000)

    # insertion method 1 (n = length)
    # prefixes = [[0,..,n-1],[1,..,n-1],[2,...n-1],....[n-1]]
    # cpt must len(prefixes) subtrees

    # insertion method 2 (n = length)
    # prefixes = [[0,..,n-1],[0,..,n-2],[0,...n-3],....[1]]
    # cpt must have 1 subtree

    # insertion method 3 (n = length)
    # check method_cpt3()
    methods = ['1', '2', '3']

    for method in methods:
        prefixes = []
        values = []
        weights = []
        cpt = CompressedPrefixTree('sum')
        cpt_avg = CompressedPrefixTree('average')

        if method == '3':
            prefixes = cpt_method_3(cpt, 3, list(range(15)))
            cpt_method_3(cpt_avg, 3, list(range(15)))
            values = prefixes  # values is only tested on length
            weights = list(range(15))
            weights.reverse()
        else:
            for x in range(0, length):
                if method == '1':
                    start = x
                    stop = length
                elif method == '2':
                    start = 0
                    stop = length - x
                prefixes.append(list(range(start, stop)))
                values.append(length - x)
                # weight goes for values, go from weight = length, to weight = 1
                weights.append(length - x)
                cpt.insert(values[len(values) - 1], weights[len(weights) - 1],
                           prefixes[len(prefixes) - 1])
                cpt_avg.insert(values[len(values) - 1],
                               weights[len(weights) - 1],
                               prefixes[len(prefixes) - 1])

        prefixes.insert(0, [])
        for prefix in prefixes:
            for i in range(1, len(values) + 1):
                assert len(cpt.autocomplete(prefix, i)) <= i
                assert len(cpt.autocomplete(prefix, i**2)) <= len(values)
                assert len(cpt_avg.autocomplete(prefix, i)) <= i
                assert len(cpt_avg.autocomplete(prefix, i**2)) <= len(values)
                tup = cpt.autocomplete(prefix, i)
                tup_av = cpt_avg.autocomplete(prefix, i)
                for x in range(len(tup)):
                    # weights[0] should have the greatest weight
                    assert tup[x][1] <= weights[0]
                    assert tup_av[x][1] <= weights[0]
                    if x != len(tup) - 1:
                        # weights should be non-increasing
                        assert tup[x][1] >= tup[x + 1][1]
                        assert tup_av[x][1] >= tup[x + 1][1]
        prefixes.pop(0)  # popping [] out
示例#14
0
def test_remove(length: int) -> None:
    """Test remove method in the SimplePrefixTree class"""
    methods = ['1', '2', '3']

    for method in methods:
        prefixes = []
        values = []
        weights = []
        cpt = CompressedPrefixTree('sum')
        cpt_avg = CompressedPrefixTree('average')

        if method == '3':
            prefixes = cpt_method_3(cpt, 3, list(range(15)))
            cpt_method_3(cpt_avg, 3, list(range(15)))
            values = prefixes  # values is only tested on length
            weights = list(range(15))
            weights.reverse()
        else:
            for x in range(0, length):
                if method == '1':
                    start = x
                    stop = length
                elif method == '2':
                    start = 0
                    stop = length - x
                prefixes.append(list(range(start, stop)))
                values.append(length - x)
                # weight goes for values, go from weight = length, to weight = 1
                weights.append(length - x)
                cpt.insert(values[len(values) - 1], weights[len(weights) - 1],
                           prefixes[len(prefixes) - 1])
                cpt_avg.insert(values[len(values) - 1],
                               weights[len(weights) - 1],
                               prefixes[len(prefixes) - 1])
        if method == '1':
            for prefix in prefixes:
                prev_weight = cpt.weight
                prev_weight_avg = cpt_avg.weight
                prev_sum = cpt_avg.total
                prev_num = len(cpt)
                prev_num_nodes = num_nodes(cpt)
                cpt.remove(prefix)
                cpt_avg.remove(prefix)
                assert len(cpt) < prev_num  # deleting at least 1 leaf
                assert cpt_avg.total < prev_sum
                if len(cpt) == 0:
                    assert cpt.weight == 0
                else:
                    assert cpt_avg.weight == (cpt_avg.total / len(cpt))
                assert prev_weight_avg == (prev_sum / prev_num)
                assert cpt.weight == cpt_avg.total
                assert cpt.weight < prev_weight == prev_sum  # weight_type: 'sum'
                assert num_nodes(cpt) < prev_num_nodes
                assert check_subtrees_non_increasing_order(cpt)
                assert check_subtrees_non_increasing_order(cpt_avg)
                assert check_subtrees_compressibility(cpt)
                assert tree_weight_check(cpt, 'sum')
                assert tree_weight_check(cpt_avg, 'average')
        elif method == '2':
            for prefix in prefixes:
                prev_weight = cpt.weight
                prev_weight_avg = cpt_avg.weight
                prev_sum = cpt_avg.total
                prev_num = len(cpt_avg)
                prev_num_nodes = num_nodes(cpt)
                cpt_avg.remove(prefix)
                cpt.remove(prefix)
                assert len(cpt_avg) < prev_num  # deleting 1 leaf
                assert len(cpt) == prev_num - 1
                assert cpt_avg.total < prev_sum
                if len(cpt) == 0:
                    assert cpt.weight == 0
                else:
                    assert cpt_avg.weight == (cpt_avg.total / len(cpt))
                assert prev_weight_avg == (prev_sum / prev_num)
                assert cpt.weight == cpt_avg.total
                assert cpt.weight < prev_weight == prev_sum  # weight_type: 'sum'
                assert num_nodes(cpt) < prev_num_nodes
                assert check_subtrees_non_increasing_order(cpt)
                assert check_subtrees_non_increasing_order(cpt_avg)
                assert check_subtrees_compressibility(cpt)
                assert tree_weight_check(cpt, 'sum')
                assert tree_weight_check(cpt_avg, 'average')
        elif method == '3':
            prefixes.reverse()
            for prefix in prefixes:
                prev_weight = cpt.weight
                prev_weight_avg = cpt_avg.weight
                prev_sum = cpt_avg.total
                prev_num = len(cpt)
                prev_num_nodes = num_nodes(cpt)
                cpt.remove(prefix)
                cpt_avg.remove(prefix)
                assert len(cpt) < prev_num  # deleting 1 leaf
                assert len(cpt) == prev_num - 1
                assert cpt_avg.total < prev_sum
                if len(cpt) == 0:
                    assert cpt.weight == 0
                else:
                    assert cpt_avg.weight == (cpt.weight / len(cpt))
                assert prev_weight_avg == (prev_sum / prev_num)
                assert cpt.weight == cpt_avg.total
                assert cpt.weight < prev_weight == prev_sum  # weight_type: 'sum'
                assert num_nodes(cpt) < prev_num_nodes
                assert check_subtrees_non_increasing_order(cpt)
                assert check_subtrees_non_increasing_order(cpt_avg)
                assert check_subtrees_compressibility(cpt)
                assert tree_weight_check(cpt, 'sum')
                assert tree_weight_check(cpt_avg, 'average')
            prefixes.reverse()
class LetterAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few letters.

    The *prefix sequence* for a string is the list of characters in the string.
    This can include space characters.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a text file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Each line of the specified file counts as one input string.
        Note that the line may or may not contain spaces.
        Each string will be sanitized, and if the resulting string contains
        at least one alphanumeric character, it is inserted into the
        Autocompleter.

        *Lines that do not contain at least one
         alphanumeric character will be skipped*

        When each string is inserted, it is given a weight of one.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight (because of how Autocompleter.insert works).
        """
        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        elif config['autocompleter'] == 'compressed':
            self.autocompleter = CompressedPrefixTree(config['weight_type'])

        # Opens the file and iterates over the lines of the file
        with open(config['file'], encoding='utf8') as f:
            for line in f:
                prefix = [c.lower() for c in line if c.isalnum() or c == ' ']
                clean = ''.join(prefix)
                if len(clean) >= 1:
                    self.autocompleter.insert(clean, 1.0, prefix)

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight.

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string will be transformed into a list
        of letters before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        prefix_lst = [c for c in prefix]
        return self.autocompleter.autocomplete(prefix_lst, limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix string.

        Note that the given prefix string will be transformed into a list
        of letters before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        prefix_lst = [c for c in prefix]
        self.autocompleter.remove(prefix_lst)
示例#16
0
class CompressedAutoCompleteTest(unittest.TestCase):
    def setUp(self):
        self.sum_tree = CompressedPrefixTree('sum')

    def test_empty_tree_no_prefix(self):
        self.assertEqual(self.sum_tree.autocomplete([]), [])

    def test_empty_tree_extra_prefix(self):
        self.assertEqual(self.sum_tree.autocomplete(['c']), [])

    def test_one_leaf_no_prefix(self):
        self.sum_tree.insert('Alice', 5, [])
        expected = [('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([]), expected)

    def test_one_leaf_no_prefix_zero_limit(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.assertEqual(self.sum_tree.autocomplete(['a'], 0), [])

    def test_one_leaf_no_prefix_at_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        expected = [('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 1), expected)

    def test_multi_leaf_no_prefix_extra_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        self.sum_tree.insert('Jacky', 11, [])
        self.sum_tree.insert('Bob', 10, [])
        expected = [('Jacky', 11.0), ('Bob', 10.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 4), expected)

    def test_multi_leaf_no_prefix_not_enough_limit(self):
        self.sum_tree.insert('Alice', 5, [])
        self.sum_tree.insert('Jacky', 11, [])
        self.sum_tree.insert('Bob', 10, [])
        expected = [('Jacky', 11.0), ('Bob', 10.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 2), expected)

    def test_multi_internal_no_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Bob', 10.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete([], 3), expected)

    def test_with_multi_internal_and_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_internal_extra_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = []
        self.assertEqual(self.sum_tree.autocomplete(['a', 'b'], 3), expected)

    def test_multi_internal_short_prefix(self):
        self.sum_tree.insert('Alice', 5, ['a', 'b'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_level_leaves(self):
        self.sum_tree.insert('Alice', 5, ['a'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        self.sum_tree.insert('Bob', 10, ['b'])
        expected = [('Jacky', 11.0), ('Alice', 5.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_multi_internal_limit_cutoff(self):
        self.sum_tree.insert('Alice', 5, ['a', 'c'])
        self.sum_tree.insert('Jacky', 11, ['a', 'c'])
        expected = [('Jacky', 11.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 1), expected)

    def test_multi_internal_limit_continue(self):
        self.sum_tree.insert('Alice', 5, ['a', 'c', 'b', 'b'])
        self.sum_tree.insert('Jacky', 10, ['a', 'c', 'b', 'f'])
        self.sum_tree.insert('Bob', 11, ['a', 'd'])
        self.sum_tree.insert('Kevin', 9, ['a', 'd'])
        expected = [('Bob', 11.0), ('Jacky', 10.0), ('Kevin', 9.0)]
        self.assertEqual(self.sum_tree.autocomplete(['a'], 3), expected)

    def test_partial_common_prefix(self):
        tree = CompressedPrefixTree('average')
        tree.insert('car', 100.0, ['c', 'a', 'r'])
        tree.insert('door', 4.0, ['d', 'o', 'o', 'r'])
        tree.insert('danger', 6.0, ['d', 'a', 'n', 'g', 'e', 'r'])
        tree.insert('cat', 20.0, ['c', 'a', 't'])
        tree.insert('care', 30.0, ['c', 'a', 'r', 'e'])
        expected = [('danger', 6.0)]
        self.assertEqual(tree.autocomplete(['d', 'a'], 3), expected)

    def test_internal_full_match_prefix(self):
        tree = CompressedPrefixTree('average')
        tree.insert('car', 100.0, ['c', 'a', 'r'])
        tree.insert('door', 4.0, ['d', 'o', 'o', 'r'])
        tree.insert('danger', 6.0, ['d', 'a', 'n', 'g', 'e', 'r'])
        tree.insert('cat', 20.0, ['c', 'a', 't'])
        tree.insert('care', 30.0, ['c', 'a', 'r', 'e'])
        expected = [('car', 100.0), ('care', 30.0), ('cat', 20.0)]
        self.assertEqual(tree.autocomplete(['c'], 4), expected)
示例#17
0
def test_compressed_tree() -> None:

    y = CompressedPrefixTree('sum')
    y.add('abc', 0.2, ['a', 'b', 'c'])
    y.add_on('abcd', 0.3, ['a', 'b', 'c', 'd'])
    y.add_on('ab', 0.5, ['a', 'b'])

    assert y.weight == 1.0
    assert y.num_leaves == 3
    assert y.subtrees[0].subtrees[1].value == ['a', 'b', 'c']
    assert y.subtrees[0].subtrees[1].weight == 0.5
    assert y.subtrees[0].subtrees[1].subtrees[0].value == ['a', 'b', 'c', 'd']
    assert y.subtrees[0].subtrees[1].subtrees[0].weight == 0.3

    y = CompressedPrefixTree('sum')
    y.insert('abc', 0.2, ['a', 'b', 'c'])
    y.insert('abcd', 0.3, ['a', 'b', 'c', 'd'])
    y.insert('ab', 0.5, ['a', 'b'])

    assert y.weight == 1.0
    assert y.num_leaves == 3
    assert y.subtrees[0].subtrees[1].value == ['a', 'b', 'c']
    assert y.subtrees[0].subtrees[1].weight == 0.5
    assert y.subtrees[0].subtrees[1].subtrees[0].value == ['a', 'b', 'c', 'd']
    assert y.subtrees[0].subtrees[1].subtrees[0].weight == 0.3

    y = CompressedPrefixTree('average')
    y.add('abc', 0.2, ['a', 'b', 'c'])
    y.add_on('ab', 0.5, ['a', 'b'])

    assert y.weight == 0.35
    assert y.num_leaves == 2
    assert y.subtrees[0].subtrees[1].value == ['a', 'b', 'c']
    assert y.subtrees[0].subtrees[1].weight == 0.2
    assert y.subtrees[0].subtrees[1].subtrees[0].value == 'abc'
    assert y.subtrees[0].subtrees[1].subtrees[0].weight == 0.2

    y = CompressedPrefixTree('average')
    y.add('abc', 0.2, ['a', 'b', 'c'])
    y.add_on('abcd', 0.3, ['a', 'b', 'c', 'd'])
    y.add_on('ab', 0.5, ['a', 'b'])

    assert y.weight == 1.0/3
    assert y.num_leaves == 3
    assert y.subtrees[0].value == ['a', 'b']
    assert y.subtrees[0].weight == y.weight
    assert y.subtrees[0].subtrees[1].value == ['a', 'b', 'c']
    assert y.subtrees[0].subtrees[1].weight == 0.25
    assert y.subtrees[0].subtrees[1].subtrees[0].value == ['a', 'b', 'c', 'd']
    assert y.subtrees[0].subtrees[1].subtrees[0].weight == 0.3
    assert y.subtrees[0].subtrees[1].subtrees[1].value == 'abc'
    assert y.subtrees[0].subtrees[1].subtrees[1].weight == 0.2

    y = CompressedPrefixTree('average')
    y.insert('abc', 0.2, ['a', 'b', 'c'])
    y.insert('abcd', 0.3, ['a', 'b', 'c', 'd'])
    y.insert('ab', 0.5, ['a', 'b'])

    assert y.weight == 1.0 / 3
    assert y.num_leaves == 3
    assert y.subtrees[0].value == ['a', 'b']
    assert y.subtrees[0].weight == y.weight
    assert y.subtrees[0].subtrees[1].value == ['a', 'b', 'c']
    assert y.subtrees[0].subtrees[1].weight == 0.25
    assert y.subtrees[0].subtrees[1].subtrees[0].value == ['a', 'b', 'c', 'd']
    assert y.subtrees[0].subtrees[1].subtrees[0].weight == 0.3
    assert y.subtrees[0].subtrees[1].subtrees[1].value == 'abc'
    assert y.subtrees[0].subtrees[1].subtrees[1].weight == 0.2

    x = CompressedPrefixTree('average')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r', 'e'])
    x.insert('cat', 6, ['c', 'a', 't'])
    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    assert x.weight == 11.0/6
    assert x.subtrees[0].value == ['c', 'a']
    assert x.subtrees[0].weight == 3.0
    assert x.subtrees[0].num_leaves == 3
    assert x.subtrees[1].value == ['d']
    assert x.subtrees[1].weight == 2.0/3
    assert x.subtrees[1].num_leaves == 3
    assert x.subtrees[1].subtrees[0].value == ['d', 'a', 'n', 'g', 'e', 'r']
    assert x.subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[0].num_leaves == 1
    assert x.subtrees[1].subtrees[1].value == ['d', 'o', 'o', 'r']
    assert x.subtrees[1].subtrees[1].weight == 0.5
    assert x.subtrees[1].subtrees[1].num_leaves == 2
    assert x.subtrees[1].subtrees[1].subtrees[0].value == 'door'
    assert x.subtrees[1].subtrees[1].subtrees[0].weight == 0.5
    assert x.subtrees[1].subtrees[1].subtrees[0].num_leaves == 0
    assert x.subtrees[1].subtrees[1].subtrees[1].value == ['d', 'o', 'o', 'r', 's']
    assert x.subtrees[1].subtrees[1].subtrees[1].weight == 0.5
    assert x.subtrees[1].subtrees[1].subtrees[1].num_leaves == 1

    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    assert x.weight == 11.5 / 6
    assert x.num_leaves == 6
    assert x.subtrees[0].value == ['c', 'a']
    assert x.subtrees[0].weight == 3.0
    assert x.subtrees[0].num_leaves == 3
    assert x.subtrees[1].value == ['d']
    assert x.subtrees[1].weight == 2.5 / 3
    assert x.subtrees[1].num_leaves == 3
    assert x.subtrees[1].subtrees[0].value == ['d', 'a', 'n', 'g', 'e', 'r']
    assert x.subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[0].num_leaves == 1
    assert x.subtrees[1].subtrees[1].value == ['d', 'o', 'o', 'r']
    assert x.subtrees[1].subtrees[1].weight == 0.75
    assert x.subtrees[1].subtrees[1].num_leaves == 2
    assert x.subtrees[1].subtrees[1].subtrees[1].value == 'door'
    assert x.subtrees[1].subtrees[1].subtrees[1].weight == 0.5
    assert x.subtrees[1].subtrees[1].subtrees[1].num_leaves == 0
    assert x.subtrees[1].subtrees[1].subtrees[0].value == ['d', 'o', 'o', 'r',
                                                           's']
    assert x.subtrees[1].subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[1].subtrees[0].num_leaves == 1

    x = CompressedPrefixTree('sum')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r', 'e'])
    x.insert('cat', 6, ['c', 'a', 't'])

    assert x.weight == 1 + 2 + 6
    assert x.num_leaves == 3
    assert x.subtrees[0].value == ['c', 'a']
    assert x.subtrees[0].weight == 9.0
    assert x.subtrees[0].num_leaves == 3
    assert x.subtrees[0].subtrees[1].value == ['c', 'a', 'r']
    assert x.subtrees[0].subtrees[1].weight == 2 + 1
    assert x.subtrees[0].subtrees[1].num_leaves == 2
    assert x.subtrees[0].subtrees[1].subtrees[1].value == 'car'
    assert x.subtrees[0].subtrees[1].subtrees[1].weight == 1
    assert x.subtrees[0].subtrees[1].subtrees[1].num_leaves == 0

    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    assert x.weight == 1 + 2 + 6 + 2
    assert x.num_leaves == 3 + 3
    assert x.subtrees[0].value == ['c', 'a']
    assert x.subtrees[0].weight == 9.0
    assert x.subtrees[0].num_leaves == 3
    assert x.subtrees[1].value == ['d']
    assert x.subtrees[1].weight == 2.0
    assert x.subtrees[1].num_leaves == 3
    assert x.subtrees[1].subtrees[0].value == ['d', 'a', 'n', 'g', 'e', 'r']
    assert x.subtrees[1].subtrees[0].weight == 1.0
    assert x.subtrees[1].subtrees[0].num_leaves == 1
    assert x.subtrees[1].subtrees[1].value == ['d', 'o', 'o', 'r']
    assert x.subtrees[1].subtrees[1].weight == 1.0
    assert x.subtrees[1].subtrees[1].num_leaves == 2
    assert x.subtrees[1].subtrees[1].subtrees[0].value == 'door'

    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])

    assert x.weight == 1 + 2 + 6 + 2 + 0.5
    assert x.num_leaves == 3 + 3
    assert x.subtrees[0].value == ['c', 'a']
    assert x.subtrees[0].weight == 9.0
    assert x.subtrees[0].num_leaves == 3
    assert x.subtrees[1].value == ['d']
    assert x.subtrees[1].weight == 2.5
    assert x.subtrees[1].num_leaves == 3
    assert x.subtrees[1].subtrees[0].value == ['d', 'o', 'o', 'r']
    assert x.subtrees[1].subtrees[0].weight == 1.5
    assert x.subtrees[1].subtrees[0].num_leaves == 2
    assert x.subtrees[1].subtrees[1].value == ['d', 'a', 'n', 'g', 'e', 'r']
    assert x.subtrees[1].subtrees[1].weight == 1.0
    assert x.subtrees[1].subtrees[1].num_leaves == 1
    assert x.subtrees[1].subtrees[1].subtrees[0].value == 'danger'
示例#18
0
class SentenceAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few words.

    A *word* is a string containing only alphanumeric characters.
    The *prefix sequence* for a string is the list of words in the string
    (separated by whitespace). The words themselves do not contain spaces.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    config: A dictionary mapping input values to its values
    """
    autocompleter: Autocompleter
    config: Dict[str, Any]

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has two entries:
            - the first entry is a string
            - the second entry is the a number representing the weight of that
              string

        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one word, it is inserted into the Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given a weight of one.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        self.config = config

        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        else:
            self.autocompleter = CompressedPrefixTree(config['weight_type'])

        with open(config['file']) as csvfile:
            reader = csv.reader(csvfile)
            for line in reader:
                clean = line[0].lower()
                weight = line[1]
                cleaned_str = ''
                for char in clean:
                    if char.isalnum() or char == ' ':
                        cleaned_str += char
                cleaned_num = ''
                for num in weight:
                    if num.isnumeric() or num == '.':
                        cleaned_num += num
                if cleaned_str != '' and cleaned_num != '':
                    self.autocompleter.insert(cleaned_str, float(cleaned_num),
                                              cleaned_str.split())

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        clean = prefix.lower()
        cleaned_str = ''
        for char in clean:
            if char.isalnum() or char == ' ':
                cleaned_str += char
        return self.autocompleter.autocomplete(cleaned_str.split(), limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        clean = prefix.lower()
        cleaned_str = ''
        for char in clean:
            if char.isalnum() or char == ' ':
                cleaned_str += char
        self.autocompleter.remove(cleaned_str.split())
示例#19
0
def test_autocomplete() -> None:

    x = CompressedPrefixTree('sum')
    x.insert('car', 1, ['c', 'a', 'r'])
    x.insert('care', 2, ['c', 'a', 'r', 'e'])
    x.insert('cat', 6, ['c', 'a', 't'])
    x.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    x.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    x.insert('desk', 10, ['d', 'e', 's', 'k'])

    y = SimplePrefixTree('sum')
    y.insert('car', 1, ['c', 'a', 'r'])
    y.insert('care', 2, ['c', 'a', 'r', 'e'])
    y.insert('cat', 6, ['c', 'a', 't'])
    y.insert('danger', 1, ['d', 'a', 'n', 'g', 'e', 'r'])
    y.insert('door', 0.5, ['d', 'o', 'o', 'r'])
    y.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    y.insert('doors', 0.5, ['d', 'o', 'o', 'r', 's'])
    y.insert('desk', 10, ['d', 'e', 's', 'k'])

    assert x.autocomplete(['c']) == y.autocomplete(['c'])
    assert x.autocomplete(['c', 'a']) == y.autocomplete(['c', 'a'])
    assert x.autocomplete(['c','a','r']) == y.autocomplete(['c','a','r'])
    assert x.autocomplete(['c', 'a', 'r', 'e']) == y.autocomplete(['c', 'a', 'r', 'e'])
    assert x.autocomplete(['c', 'a', 't']) == y.autocomplete(['c', 'a', 't'])
    assert x.autocomplete(['d']) == y.autocomplete(['d'])
    assert x.autocomplete(['d', 'o']) == y.autocomplete(['d', 'o'])
    assert x.autocomplete(['d', 'a']) == y.autocomplete(['d', 'a'])
    assert x.autocomplete(['d', 'e']) == y.autocomplete(['d', 'e'])
    assert x.autocomplete(['d', 'e', 's']) == y.autocomplete(['d', 'e', 's'])
    assert x.autocomplete(['d', 'o', 'o']) == y.autocomplete(['d', 'o', 'o'])
    assert x.autocomplete(['d', 'a', 'n']) == y.autocomplete(['d', 'a', 'n'])
    assert x.autocomplete(['d', 'o', 'o', 'r']) == y.autocomplete(['d', 'o', 'o', 'r'])
    assert x.autocomplete(['d', 'o', 'o', 'r', 's']) == y.autocomplete(['d', 'o', 'o', 'r', 's'])
    assert x.autocomplete(['d', 'a', 'n', 'g', 'e', 'r']) == y.autocomplete(['d', 'a', 'n', 'g', 'e', 'r'])
示例#20
0
class LetterAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few letters.

    The *prefix sequence* for a string is the list of characters in the string.
    This can include space characters.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    config: A dictionary mapping input values to its values
    """
    autocompleter: Autocompleter
    config: Dict[str, Any]

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a text file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Each line of the specified file counts as one input string.
        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one alphanumeric character, it is inserted into the
        Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given a weight of one.
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight (because of how Autocompleter.insert works).
        """
        # We've opened the file for you here. You should iterate over the
        # lines of the file and process them according to the description in
        # this method's docstring.
        self.config = config

        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        else:
            self.autocompleter = CompressedPrefixTree(config['weight_type'])
        with open(config['file'], encoding='utf8') as f:
            for line in f:
                clean = line.lower()
                clean_str = ''
                for char in clean:
                    if char.isalnum() or char == ' ':
                        clean_str += char
                prefix = []
                for char in clean_str:
                    prefix.append(char)
                self.autocompleter.insert(clean_str, 1.0, prefix)

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        clean = prefix.lower()
        cleaned_str = ''
        for char in clean:
            if char.isalnum() or char == ' ':
                cleaned_str += char
        new = []
        for char in cleaned_str:
            new.append(char)
        return self.autocompleter.autocomplete(new, limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix string.

        Note that the given prefix string must be transformed into a list
        of letters before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        clean = prefix.lower()
        cleaned_str = ''
        for char in clean:
            if char.isalnum() or char == ' ':
                cleaned_str += char
        new = []
        for char in cleaned_str:
            new.append(char)
        self.autocompleter.remove(new)
class MelodyAutocompleteEngine:
    """An autocomplete engine that suggests melodies based on a few intervals.

    The values stored are Melody objects, and the corresponding
    prefix sequence for a Melody is its interval sequence.

    Because the prefix is based only on interval sequence and not the
    starting pitch or duration of the notes, it is possible for different
    melodies to have the same prefix.

    # === Private Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has the following format:
            - The first entry is the name of a melody (a string).
            - The remaining entries are grouped into pairs (as in Assignment 1)
              where the first number in each pair is a note pitch,
              and the second number is the corresponding duration.

            HOWEVER, there may be blank entries (stored as an empty string '');
            as soon as you encounter a blank entry, stop processing this line
            and move onto the next line the CSV file.

        Each melody is be inserted into the Autocompleter with a weight of 1.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        else:
            self.autocompleter = CompressedPrefixTree(config['weight_type'])
        with open(config['file']) as csvfile:
            csvfile = csvfile.readlines()
            temp = []
            name_list = []
            for i in csvfile:
                i = i.strip('\n').split(',')
                name_list.append(i[0])
                k = []
                for letter in i:
                    if letter != '':
                        k.append(letter)
                temp.append(k)
        nested = []
        for item in temp:
            new_temp = []
            num = 0
            while num < len(item) - 1:
                new_temp.append((int(item[1:][num]), int(item[1:][num + 1])))
                num += 2
            nested.append(new_temp)
        new_temp = []
        for node in nested:
            prefix = []
            num = 0
            while num < len(node) - 1:
                prefix.append((int(node[num + 1][0]) - int(node[num][0])))
                num += 1
            new_temp.append(prefix)
        for i in range(len(nested)):
            self.autocompleter.insert(Melody(name_list[i], nested[i]),
                                      1, new_temp[i])

    def autocomplete(self, prefix: List[int],
                     limit: Optional[int] = None) -> List[Tuple[Melody, float]]:
        """Return up to <limit> matches for the given interval sequence.

        The return value is a list of tuples (melody, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given interval sequence.

        Precondition:
            limit is None or limit > 0
        """
        return self.autocompleter.autocomplete(prefix, limit)

    def remove(self, prefix: List[int]) -> None:
        """Remove all melodies that match the given interval sequence.
        """
        self.autocompleter.remove(prefix)
class SentenceAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few words.

    A *word* is a string containing only alphanumeric characters.
    The *prefix sequence* for a string is the list of words in the string
    (separated by whitespace). The words themselves do not contain spaces.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    # new private variables
    _weight_type: str
    _autocompleter_type: str

    #file that the autocomplete engine reads from
    _file: str

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has two entries:
            - the first entry is a string
            - the second entry is the a number representing the weight of that
              string

        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one word, it is inserted into the Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given THE WEIGHT SPECIFIED ON THE
        LINE FROM THE CSV FILE. (Updated Nov 19)
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight.

        === Attributes ===
        autocompleter: An Autocompleter used by this engine.
        _weight_type: either 'sum' or 'average', which specifies the
              weight type for the prefix tree.
        _autocompleter_type: stores the type of the autocompleter

        """

        self._weight_type = config['weight_type']
        self._autocompleter_type = config['autocompleter']

        if self._autocompleter_type == 'simple':
            self.autocompleter = SimplePrefixTree(self._weight_type)
        else:
            self.autocompleter = CompressedPrefixTree(self._weight_type)

        with open(config['file']) as csvfile:
            reader = csv.reader(csvfile)

            for line in reader:
                weight = float(line[1])
                txt = line[0]
                txt = txt.lower()
                txt = txt.replace("\n", "")

                count = 0
                # sanatize string
                for char in txt:
                    if char.isalnum() or char == ' ':
                        count += 1
                    else:
                        txt = txt.replace(char, "")

                # check if there is a character in string and insert
                prefix = txt.split()
                if len(prefix) >= 1:
                    self.autocompleter.insert(txt, weight, prefix)

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        return self.autocompleter.autocomplete(prefix.split(), limit)

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        self.autocompleter.remove(prefix.split())
class SentenceAutocompleteEngine:
    """An autocomplete engine that suggests strings based on a few words.

    A *word* is a string containing only alphanumeric characters.
    The *prefix sequence* for a string is the list of words in the string
    (separated by whitespace). The words themselves do not contain spaces.

    This autocomplete engine only stores and suggests strings with lowercase
    letters, numbers, and space characters; see the section on
    "Text sanitization" on the assignment handout.

    === Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has two entries:
            - the first entry is a string
            - the second entry is the a number representing the weight of that
              string

        Note that the line may or may not contain spaces.
        Each string must be sanitized, and if the resulting string contains
        at least one word, it is inserted into the Autocompleter.

        *Skip lines that do not contain at least one alphanumeric character!*

        When each string is inserted, it is given THE WEIGHT SPECIFIED ON THE
        LINE FROM THE CSV FILE. (Updated Nov 19)
        Note that it is possible for the same string to appear on more than
        one line of the input file; this would result in that string getting
        a larger weight.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        else:
            self.autocompleter = CompressedPrefixTree(config['weight_type'])
        with open(config['file'], encoding='utf8') as csvfile:
            f = csv.reader(csvfile)
            for line in f:
                prefix = []
                for letter in line[0]:
                    if letter.isalnum() or letter == ' ':
                        letter = letter.lower()
                        prefix.append(letter)
                new_line = ''.join(prefix)
                prefix = new_line.split(' ')
                weight = float(line[1])

                if len(prefix) > 0:
                    self.autocompleter.insert(new_line, weight, prefix)

    def autocomplete(self,
                     prefix: str,
                     limit: Optional[int] = None) -> List[Tuple[str, float]]:
        """Return up to <limit> matches for the given prefix string.

        The return value is a list of tuples (string, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Preconditions:
            limit is None or limit > 0
            <prefix> contains only lowercase alphanumeric characters and spaces
        """
        prefix1 = prefix.split(' ')
        result = self.autocompleter.autocomplete(prefix1)
        if limit is None:
            return result
        else:
            return result[:limit]

    def remove(self, prefix: str) -> None:
        """Remove all strings that match the given prefix.

        Note that the given prefix string must be transformed into a list
        of words before being passed to the Autocompleter.

        Precondition: <prefix> contains only lowercase alphanumeric characters
                      and spaces.
        """
        prefix1 = prefix.split(' ')
        self.autocompleter.remove(prefix1)
class MelodyAutocompleteEngine:
    """An autocomplete engine that suggests melodies based on a few intervals.

    The values stored are Melody objects, and the corresponding
    prefix sequence for a Melody is its interval sequence.

    Because the prefix is based only on interval sequence and not the
    starting pitch or duration of the notes, it is possible for different
    melodies to have the same prefix.

    # === Private Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    # new private variables
    _weight_type: str
    _autocompleter_type: str

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has the format:
            - The first entry is the name of a melody (a string).
            - The remaining entries are grouped into pairs (as in Assignment 1)
              where the first number in each pair is a note pitch,
              and the second number is the corresponding duration.

            HOWEVER, there may be blank entries (stored as an empty string '');
            as soon as you encounter a blank entry, stop processing this line
            and move onto the next line the CSV file.

        Each melody is be inserted into the Autocompleter with a weight of 1.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.

        self._weight_type = config['weight_type']
        self._autocompleter_type = config['autocompleter']

        if self._autocompleter_type == 'simple':
            self.autocompleter = SimplePrefixTree(self._weight_type)
        else:
            self.autocompleter = CompressedPrefixTree(self._weight_type)

        with open(config['file']) as csvfile:
            reader = csv.reader(csvfile)

            for line in reader:
                name = line[0]  #name of the melody
                notes = []  #list of notes in the melody
                interval_sequence = []

                found_empty = False

                for x in range(1, len(line) - 1, 2):
                    pitch = int(line[x])
                    duration = int(line[x + 1])

                    if pitch == '' or duration == '':
                        found_empty = True
                    else:
                        #add the note to the list of notes as a tuple
                        notes.append((pitch, duration))

                if not found_empty:
                    for i in range(3, len(line) - 1, 2):
                        #interval = int(line[i]) - int(line[i-2])
                        interval_sequence.append(
                            int(line[i]) - int(line[i - 2]))

                    melody = Melody(name, notes)
                    self.autocompleter.insert(melody, 1, interval_sequence)

    def autocomplete(self, prefix: List[int], limit: Optional[int] = None) \
            -> List[Tuple[Melody, float]]:
        """Return up to <limit> matches for the given interval sequence.

        The return value is a list of tuples (melody, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given interval sequence.

        Precondition:
            limit is None or limit > 0
        """
        return self.autocompleter.autocomplete(prefix, limit)

    def remove(self, prefix: List[int]) -> None:
        """Remove all melodies that match the given interval sequence.
        """
        self.autocompleter.remove(prefix)
class MelodyAutocompleteEngine:
    """An autocomplete engine that suggests melodies based on a few intervals.

    The values stored are Melody objects, and the corresponding
    prefix sequence for a Melody is its interval sequence.

    Because the prefix is based only on interval sequence and not the
    starting pitch or duration of the notes, it is possible for different
    melodies to have the same prefix.

    # === Private Attributes ===
    autocompleter: An Autocompleter used by this engine.
    """
    autocompleter: Autocompleter

    def __init__(self, config: Dict[str, Any]) -> None:
        """Initialize this engine with the given configuration.

        <config> is a dictionary consisting of the following keys:
            - 'file': the path to a CSV file
            - 'autocompleter': either the string 'simple' or 'compressed',
              specifying which subclass of Autocompleter to use.
            - 'weight_type': either 'sum' or 'average', which specifies the
              weight type for the prefix tree.

        Precondition:
        The given file is a *CSV file* where each line has the following format:
            - The first entry is the name of a melody (a string).
            - The remaining entries are grouped into pairs (as in Assignment 1)
              where the first number in each pair is a note pitch,
              and the second number is the corresponding duration.

            HOWEVER, there may be blank entries (stored as an empty string '');
            as soon as you encounter a blank entry, stop processing this line
            and move onto the next line the CSV file.

        Each melody is be inserted into the Autocompleter with a weight of 1.
        """
        # We haven't given you any starter code here! You should review how
        # you processed CSV files on Assignment 1.
        self.autocompleter = None
        # determine tree type
        if config['autocompleter'] == 'simple':
            self.autocompleter = SimplePrefixTree(config['weight_type'])
        elif config['autocompleter'] == 'compressed':
            self.autocompleter = CompressedPrefixTree(config['weight_type'])

        with open(config['file']) as file:
            reader = csv.reader(file)
            for item in reader:
                name = item[0]  # get song name
                index = 1
                notes = []
                interval = []
                prev_pit = None
                # loop to record notes and pitches
                while index < len(item) and len(item[index]) != 0:
                    pitch = int(item[index])

                    if prev_pit is not None:
                        interval.append(pitch - prev_pit)

                    prev_pit = pitch
                    notes.append((pitch, int(item[index + 1])))

                    index += 2

                self.autocompleter.insert(Melody(name, notes), 1, interval)

    def autocomplete(
            self,
            prefix: List[int],
            limit: Optional[int] = None) -> List[Tuple[Melody, float]]:
        """Return up to <limit> matches for the given interval sequence.

        The return value is a list of tuples (melody, weight), and must be
        ordered in non-increasing weight. (You can decide how to break ties.)

        If limit is None, return *every* match for the given interval sequence.

        Precondition:
            limit is None or limit > 0
        """
        return self.autocompleter.autocomplete(prefix, limit)

    def remove(self, prefix: List[int]) -> None:
        """Remove all melodies that match the given interval sequence.
        """
        self.autocompleter.remove(prefix)