Пример #1
0
 def get_code_trie(self):
     """
         Returns a Trie data structure 
         from the words in the code
     """
     if self.__code_trie == None:
         self.__code_trie = Trie(self.code_alphabet)
         for word in self.code_words:
             self.__code_trie.insert(word)
     return self.__code_trie
Пример #2
0
    def is_uniquely_decodable(self):
        """Returns boolean specifying whether or not is uniquely decodable
        
        For the code to be uniquely decodable means that the function 
        code that takes a string from a the source and codifies it
        is inyective. 

        Returns:
            True if the code is uniquely decodable, False otherwise
        """
        if self.__uniquely_decodable == None:
            self.__C_n = [self.code_words]
            trie_0 = Trie(self.code_alphabet, words=self.code_words)
            while True:
                new_words = set()
                trie_n = Trie(self.code_alphabet, words=self.__C_n[-1])

                for word_i in self.__C_n[-1]:
                    for new_word in trie_0.complete_word(word_i):
                        new_words.add(new_word)
                for word_i in self.__C_n[0]:
                    for new_word in trie_n.complete_word(word_i):
                        new_words.add(new_word)
                if new_words == set() or self.__repeated_C_n():
                    break
                self.__C_n.append(new_words)

            self.__C_infty = set()
            for i in range(1, len(self.__C_n)):
                for word in self.__C_n[i]:
                    self.__C_infty.add(word)

            self.is_uniquely_decodable = len(self.__C_infty
                                             & self.__C_n[0]) == 0

        return self.is_uniquely_decodable
Пример #3
0
class Code:
    """
    Core class for Coding and Decoding.
    It has integrated core metrics to it.

    To create a new Code you need to pass the rule
    for the code as a dictionary. 


    Attributes:
        arity:
            An integer representing the code arity
        code_alphabet:
            A set containing the code alphabet
        source_alphabet:
            A set containing the source alphabet
    """
    __code_trie = None
    __source_trie = None
    __uniquely_decodable = None

    def __init__(self, code_dictionary):
        """"
        Initiates the class using a code dictionary
        """
        self.code = validate_dictionary(code_dictionary)
        self.code_alphabet = set()
        self.source_words = set()
        self.code_words = set()
        self.source_alphabet = set()
        self.__reverse_code = {}
        coding_output = ''
        source_input = ''
        for key, value in self.code.items():
            self.source_words.add(key)
            coding_output += value
            source_input += key
            self.code_words.add(value)
            self.__reverse_code[value] = key
        for simbol in coding_output:
            self.code_alphabet.add(simbol)
        for simbol in source_input:
            self.source_alphabet.add(simbol)
        self.arity = len(self.code_alphabet)

        if self.is_prefix():
            self.__decoder = Encoder(self.code_alphabet, self.__reverse_code)
        self.__encoder = Encoder(self.source_alphabet, self.code)

    def get_arity(self):
        """Returns arity of the code
        
        Return:
            Integer value of the arity
        """
        return self.arity

    def is_prefix(self):
        """
            Uses a Trie data structure to determine
            if the code is prefix

            Returns:
                A boolean True if the code is prefix, False
                otherwise
        """
        return self.get_code_trie().is_prefix()

    @staticmethod
    def kraft_inequality_metric(words_length, arity):
        """
        Calculates Kraft Inequality for given word lengths
        
        Measures Kraft Inequality Metric defined as 

        .. math::
            \sum_{i} r^{-l_i} 

        where r means arity, and l sub i the length of 
        the ith word
        
        Also returns if an instantaneous code can be made with said words

        Typical Usage:

        .. highlight:: python
        .. code-block:: python

            >>> metric, can_be_inst = kraft_inequality_metric(words_length=[4,4,3,6],arity=2)
            >>> print(metric, can_be_inst)
            0.265625 True
            
            
        Args:
            words_length:
                An list with the length of the words
            arity:
                Arity of the code

        Returns:
            metric , can_be_instantaneous.
            Kraft Inequality metric.
            A boolean refering to whether or not a code
            with this word lenghts and arity exists.
            This is dependant on the metric being less than 1
        """

        if arity == None:
            raise Exception('arity can not be null')
        if type(arity) != int:
            raise Exception('arity must be an int')
        if words_length == None:
            raise Exception('words_length can not be null')

        metric = 0.0
        epsilon = 0.00001
        for l in words_length:
            metric += float(arity)**float(-l)
        return metric, metric - epsilon <= 1

    def get_code_trie(self):
        """
            Returns a Trie data structure 
            from the words in the code
        """
        if self.__code_trie == None:
            self.__code_trie = Trie(self.code_alphabet)
            for word in self.code_words:
                self.__code_trie.insert(word)
        return self.__code_trie

    def __repeated_C_n(self):
        """True if C_n has began a cycle, False otherwise"""
        if len(self.__C_n) <= 3:
            return False
        for i in range(1, len(self.__C_n) - 1):
            if self.__C_n[-1] == self.__C_n[i] and self.__C_n[
                    -2] == self.__C_n[i - 1]:
                return True
        return False

    def is_uniquely_decodable(self):
        """Returns boolean specifying whether or not is uniquely decodable
        
        For the code to be uniquely decodable means that the function 
        code that takes a string from a the source and codifies it
        is inyective. 

        Returns:
            True if the code is uniquely decodable, False otherwise
        """
        if self.__uniquely_decodable == None:
            self.__C_n = [self.code_words]
            trie_0 = Trie(self.code_alphabet, words=self.code_words)
            while True:
                new_words = set()
                trie_n = Trie(self.code_alphabet, words=self.__C_n[-1])

                for word_i in self.__C_n[-1]:
                    for new_word in trie_0.complete_word(word_i):
                        new_words.add(new_word)
                for word_i in self.__C_n[0]:
                    for new_word in trie_n.complete_word(word_i):
                        new_words.add(new_word)
                if new_words == set() or self.__repeated_C_n():
                    break
                self.__C_n.append(new_words)

            self.__C_infty = set()
            for i in range(1, len(self.__C_n)):
                for word in self.__C_n[i]:
                    self.__C_infty.add(word)

            self.is_uniquely_decodable = len(self.__C_infty
                                             & self.__C_n[0]) == 0

        return self.is_uniquely_decodable

    def get_C_n(self):
        """Returns C_n related to the uniquely decodable algorithm.
        
        Returns:
            List of set of words. 
        """
        self.is_uniquely_decodable()
        return self.__C_n

    def encode(self, sentence):
        """"Encodes a given sentence
        """

        return self.__encoder.encode_sentence(sentence)

    def __encode_brute_force(self, sentence):
        """Encodes a given sentence using bruteforce
        """
        encoding = ''
        curr_idx = 0
        sentence_length = len(sentence)
        while curr_idx < sentence_length:
            advance = False
            for word in self.source_words:
                if re.match('^' + word, sentence[curr_idx:]):
                    curr_idx += len(word)
                    encoding += self.code[word]
                    advance = True
                    break
            if not advance:
                raise Exception('No match found to encode {} '.format(
                    sentence[curr_idx:]))
        return encoding

    def decode(self, sentence, get_any=True):
        """Decodes a given sentence, in case
        multiple outputs are possible it returns 
        any of them if the get_any flag is True. 
        
        Args:
            sentence:
                Sentence to be decoded. It must compel
                with the source alphabet.
            get_any:
                True by default. It gives a string when
                multiple answers are possible. When False
                it gives the answer in a set of possible answers. 
        
        Returns:
            A string with the decoded sentence
        """
        if self.is_prefix():
            return self.__decoder.encode_sentence(sentence)
        result = self.__decode_brute_force(sentence)

        if len(result) == 0:
            raise Exception('No decoding possible')

        if get_any:
            return result.pop()
        return result

    def __decode_brute_force(self, sentence, curr_idx=0):
        results = set()
        # Base Case
        if curr_idx >= len(sentence):
            return ['']

        for word in self.code_words:
            if re.match('^' + word, sentence[curr_idx:]):
                for result in self.__decode_brute_force(
                        sentence,
                        curr_idx=curr_idx + len(word),
                ):
                    results.add(self.__reverse_code[word] + result)
        return results

    def get_word_length(self, source):
        """Gets the average word lenght of the code
        for a given Source

        Args:
            source:
                A source of type Source
        
        Returns:
            A float with the value of the word lenght of 
            the code for the given source.
        """
        if type(source) != Source:
            raise Exception('source must be of type source')
        word_length = 0.0
        for word, probability in source.dictionary.items():
            if word not in source.dictionary:
                raise Exception('{} not part of the code source '.format(word))

            word_length += probability * float(len(self.code[word]))
        return word_length

    @staticmethod
    def make_huffman_code(source, code_alphabet={'0', '1'}):
        """Generates an optimal Huffman Code
        
        Args:
            source:
                A source of type Source
            code_alphabet:
                Iterable with the alphabet. The items should be
                of type str
        Returns:
            Optimal huffman code for the given source and
            with arity of the given code_alphabet
        """
        if type(source) != Source:
            raise Exception('source must be of type source')

        base_nodes = [(probability, word)
                      for word, probability in source.dictionary.items()]
        arity = len(code_alphabet)
        if arity != 2:
            while len(base_nodes) % (arity - 1) != 1:
                base_nodes.append((float(0), ''))
        huffman = Huffman_Generator(simbols=code_alphabet,
                                    base_nodes=base_nodes,
                                    arity=len(code_alphabet))

        return Code(huffman.make_huffman_code())
Пример #4
0
def test_complete_word():
    simbols = {'0' , '2' , '1'}
    t = Trie(simbols, words = {'01','21','0112','212','011'})
    assert t.complete_word('01') == {'1','12'}
    assert t.complete_word('21') == {'2'}
    assert t.complete_word('1') == set()
Пример #5
0
def test_inserting_and_checking():
    simbols = {'0' , '1' , '2'}
    t = Trie(simbols)
    t.insert('00')
    t.insert('101')
    t.insert('2')
    assert not t.search('0') 
    assert t.search('00')
    assert not t.search('10')
    assert t.search('101')
    assert t.search('2')
Пример #6
0
def test_words_init():
    simbols = {'0' , '1' , '2'}
    t = Trie(simbols, words = ['120','000','101'])
    assert t.search('120')
    assert not t.search('100')
Пример #7
0
def test_prefix():
    simbols = {'0' , '1' , '2'}
    t = Trie(simbols)
    t.insert('0')
    t.insert('1')
    t.insert('20')
    t.insert('21')
    t.insert('22')
    assert t.is_prefix()
    t.insert('01')
    assert not t.is_prefix()