Пример #1
0
def transform_forward(X):
    global trafo
    interval          = Interval(X, X+1)
    verdict_f, result = interval.transform_by_table(trafo)
    if not verdict_f: return None
    assert len(result) == 1
    return result[0].begin
Пример #2
0
    def add_transition(self, Trigger, TargetStateIdx): 
        """Adds a transition according to trigger and target index.
           RETURNS: The target state index (may be created newly).
        """
        assert type(TargetStateIdx) == long \
               or TargetStateIdx is None \
               or TargetStateIdx in E_StateIndices, "%s" % TargetStateIdx.__class__.__name__
        assert Trigger.__class__ in (int, long, list, Interval, NumberSet) or Trigger is None

        if Trigger is None: # This is a shorthand to trigger via the remaining triggers
            Trigger = self.get_trigger_set_union().get_complement(Setup.buffer_encoding.source_set)
        elif type(Trigger) == long: Trigger = Interval(int(Trigger), int(Trigger+1))
        elif type(Trigger) == int:  Trigger = Interval(Trigger, Trigger+1)
        elif type(Trigger) == list: Trigger = NumberSet(Trigger, ArgumentIsYoursF=True)

        if Trigger.__class__ == Interval:  
            if self.__db.has_key(TargetStateIdx): 
                self.__db[TargetStateIdx].add_interval(Trigger)
            else:
                self.__db[TargetStateIdx] = NumberSet(Trigger, ArgumentIsYoursF=True)
        else:
            if self.__db.has_key(TargetStateIdx): 
                self.__db[TargetStateIdx].unite_with(Trigger)
            else:
                self.__db[TargetStateIdx] = Trigger

        return TargetStateIdx
Пример #3
0
def test(Begin, End):
    X = Interval(Begin, End)
    print "-------------------------"
    print "Interval:     " + X.get_string(Option="hex")
    print "   .front --> " + pretty_sequence(X.begin)
    print "   .back  --> " + pretty_sequence(X.end - 1)
    print

    L = len(trafo.unicode_to_utf8(X.begin))
    assert L == len(trafo.unicode_to_utf8(X.end - 1))
    result, p = trafo.split_interval_into_contigous_byte_sequence_range(X, L)
    print "Result:"
    previous_end = X.begin
    for interval in result:
        print "      %s " % interval.get_string(Option="hex")

        # All sub intervals must be adjacent
        assert interval.begin == previous_end

        print "         .front --> " + pretty_sequence(interval.begin)
        print "         .back  --> " + pretty_sequence(interval.end - 1)
        previous_end = interval.end

    # The whole interval has been spanned
    assert result[0].begin == X.begin
    assert result[-1].end  == X.end
Пример #4
0
def do(BufferCodecName, BufferCodecFileName=""):
    from quex.engine.state_machine.transformation.base import EncodingTrafoUnicode
    from quex.engine.state_machine.transformation.table import EncodingTrafoByTable
    from quex.engine.state_machine.transformation.utf8_state_split import EncodingTrafoUTF8
    from quex.engine.state_machine.transformation.utf16_state_split import EncodingTrafoUTF16

    if BufferCodecName == "utf8":
        return EncodingTrafoUTF8()

    elif BufferCodecName == "utf16":
        return EncodingTrafoUTF16()

    elif BufferCodecFileName:
        os.path.splitext(os.path.basename(BufferCodecFileName))
        try:
            os.path.splitext(os.path.basename(BufferCodecFileName))
        except:
            error.log("cannot interpret string following '--codec-file'")
        return EncodingTrafoByTable(FileName=BufferCodecFileName)

    elif BufferCodecName == "unicode":
        # (Still, 'icu' or 'iconv' may provide converted content, but ...)
        # If the internal buffer is 'unicode', then the pattern's state
        # machines are not converted. The requirement for the pattern's
        # range is the same as for the 'buffer element chunks'.
        return EncodingTrafoUnicode(NumberSet(Interval(0, 0x110000)),
                                    NumberSet(Interval(0, 0x110000)))

    elif BufferCodecName == "unit-test":
        return EncodingTrafoUnicode(NumberSet_All(), NumberSet_All())

    else:
        return EncodingTrafoByTable(BufferCodecName)
Пример #5
0
def test(Begin, End):
    X = Interval(Begin, End)
    print "-------------------------"
    print "Interval:     " + X.get_string(Option="hex")
    print "   .front --> " + pretty_sequence(X.begin)
    print "   .back  --> " + pretty_sequence(X.end - 1)
    print

    L = len(trafo.unicode_to_utf8(X.begin))
    assert L == len(trafo.unicode_to_utf8(X.end - 1))
    result, p = trafo.split_interval_into_contigous_byte_sequence_range(X, L)
    print "Result:"
    previous_end = X.begin
    for interval in result:
        print "      %s " % interval.get_string(Option="hex")

        # All sub intervals must be adjacent
        assert interval.begin == previous_end

        print "         .front --> " + pretty_sequence(interval.begin)
        print "         .back  --> " + pretty_sequence(interval.end - 1)
        previous_end = interval.end

    # The whole interval has been spanned
    assert result[0].begin == X.begin
    assert result[-1].end == X.end
Пример #6
0
def _split_by_transformed_sequence_length(X):
    """Split Unicode interval into intervals where all values have the same 
    utf8-byte sequence length.

    RETURNS: map: sequence length --> Unicode Sub-Interval of X.
    """
    if X.begin < 0:         X.begin = 0
    if X.end   > UTF8_MAX:  X.end   = UTF8_MAX + 1

    if X.size() == 0: return None

    db = {}
    current_begin = X.begin
    last_L        = len(unicode_to_utf8(X.end - 1))  # Length of utf8 sequence corresponding
    #                                                # the last value inside the interval.
    while 1 + 1 == 2:
        L = len(unicode_to_utf8(current_begin))   # Length of the first unicode in utf8
        # Store the interval together with the required byte sequence length (as key)
        current_end = UTF8_BORDERS[L-1]
        if L == last_L: 
            db[L] = Interval(current_begin, X.end)
            break
        db[L] = Interval(current_begin, current_end)
        current_begin = current_end

    return db
Пример #7
0
 def __init__(self):
     EncodingTrafoBySplit.__init__(self, "utf16", 
                                      CodeUnitRange=NumberSet.from_range(0, 0x10000))
     self.error_range_code_unit0 = NumberSet([
         Interval(0x0000, 0xDC00), Interval(0xE000, 0x10000)
     ]).get_complement(NumberSet_All())
     self.error_range_code_unit1 = NumberSet([
         Interval(0xDC00, 0xE000)
     ]).get_complement(NumberSet_All())
Пример #8
0
def test_inverse():
    print "INVERSE"
    print "--------------------------------------------------------------------------------"

    the_inverse("(a) normal", Interval(5000, 6000))
    the_inverse("(b) lower border = - maxint", Interval(-sys.maxint, 6000))
    the_inverse("(c) upper border = maxint", Interval(5000, sys.maxint))
    the_inverse("(c) upper/lower border = +/- maxint",
                Interval(-sys.maxint, sys.maxint))
Пример #9
0
def test_inverse():
    print "INVERSE"
    print "--------------------------------------------------------------------------------"

    the_inverse("(a) normal", Interval(5000, 6000))
    the_inverse("(b) lower border = - maxint", Interval(-INTEGER_MAX, 6000))
    the_inverse("(c) upper border = maxint", Interval(5000, INTEGER_MAX))
    the_inverse("(c) upper/lower border = +/- maxint",
                Interval(-INTEGER_MAX, INTEGER_MAX))
Пример #10
0
def test(A0, A1, B0, B1):
    print "------------------------------------------------"
    A = Interval(A0, A1)
    B = Interval(B0, B1)
    print "%s <  %s : %s" % (repr(A), repr(B), A < B)
    print "%s <= %s : %s" % (repr(A), repr(B), A <= B)
    print "%s == %s : %s" % (repr(A), repr(B), A == B)
    print "%s != %s : %s" % (repr(A), repr(B), A != B)
    print "%s >= %s : %s" % (repr(A), repr(B), A >= B)
    print "%s >  %s : %s" % (repr(A), repr(B), A > B)
Пример #11
0
def _split_contigous_intervals_for_surrogates(Begin, End):
    """Splits the interval X into sub interval so that no interval runs over a 'surrogate'
       border of the last word. For that, it is simply checked if the End falls into the
       same 'surrogate' domain of 'front' (start value of front = Begin). If it does not
       an interval [front, end_of_domain) is split up and front is set to end of domain.
       This procedure repeats until front and End lie in the same domain.
    """
    global ForbiddenRange
    assert Begin >= 0x10000
    assert End <= 0x110000
    assert End > Begin

    front_seq = unicode_to_utf16(Begin)
    back_seq = unicode_to_utf16(End - 1)

    # (*) First word is the same.
    #     Then,
    #       -- it is either a one word character.
    #       -- it is a range of two word characters, but the range
    #          extends in one contigous range in the second surrogate.
    #     In both cases, the interval is contigous.
    if front_seq[0] == back_seq[0]:
        return [Interval(Begin, End)]

    # (*) First word is NOT the same
    # Separate into three domains:
    #
    # (1) Interval from Begin until second surrogate hits border 0xE000
    # (2) Interval where the first surrogate inreases while second
    #     surrogate iterates over [0xDC00, 0xDFFF]
    # (3) Interval from begin of last surrogate border to End
    result = []
    end = utf16_to_unicode([front_seq[0], ForbiddenRange.end - 1]) + 1

    # (1) 'Begin' until second surrogate hits border 0xE000
    #      (The following **must** hold according to entry condition about
    #       front and back sequence.)
    assert End > end
    result.append(Interval(Begin, end))

    if front_seq[0] + 1 != back_seq[0]:
        # (2) Second surrogate iterates over [0xDC00, 0xDFFF]
        mid_end = utf16_to_unicode([back_seq[0] - 1, ForbiddenRange.end - 1
                                    ]) + 1
        #     (The following **must** hold according to entry condition about
        #      front and back sequence.)
        assert mid_end > end
        result.append(Interval(end, mid_end))
        end = mid_end

    # (3) Last surrogate border to End
    if End > end:
        result.append(Interval(end, End))

    return result
Пример #12
0
def prepare(A_list, B_list):
    A = NumberSet()
    B = NumberSet()
    for begin, end in A_list:
        A.add_interval(Interval(begin, end))
    for begin, end in B_list:
        B.add_interval(Interval(begin, end))

    A.assert_consistency()
    B.assert_consistency()
    return A, B
Пример #13
0
 def __do_this(A, B):
     interval_list = B.get_intervals()
     for interval in interval_list:
         print "#"
         print "#  A                 = " + repr(A)
         print "#  B                 = " + repr(interval)
         X = deepcopy(A)
         safe = Interval(interval.begin, interval.end)
         X.cut_interval(safe)
         X.assert_consistency()
         safe.begin = 7777
         safe.end = 0000
         print "#  A.cut_interval(B) = " + repr(X)
Пример #14
0
 def __do_this(A, B):
     interval_list = B.get_intervals()
     for interval in interval_list:
         print "#"
         print "#  A                 = " + repr(A)
         print "#  B                 = " + repr(interval)
         X = deepcopy(A)
         safe = Interval(interval.begin, interval.end)
         X.cut_interval(safe)
         X.assert_consistency()
         safe.begin = 7777
         safe.end   = 0000
         print "#  A.cut_interval(B) = " + repr(X) 
Пример #15
0
 def adapt_ranges_to_lexatom_type_range(self, LexatomTypeRange):
     self._adapt_error_ranges_to_lexatom_type_range(LexatomTypeRange)
     # UTF16 requires at least 2 byte for a 'normal code unit'. Anything else
     # requires to cut on the addmissible set of code points.
     if LexatomTypeRange.end < 0x10000:
         self.source_set.mask(0, LexatomTypeRange.end)
     else:
         self.source_set.mask(0, 0x110000)
     if LexatomTypeRange.end > 0x10000:
         self._error_range_by_code_unit_db[0].unite_with(
             Interval(0x10000, LexatomTypeRange.end))
         self._error_range_by_code_unit_db[1].unite_with(
             Interval(0x10000, LexatomTypeRange.end))
Пример #16
0
def _get_trigger_sequence_for_interval(X):
    # The interval either lies entirely >= 0x10000 or entirely < 0x10000
    assert X.begin >= 0x10000 or X.end < 0x10000

    # An interval below < 0x10000 remains the same
    if X.end < 0x10000: return [ X ]
    
    # In case that the interval >= 0x10000 it the value is split up into
    # two values.
    front_seq = unicode_to_utf16(X.begin)
    back_seq  = unicode_to_utf16(X.end - 1)

    return [ Interval(front_seq[0], back_seq[0] + 1), 
             Interval(front_seq[1], back_seq[1] + 1) ]
Пример #17
0
    def __init__(self):
        # A character in UTF16 is at maximum represented by two code units.
        # => Two error ranges.
        error_range_0 = NumberSet([
            Interval(0x0000, 0xDC00),
            Interval(0xE000, 0x10000)
        ]).get_complement(NumberSet_All())  # Adapted later

        error_range_1 = NumberSet([Interval(0xDC00, 0xE000)]).get_complement(
            NumberSet_All())  # Adapted later

        error_range_by_code_unit_db = {0: error_range_0, 1: error_range_1}

        EncodingTrafoBySplit.__init__(self, "utf16",
                                      error_range_by_code_unit_db)
Пример #18
0
def __create_database_file(TargetEncoding, TargetEncodingName):
    """Writes a database file for a given TargetEncodingName. The 
       TargetEncodingName is required to name the file where the 
       data is to be stored.
    """
    encoder     = codecs.getencoder(TargetEncoding)
    prev_output = -1
    db          = []
    bytes_per_char = -1
    for input in range(0x110000):
        output, n = __get_transformation(encoder, input)

        if bytes_per_char == -1: 
            bytes_per_char = n
        elif n != -1 and bytes_per_char != n:
            print "# not a constant size byte format."
            return False

        # Detect discontinuity in the mapping
        if   prev_output == -1:
            if output != -1:
                input_interval        = Interval(input)
                target_interval_begin = output

        elif output != prev_output + 1:
            # If interval was valid, append it to the database
            input_interval.end    = input
            db.append((input_interval, target_interval_begin))
            # If interval ahead is valid, prepare an object for it
            if output != -1:
                input_interval        = Interval(input)
                target_interval_begin = output

        prev_output = output

    if prev_output != -1:
        input_interval.end = input
        db.append((input_interval, target_interval_begin))

    fh = open_file_or_die(QUEX_CODEC_DB_PATH + "/%s.dat" % TargetEncoding, "wb")
    fh.write("// Describes mapping from Unicode Code pointer to Character code in %s (%s)\n" \
             % (TargetEncoding, TargetEncodingName))
    fh.write("// [SourceInterval.begin] [SourceInterval.Size]  [TargetInterval.begin] (all in hexidecimal)\n")
    for i, t in db:
        fh.write("0x%X %i 0x%X\n" % (i.begin, i.end - i.begin, t))
    fh.close()

    return True
Пример #19
0
    def adapt_source_and_drain_range(self, LexatomByteN):
        """The drain range may be restricted due to the number of bytes given
        per lexatom. If the 'LexatomByteN' is '-1' it is unrestricted which 
        may be useful for unit tests and theoretical investigations.

        DERIVED CLASS MAY HAVE TO WRITE A DEDICATED VERSION OF THIS FUNCTION
        TO MODIFY THE SOURCE RANGE '.source_set'.
        """
        if LexatomByteN == -1:
            self.lexatom_range = Interval_All()
            return 

        assert LexatomByteN >= 1
        lexatom_min_value = self.drain_set.minimum()
        lexatom_max_value = self.drain_set.supremum() - 1
        if LexatomByteN != -1:
            try:    
                value_n = 256 ** LexatomByteN
            except:
                error.log("Error while trying to compute 256 power the 'lexatom-size' (%i bytes)\n"   \
                          % LexatomByteN + \
                          "Adapt \"--buffer-element-size\" or \"--buffer-element-type\",\n"       + \
                          "or specify '--buffer-element-size-irrelevant' to ignore the issue.")
            lexatom_min_value = 0
            lexatom_max_value = min(lexatom_max_value, value_n - 1)

        lexatom_max_value = min(lexatom_max_value, sys.maxint)

        assert lexatom_max_value > lexatom_min_value

        self.lexatom_range = Interval(lexatom_min_value, lexatom_max_value + 1)
        self.drain_set.mask_interval(self.lexatom_range)
Пример #20
0
    def do(self, UnicodeTrafoInfo, ProvidedConversionInfoF=False):
        """Creates code for a conversion to target encoding according to the conversion_table.
        """
        # 'ProvidedConversionTableF' is only to be used for Unit Tests
        if ProvidedConversionInfoF: conversion_table = UnicodeTrafoInfo
        else: conversion_table = self.get_conversion_table(UnicodeTrafoInfo)

        assert all(
            isinstance(entry, ConversionInfo) for entry in conversion_table)

        # Make sure that the conversion table is sorted
        conversion_table.sort(key=attrgetter("codec_interval_begin"))

        def action(ci):
            return "{ %s %s }" % \
                   (self.get_offset_code(ci),
                    self.jump_to_output_formatter(ci.code_unit_n))

        if len(conversion_table) == 1:
            ci = conversion_table[0]
            txt = ["    %s" % self.get_offset_code(ci)]
            txt.extend(self.unicode_to_output(ci.code_unit_n))

        else:
            tm = [(Interval(ci.codec_interval_begin,
                            ci.codec_interval_begin + ci.codec_interval_size),
                   action(ci)) for ci in conversion_table]
            txt = []
            transition_map.do(txt, tm, AssertBorderF=False)
            txt.append(self.unicode_to_output_all_ranges())

        return "\n".join(txt)
Пример #21
0
    def consider_interval(self, Begin, End):
        if Begin > End:
            raise RegularExpressionException("Character range: '-' requires character with 'lower code' to preceed\n" + \
                                             "found range '%s-%s' which corresponds to %i-%i as unicode code points." % \
                                             (utf8.map_unicode_to_utf8(Begin), utf8.map_unicode_to_utf8(End), Begin, End))

        self.match_set.add_interval(Interval(Begin, End))
Пример #22
0
    def get(self):
        # Transform 'cursor' into a number set
        result = NumberSet()
        K = len(self.__cursor)
        if K == 0: return None
        k = 0
        end = 0
        while k < K - 1:
            begin = end + self.__cursor[k]
            end = begin + self.__cursor[k + 1]
            if end > self.N:
                self.__cursor.pop()
                K -= 1
                break
            if begin != end:
                result.quick_append_interval(Interval(begin, end))
            k += 2

        # Increment cursor
        k = 0
        while k < K:
            if k == 0:
                self.__cursor[k] += 2
                if self.__cursor[k] < 8:
                    break
            else:
                self.__cursor[k] += 1
                if self.__cursor[k] < 3:
                    break
            self.__cursor[k] = 1
            k += 1

        return result
Пример #23
0
def construct_tm(IntervalList):
    letter = ord('a')

    return TransitionMap.from_iterable([ 
       (Interval(x[0], x[1]), letter + i) 
       for i, x in enumerate(IntervalList) 
    ])
Пример #24
0
def test_on_UCS_sample_sets(Trafo, unicode_to_transformed_sequence):
    script_list = [
        "Arabic", "Armenian", "Balinese", "Bengali", "Bopomofo", "Braille",
        "Buginese", "Buhid", "Canadian_Aboriginal", "Cherokee", "Common",
        "Cuneiform", "Cypriot", "Deseret", "Gothic", "Greek", "Hanunoo",
        "Hebrew", "Hiragana", "Inherited", "Kannada", "Han", "Katakana",
        "Kharoshthi", "Khmer", "Lao", "Latin", "Limbu", "Linear_B",
        "Malayalam", "Mongolian", "Myanmar", "New_Tai_Lue", "Nko", "Osmanya",
        "Ogham", "Old_Italic", "Old_Persian", "Phoenician", "Shavian",
        "Syloti_Nagri", "Syriac", "Tagalog", "Tagbanwa", "Tai_Le", "Tamil",
        "Telugu", "Thaana", "Thai", "Tibetan", "Tifinagh", "Ugaritic", "Yi"
    ]
    sets = [X(name) for name in script_list]

    orig = combination.do(map(lambda x: x.sm, sets))
    state_n_before, result = transform(Trafo, orig)

    # print result.get_graphviz_string(Option="hex")

    for set in sets:
        set.check(result, unicode_to_transformed_sequence)
    print "Translated %i groups without abortion on error (OK)" % len(sets)

    union = NumberSet()
    for nset in map(lambda set: set.charset, sets):
        union.unite_with(nset)

    inverse_union = NumberSet(Interval(0, 0x110000))
    inverse_union.subtract(union)
    # print inverse_union.get_string(Option="hex")
    check_negative(result,
                   inverse_union.get_intervals(PromiseToTreatWellF=True),
                   unicode_to_transformed_sequence)
Пример #25
0
def _get_contigous_intervals(X):
    """Split Unicode interval into intervals where all values
       have the same utf16-byte sequence length. This is fairly 
       simple in comparison with utf8-byte sequence length: There
       are only two lengths: 2 bytes and 2 x 2 bytes.

       RETURNS:  [X0, List1]  

                 X0   = the sub-interval where all values are 1 word (2 byte)
                        utf16 encoded. 
                         
                        None => No such interval
                
                List1 = list of contigous sub-intervals where coded as 2 words.

                        None => No such intervals
    """
    global ForbiddenRange
    if X.begin == -sys.maxint: X.begin = 0
    if X.end   == sys.maxint:  X.end   = 0x110000
    assert X.end != X.begin     # Empty intervals are nonsensical
    assert X.end <= 0x110000    # Interval must lie in unicode range
    assert not X.check_overlap(ForbiddenRange) # The 'forbidden range' is not to be covered.

    if   X.end   <= 0x10000: 
        return [X, None]
    elif X.begin >= 0x10000: 
        return [None, _split_contigous_intervals_for_surrogates(X.begin, X.end)]
    else:                    
        return [Interval(X.begin, 0x10000), _split_contigous_intervals_for_surrogates(0x10000, X.end)]
Пример #26
0
def test(Begin, End):
    X = Interval(Begin, End)
    print "-------------------------"
    print "Interval:     " + X.get_string(Option="hex")
    print "   .front --> " + pretty_sequence(X.begin)
    print "   .back  --> " + pretty_sequence(X.end - 1)
    print

    x0, list1 = trafo.get_contigous_intervals(X)
    print "Result:"
    print "   Interval < 0x10000:    ",
    if x0 is not None: print "%s" % x0.get_string(Option="hex")
    else: print "None"
    print "   Intervals >= 0x10000:  ",

    if list1 is None: print "None"
    else:
Пример #27
0
def get_trigger_sequence_for_contigous_byte_range_interval(X, L):
    front_sequence = unicode_to_utf8(X.begin)
    back_sequence = unicode_to_utf8(X.end - 1)
    # If the interval is contigous it must produce equal length utf8 sequences

    return [
        Interval(front_sequence[i], back_sequence[i] + 1) for i in range(L)
    ]
Пример #28
0
def test(Begin, End):
    X = Interval(Begin, End)
    print "-------------------------"
    print "Interval:     " + X.get_string(Option="hex")
    print "   .front --> " + pretty_sequence(X.begin)
    print "   .back  --> " + pretty_sequence(X.end - 1)
    print

    x0, list1 = trafo._get_contigous_intervals(X)
    print "Result:"
    print "   Interval < 0x10000:    ", 
    if x0 is not None: print "%s" % x0.get_string(Option="hex")
    else:          print "None"
    print "   Intervals >= 0x10000:  ",
    
    if list1 is None: print "None"
    else:
Пример #29
0
    def __init__(self):
        error_range_0 = NumberSet([
            Interval(0b00000000, 0b01111111 + 1),
            Interval(0b11000000, 0b11011111 + 1),
            Interval(0b11100000, 0b11101111 + 1),
            Interval(0b11110000, 0b11110111 + 1),
            Interval(0b11111000, 0b11111011 + 1),
            Interval(0b11111100, 0b11111101 + 1),
        ]).get_complement(NumberSet_All())  # Adapted later

        error_range_N = NumberSet(Interval(0b10000000, 0b10111111+1)) \
                        .get_complement(NumberSet_All()) # Adapted later

        error_range_by_code_unit_db = {
            0: error_range_0,
            1: error_range_N,
            2: error_range_N,
            3: error_range_N,
            4: error_range_N,
            5: error_range_N,
            6: error_range_N,
            7: error_range_N,
            8: error_range_N
        }

        EncodingTrafoBySplit.__init__(self, "utf8",
                                      error_range_by_code_unit_db)
        self.UnchangedRange = 0x7F
Пример #30
0
def do(section_list, fh):
    """Parses a codec information file. The described codec can only be
    a 'static character length' encoding. That is every character in the
    code occupies the same number of bytes.

    RETURNS: [0] Set of characters in unicode which are covered by the
                 described codec.
             [1] Range of values in the codec elements.
    """
    source_set = NumberSet()
    drain_set = NumberSet()

    error_str = None

    try:
        while error_str is None:
            skip_whitespace(fh)
            source_begin = read_integer(fh)
            if source_begin is None:
                error_str = "Missing integer (source interval begin) in codec file."
                continue

            skip_whitespace(fh)
            source_size = read_integer(fh)
            if source_size is None:
                error_str = "Missing integer (source interval size) in codec file."
                continue

            skip_whitespace(fh)
            target_begin = read_integer(fh)
            if target_begin is None:
                error_str = "Missing integer (target interval begin) in codec file."
                continue

            source_end = source_begin + source_size
            list.append(section_list, [source_begin, source_end, target_begin])

            source_set.add_interval(Interval(source_begin, source_end))
            drain_set.add_interval(
                Interval(target_begin, target_begin + source_size))

    except EndOfStreamException:
        pass

    return source_set, drain_set, error_str
Пример #31
0
def test(TM, Target="X"):
    tm = TransitionMap.from_iterable([ (Interval(x[0], x[1]), y) for x, y in TM ])
    print "____________________________________________________________________"
    print "BEFORE:"
    show(tm)
    tm.combine_adjacents()
    tm.assert_continuity(StrictF=True)
    print "AFTER:"
    show(tm)
Пример #32
0
def test(TM, Target="X"):
    tm = TransitionMap([ (Interval(x[0], x[1]), y) for x, y in TM ])
    print "____________________________________________________________________"
    print "BEFORE:"
    show(tm)
    tm.fill_gaps(Target, Setup.buffer_codec.source_set.minimum(), Setup.buffer_codec.source_set.supremum())
    tm.assert_adjacency(ChangeF=True)
    print "AFTER:"
    show(tm)
Пример #33
0
    def set_target(self, Character, NewTarget):
        """Set the target in the transition map for a given 'Character'.
        """
        # Find the index of the interval which contains 'Character'
        i = TransitionMap.bisect(self, Character)
        if i is None:
            self.insert(0, (Interval(Character), NewTarget))
            self.sort()
            return

        # Split the found interval, if necessary, so that the map
        # contains 'Character' --> 'NewTarget'.
        interval, target = self[i]
        assert interval.size() > 0

        new_i = None

        if target == NewTarget:
            return  # Nothing to be done

        elif interval.size() == 1:
            self[i] = (interval, NewTarget)
            new_i = i

        elif Character == interval.end - 1:
            self.insert(i + 1, (Interval(Character), NewTarget))
            interval.end -= 1
            new_i = i + 1

        elif Character == interval.begin:
            self.insert(i, (Interval(Character), NewTarget))
            interval.begin += 1
            new_i = i

        else:
            self.insert(i + 1, (Interval(Character), NewTarget))
            self.insert(i + 2, (Interval(Character + 1, interval.end), target))
            interval.end = Character
            new_i = i + 1

        # Combine adjacent intervals which trigger to the same target.
        self.combine_adjacents(new_i)
        self.assert_continuity()
        return
Пример #34
0
def parse_table(Filename,
                IntervalColumnList=[],
                NumberColumnList=[],
                NumberListColumnList=[],
                CommentF=False):
    """Columns in IntervalColumnList   --> converted to Interval() objects
                  NumberColumnList     --> converted to integers (hex numbers)
                  NumberListColumnList --> converted to integer list (hex numbers)
    """
    fh = open_data_base_file(Filename)

    record_set = []
    for line in fh.readlines():
        comment_idx = line.find("#")
        comment = None
        if comment_idx != -1:
            comment = line[comment_idx + 1:]
            line = line[:comment_idx]

        if line == "" or line.isspace():
            continue

        # append content to record set
        cells = map(lambda x: x.strip(), line.split(";"))

        for i in IntervalColumnList:
            fields = cells[i].split("..")  # range: value0..value1
            assert len(fields) in [1, 2]

            if len(fields) == 2:
                begin = int("0x" + fields[0], 16)
                end = int("0x" + fields[1], 16) + 1
            else:
                begin = int("0x" + fields[0], 16)
                end = int("0x" + fields[0], 16) + 1
            cells[i] = Interval(begin, end)

        for i in NumberColumnList:
            cells[i] = int("0x" + cells[i], 16)

        for i in NumberListColumnList:
            nl = []
            for n in cells[i].split():
                nl.append(int("0x" + n, 16))
            cells[i] = nl

        # Sometimes, the comment is useful
        if CommentF:
            cells.append(comment)

        record_set.append(cells)

    # There is no need to decouple here, since the record_set is created
    # each time that the function is called.
    return record_set
Пример #35
0
 def test(X):
     print "#_______________________________________________"
     nset  = NumberSet([ Interval(x, y) for x, y in X])
     clone = nset.clone()
     print "#NumberSet:         %s" % nset
     result = nset.clone()
     result.complement(all)
     print "#NumberSet.inverse: %s" % result
     assert result.is_equal(nset.get_complement(all))
     assert result.intersection(nset).is_empty()
     assert result.union(nset).is_all()