Exemplo n.º 1
0
 def test_box(self):
     """Test box protection."""
     cp_936 = read_codepage('936')
     with Session(
             codepage=cp_936,
             box_protect=True,
             textfile_encoding='utf-8',
             devices={'c': self.output_path()},
     ) as s:
         # to file
         s.execute('open "c:boxtest.txt" for output as 1')
         s.execute('PRINT#1, CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
         # to screen
         s.execute('PRINT CHR$(218);STRING$(10,CHR$(196));CHR$(191)')
         # bytes text
         output_bytes = [_row.strip() for _row in self.get_text(s)]
         # unicode text
         output_unicode = [
             _row.strip() for _row in self.get_text(s, as_type=type(u''))
         ]
     with open(self.output_path('BOXTEST.TXT'), 'r', encoding='utf-8') as f:
         assert f.read() == u'\ufeff┌──────────┐\n\x1a'
     assert output_bytes[
         0] == b'\xda\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xc4\xbf'
     assert output_unicode[0] == u'┌──────────┐'
Exemplo n.º 2
0
    def test_hello(self):
        """Hello world in 9 codepages."""
        hello = {
            # contains \u064b which is not in 720
            #'720': u'أهلاً بالعالم',
            '720': u'أهلا بالعالم',
            '737': u'Γεια σου κόσμε',
            '862': u'שלום עולם',
            '866': u'Здравствуй, мир',
            # combining graphemes \u0e27\u0e31 \u0e14\u0e35 are in codepage as separate chars
            # so converting to bytes fails
            #'874': u'สวัสดีโลก',
            #'874': u'\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35\u0e42\u0e25\u0e01',
            '932': u'こんにちは、 世界',
            '936': u'你好世界',
            '949': u'반갑다 세상아',
            'viscii': u'Xin chào thế giới',
        }
        # note that we're making a round-trip conversion unicode -> codepage -> unicode
        # this doesn't always work
        for cp, hi in hello.items():
            with open(self.output_path(hi), 'w', encoding='utf-8') as f:
                f.write(hi)
            cp_dict = read_codepage(cp)
            with Session(
                    codepage=cp_dict,
                    textfile_encoding='utf-8',
                    devices={'c': self.output_path()},
            ) as s:
                s.execute(u'cls:print "{}"'.format(hi))

                #TODO: (api) should have an errors= option in convert?
                #TODO: (codepages) only perform grapheme clustering if the codepage has actual clusters in code points? (but: non-canonical combinations) override clustering if clustering elements in codepage?
                #cp_inv = {_v: _k for _k, _v in cp_dict.items()}
                #print repr(hi), repr(s.convert(hi, to_type=type(b''))), repr([cp_inv[x] for x in hi])

                s.execute(u'open "c:{}" for input as 1'.format(hi))
                s.execute('line input#1, a$')
                assert s.get_variable('a$', as_type=type(u'')) == hi
                output_unicode = [
                    _row.strip()
                    for _row in self.get_text(s, as_type=type(u''))
                ]
                assert output_unicode[0] == hi
Exemplo n.º 3
0
 def test_box2(self):
     """Test box protection cases."""
     cp_936 = read_codepage('936')
     with Session(codepage=cp_936, box_protect=True) as s:
         s.execute('a$= "+"+STRING$(3,CHR$(196))+"+"')
         s.execute('b$= "+"+STRING$(2,CHR$(196))+"+"')
         s.execute('c$= "+"+STRING$(1,CHR$(196))+"+"')
         s.execute('d$= "+"+CHR$(196)+chr$(196)+chr$(190)+chr$(196)+"+"')
         assert s.get_variable('a$') == b'+\xc4\xc4\xc4+'
         assert s.get_variable('b$') == b'+\xc4\xc4+'
         assert s.get_variable('c$') == b'+\xc4+'
         assert s.get_variable('d$') == b'+\xc4\xc4\xbe\xc4+'
         # three consecutive lines are protected
         assert s.get_variable('a$', as_type=type(u'')) == u'+\u2500\u2500\u2500+'
         # two consecutive lines are not
         assert s.get_variable('b$', as_type=type(u'')) == u'+\u54ea+'
         # single lead byte is shown as box drawing
         assert s.get_variable('c$', as_type=type(u'')) == u'+\u2500+'
         # two box lines followed by a non-box lead & trail byte - not protected
         assert s.get_variable('d$', as_type=type(u'')) == u'+\u54ea\u7078+'