Exemplos de LineBreak em Python, exemplos de textseg.LineBreak em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: TDictTest.py Projeto: hatukanezumi/pytextseg

    def test_merge(self):
        lb = LineBreak(lbc=None, eaw=None)
        lb2 = LineBreak(lbc=None, eaw=None)
        lbc = {}
        eaw = {}

        count = 0
        while count < 1000:
            if not (randrange(0, 100)):
                lb.lbc.clear()
                lb.eaw.clear()
                lbc.clear()
                eaw.clear()
                lb2 = LineBreak(lbc=None, eaw=None)

            beg = randrange(0, 512)
            end = randrange(0, 512)
            if beg > end:
                beg, end = end, beg
            prop = randrange(0, 39)
            idx = randrange(0, 2)
            for c in range(beg, end + 1):
                if idx:
                    lbc[c] = prop
                else:
                    eaw[c] = prop
            if idx:
                lb2.lbc[tuple(range(beg, end + 1))] = prop
                lb.lbc.update(lb2.lbc)
            else:
                lb2.eaw[tuple(range(beg, end + 1))] = prop
                lb.eaw.update(lb2.eaw)

            #print("(%5X-%5X) = %d: %d" % (beg, end, idx, prop))
            #if idx:
            #    lb.lbc._dump()
            #else:
            #    lb.eaw._dump()

            for c in range(0, 512):
                if c not in lbc:
                    try:
                        lb.lbc[c]
                    except KeyError:
                        pass
                    else:
                        raise
                else:
                    self.assertEqual(lbc[c], lb.lbc[c])
                if c not in eaw:
                    try:
                        lb.eaw[c]
                    except KeyError:
                        pass
                    else:
                        raise
                else:
                    self.assertEqual(eaw[c], lb.eaw[c])

            count = count + 1

Exemplo n.º 2

0

Exibir arquivo

Arquivo: LineBreakTest.py Projeto: hatukanezumi/pytextseg

 def doTest(self, pairs, **kwds):
     lb = LineBreak(**kwds)
     for infn, outfn in pairs:
         instring = self.readText(infn + ".in")
         b = lb.wrap(instring)
         broken = "".join([unicode(x) for x in b])
         outstring = self.readText(outfn + ".out")
         self.assertEqual(broken, outstring)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: LineBreakTest.py Projeto: hatukanezumi/pytextseg

 def doTestArray(self, pairs, **kwds):
     for infn, outfn in pairs:
         instring = self.readText(infn + ".in")
         lb = LineBreak(**kwds)
         broken = [unicode(x) for x in lb.wrap(instring)]
         fp = open(os.path.join("test-data", outfn + ".out"), "rb")
         outstrings = [unicode(x, "utf-8") for x in fp.readlines()]
         fp.close()
         self.assertEqual(broken, outstrings)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: LineBreakTest.py Projeto: hatukanezumi/pytextseg

    def test_00LineBreakTest(self):
        commentRe = re.compile(r"\s*#\s*")
        opRe = re.compile(r"\s*(?:" + unichr(0xF7) + "|" + unichr(0xD7) + r")\s*")

        try:
            fp = open(os.path.join("test-data", "LineBreakTest.txt"), "rb")
        except IOError:
            return

        eaw = {}
        for c in range(1, 0xFFFD):
            eaw[unichr(c)] = eawN
        lb = LineBreak(break_indent=False, width=1, eaw=eaw, format=None, legacy_cm=False)

        print("")
        print(fp.readline().strip())
        print(fp.readline().strip())

        errs = 0
        tests = 0
        for l in fp.readlines():
            l = unicode(l, "utf-8").rstrip()
            a = commentRe.split(l, 1)
            if len(a) > 1:
                desc = a[1]
            else:
                desc = ""
            l = a[0].strip()
            if not len(l):
                continue

            if l.startswith(unichr(0xD7)):  # ÷
                l = l[1:].lstrip()
            if l.endswith(unichr(0xF7)):  # ×
                l = l[:-1].rstrip()

            s = "".join([unichr(int(c, 16)) for c in opRe.split(l) if len(c) > 0])
            b = unistr(0x20, 0xF7, 0x20).join(
                [unistr(0x20, 0xD7, 0x20).join(["%04X" % ord(c) for c in unicode(x)]) for x in lb.wrap(s)]
            )
            try:
                self.assertEqual(b, l)
            except AssertionError:
                import sys
                import traceback

                print("Failed: %s" % desc)
                traceback.print_exc(0)
                errs = errs + 1
            tests = tests + 1
        fp.close()

        if errs > 0:
            raise AssertionError("%d of %d subtests are failed." % (errs, tests))