def test_merge(self): lb = LineBreak(lbc=None, eaw=None) lb2 = LineBreak(lbc=None, eaw=None) lbc = {} eaw = {} count = 0 while count < 1000: if not (randrange(0, 100)): lb.lbc.clear() lb.eaw.clear() lbc.clear() eaw.clear() lb2 = LineBreak(lbc=None, eaw=None) beg = randrange(0, 512) end = randrange(0, 512) if beg > end: beg, end = end, beg prop = randrange(0, 39) idx = randrange(0, 2) for c in range(beg, end + 1): if idx: lbc[c] = prop else: eaw[c] = prop if idx: lb2.lbc[tuple(range(beg, end + 1))] = prop lb.lbc.update(lb2.lbc) else: lb2.eaw[tuple(range(beg, end + 1))] = prop lb.eaw.update(lb2.eaw) #print("(%5X-%5X) = %d: %d" % (beg, end, idx, prop)) #if idx: # lb.lbc._dump() #else: # lb.eaw._dump() for c in range(0, 512): if c not in lbc: try: lb.lbc[c] except KeyError: pass else: raise else: self.assertEqual(lbc[c], lb.lbc[c]) if c not in eaw: try: lb.eaw[c] except KeyError: pass else: raise else: self.assertEqual(eaw[c], lb.eaw[c]) count = count + 1
def doTest(self, pairs, **kwds): lb = LineBreak(**kwds) for infn, outfn in pairs: instring = self.readText(infn + ".in") b = lb.wrap(instring) broken = "".join([unicode(x) for x in b]) outstring = self.readText(outfn + ".out") self.assertEqual(broken, outstring)
def doTestArray(self, pairs, **kwds): for infn, outfn in pairs: instring = self.readText(infn + ".in") lb = LineBreak(**kwds) broken = [unicode(x) for x in lb.wrap(instring)] fp = open(os.path.join("test-data", outfn + ".out"), "rb") outstrings = [unicode(x, "utf-8") for x in fp.readlines()] fp.close() self.assertEqual(broken, outstrings)
def test_00LineBreakTest(self): commentRe = re.compile(r"\s*#\s*") opRe = re.compile(r"\s*(?:" + unichr(0xF7) + "|" + unichr(0xD7) + r")\s*") try: fp = open(os.path.join("test-data", "LineBreakTest.txt"), "rb") except IOError: return eaw = {} for c in range(1, 0xFFFD): eaw[unichr(c)] = eawN lb = LineBreak(break_indent=False, width=1, eaw=eaw, format=None, legacy_cm=False) print("") print(fp.readline().strip()) print(fp.readline().strip()) errs = 0 tests = 0 for l in fp.readlines(): l = unicode(l, "utf-8").rstrip() a = commentRe.split(l, 1) if len(a) > 1: desc = a[1] else: desc = "" l = a[0].strip() if not len(l): continue if l.startswith(unichr(0xD7)): # ÷ l = l[1:].lstrip() if l.endswith(unichr(0xF7)): # × l = l[:-1].rstrip() s = "".join([unichr(int(c, 16)) for c in opRe.split(l) if len(c) > 0]) b = unistr(0x20, 0xF7, 0x20).join( [unistr(0x20, 0xD7, 0x20).join(["%04X" % ord(c) for c in unicode(x)]) for x in lb.wrap(s)] ) try: self.assertEqual(b, l) except AssertionError: import sys import traceback print("Failed: %s" % desc) traceback.print_exc(0) errs = errs + 1 tests = tests + 1 fp.close() if errs > 0: raise AssertionError("%d of %d subtests are failed." % (errs, tests))