def test_create(self): """ Test that a Quotation.create works when supported language and quotation character """ tests = [ {"lc": constants.LC_FRENCH, "char": u"«"}, {"lc": constants.LC_ENGLISH, "char": u"\"", "close": True}, {"lc": constants.LC_JAPANESE, "char": u"「"} ] for test in tests: q = Quotation.create(test["lc"], test["char"], force_close=("close" in test)) self.assertTrue(q) self.assertEqual(str(q), test["char"].encode('utf-8'))
def test_mirror(self): """ Test that a QuotationNotFound is raised when unsupported languages or quotations requested """ from errors import QuotationNotFound tests = [ {"opening": u"„", "closing": u"“", "lc": constants.LC_GERMAN}, {"opening": u"『", "closing": u"』", "lc": constants.LC_CHINESE} ] for test in tests: opening = Quotation.create(test["lc"], test["opening"]) closing = opening.mirror() self.assertTrue(str(closing), test["closing"]) self.assertTrue(opening ^ closing)
def extract(self): """ Yields Quotation instance extractable from text """ closed = True for index, char in enumerate(self.text): if char in QUOTATION_MAP.get(self.lc, []): if char in LONE_RANGERS and index > 0: prev_char = self.text[index-1] if ALPHANUMERIC_PATTERN.match(prev_char): # prev char is a alphabet; highly likely to be a lone ranger, not a true quotation # ignore lone ranger continue closed = not closed # toggle to open from start prev = Quotation.create(self.lc, char, force_close=closed) yield prev