def test_readlines_in_range(self): text = """ Fetches rows from a Bigtable. Retrieves rows pertaining to the given keys from the Table instance represented by big_table. Silly things may happen if other_silly_variable is not None. Args: big_table: An open Bigtable Table instance. keys: A sequence of strings representing the key of each table row to fetch. other_silly_variable: Another optional variable, that has a much longer name than the other args, and which does nothing. Returns: A dict mapping keys to the corresponding table row data fetched. Each row is represented as a tuple of strings. """ rgx_start = re.compile("\s*Args:") rgx_end = re.compile("\s*Returns:") parser = VerdictParser(text) lines = parser._lines_in_range(rgx_start, rgx_end) self.assertEqual(7, len(lines)) self.assertTrue(not rgx_end.match(lines[-1])) rgx_start = VerdictParser.rgx_map[VerdictParser.e_plaintiff] rgx_end = VerdictParser.rgx_map[VerdictParser.e_defendant] lines = self.parser._lines_in_range(rgx_start, rgx_end) self.assertTrue(5, len(lines))
def test_get_defendant_info(self): text = ur"""施汝憬律師 被 上訴 人 幸福空間有限公司 法定代理人 林鎮業 訴訟代理人 陳建瑜律師 黃國紘律師 上列當事人間侵害商標權有關財產權爭議等事件,上訴人對於中 """ parser = VerdictParser(text) # pdb.set_trace() info = parser._get_info(VerdictParser.e_defendant) self.assertEqual(4, len(info))
def setUp(self): if "verdict_text" in self.__dict__: return self.test_path = os.path.abspath(os.path.join(__file__, "..")) sample = os.path.join(self.test_path, "verdict_sample.txt") with file(sample) as _fp: self.verdict_text = _fp.read().decode("utf-8") self.parser = VerdictParser(self.verdict_text)
def _set_up_contents(self, file_name): test_case = os.path.join(self.test_path, file_name) with file(test_case) as _fp: contents = _fp.read().decode("utf-8") self.parser = VerdictParser(contents) return contents
class VerdictParserTest(unittest.TestCase): def setUp(self): if "verdict_text" in self.__dict__: return self.test_path = os.path.abspath(os.path.join(__file__, "..")) sample = os.path.join(self.test_path, "verdict_sample.txt") with file(sample) as _fp: self.verdict_text = _fp.read().decode("utf-8") self.parser = VerdictParser(self.verdict_text) def test_readlines_in_range(self): text = """ Fetches rows from a Bigtable. Retrieves rows pertaining to the given keys from the Table instance represented by big_table. Silly things may happen if other_silly_variable is not None. Args: big_table: An open Bigtable Table instance. keys: A sequence of strings representing the key of each table row to fetch. other_silly_variable: Another optional variable, that has a much longer name than the other args, and which does nothing. Returns: A dict mapping keys to the corresponding table row data fetched. Each row is represented as a tuple of strings. """ rgx_start = re.compile("\s*Args:") rgx_end = re.compile("\s*Returns:") parser = VerdictParser(text) lines = parser._lines_in_range(rgx_start, rgx_end) self.assertEqual(7, len(lines)) self.assertTrue(not rgx_end.match(lines[-1])) rgx_start = VerdictParser.rgx_map[VerdictParser.e_plaintiff] rgx_end = VerdictParser.rgx_map[VerdictParser.e_defendant] lines = self.parser._lines_in_range(rgx_start, rgx_end) self.assertTrue(5, len(lines)) # This test case passes unit test. It exists because it tests re module with # utf8 encoding, which I was not familiar at that time. One leason I learned # is file encoding matters. If your source code(*.py) is not utf-8, re.match # returns False. To check the file encoding, just use # # $ type your_prog.py # # def test_re_match_utf8(self): # line = "被 上訴 人 幸福空間有限公司" # rgx = re.compile(r"被\s*上訴\s*人??(?P<name>.+)", re.UNICODE) # match = rgx.match(line) # self.assertTrue(match != None) # name = match.expand(r"\g<name>") # print name def test_get_defendant_info(self): text = ur"""施汝憬律師 被 上訴 人 幸福空間有限公司 法定代理人 林鎮業 訴訟代理人 陳建瑜律師 黃國紘律師 上列當事人間侵害商標權有關財產權爭議等事件,上訴人對於中 """ parser = VerdictParser(text) # pdb.set_trace() info = parser._get_info(VerdictParser.e_defendant) self.assertEqual(4, len(info)) # @unittest.skip("") def test_get_defendant(self): defendant = self.parser.get_defendant() self.assertEqual(ur"幸福空間有限公司", defendant.get_name()) self.assertEqual(ur"林鎮業", defendant.get_representative()) self.assertEqual(2, len(defendant.get_lawyers())) self.assertEqual(ur"陳建瑜", defendant.get_lawyers()[0]) self.assertEqual(ur"黃國紘", defendant.get_lawyers()[1]) def test_get_plaintiff(self): plaintiff = self.parser.get_plaintiff() self.assertEqual(ur"美商科高國際有限公司台灣分公司", plaintiff.get_name()) self.assertEqual(ur"蘇德曼", plaintiff.get_representative()) self.assertEqual(2, len(plaintiff.get_lawyers())) self.assertEqual(ur"徐頌雅", plaintiff.get_lawyers()[0]) self.assertEqual(ur"施汝憬", plaintiff.get_lawyers()[1]) def test_re(self): rgx = VerdictParser.rgx_map[VerdictParser.e_serial] match = rgx.match(ur"【裁判字號】 105,台上,81") self.assertEqual(ur"105,台上,81", match.expand("\g<serial>")) rgx = VerdictParser.rgx_map[VerdictParser.e_plaintiff] # Full space self.assertTrue(rgx.match(ur"上 訴 人 美商科高國際") is not None) # Half space self.assertTrue(rgx.match(ur"上 訴 人 美商科高國際") is not None) rgx = VerdictParser.rgx_map[VerdictParser.e_decision] # Half space self.assertTrue(rgx.match(ur" 主 文") is not None) rgx = VerdictParser.rgx_map[VerdictParser.e_reason] # Half space self.assertTrue(rgx.match(ur" 理 由") is not None) rgx = VerdictParser.rgx_map[VerdictParser.e_date] # Half space text = ur"中 華 民 國 一○五 年 一 月 十四 日" self.assertTrue(rgx.match(text) is not None) lawyer_re = VerdictParser.rgx_map[VerdictParser.e_lawyer] # TODO: handle the following case: # text = u"訴訟代理人 楊勝斐" # match = lawyer_re.match(text) # self.assertEqual(u"楊勝斐", match.expand(r"\g<name>")) text = u" 再 抗告 人 中國國際商業銀行股份有限公司" self.assertTrue(lawyer_re.match(text) is None) rep_re = VerdictParser.rgx_map[VerdictParser.e_representative] text = u"法 定 代 理 人 邱欽庭" self.assertTrue(rep_re.match(text) is not None) def test_get_date(self): year, month, day = self.parser.get_verdict_date() self.assertEqual(105, year) self.assertEqual(1, month) self.assertEqual(14, day) def test__get_judge_name(self): # trailing half spaces text = ur" 審判長法官 高 孟 焄 " name = _match_judge_name(text) self.assertEqual(ur"高孟焄", name) # trailing full spaces text = ur" 法官 鄭 雅 萍 " name = _match_judge_name(text) self.assertEqual(ur"鄭雅萍", name) def test__get_judge_name2(self): judges = self.parser.get_judge_names() expected = [ur"高孟焄", ur"鄭雅萍", ur"鍾任賜", ur"李錦美", ur"袁靜文"] self.assertEqual(set(expected), set(judges)) def test_get_keywords(self): keywords = self.parser.get_keywords() # Just use a few important words to test the functionality expected = [ur"商標", ur"公平交易法", ur"點擊率", ur"檢索", ur"言論自由", ur"廣告"] for item in expected: self.assertTrue(item in keywords) def test_get_decision(self): expected = (u"上訴駁回。\n" u"第三審訴訟費用由上訴人負擔。") actual = self.parser.get_verdict_decision() self.assertTrue(expected in actual) def test_get_summary(self): actual = self.parser.get_verdict_summary() self.assertEqual(ur"侵害商標權有關財產權爭議等", actual) def _set_up_contents(self, file_name): test_case = os.path.join(self.test_path, file_name) with file(test_case) as _fp: contents = _fp.read().decode("utf-8") self.parser = VerdictParser(contents) return contents def test__get_info(self): contents = self._set_up_contents(u"臺灣高等法院-民-100勞再4") expected = contents.splitlines()[4:7] info = self.parser._get_info(VerdictParser.e_defendant) self.assertEqual(expected, info) contents = self._set_up_contents(u"最高法院-民-93台抗36") info = self.parser._get_info(VerdictParser.e_plaintiff) self.assertEqual(4, len(info)) # for line in info: # print("**" + line) contents = self._set_up_contents(u"臺灣高等法院-民-92勞上易25") info = self.parser._get_info(VerdictParser.e_plaintiff) self.assertEqual(6, len(info)) info = self.parser._get_info(VerdictParser.e_defendant) self.assertEqual(3, len(info))