def test_big5(self): self.fp = file(testdir + 'hk.yahoo_big5.qlog', 'rb') title, content = self.test_data[10:12] result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(0, result) self.assertEqual(self.meta['encoding'], 'big5 [META]') self.assertEqual(self.meta['title'], title) s = self.buf.getvalue().decode('utf8') self.assert_(s.find(content) > 0)
def test_euc_kr(self): self.fp = file(testdir + 'apache_euc-kr.qlog', 'rb') title, content = self.test_data[16:18] result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(0, result) self.assertEqual(self.meta['encoding'], 'euc-kr [HTTP]') self.assertEqual(self.meta['title'], title) s = self.buf.getvalue().decode('utf8') self.assert_(s.find(content) > 0)
def test_utf8(self): self.fp = file(testdir + 'ah_ying_utf8.qlog', 'rb') title, content = self.test_data[1:3] result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(0, result) self.assertEqual(self.meta['encoding'], 'utf-8 [META]') self.assertEqual(self.meta['title'], title) s = self.buf.getvalue().decode('utf8') self.assert_(s.find(content) > 0)
def test_iso_8851_1(self): self.fp = file(testdir + 'apache_ISO-8859-1.qlog', 'rb') title, content = self.test_data[4:6] result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(0, result) self.assertEqual(self.meta['encoding'], 'iso-8859-1 [HTTP]') self.assertEqual(self.meta['title'], title) s = self.buf.getvalue().decode('utf8') self.assert_(s.find(content) > 0)
def test_euc_kr(self): self.fp = file(testpath / "apache_euc-kr.qlog", "rb") title, content = self.test_data[16:18] result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(0, result) self.assertEqual(self.meta["encoding"], "euc-kr [HTTP]") self.assertEqual(self.meta["title"], title) s = self.buf.getvalue().decode("utf8") self.assert_(s.find(content) > 0)
def test_big5(self): self.fp = file(testpath / "hk.yahoo_big5.qlog", "rb") title, content = self.test_data[10:12] result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(0, result) self.assertEqual(self.meta["encoding"], "big5 [META]") self.assertEqual(self.meta["title"], title) s = self.buf.getvalue().decode("utf8") self.assert_(s.find(content) > 0)
def test_iso_8851_1(self): self.fp = file(testpath / "apache_ISO-8859-1.qlog", "rb") title, content = self.test_data[4:6] result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(0, result) self.assertEqual(self.meta["encoding"], "iso-8859-1 [HTTP]") self.assertEqual(self.meta["title"], title) s = self.buf.getvalue().decode("utf8") self.assert_(s.find(content) > 0)
def test_utf8(self): self.fp = file(testpath / "ah_ying_utf8.qlog", "rb") title, content = self.test_data[1:3] result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(0, result) self.assertEqual(self.meta["encoding"], "utf-8 [META]") self.assertEqual(self.meta["title"], title) s = self.buf.getvalue().decode("utf8") self.assert_(s.find(content) > 0)
def test_bad_encoding(self): self.fp = file(testdir + 'ah_ying_bad.qlog', 'rb') result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(self.meta['encoding'], 'iso-8859-1 [DEFAULT]') # invalid encoding -> default
def test_bad_encoding(self): self.fp = file(testpath / "ah_ying_bad.qlog", "rb") result = distillML.test_distill(self.fp, self.buf, self.meta) self.assertEqual(self.meta["encoding"], "iso-8859-1 [DEFAULT]") # invalid encoding -> default