def main(library_dir): try: library = DigitalLibrary(library_dir, XapianIndex, SQLAlchemyDatabase) gui = GUI(library) gui.start() library.close() except KeyboardInterrupt: pass # Exit process silently using CTRL-C.
class TestDigitalLibrary(unittest.TestCase): def setUp(self): self._tests_dir = os.path.dirname(os.path.abspath(__file__)) self._library_dir = os.path.join(self._tests_dir, 'data') self._library = DigitalLibrary(self._library_dir, XapianIndex, SQLAlchemyDatabase) def tearDown(self): self._library.close() shutil.rmtree(self._library_dir) def test_add_doc_ps(self): ps_path = os.path.join(self._tests_dir, 'en.ps') tags = set('abcd') doc = self._library.add_doc(ps_path, tags) self.assertEqual(doc.mime_type, 'application/postscript') self.assertTrue(os.path.isfile(doc.document_abspath)) self.assertTrue(os.path.isfile(doc.small_thumbnail_abspath)) self.assertTrue(os.path.isfile(doc.normal_thumbnail_abspath)) self.assertTrue(os.path.isfile(doc.large_thumbnail_abspath)) self.assertEqual(doc.language_code, 'en') self.assertSetEqual(doc.tags, tags) other_doc = self._library.get_doc(doc.hash_md5) self._assert_docs_equal(doc, other_doc) return doc def test_add_doc_txt(self): txt_path = os.path.join(self._tests_dir, 'es.txt') tags = set('abc') doc = self._library.add_doc(txt_path, tags) self.assertEqual(doc.mime_type, 'text/plain') self.assertTrue(os.path.isfile(doc.document_abspath)) self.assertEqual(doc.small_thumbnail_abspath, None) self.assertEqual(doc.normal_thumbnail_abspath, None) self.assertEqual(doc.large_thumbnail_abspath, None) self.assertEqual(doc.language_code, 'es') self.assertSetEqual(doc.tags, tags) other_doc = self._library.get_doc(doc.hash_md5) self._assert_docs_equal(doc, other_doc) return doc def test_add_doc_pdf(self): pdf_path = os.path.join(self._tests_dir, 'en.pdf') tags = set('ab') doc = self._library.add_doc(pdf_path, tags) self.assertEqual(doc.mime_type, 'application/pdf') self.assertTrue(os.path.isfile(doc.document_abspath)) self.assertTrue(os.path.isfile(doc.small_thumbnail_abspath)) self.assertTrue(os.path.isfile(doc.normal_thumbnail_abspath)) self.assertTrue(os.path.isfile(doc.large_thumbnail_abspath)) self.assertEqual(doc.language_code, 'en') self.assertSetEqual(doc.tags, tags) other_doc = self._library.get_doc(doc.hash_md5) self._assert_docs_equal(doc, other_doc) return doc def test_add_doc_djvu(self): djvu_path = os.path.join(self._tests_dir, 'en.djvu') tags = set('a') doc = self._library.add_doc(djvu_path, tags) self.assertEqual(doc.mime_type, 'image/vnd.djvu') self.assertTrue(os.path.isfile(doc.document_abspath)) self.assertTrue(os.path.isfile(doc.small_thumbnail_abspath)) self.assertTrue(os.path.isfile(doc.normal_thumbnail_abspath)) self.assertTrue(os.path.isfile(doc.large_thumbnail_abspath)) self.assertEqual(doc.language_code, 'en') self.assertSetEqual(doc.tags, tags) other_doc = self._library.get_doc(doc.hash_md5) self._assert_docs_equal(doc, other_doc) return doc def test_add_doc_all(self): ps_doc = self.test_add_doc_ps() txt_doc = self.test_add_doc_txt() pdf_doc = self.test_add_doc_pdf() djvu_doc = self.test_add_doc_djvu() other_ps_doc = self._library.get_doc(ps_doc.hash_md5) other_txt_doc = self._library.get_doc(txt_doc.hash_md5) other_pdf_doc = self._library.get_doc(pdf_doc.hash_md5) other_djvu_doc = self._library.get_doc(djvu_doc.hash_md5) self._assert_docs_equal(ps_doc, other_ps_doc) self._assert_docs_equal(txt_doc, other_txt_doc) self._assert_docs_equal(pdf_doc, other_pdf_doc) self._assert_docs_equal(djvu_doc, other_djvu_doc) def test_add_doc_exact_duplicate(self): with self.assertRaises(error.DocumentDuplicatedExact): self.test_add_doc_txt() self.test_add_doc_txt() def test_add_doc_similar_duplicate(self): with self.assertRaises(error.DocumentDuplicatedSimilar): self.test_add_doc_txt() similar_path = os.path.join(self._tests_dir, 'similar.txt') self._library.add_doc(similar_path, set('abc')) def test_add_doc_not_retrievable(self): with self.assertRaises(error.DocumentNotRetrievable): doc_path = os.path.join(self._tests_dir, 'not-retrievable.txt') self._library.add_doc(doc_path, set()) def test_get_doc_not_found(self): with self.assertRaises(error.DocumentNotFound): self._library.get_doc('7d78df0a62e07eeeef6b942abe5bdc7f') def test_delete_doc(self): pdf_doc = self.test_add_doc_pdf() doc_path = pdf_doc.document_abspath small_thumbnail = pdf_doc.small_thumbnail_abspath normal_thumbnail = pdf_doc.normal_thumbnail_abspath large_thumbnail = pdf_doc.large_thumbnail_abspath self._library.delete_doc(pdf_doc.hash_md5) self.assertFalse(os.path.exists(doc_path)) self.assertFalse(os.path.exists(small_thumbnail)) self.assertFalse(os.path.exists(normal_thumbnail)) self.assertFalse(os.path.exists(large_thumbnail)) with self.assertRaises(error.DocumentNotFound): self._library.get_doc(pdf_doc.hash_md5) def test_add_doc_previously_imported(self): pdf_doc = self.test_add_doc_pdf() self._library.delete_doc(pdf_doc.hash_md5) self.test_add_doc_pdf() def test_get_all_tags(self): self.assertSetEqual(self._library.get_all_tags(), set()) self.test_add_doc_txt() self.assertSetEqual(self._library.get_all_tags(), set('abc')) def test_rename_tag(self): original = self.test_add_doc_txt() self.assertSetEqual(self._library.get_all_tags(), set('abc')) self._library.rename_tag('c', 'z') self.assertSetEqual(self._library.get_all_tags(), set('abz')) modified = self._library.get_doc(original.hash_md5) self.assertSetEqual(modified.tags, set('abz')) self.assertListEqual(self._library.search('', set('c')), []) self.assertListEqual(self._library.search('', set('z')), [original.hash_md5]) def test_rename_tag_tilde(self): original = self.test_add_doc_txt() self.assertSetEqual(self._library.get_all_tags(), set('abc')) self._library.rename_tag('a', u'á') self.assertSetEqual(self._library.get_all_tags(), set(u'ábc')) modified = self._library.get_doc(original.hash_md5) self.assertSetEqual(modified.tags, set(u'ábc')) self.assertListEqual(self._library.search('', set(u'á')), [original.hash_md5]) def test_rename_tag_exists(self): original = self.test_add_doc_txt() self.assertSetEqual(self._library.get_all_tags(), set('abc')) self._library.rename_tag('c', 'b') self.assertSetEqual(self._library.get_all_tags(), set('ab')) modified = self._library.get_doc(original.hash_md5) self.assertSetEqual(modified.tags, set('ab')) self.assertListEqual(self._library.search('', set('c')), []) self.assertListEqual(self._library.search('', set('b')), [original.hash_md5]) def test_update_tags(self): doc = self.test_add_doc_txt() self.assertListEqual(self._library.search('', set('xyz')), []) self._library.update_tags(doc.hash_md5, set('xyz')) self.assertListEqual(self._library.search('', set('xyz')), [doc.hash_md5]) def test_update_tags_tildes(self): doc = self.test_add_doc_txt() self._library.update_tags(doc.hash_md5, set(u'áéíóóñ')) self.assertListEqual(self._library.search('', set(u'áéíóóñ')), [doc.hash_md5]) def test_update_tags_not_retrievable(self): doc_path = os.path.join(self._tests_dir, 'not-retrievable.txt') doc = self._library.add_doc(doc_path, set('abc')) with self.assertRaises(error.DocumentNotRetrievable): self._library.update_tags(doc.hash_md5, set()) def test_get_doc_count(self): self.test_add_doc_ps() self.assertEqual(self._library.get_doc_count(), 1) self.test_add_doc_txt() self.assertEqual(self._library.get_doc_count(), 2) self.test_add_doc_pdf() self.assertEqual(self._library.get_doc_count(), 3) self.test_add_doc_djvu() self.assertEqual(self._library.get_doc_count(), 4) def test_get_tag_count(self): self.test_add_doc_ps() self.test_add_doc_txt() self.test_add_doc_pdf() self.test_add_doc_djvu() self.assertEqual(self._library.get_tag_count('d'), 1) self.assertEqual(self._library.get_tag_count('c'), 2) self.assertEqual(self._library.get_tag_count('b'), 3) self.assertEqual(self._library.get_tag_count('a'), 4) def test_get_tag_freq(self): self.test_add_doc_ps() self.test_add_doc_txt() self.test_add_doc_pdf() self.test_add_doc_djvu() self.assertEqual(self._library.get_tag_freq('d'), 1.0/4.0) self.assertEqual(self._library.get_tag_freq('c'), 2.0/4.0) self.assertEqual(self._library.get_tag_freq('b'), 3.0/4.0) self.assertEqual(self._library.get_tag_freq('a'), 4.0/4.0) def test_search_empty(self): self.assertListEqual(self._library.search('foo bar', set()), []) self.assertListEqual(self._library.search('foo bar', set('c')), []) self.test_add_doc_txt() self.test_add_doc_pdf() self.assertListEqual(self._library.search('foo bar', set()), []) self.assertListEqual(self._library.search('foo bar', set('c')), []) def test_search_all(self): self.assertListEqual(self._library.search('', set()), []) txt_doc = self.test_add_doc_txt() self.assertListEqual(self._library.search('', set()), [txt_doc.hash_md5]) pdf_doc = self.test_add_doc_pdf() results = self._library.search('', set()) self.assertSetEqual(set(results), set([txt_doc.hash_md5, pdf_doc.hash_md5])) def test_search_simple(self): txt_doc = self.test_add_doc_txt() pdf_doc = self.test_add_doc_pdf() results = self._library.search('+VEDA EDA', set()) self.assertListEqual(results, [pdf_doc.hash_md5, txt_doc.hash_md5]) def test_search_filtered(self): txt_doc = self.test_add_doc_txt() self.test_add_doc_pdf() results = self._library.search('+VEDA EDA', set('abc')) self.assertListEqual(results, [txt_doc.hash_md5]) def _assert_docs_equal(self, x, y): self.assertEqual(x.hash_md5, y.hash_md5) self.assertEqual(x.hash_ssdeep, y.hash_ssdeep) self.assertEqual(x.mime_type, y.mime_type) self.assertEqual(x.document_path, y.document_path) self.assertEqual(x.document_abspath, y.document_abspath) self.assertEqual(x.document_size, y.document_size) self.assertEqual(x.small_thumbnail_abspath, y.small_thumbnail_abspath) self.assertEqual(x.normal_thumbnail_abspath, y.normal_thumbnail_abspath) self.assertEqual(x.large_thumbnail_abspath, y.large_thumbnail_abspath) self.assertEqual(x.language_code, y.language_code) self.assertSetEqual(x.tags, y.tags)