def test_browse(self): """ Check that user can view only documnent he/she is allowed to (has read access) """ # Document owned by margaret Document.create_document( title="document_M", file_name="document_M.pdf", size='36', lang='DEU', user=self.margaret_user, page_count=3, parent_id=None ) # This document is owned by testcase_user, not margaret. Document.create_document( title="document_X", file_name="document_X.pdf", size='36', lang='DEU', user=self.testcase_user, page_count=3, parent_id=None ) self.client.login( testcase_user=self.margaret_user ) ret = self.client.get( reverse('core:browse'), content_type='application/json', HTTP_X_REQUESTED_WITH='XMLHttpRequest', ) self.assertEqual( ret.status_code, 200 ) result = json.loads(ret.content) # margeret will see only one document. self.assertEqual( len(result['nodes']), 1 ) self.assertEqual( result['nodes'][0]['title'], 'document_M' )
def test_normalize_doc_title(self): doc = Document.create_document( title="kyuss.pdf", user=self.user, lang="ENG", file_name="kyuss.pdf", size=1222, page_count=3 ) doc.save() # simulate a singnal trigger normalize_pages(doc) first_page = doc.pages.first() self.assertEqual( first_page.norm_doc_title, "kyuss.pdf" ) result = Page.objects.search("kyuss") # doc has 3 pages, thus keyword kyuss will # match 3 pages (because of normalized page.norm_doc_title). self.assertEqual( result.count(), 3 )
def get_whatever_doc(self): return Document.create_document(title="kyuss.pdf", user=self.user, lang="ENG", file_name="kyuss.pdf", size=1222, page_count=3)
def test_delete_folder_with_document(self): folder_A = Folder.objects.create(title="A", user=self.user) folder_B = Folder.objects.create( title="B", user=self.user, ) doc = Document.create_document( title="document.pdf", file_name="document.pdf", size='1212', lang='DEU', user=self.user, parent_id=folder_A.id, page_count=5, ) doc.save() BaseTreeNode.objects.move_node(folder_B, folder_A) folder_A.refresh_from_db() # at this point, folder_A will have 2 descendants: # * folder B # * document.pdf descendants_count = folder_A.get_descendants( include_self=False).count() self.assertEqual(2, descendants_count) recursive_delete(folder_A) # by now everything should be deleted self.assertEqual(0, BaseTreeNode.objects.count())
def test_preview(self): doc = Document.create_document( title="berlin.pdf", user=self.testcase_user, lang="ENG", file_name="berlin.pdf", size=1222, page_count=3 ) default_storage.copy_doc( src=os.path.join( BASE_DIR, "data", "berlin.pdf" ), dst=doc.path.url(), ) ret = self.client.post( reverse('core:preview', args=(doc.id, 1, 1)) ) self.assertEqual( ret.status_code, 200 ) page_path = PagePath( document_path=doc.path, page_num=1, step=Step(1), page_count=3 ) self.assertTrue( os.path.exists( default_storage.abspath(page_path.img_url()) ) )
def test_deny_change_for_restricted_document(self): """ Changing of the document should be restricted only to users who have PERM_WRITE permissions for respective document. """ document_path = os.path.join(BASE_DIR, "data", "berlin.pdf") doc = Document.create_document(user=self.testcase_user, title='berlin.pdf', size=os.path.getsize(document_path), lang='deu', notes="Margaret, stay away!", file_name='berlin.pdf', page_count=3) self.assertEqual(Document.objects.count(), 1) document_url = reverse('core:document', args=(doc.id, )) document_data = {'notes': "It works!"} # # Margaret does not have access to document # berlin.pdf self.client.login(testcase_user=self.margaret_user) ret = self.client.patch( document_url, json.dumps(document_data), content_type='application/json', HTTP_X_REQUESTED_WITH='XMLHttpRequest', ) self.assertEqual(ret.status_code, HttpResponseForbidden.status_code) # because margaret does not have access to the doc.refresh_from_db() self.assertEqual(doc.notes, "Margaret, stay away!")
def test_allow_view_if_user_has_perm(self): """ Changing of the document should be restricted only to users who have PERM_WRITE permissions for respective document. """ document_path = os.path.join(BASE_DIR, "data", "berlin.pdf") doc = Document.create_document(user=self.testcase_user, title='berlin.pdf', size=os.path.getsize(document_path), lang='deu', file_name='berlin.pdf', page_count=3) document_url = reverse('core:document', args=(doc.id, )) create_access( node=doc, name=self.margaret_user.username, model_type=Access.MODEL_USER, access_type=Access.ALLOW, access_inherited=False, permissions={ READ: True, } # allow margaret to read/view the document ) # # Margaret does not have access to document # berlin.pdf self.client.login(testcase_user=self.margaret_user) ret = self.client.get(document_url) self.assertEqual(ret.status_code, 200)
def test_assign_tags_from_automate_instance(self): doc = Document.create_document( title="document_c", file_name="document_c.pdf", size='1212', lang='DEU', user=self.user, page_count=5, ) doc.save() dst_folder = Folder.objects.create(title="destination Folder", user=self.user) auto = Automate.objects.create( name="whatever", match="XYZ", matching_algorithm=Automate.MATCH_ALL, is_case_sensitive=False, # i.e. ignore case user=self.user, dst_folder=dst_folder) auto.tags.add('invoice', 'tags', tag_kwargs={'user': self.user}) doc.add_tags(auto.tags.all()) self.assertEquals(doc.tags.count(), 2) self.assertEquals(set([tag.name for tag in doc.tags.all()]), set([tag.name for tag in auto.tags.all()]))
def test_search_is_not_case_sensitive(self): """ UT to double check that search by default is NOT case sensitive """ backend = get_search_backend() doc = Document.create_document( title="document_c", file_name="document_c.pdf", size='1212', lang='DEU', user=self.user, page_count=5, ) p = doc.pages.first() p.text = "search for TESTX text" p.save() result = backend.search("TESTX", Page) # it matches exact case self.assertEqual(result.count(), 1) result_case_insensitive_match = backend.search("testX", Page) # it matches lower and upper case mix self.assertEqual(result_case_insensitive_match.count(), 1) # no match for tst no_match = backend.search("tst", Page) self.assertEqual(no_match.count(), 0)
def test_folders_kvstore_propagates_add_to_subfolders(self): """ Folder's kvstore propagates to all its descendent folders, documents, pages """ top = Folder.objects.create(title="top", user=self.user) top.save() sub = Folder.objects.create(title="sub", parent=top, user=self.user) sub.save() doc = Document.create_document( title="document_in_sub", file_name="document_sub.pdf", size='1212', lang='DEU', user=self.user, parent_id=sub.id, page_count=5, ) doc.save() self.assertEqual(0, top.kvstore.count()) self.assertEqual(0, sub.kvstore.count()) self.assertEqual(0, doc.kvstore.count()) top.kv.add(key="shop") self.assertEqual(1, top.kvstore.count()) # kvstore propagated from parent folder to descendents self.assertEqual(1, sub.kvstore.count()) # kvstore propagated from ancestor folder to doc self.assertEqual(1, doc.kvstore.count())
def test_language_is_inherited(self): """ Whatever document model has in doc.lang field will be inherited by the related page models. """ doc = Document.create_document( title="kyuss.pdf", user=self.user, lang="ENG", file_name="kyuss.pdf", size=1222, page_count=3 ) doc.save() self.assertEqual( doc.pages.count(), 3 ) langs = [ page.lang for page in doc.pages.all() ] self.assertEqual( ['ENG', 'ENG', 'ENG'], langs )
def test_preview(self): doc = Document.create_document( title="andromeda.pdf", user=self.testcase_user, lang="ENG", file_name="andromeda.pdf", size=1222, page_count=3 ) copy2doc_url( src_file_path=os.path.join( BASE_DIR, "data", "andromeda.pdf" ), doc_url=doc.doc_ep.url() ) ret = self.client.post( reverse('core:preview', args=(doc.id, 1, 1)) ) self.assertEqual( ret.status_code, 200 ) page_url = PageEp( document_ep=doc.doc_ep, page_num=1, step=Step(1), page_count=3 ) self.assertTrue( os.path.exists(page_url.img_exists()) )
def test_paste_view_access_forbidden(self): """ Access forbidden for margaret """ doc = Document.create_document( title="berlin.pdf", user=self.testcase_user, lang="ENG", file_name="berlin.pdf", size=1222, page_count=3, ) # margaret does not have access to the document self.client.login(testcase_user=self.margaret_user) post_data = [1, 2] ret = self.client.post(reverse('core:api_pages_paste', args=(doc.id, )), json.dumps(post_data), content_type='application/json', HTTP_X_REQUESTED_WITH='XMLHttpRequest') self.assertEquals(ret.status_code, HttpResponseForbidden.status_code)
def test_delete_document_with_parent(self): """ Document D is child of folder F. Deleting document D, will result in... well... no more D around, but F still present. """ folder = Folder.objects.create(user=self.user, title="F") doc = Document.create_document(title="andromeda.pdf", user=self.user, lang="ENG", file_name="andromeda.pdf", size=1222, page_count=3) doc.parent = folder doc.save() folder.save() count = folder.get_children().count() self.assertEqual(count, 1, f"Folder {folder.title} has {count} children") self.assertEqual(doc.pages.count(), 3) doc.delete() self.assertEqual( folder.get_children().count(), 0, ) with self.assertRaises(Document.DoesNotExist): Document.objects.get(title="D")
def test_tree_path(self): """ Create following structure: Folder A > Folder B > Document C and check ancestors of Document C """ folder_a = Folder.objects.create(title="folder_a", user=self.user, parent_id=None) folder_a.save() folder_b = Folder.objects.create(title="folder_b", user=self.user, parent_id=folder_a.id) folder_b.save() doc = Document.create_document( title="document_c", file_name="document_c.pdf", size='1212', lang='DEU', user=self.user, parent_id=folder_b.id, page_count=5, ) doc.save() ancestors = [[node.title, node.id] for node in doc.get_ancestors(include_self=True)] self.assertListEqual( ancestors, [['folder_a', folder_a.id], ['folder_b', folder_b.id], ['document_c', doc.id]]) self.assertEqual(doc.pages.count(), 5)
def test_document_inherits_kv_from_parent_folder(self): """ Newly added focuments into the folder will inherit folder's kv metadata. """ top = Folder.objects.create( title="top", user=self.user, ) top.save() top.kv.update([{ 'key': 'shop', 'kv_type': 'text', 'kv_format': '' }, { 'key': 'total', 'kv_type': 'money', 'kv_format': 'dd.cc' }]) doc = Document.create_document( title="document_c", file_name="document_c.pdf", size='1212', lang='DEU', user=self.user, parent_id=top.id, page_count=5, ) doc.save() self.assertEqual(2, doc.kv.count()) self.assertEqual(set(doc.kv.typed_keys()), set(top.kv.typed_keys()))
def test_download_hocr_which_does_not_exists(self): """ HOCR might not be available. It is a normal case (page OCR task is still in the queue/progress). Missing HCOR file => HTTP 404 return code is expected. """ doc = Document.create_document( title="berlin.pdf", user=self.testcase_user, lang="ENG", file_name="berlin.pdf", size=1222, page_count=3 ) # Doc is available (for get_pagecount on server side). default_storage.copy_doc( src=os.path.join( BASE_DIR, "data", "berlin.pdf" ), dst=doc.path.url() ) # But HOCR file is missing. ret = self.client.get( reverse('core:hocr', args=(doc.id, 1, 1)) ) self.assertEqual( ret.status_code, 404 )
def test_basic_av_by_tag(self): """ In advaced search user can search by tag(s) """ doc1 = Document.create_document( title="doc1", user=self.testcase_user, page_count=2, file_name="koko.pdf", size='1111', lang='ENG', ) doc2 = Document.create_document( title="doc2", user=self.testcase_user, page_count=2, file_name="kuku.pdf", size='1111', lang='ENG', ) doc1.tags.add( "green", "blue", tag_kwargs={'user': self.testcase_user} ) doc2.tags.add( "blue", tag_kwargs={'user': self.testcase_user} ) ret = self.client.get( reverse('admin:search'), {'tag': 'green'} ) self.assertEquals( ret.status_code, 200 ) self.assertEquals( len(ret.context['results_docs']), 1 ) doc_ = ret.context['results_docs'][0] self.assertEquals( doc_.id, doc1.id )
def test_metadata_created_on_existing_folder_document_structure(self): """ Consider following folder structure: Home | Folder_A | document.pdf 1. User adds metadata named price (of money type with format 'dd,cc'). Notice that at the time of adding metadata - document.pdf already exists in Folder_A. 2. Expected: a. document.pdf inherits price metadata b. all pages of document.pdf inherit price metadata as well c. metadata is inherited with correct format/type """ folder_A = Folder.objects.create(title="folder_A", user=self.user) doc = Document.create_document( title="document.pdf", file_name="document.pdf", size='1989', lang='DEU', user=self.user, parent_id=folder_A.id, # document.pdf is inside folder_A page_count=5, ) doc.save() folder_A = Folder.objects.get(id=folder_A.id) self.assertEqual(folder_A.get_children().count(), 1) # attach/add metadata to the folder_A folder_A.kv.update([{ 'key': 'price', 'kv_type': MONEY, 'kv_format': 'dd,cc' }]) # metadata was added to the folder_A self.assertEqual(folder_A.kv.all().count(), 1) document_kvs = doc.kv.all() # document inherited metadata self.assertEqual(document_kvs.count(), 1) # and document's metadata is of correct format self.assertEqual(document_kvs[0].kv_type, MONEY) self.assertEqual(document_kvs[0].kv_format, "dd,cc") # now check if metadata was propagated to first page page = doc.pages.first() page_kv = page.kv.all() self.assertEqual(page_kv.count(), 1, "Metadata was not propagated to document's page") # and test if metadata has correct format self.assertEqual(page_kv[0].kv_type, MONEY) self.assertEqual(page_kv[0].kv_format, "dd,cc")
def import_file(self, file_title=None, inbox_title="Inbox", delete_after_import=True, skip_ocr=False): """ Gets as input a path to a file on a local file system and: 1. creates a document instance 2. Copies file to doc_instance.url() 4. OCR the doc Used with ./manage.py local_importer ./manage.py imap_importer command """ logger.debug(f"Importing file {self.filepath}") if file_title is None: file_title = os.path.basename(self.filepath) try: page_count = get_pagecount(self.filepath) except Exception: logger.error(f"Error while getting page count of {self.filepath}.") return False inbox, _ = Folder.objects.get_or_create(title=inbox_title, parent=None, user=self.user) doc = Document.create_document(user=self.user, title=file_title, size=os.path.getsize(self.filepath), lang=self.user_ocr_language, file_name=file_title, parent_id=inbox.id, page_count=page_count) logger.debug(f"Uploading file {self.filepath} to {doc.path.url()}") default_storage.copy_doc( src=self.filepath, dst=doc.path.url(), ) if not skip_ocr: DocumentImporter.ocr_document( document=doc, page_count=page_count, lang=self.user_ocr_language, ) if delete_after_import: # Usually we want to delete files when importing # them from local directory # When importing from Email attachment - deleting # files does not apply os.remove(self.filepath) logger.debug("Import complete.") return doc
def test_automate_apply(self): """ test automate.apply method """ # automates are applicable only for documents # in inbox folder folder, _ = Folder.objects.get_or_create(title=Folder.INBOX_NAME, user=self.user) document = Document.create_document( title="document_c", file_name="document_c.pdf", size='1212', lang='DEU', user=self.user, parent_id=folder.id, page_count=5, ) document2 = Document.create_document( title="document_c", file_name="document_c.pdf", size='1212', lang='DEU', user=self.user, parent_id=folder.id, page_count=5, ) # automate with tags automate = _create_am_any("test", "test", self.user) automate.tags.set("test", "one", tag_kwargs={'user': self.user}) # make sure no exception is rised automate.apply( document=document, page_num=1, text="test", ) # without tags automate2 = _create_am_any("test2", "test", self.user) # make sure no exception is rised automate2.apply( document=document2, page_num=1, text="test", )
def test_documents_returns_only_docs_user_has_perms_for_1(self): """ User can list via API only docs he/she has access READ_ACCESS """ # create a basic document and assert # that api/documents returns it Document.create_document(title="berlin.pdf", user=self.margaret_user, lang="ENG", file_name="berlin.pdf", size=1222, page_count=3) ret = self.client.get(reverse('core:api_documents')) self.assertEqual(ret.status_code, 200) docs = json.loads(ret.content) # There is only one document in the system, and # only margaret has access to it (as owner) self.assertEqual(len(docs), 0)
def post(self, request, doc_id): """ Paste pages (within document view). NO new document is created. Code for pasting document in changelist view (i.e. when a NEW document is created) is in papermerge.core.views.documents.paste_pages """ before = request.POST.get('before', False) after = request.POST.get('after', False) try: document = Document.objects.get(id=doc_id) except Document.DoesNotExist: raise Http404("Document does not exists") if request.user.has_perm( Access.PERM_WRITE, document ): Document.paste_pages( user=request.user, parent_id=document.parent, dst_document=document, doc_pages=request.pages.all(), before=before, after=after ) request.pages.clear() return Response(status=status.HTTP_204_NO_CONTENT) return Response( status=status.HTTP_403_FORBIDDEN, data={ 'msg': _( "You don't have permissions to paste pages" " in this document." ) } )
def put(self, request, filename): file_obj = request.data['file'] doc = Document.import_file(file_obj.temporary_file_path(), username=request.user.username, file_title=filename) if isinstance(doc, Document): serializer = DocumentSerializer(doc) return Response(serializer.data) return Response(status=200)
def paste_pages(request): """ Paste pages in a changelist view. This means a new document instance is created. """ data = json.loads(request.body) parent_id = data.get('parent_id', None) if parent_id: parent_id = int(parent_id) Document.paste_pages(user=request.user, parent_id=parent_id, doc_pages=request.pages.all()) request.pages.clear() return HttpResponse( json.dumps({'msg': 'OK'}), content_type="application/json", )
def test_recreate_page_models(self): doc = Document.create_document(title="kyuss.pdf", user=self.user, lang="ENG", file_name="kyuss.pdf", size=1222, page_count=3) doc.save() self.assertEqual(doc.page_set.count(), 3) doc.page_set.all().delete() self.assertEqual(doc.page_set.count(), 0)
def test_download(self): doc = Document.create_document(title="andromeda.pdf", user=self.testcase_user, lang="ENG", file_name="andromeda.pdf", size=1222, page_count=3) copy2doc_url(src_file_path=os.path.join(BASE_DIR, "data", "andromeda.pdf"), doc_url=doc.doc_ep.url()) ret = self.client.post( reverse('core:document_download', args=(doc.id, ))) self.assertEqual(ret.status_code, 200)
def test_node_is_document(self): node = Document.create_document( title="document_node", file_name="document_node.pdf", size='1212', lang='DEU', user=self.user, page_count=5, ) node.save() self.assertTrue(node.is_document()) self.assertFalse(node.is_folder())
def test_download(self): doc = Document.create_document(title="berlin.pdf", user=self.testcase_user, lang="ENG", file_name="berlin.pdf", size=1222, page_count=3) default_storage.copy_doc(src=os.path.join(BASE_DIR, "data", "berlin.pdf"), dst=doc.path.url()) ret = self.client.post( reverse('core:document_download', args=(doc.id, ))) self.assertEqual(ret.status_code, 200)
def test_returns_only_doc_user_has_perms_for_1(self): """ User can view via API only document he/she has access READ_ACCESS """ doc = Document.create_document(title="berlin.pdf", user=self.margaret_user, lang="ENG", file_name="berlin.pdf", size=1222, page_count=3) ret = self.client.get(reverse('core:api_document', args=(doc.id, ))) self.assertEqual(ret.status_code, HttpResponseForbidden.status_code)