def testBadGroupTypeInCSV(self): csv_string_bad = ("filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,9,, \n" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,4,, \n") # DocumentType.objects.all().exclude(group_type__title='Composition').values('id', 'title') # assigning the (incompatible) *chemical use disclosure* document type in row 1 of the csv data = io.StringIO(csv_string_bad) csv_len = len(csv_string_bad) sample_csv = InMemoryUploadedFile(data, field_name='csv', name='register_records.csv', content_type='text/csv', size=csv_len, charset='utf-8') form_data= {'name': ['Composition Type Group'], 'description': ['test data group'], 'group_type': ['2'], # Composition 'downloaded_by': ['1'], 'downloaded_at': ['08/02/2018'], 'download_script': ['1'], 'data_source': ['10']} request = self.factory.post(path='/datagroup/new', data=form_data) request.FILES['csv'] = sample_csv request.user = User.objects.get(username='******') request.session={} request.session['datasource_title'] = 'Walmart' request.session['datasource_pk'] = 10 resp = views.data_group_create(pk = 10 , request=request) # the upload form should be invalid self.assertIn('CSV has bad data in row/s:'.encode(), resp.content)
def test_datagroup_create_dupe_filename(self): csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,1,, \n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,Body Cream,1,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile(data, field_name='csv', name='register_records.csv', content_type='text/csv', size=len(csv_string), charset='utf-8') form_data = { 'name': ['Walmart MSDS Test Group'], 'description': ['test data group'], 'group_type': ['1'], 'downloaded_by': [str(User.objects.get(username='******').pk)], 'downloaded_at': ['08/02/2018'], 'download_script': ['1'], 'data_source': ['10'] } request = self.factory.post(path='/datagroup/new/', data=form_data) request.FILES['csv'] = sample_csv request.user = User.objects.get(username='******') request.session = {} request.session['datasource_title'] = 'Walmart' request.session['datasource_pk'] = 10 resp = views.data_group_create(request=request, pk=10) self.assertContains(resp, 'Duplicate filename found')
def test_upload_html_as_datadoc(self): csv_string = ( "filename,title,document_type,url,organization\n" "alberto_balsam_conditioner_antioxidant_blueberry.html,Example Datadocument from HTML,ID,, \n" ) data = io.StringIO(csv_string) csv_len = len(csv_string) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=csv_len, charset="utf-8", ) form_data = { "name": ["California Pesticides"], "description": ["test data group"], "group_type": ["2"], # Composition "downloaded_by": ["1"], "downloaded_at": ["08/02/2018"], "download_script": ["1"], "data_source": ["10"], } request = self.factory.post(path="/datagroup/new", data=form_data) request.FILES["csv"] = sample_csv request.user = User.objects.get(username="******") request.session = {} request.session["datasource_title"] = "Walmart" request.session["datasource_pk"] = 10 resp = views.data_group_create(pk=10, request=request) self.assertEqual(resp.status_code, 302, "Should be redirected to new datagroup detail page") newdg_pk = resolve(resp.url).kwargs["pk"] # does the datagroup in the ORM contain the new data docs? newdg = DataGroup.objects.get(pk=newdg_pk) newdds = DataDocument.objects.filter(data_group=newdg) self.assertEqual(newdds.count(), 1, "There should be one new data document") # Does the data document page include a link to the csv? resp = self.client.get(f"/datadocument/%s/" % newdds[0].id) # Does the response include a link to the data document's file? self.assertContains(resp, f"pdf/document_%s.html" % newdds[0].id)
def testBadGroupTypeInCSV(self): csv_string_bad = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,HH,, \n" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,HH,, \n") data = io.StringIO(csv_string_bad) csv_len = len(csv_string_bad) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=csv_len, charset="utf-8", ) form_data = { "name": ["Composition Type Group"], "description": ["test data group"], "group_type": ["2"], # Composition "downloaded_by": ["1"], "downloaded_at": ["08/02/2018"], "download_script": ["1"], "data_source": ["10"], } request = self.factory.post(path="/datagroup/new", data=form_data) request.FILES["csv"] = sample_csv middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.user = User.objects.get(username="******") request.session = {} request.session["datasource_title"] = "Walmart" request.session["datasource_pk"] = 10 resp = views.data_group_create(pk=10, request=request) # the upload form should be invalid self.assertIn( "The document type must be allowed by the parent data group.". encode(), resp.content, )
def test_datagroup_create_dupe_filename(self): csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,Body Cream,MS,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=len(csv_string), charset="utf-8", ) form_data = { "name": ["Walmart MSDS Test Group"], "description": ["test data group"], "group_type": ["2"], "downloaded_by": [str(User.objects.get(username="******").pk)], "downloaded_at": ["08/02/2018"], "download_script": ["1"], "data_source": ["10"], } request = self.factory.post(path="/datagroup/new/", data=form_data) request.FILES["csv"] = sample_csv middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.user = User.objects.get(username="******") request.session = {} request.session["datasource_title"] = "Walmart" request.session["datasource_pk"] = 10 resp = views.data_group_create(request=request, pk=10) self.assertContains( resp, "Duplicate "filename" values for "" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf" are not allowed.", )
def testGoodGroupTypeInCSV(self): csv_string_good = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,MS,, \n") data = io.StringIO(csv_string_good) csv_len = len(csv_string_good) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=csv_len, charset="utf-8", ) form_data = { "name": ["Composition Type Group"], "description": ["test data group"], "group_type": ["2"], # Composition "downloaded_by": ["1"], "downloaded_at": ["08/02/2018"], "download_script": ["1"], "data_source": ["10"], } request = self.factory.post(path="/datagroup/new", data=form_data) request.FILES["csv"] = sample_csv request.user = User.objects.get(username="******") request.session = {} request.session["datasource_title"] = "Walmart" request.session["datasource_pk"] = 10 resp = views.data_group_create(pk=10, request=request) self.assertEqual(resp.status_code, 302, "Should be redirected to new datagroup detail page") # does the datagroup in the ORM contain the new data docs? newdg_pk = resolve(resp.url).kwargs["pk"] newdg = DataGroup.objects.get(pk=newdg_pk) newdds = DataDocument.objects.filter(data_group=newdg) self.assertEqual(newdds.count(), 2, "There should be two new data documents")
def testGoodGroupTypeInCSV(self): csv_string_good = ("filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,MS,, \n") # DocumentType.objects.all().filter(group_type__title='Composition').values('id', 'title') # assigning the (compatible) MSDS document type in the csv data = io.StringIO(csv_string_good) csv_len = len(csv_string_good) sample_csv = InMemoryUploadedFile(data, field_name='csv', name='register_records.csv', content_type='text/csv', size=csv_len, charset='utf-8') form_data= {'name': ['Composition Type Group'], 'description': ['test data group'], 'group_type': ['2'], # Composition 'downloaded_by': ['1'], 'downloaded_at': ['08/02/2018'], 'download_script': ['1'], 'data_source': ['10']} request = self.factory.post(path='/datagroup/new', data=form_data) request.FILES['csv'] = sample_csv request.user = User.objects.get(username='******') request.session={} request.session['datasource_title'] = 'Walmart' request.session['datasource_pk'] = 10 resp = views.data_group_create(pk = 10, request=request) self.assertEqual(resp.status_code,302, "Should be redirected to new datagroup detail page") # does the datagroup in the ORM contain the new data docs? newdg_pk = resolve(resp.url).kwargs['pk'] newdg = DataGroup.objects.get(pk=newdg_pk) newdds = DataDocument.objects.filter(data_group=newdg) self.assertEqual(newdds.count(),2,'There should be two new data documents')
def test_csv_line_endings(self): csv_string = ( "filename,title,document_type,url,organization\r" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \r" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,MS,, \r\n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=len(csv_string), charset="utf-8", ) form_data = { "name": ["Walmart MSDS Test Group"], "description": ["test data group"], "group_type": ["2"], "downloaded_by": [f"{User.objects.first().pk}"], "downloaded_at": ["08/02/2018"], "download_script": ["1"], "data_source": ["10"], } request = self.factory.post(path="/datagroup/new/", data=form_data) request.FILES["csv"] = sample_csv middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.user = User.objects.get(username="******") request.session = {"datasource_title": "Walmart", "datasource_pk": 10} resp = views.data_group_create(request=request, pk=10) dg = DataGroup.objects.filter(name="Walmart MSDS Test Group") self.assertTrue(resp.status_code == 302, "Redirect to detail page") self.assertTrue(dg.exists(), "Group should be created")
def test_datagroup_create(self): long_fn = "a filename that is too long " * 10 csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n" f"{long_fn},Body Cream,MS,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=len(csv_string), charset="utf-8", ) form_data = { "name": ["Walmart MSDS Test Group"], "description": ["test data group"], "group_type": ["2"], "downloaded_by": [str(User.objects.get(username="******").pk)], "downloaded_at": ["08/02/2018"], "download_script": ["1"], "data_source": ["10"], } request = self.factory.post(path="/datagroup/new/", data=form_data) request.FILES["csv"] = sample_csv request.user = User.objects.get(username="******") middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.session = {} request.session["datasource_title"] = "Walmart" request.session["datasource_pk"] = 10 resp = views.data_group_create(request=request, pk=10) dg_exists = DataGroup.objects.filter( name="Walmart MSDS Test Group").exists() self.assertContains( resp, "filename: Ensure this value has at most 255 characters") self.assertFalse(dg_exists) csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,MS,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=len(csv_string), charset="utf-8", ) request = self.factory.post(path="/datagroup/new", data=form_data) request.FILES["csv"] = sample_csv middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.user = User.objects.get(username="******") request.session = {} request.session["datasource_title"] = "Walmart" request.session["datasource_pk"] = 10 resp = views.data_group_create(request=request, pk=10) self.assertEqual(resp.status_code, 302, "Should be redirecting") dg = DataGroup.objects.get(name="Walmart MSDS Test Group") self.assertEqual(f"/datagroup/{dg.pk}/", resp.url, "Should be redirecting to the proper URL") # test whether the file system folder was created self.assertIn( str(dg.fs_id), os.listdir(settings.MEDIA_ROOT), "The data group's UUID should be a folder in MEDIA_ROOT", ) # In the Data Group Detail Page resp = self.client.get(f"/datagroup/{dg.pk}/") # test whether the data documents were created docs = DataDocument.objects.filter(data_group=dg) self.assertEqual(len(docs), 2, "there should be two associated documents") # test whether the "Download Registered Records" link is like this example # <a href="/datagroup/a9c7f5a7-5ad4-4f75-b877-a3747f0cc081/download_registered_documents" class="btn btn-secondary"> csv_href = f"/datagroup/{dg.pk}/download_registered_documents/" self.assertIn( csv_href, str(resp._container), "The data group detail page must contain the right download link", ) # grab a filename from a data document and see if it's in the csv doc_fn = docs.first().filename # test whether the registered records csv download link works resp_rr_csv = self.client.get( csv_href) # this object should be of type StreamingHttpResponse docfound = "not found" for csv_row in resp_rr_csv.streaming_content: if doc_fn in str(csv_row): docfound = "found" self.assertEqual( docfound, "found", "the document file name should appear in the registered records csv", ) # Test whether the data document csv download works # URL on data group detail page: datagroup/docs_csv/{pk}/ dd_csv_href = (f"/datagroup/{dg.pk}/download_documents/" ) # this is an interpreted django URL resp_dd_csv = self.client.get(dd_csv_href) for csv_row in resp_dd_csv.streaming_content: if doc_fn in str(csv_row): docfound = "found" self.assertEqual( docfound, "found", "the document file name should appear in the data documents csv", ) # test uploading one pdf that matches a registered record doc = DataDocument.objects.filter(data_group_id=dg.pk).first() pdf = TemporaryUploadedFile(name=doc.filename, content_type="application/pdf", size=47, charset=None) request = self.factory.post(path="/datagroup/%s" % dg.pk, data={"uploaddocs-submit": "Submit"}) request.FILES["uploaddocs-documents"] = pdf middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.user = User.objects.get(username="******") resp = views.data_group_detail(request=request, pk=dg.pk) pdf_path = f"{settings.MEDIA_ROOT}{dg.fs_id}/pdf/{doc.get_abstract_filename()}" self.assertTrue(os.path.exists(pdf_path), "the stored file should be in MEDIA_ROOT/dg.fs_id") pdf.close()
def test_datagroup_create(self): long_fn = 'a filename that is too long ' * 10 csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,UN,, \n" f"{long_fn},Body Cream,1,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile(data, field_name='csv', name='register_records.csv', content_type='text/csv', size=len(csv_string), charset='utf-8') form_data = { 'name': ['Walmart MSDS Test Group'], 'description': ['test data group'], 'group_type': ['1'], 'downloaded_by': [str(User.objects.get(username='******').pk)], 'downloaded_at': ['08/02/2018'], 'download_script': ['1'], 'data_source': ['10'] } request = self.factory.post(path='/datagroup/new/', data=form_data) request.FILES['csv'] = sample_csv request.user = User.objects.get(username='******') request.session = {} request.session['datasource_title'] = 'Walmart' request.session['datasource_pk'] = 10 resp = views.data_group_create(request=request, pk=10) # print(resp.content) dg_exists = DataGroup.objects.filter( name='Walmart MSDS Test Group').exists() self.assertContains(resp, 'Filename too long') self.assertFalse(dg_exists, ) # print(dg.__dict__) csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,UN,, \n" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,UN,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile(data, field_name='csv', name='register_records.csv', content_type='text/csv', size=len(csv_string), charset='utf-8') request = self.factory.post(path='/datagroup/new', data=form_data) request.FILES['csv'] = sample_csv request.user = User.objects.get(username='******') request.session = {} request.session['datasource_title'] = 'Walmart' request.session['datasource_pk'] = 10 resp = views.data_group_create(request=request, pk=10) self.assertEqual(resp.status_code, 302, "Should be redirecting") dg = DataGroup.objects.get(name='Walmart MSDS Test Group') self.assertEqual(f'/datagroup/{dg.pk}/', resp.url, "Should be redirecting to the proper URL") # test whether the file system folder was created self.assertIn( str(dg.fs_id), os.listdir(settings.MEDIA_ROOT), "The data group's UUID should be a folder in MEDIA_ROOT") # In the Data Group Detail Page resp = self.client.get(f'/datagroup/{dg.pk}/') # test whether the data documents were created docs = DataDocument.objects.filter(data_group=dg) self.assertEqual(len(docs), 2, "there should be two associated documents") # test whether the "Download Registered Records" link is like this example # <a href="/datagroup/a9c7f5a7-5ad4-4f75-b877-a3747f0cc081/registered_records.csv" class="btn btn-secondary"> # <span class="oi oi-spreadsheet"></span> Download Registered Records CSV</a> csv_href = f'/datagroup/{dg.pk}/registered_records.csv' self.assertIn( csv_href, str(resp._container), "The data group detail page must contain the right download link") # grab a filename from a data document and see if it's in the csv doc_fn = docs.first().filename # test whether the registered records csv download link works resp_rr_csv = self.client.get( csv_href) # this object should be of type StreamingHttpResponse docfound = 'not found' for csv_row in resp_rr_csv.streaming_content: if doc_fn in str(csv_row): docfound = 'found' self.assertEqual( docfound, 'found', "the document file name should appear in the registered records csv" ) # Test whether the data document csv download works # URL on data group detail page: datagroup/docs_csv/{pk}/ dd_csv_href = f'/datagroup/docs_csv/{dg.pk}/' # this is an interpreted django URL resp_dd_csv = self.client.get(dd_csv_href) for csv_row in resp_dd_csv.streaming_content: if doc_fn in str(csv_row): docfound = 'found' self.assertEqual( docfound, 'found', "the document file name should appear in the data documents csv") # test whether the "Download All PDF Documents" link works dg_zip_href = f'/datagroup/pdfs_zipped/{dg.pk}/' # this is the django-interpreted URL self.assertIn( dg_zip_href, str(resp._container), "The data group detail page must contain the right zip download link" ) resp_zip = self.client.get(dg_zip_href) # test uploading one pdf that matches a registered record f = TemporaryUploadedFile( name='0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf', content_type='application/pdf', size=47, charset=None) request = self.factory.post(path='/datagroup/%s' % dg.pk, data={'upload': 'Submit'}) request.FILES['multifiles'] = f request.user = User.objects.get(username='******') resp = views.data_group_detail(request=request, pk=dg.pk) doc = DataDocument.objects.get(title='NUTRA NAIL') fn = doc.get_abstract_filename() folder_name = str(dg.fs_id) stored_file = f'{folder_name}/pdf/{fn}' pdf_path = f'{settings.MEDIA_ROOT}{stored_file}' self.assertTrue(os.path.exists(pdf_path), "the stored file should be in MEDIA_ROOT/dg.fs_id") f.close()