def test_start_citation_analysis_dblp(self): """ Asserts consistent length of dictionary in dblp """ dblp_array = file_processing.extract_citations(self.dblp_file_location) dblp_dict = file_processing.start_citation_analysis(dblp_array) self.assertEqual(10, len(dblp_dict))
def test_start_citation_analysis_pdf(self): """ Asserts consistent length of dictionary in pdf """ pdf_array = file_processing.extract_citations(self.pdf_file_location) pdf_dict = file_processing.start_citation_analysis(pdf_array) self.assertEqual(10, len(pdf_dict))
def test_start_citation_analysis_txt(self): """ Asserts consistent length of dictionary in txt """ txt_array = file_processing.extract_citations(self.txt_file_location) txt_dict = file_processing.start_citation_analysis(txt_array) self.assertEqual(10, len(txt_dict))
def test_extract_citations_bibtex(self): """ Asserts consistent length of extraction in bibtex """ bibtex_array = file_processing.extract_citations( self.bibtext_file_location) self.assertEqual(2, len(bibtex_array))
def test_extract_citations_else(self): """ Asserts extraction array is None when an unsupported file type is processed """ blank_array = file_processing.extract_citations(self.png_file_location) self.assertTrue(blank_array is None)
def test_start_citation_analysis_bibtex(self): """ Asserts consistent length of dictionary in bibtex """ bibtex_array = file_processing.extract_citations( self.bibtext_file_location) bibtex_dict = file_processing.start_citation_analysis(bibtex_array) self.assertEqual(10, len(bibtex_dict))
def test_write_citations_to_file_pdf(self): """ Asserts output file is created for pdf """ pdf_array = file_processing.extract_citations(self.pdf_file_location) pdf_dict = file_processing.start_citation_analysis(pdf_array) file_processing.write_citations_to_file_json(pdf_dict, self.upload_path) self.assertTrue(os.path.exists(self.upload_path + '/output.txt'))
def test_generate_results_chart(self): """Asserts graph file is created """ os.remove(self.upload_path + '/output.txt') txt_array = file_processing.extract_citations(self.docx_file_location) txt_dict = file_processing.start_citation_analysis(txt_array) file_processing.write_citations_to_file_json(txt_dict, self.upload_path) file_processing.generate_results_chart(self.upload_path) self.assertTrue(os.path.exists(self.upload_path + '/results_graph.png'))
def test_extract_citations_dblp(self): """ Asserts consistent length of extraction in dblp """ dblp_array = file_processing.extract_citations(self.dblp_file_location) self.assertEqual(4, len(dblp_array))
def test_extract_citations_txt(self): """ Asserts consistent length of extraction in txt """ txt_array = file_processing.extract_citations(self.txt_file_location) self.assertEqual(1553, len(txt_array))
def test_extract_citations_docx(self): """ Asserts consistent length of extraction in docx """ docx_array = file_processing.extract_citations(self.docx_file_location) self.assertEqual(83, len(docx_array))
def test_extract_citations_pdf(self): """ Asserts consistent length of extraction in pdf """ pdf_array = file_processing.extract_citations(self.pdf_file_location) self.assertEqual(27, len(pdf_array))
def index_post_request(upload_path): """This function handles how the index.html page processes data.txt and serves the page when a post request is received. It will first check if there is a file request, and if so will iterate through each file validating them to ensure that they have a file name, are of the right extension and are between two file sizes. If they fail any validation, the file is disregarded. Else, file is saved into a directory unique for the user's session. Then, calls functions to extract and process the citations within the files that have been successfully saved, using a loader based on the file extension. Args: upload_path (string): The upload directory - a combination of / "upload/" + the session name Variables: files_processed (dictionary): Tracks the amount of files processed. If True (meaning a file has been successfully processed) >= 1, a zip is created and the download button is shown to the user] Returns: render_template: Serves the index.html page send_from_directory: Downloads the results zip for the client """ if request.files: files_processed = {"True": 0, "False": 0} files = request.files.getlist("uploaded_file") # loop, as possibility of multiple file uploads for file_to_upload in files: # Gets the length of the file file_to_upload.seek(0, os.SEEK_END) file_length = file_to_upload.tell() # reset pointer to start of file, otherwise will be empty file_to_upload.seek(0) # Secures file name against user input file_name = secure_filename(file_to_upload.filename) # Checks the file name isn't blank if file_uploader.check_file_name_empty(file_name) is True: logging.info("Error uploading " + file_to_upload.filename + "from " + str(session['public_user']) + "- empty file name.") files_processed['False'] += 1 continue # Checks the file has an allowed extension elif file_uploader.allowed_ext(file_to_upload.filename, config.ALLOWED_EXTENSIONS) is False: logging.info("Error uploading " + file_to_upload.filename + "from " + str(session['public_user']) + "- extension not supported.") files_processed['False'] += 1 continue # Checks file size elif file_uploader.check_file_length( file_length, config.MAX_FILE_SIZE, config.MIN_FILE_SIZE) is False: logging.info("Error uploading " + file_to_upload.filename + "from " + str(session['public_user']) + file_to_upload.filename + " invalid file size.") files_processed['False'] += 1 continue else: # Else, passes all validation and is saved. files_processed['True'] += 1 file_name = file_uploader.check_existing_file_name( file_name, "uploads/") file_path = upload_path + "/" + file_name file_to_upload.save(file_path) citations = file_processing.extract_citations(file_path) results = file_processing.start_citation_analysis(citations) file_processing.write_citations_to_file_json( results, upload_path) # If files have been processed, # return a render with the file download. if files_processed['True'] >= 1: # file_processing.generate_results_chart( # upload_path) file_processing.create_zip(upload_path, config.RESULTS_ZIP) return True else: # Else, normal redirect. return False # If user clicked download results button else: # If no files request, redirect to index. return redirect(request.url)