Python extract_citations示例，file_processing.extract_citations Python示例

示例#1

0

显示文件

 def test_start_citation_analysis_dblp(self):
     """
     Asserts consistent length of dictionary in dblp
     """
     dblp_array = file_processing.extract_citations(self.dblp_file_location)
     dblp_dict = file_processing.start_citation_analysis(dblp_array)
     self.assertEqual(10, len(dblp_dict))

示例#2

0

显示文件

 def test_start_citation_analysis_pdf(self):
     """
     Asserts consistent length of dictionary in pdf
     """
     pdf_array = file_processing.extract_citations(self.pdf_file_location)
     pdf_dict = file_processing.start_citation_analysis(pdf_array)
     self.assertEqual(10, len(pdf_dict))

示例#3

0

显示文件

 def test_start_citation_analysis_txt(self):
     """
     Asserts consistent length of dictionary in txt
     """
     txt_array = file_processing.extract_citations(self.txt_file_location)
     txt_dict = file_processing.start_citation_analysis(txt_array)
     self.assertEqual(10, len(txt_dict))

示例#4

0

显示文件

 def test_extract_citations_bibtex(self):
     """
     Asserts consistent length of extraction in bibtex
     """
     bibtex_array = file_processing.extract_citations(
         self.bibtext_file_location)
     self.assertEqual(2, len(bibtex_array))

示例#5

0

显示文件

 def test_extract_citations_else(self):
     """
     Asserts extraction array is None when an unsupported
     file type is processed
     """
     blank_array = file_processing.extract_citations(self.png_file_location)
     self.assertTrue(blank_array is None)

示例#6

0

显示文件

 def test_start_citation_analysis_bibtex(self):
     """
     Asserts consistent length of dictionary in bibtex
     """
     bibtex_array = file_processing.extract_citations(
         self.bibtext_file_location)
     bibtex_dict = file_processing.start_citation_analysis(bibtex_array)
     self.assertEqual(10, len(bibtex_dict))

示例#7

0

显示文件

 def test_write_citations_to_file_pdf(self):
     """
     Asserts output file is created for pdf
     """
     pdf_array = file_processing.extract_citations(self.pdf_file_location)
     pdf_dict = file_processing.start_citation_analysis(pdf_array)
     file_processing.write_citations_to_file_json(pdf_dict,
                                                  self.upload_path)
     self.assertTrue(os.path.exists(self.upload_path + '/output.txt'))

示例#8

0

显示文件

 def test_generate_results_chart(self):
     """Asserts graph file is created
     """
     os.remove(self.upload_path + '/output.txt')
     txt_array = file_processing.extract_citations(self.docx_file_location)
     txt_dict = file_processing.start_citation_analysis(txt_array)
     file_processing.write_citations_to_file_json(txt_dict,
                                                  self.upload_path)
     file_processing.generate_results_chart(self.upload_path)
     self.assertTrue(os.path.exists(self.upload_path +
                                    '/results_graph.png'))

示例#9

0

显示文件

 def test_extract_citations_dblp(self):
     """
     Asserts consistent length of extraction in dblp
     """
     dblp_array = file_processing.extract_citations(self.dblp_file_location)
     self.assertEqual(4, len(dblp_array))

示例#10

0

显示文件

 def test_extract_citations_txt(self):
     """
     Asserts consistent length of extraction in txt
     """
     txt_array = file_processing.extract_citations(self.txt_file_location)
     self.assertEqual(1553, len(txt_array))

示例#11

0

显示文件

 def test_extract_citations_docx(self):
     """
     Asserts consistent length of extraction in docx
     """
     docx_array = file_processing.extract_citations(self.docx_file_location)
     self.assertEqual(83, len(docx_array))

示例#12

0

显示文件

 def test_extract_citations_pdf(self):
     """
     Asserts consistent length of extraction in pdf
     """
     pdf_array = file_processing.extract_citations(self.pdf_file_location)
     self.assertEqual(27, len(pdf_array))

示例#13

0

显示文件

文件： site_main.py 项目： 0x4A42/citation_extractor

def index_post_request(upload_path):
    """This function handles how the index.html page processes data.txt and
        serves the page when a post request is received.

        It will first check if there is a file request, and if so will iterate
        through each file validating them to ensure that they have a file name,
        are of the right extension and are between two file sizes.

        If they fail any validation, the file is disregarded.
        Else, file is saved into a directory unique for the user's session.
        Then, calls functions to extract and process the citations within the
        files that have been successfully saved, using a loader based on
        the file extension.

    Args:
        upload_path (string): The upload directory - a combination of /
        "upload/" + the session name

    Variables:
        files_processed (dictionary): Tracks the amount of files processed.
        If True (meaning a file has been successfully processed) >= 1,
        a zip is created and the download button is shown to the user]
    Returns:
        render_template: Serves the index.html page
        send_from_directory: Downloads the results zip for the client
    """
    if request.files:
        files_processed = {"True": 0, "False": 0}
        files = request.files.getlist("uploaded_file")

        # loop, as possibility of multiple file uploads
        for file_to_upload in files:
            # Gets the length of the file
            file_to_upload.seek(0, os.SEEK_END)
            file_length = file_to_upload.tell()
            # reset pointer to start of file, otherwise will be empty
            file_to_upload.seek(0)
            # Secures file name against user input
            file_name = secure_filename(file_to_upload.filename)
            # Checks the file name isn't blank
            if file_uploader.check_file_name_empty(file_name) is True:
                logging.info("Error uploading " + file_to_upload.filename +
                             "from " + str(session['public_user']) +
                             "- empty file name.")
                files_processed['False'] += 1
                continue
            # Checks the file has an allowed extension
            elif file_uploader.allowed_ext(file_to_upload.filename,
                                           config.ALLOWED_EXTENSIONS) is False:
                logging.info("Error uploading " + file_to_upload.filename +
                             "from " + str(session['public_user']) +
                             "- extension not supported.")
                files_processed['False'] += 1
                continue
            # Checks file size
            elif file_uploader.check_file_length(
                    file_length, config.MAX_FILE_SIZE,
                    config.MIN_FILE_SIZE) is False:
                logging.info("Error uploading " + file_to_upload.filename +
                             "from " + str(session['public_user']) +
                             file_to_upload.filename + " invalid file size.")
                files_processed['False'] += 1
                continue
            else:  # Else, passes all validation and is saved.
                files_processed['True'] += 1
                file_name = file_uploader.check_existing_file_name(
                    file_name, "uploads/")
                file_path = upload_path + "/" + file_name
                file_to_upload.save(file_path)
                citations = file_processing.extract_citations(file_path)
                results = file_processing.start_citation_analysis(citations)
                file_processing.write_citations_to_file_json(
                    results, upload_path)

        # If files have been processed,
        #       return a render with the file download.
        if files_processed['True'] >= 1:
            # file_processing.generate_results_chart(
            # upload_path)
            file_processing.create_zip(upload_path, config.RESULTS_ZIP)
            return True
        else:  # Else, normal redirect.
            return False
        # If user clicked download results button
    else:  # If no files request, redirect to index.
        return redirect(request.url)