示例#1
0
def build_json(
    file_name,
    temp_dir="tmp",
    digest_config=None,
    jats_file_name=None,
    image_file_name=None,
    related=None,
):
    "build JSON output from a DOCX input file and possibly some JATS input"
    digest = build_digest(file_name, temp_dir, digest_config)

    # override the text and other details with the jats file digest content
    if jats_file_name:
        soup = parse_jats_file(jats_file_name)
        digest.text = parse_jats_digest(soup)

        # add subjects from the jats file
        digest.subjects = parse_jats_subjects(soup)

    # override the image file name if provided
    if image_file_name:
        digest.image.file = image_file_name

    json_content = digest_json(digest, digest_config, related)

    return json_content
示例#2
0
 def test_build_docx(self, test_data):
     "check building a DOCX from a DOCX file"
     file_name = test_data.get("file_name")
     output_dir = test_data.get("output_dir")
     digest = build_digest(data_path(file_name))
     output_file_name = output.docx_file_name(digest)
     expected_fixture = fixture_file(test_data.get("expected_docx_file"))
     # build now
     full_file_name = os.path.join(output_dir, output_file_name)
     docx_file = output.build_docx(data_path(file_name), full_file_name)
     # assert assertions
     self.assertEqual(docx_file, os.path.join(output_dir, output_file_name))
     # parse and compare the content of the built docx and the fixture docx
     output_content = parse_content(
         os.path.join(output_dir, output_file_name))
     expected_content = parse_content(expected_fixture)
     self.assertEqual(output_content, expected_content)
示例#3
0
 def test_build_to_html(self):
     "test building from a DOCX file and converting to HTML"
     docx_file = "DIGEST 99999.docx"
     expected_title = u"Fishing for errors in the tests"
     expected_summary = read_fixture("html_content_99999_summary.txt").decode(
         "utf-8"
     )
     expected_text_1 = read_fixture("html_content_99999_text_1.txt").decode("utf-8")
     expected_text_2 = read_fixture("html_content_99999_text_2.txt").decode("utf-8")
     expected_text_3 = read_fixture("html_content_99999_text_3.txt").decode("utf-8")
     # build the digest object
     digest = build.build_digest(data_path(docx_file))
     # test assertions
     self.assertEqual(html.string_to_html(digest.title), expected_title)
     self.assertEqual(html.string_to_html(digest.summary), expected_summary)
     self.assertEqual(html.string_to_html(digest.text[0]), expected_text_1)
     self.assertEqual(html.string_to_html(digest.text[1]), expected_text_2)
     self.assertEqual(html.string_to_html(digest.text[2]), expected_text_3)
示例#4
0
 def test_build_digest(self, test_data):
     "check building a digest object from a DOCX file"
     # note: below after 'the' is a unicode non-breaking space character
     expected_author = u"Anonymous"
     expected_title = u"Fishing for errors in the\xa0tests"
     expected_summary = (
         u"Testing a document which mimics the format of a file we’ve used  "
         + "before plus CO<sub>2</sub> and Ca<sup>2+</sup>.")
     expected_keywords = ["Face Recognition", "Neuroscience", "Vision"]
     expected_doi = u"https://doi.org/10.7554/eLife.99999"
     expected_text_len = 3
     expected_text_0 = read_fixture(
         "digest_content_99999_text_1.txt").decode("utf-8")
     expected_text_1 = read_fixture(
         "digest_content_99999_text_2.txt").decode("utf-8")
     expected_text_2 = read_fixture(
         "digest_content_99999_text_3.txt").decode("utf-8")
     expected_image_caption = (
         u"<b>It’s not just mammals who can recognise sample data.</b>" +
         u"\xa0Image credit:\xa0Anonymous and Anonymous\xa0(CC BY\xa04.0)")
     # build now
     digest_config = parse_raw_config(
         raw_config(test_data.get("config_section")))
     digest = build.build_digest(data_path(test_data.get("file_name")),
                                 "tmp", digest_config)
     # assert assertions
     self.assertIsNotNone(digest)
     self.assertEqual(digest.author, expected_author)
     self.assertEqual(digest.title, expected_title)
     self.assertEqual(digest.summary, expected_summary)
     self.assertEqual(digest.keywords, expected_keywords)
     self.assertEqual(digest.doi, expected_doi)
     self.assertEqual(len(digest.text), expected_text_len)
     self.assertEqual(digest.text[0], expected_text_0)
     self.assertEqual(digest.text[1], expected_text_1)
     self.assertEqual(digest.text[2], expected_text_2)
     if digest.image:
         self.assertEqual(digest.image.caption, expected_image_caption)
         if test_data.get("image_file"):
             expected_image_file = os.path.join("tmp",
                                                test_data.get("image_file"))
             self.assertEqual(digest.image.file, expected_image_file)
def build_medium_content(
    file_name,
    temp_dir="tmp",
    digest_config=None,
    jats_file_name=None,
    image_file_name=None,
):
    "build Medium content from a DOCX input file"

    # build the digest object
    digest = build_digest(file_name, temp_dir, digest_config, image_file_name)

    # override the text with the jats file digest content
    if jats_file_name:
        soup = parse_jats_file(jats_file_name)
        jats_content = parse_jats_digest(soup)
        if jats_content:
            digest.text = map(xml_to_html, jats_content)

    # convert to Medium content components
    title = digest_medium_title(digest)
    # todo!! pass in footer content
    content_format = digest_medium_content_format(digest_config)
    content = digest_medium_content(digest, digest_config)
    tags = digest_medium_tags(digest)
    # license
    medium_license = digest_medium_license(digest_config)

    # assemble the return value
    medium_content = OrderedDict()
    medium_content["title"] = title
    medium_content["contentFormat"] = content_format
    medium_content["content"] = content
    if tags:
        medium_content["tags"] = tags
    if medium_license:
        medium_content["license"] = medium_license
    return medium_content
示例#6
0
def build_jats(file_name, temp_dir="tmp", digest_config=None):
    "build a digest object from a DOCX input file"
    digest = build_digest(file_name, temp_dir, digest_config)
    jats_content = digest_jats(digest)
    return jats_content
示例#7
0
def build_docx(file_name, output_file_name):
    "build an output DOCX from a DOCX input file"
    digest = build_digest(file_name)
    docx_file = digest_docx(digest, output_file_name)
    return docx_file