Пример #1
0
    def testXml(self):
        """Test highwire elements are converted to XML bytes.

        Because the order of attributes is not guaranteed, we convert the
        generated bytes and expected bytes into ElementTrees to compare
        the attributes.
        """
        untlpy = untldict2py(UNTL_DICT)
        highwire_elements = untlpy2highwirepy(untlpy)
        highwire_xml = generate_highwire_xml(highwire_elements)

        # Get a sorted list of attributes for child elements in the generated and expected XML.
        generated_tree = fromstring(highwire_xml)
        generated_attribs = [child.attrib for child in generated_tree]
        generated_attribs = sorted(generated_attribs,
                                   key=lambda i: (i['content'], i['name']))
        expected_attribs = [child.attrib for child in fromstring(HIGHWIRE_XML)]
        expected_attribs = sorted(expected_attribs,
                                  key=lambda i: (i['content'], i['name']))
        self.assertEqual(expected_attribs, generated_attribs)

        # Our generated XML has a `metadata` element with all `meta` element children.
        self.assertEqual(generated_tree.tag, 'metadata')
        for child in generated_tree:
            self.assertEqual(child.tag, 'meta')
Пример #2
0
 def test_convert_none_content_object_to_dict_and_back(self):
     """Test for original dictionary equivalent to yielded one
     using py2dict, then dict2py, then finally back to py2dict
     on a None content.
     """
     field = PYUNTL_DISPATCH['title'](
         content=None
     )
     self.record.add_child(field)
     self.record.children[0].set_qualifier('officialtitle')
     missing_content_dict = py2dict(self.record)
     self.assertTrue(missing_content_dict == {'title': []})
     py_from_dict = untldict2py(missing_content_dict)
     self.assertTrue(
         self.record.children[0].qualifier != py_from_dict.qualifier
     )
     self.record.children[0].content = 'fake content'
     missing_content_dict = py2dict(self.record)
     self.assertTrue(
         missing_content_dict == {
             'title': [
                 {'content': 'fake content', 'qualifier': 'officialtitle'}
             ]
         }
     )
Пример #3
0
def test_generate_highwire_xml():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    xml = untldoc.generate_highwire_xml(highwire_list)
    expected_xml = (
        b'<?xml version="1.0" encoding="UTF-8"?>\n'
        b'<metadata>\n'
        b'  <meta content="Tres Actos" name="citation_title"/>\n'
        b'  <meta content="Last, Furston, 1807-1865." name="citation_author"/>\n'
        b'  <meta content="Fake Publishing" name="citation_publisher"/>\n'
        b'  <meta content="1944" name="citation_publication_date"/>\n'
        b'</metadata>\n')

    # Get a sorted list of attributes for child elements in the generated and expected XML.
    generated_tree = fromstring(xml)
    generated_attribs = [child.attrib for child in generated_tree]
    generated_attribs = sorted(generated_attribs,
                               key=lambda i: (i['content'], i['name']))
    expected_attribs = [child.attrib for child in fromstring(expected_xml)]
    expected_attribs = sorted(expected_attribs,
                              key=lambda i: (i['content'], i['name']))
    assert expected_attribs == generated_attribs

    # Our generated XML has a `metadata` element with all `meta` element children.
    assert generated_tree.tag == 'metadata'
    for child in generated_tree:
        assert child.tag == 'meta'
Пример #4
0
 def test_convert_none_content_object_to_dict_and_back(self):
     """Test for original dictionary equivalent to yielded one
     using py2dict, then dict2py, then finally back to py2dict
     on a None content.
     """
     field = PYUNTL_DISPATCH['title'](
         content=None
     )
     self.record.add_child(field)
     self.record.children[0].set_qualifier('officialtitle')
     missing_content_dict = py2dict(self.record)
     self.assertTrue(missing_content_dict == {'title': []})
     py_from_dict = untldict2py(missing_content_dict)
     self.assertTrue(
         self.record.children[0].qualifier != py_from_dict.qualifier
     )
     self.record.children[0].content = 'fake content'
     missing_content_dict = py2dict(self.record)
     self.assertTrue(
         missing_content_dict == {
             'title': [
                 {'content': 'fake content', 'qualifier': 'officialtitle'}
             ]
         }
     )
Пример #5
0
def test_generate_highwire_json():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    highwire_json = untldoc.generate_highwire_json(highwire_list)
    assert highwire_json == (
        '{\n'
        '    "citation_author": [\n'
        '        {\n'
        '            "content": "Last, Furston, 1807-1865."\n'
        '        }\n'
        '    ],\n'
        '    "citation_publication_date": [\n'
        '        {\n'
        '            "content": "1944"\n'
        '        }\n'
        '    ],\n'
        '    "citation_publisher": [\n'
        '        {\n'
        '            "content": "Fake Publishing"\n'
        '        }\n'
        '    ],\n'
        '    "citation_title": [\n'
        '        {\n'
        '            "content": "Tres Actos"\n'
        '        }\n'
        '    ]\n'
        '}')
Пример #6
0
 def test_sort_untl(self):
     c2 = untldict2py(UNTL_DICT)
     c2.sort_untl(UNTL_PTH_ORDER)
     # Get ordered list of children tags.
     tag_list = [UNTL_PTH_ORDER.index(elem.tag) for elem in c2.children]
     # Verify order is in order of UNTL_PTH_ORDER.
     self.assertTrue(all(current <= next_ for current, next_ in zip(tag_list, tag_list[1:])))
Пример #7
0
 def test_sort_untl(self):
     c2 = untldict2py(UNTL_DICT)
     c2.sort_untl(UNTL_PTH_ORDER)
     # Get ordered list of children tags.
     tag_list = [UNTL_PTH_ORDER.index(elem.tag) for elem in c2.children]
     # Verify order is in order of UNTL_PTH_ORDER.
     self.assertTrue(all(current <= next_ for current, next_ in zip(tag_list, tag_list[1:])))
Пример #8
0
def test_generate_highwire_text():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    text = untldoc.generate_highwire_text(highwire_list)
    assert text == ('citation_title: Tres Actos\n'
                    'citation_author: Last, Furston, 1807-1865.\n'
                    'citation_publisher: Fake Publishing\n'
                    'citation_publication_date: 1944')
Пример #9
0
def test_untldict2py_dict_includes_qualifier_only_element():
    # Not sure if there are ever elements with no content and no children
    # in practice, but the code handles that scenario.
    untl_dict = deepcopy(UNTL_DICTIONARY)
    untl_dict['date'] = [{'qualifier': 'creation'}]
    root = untldoc.untldict2py(untl_dict)
    assert isinstance(root, us.UNTLElement)
    assert root.children[4].tag == 'date'
    assert root.children[4].qualifier == 'creation'
Пример #10
0
def test_untlpy2dcpy_resolve_values_retrieve_vocab(mock_vocab):
    mock_vocab.return_value = {
        'languages': [{
            'url': 'http://example.com/languages/#spa',
            'name': 'spa',
            'label': 'Spanish'
        }]
    }
    untl_dict = {'language': [{'content': 'spa'}]}
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements, resolve_values=True)
    assert root.children[0].tag == 'language'
    assert root.children[0].content == 'Spanish'
Пример #11
0
def test_untlpy2highwirepy_not_official_title():
    untl_dict = {
        'title': [{
            'qualifier': 'alternatetitle',
            'content': 'Tres Actos'
        }]
    }
    untl_elements = untldoc.untldict2py(untl_dict)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    assert len(highwire_list) == 1
    assert highwire_list[0].qualifier == 'alternatetitle'
    assert highwire_list[0].name == 'citation_title'
    assert highwire_list[0].content == 'Tres Actos'
Пример #12
0
def test_untlpy2dcpy():
    untl_dict = {
        'coverage': [{
            'content': '1943',
            'qualifier': 'sDate'
        }, {
            'content': '1944',
            'qualifier': 'eDate'
        }, {
            'content': 'United States - Texas',
            'qualifier': 'placeName'
        }],
        'publisher': [{
            'content': {
                'name': 'UNT Press',
                'location': 'Denton, Texas'
            }
        }],
        'creator': [{
            'content': {
                'type': 'org',
                'name': 'UNT'
            },
            'qualifier': 'aut'
        }],
        'title': [{
            'content': 'UNT Book',
            'qualifier': 'officialtitle'
        }],
        'collection': [{
            'content': 'UNT'
        }]
    }
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements)
    assert isinstance(root, dc.DCElement)
    assert len(root.children) == 5
    assert root.tag == 'dc'
    assert root.children[0].tag == 'coverage'
    assert root.children[0].content == 'United States - Texas'
    assert root.children[1].tag == 'publisher'
    assert root.children[1].content == 'UNT Press'
    assert root.children[2].tag == 'creator'
    assert root.children[2].content == 'UNT'
    assert root.children[3].tag == 'title'
    assert root.children[3].content == 'UNT Book'
    # Coverage sDate/eDate are combined and added at the end.
    assert root.children[4].tag == 'coverage'
    assert root.children[4].content == '1943-1944'
Пример #13
0
def test_untlpy2dcpy_resolve_urls():
    verbose_vocab = {
        'languages': [{
            'url': 'http://example.com/languages/#spa',
            'name': 'spa',
            'label': 'Spanish'
        }]
    }
    untl_dict = {'language': [{'content': 'spa'}]}
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements,
                               resolve_urls=True,
                               verbose_vocabularies=verbose_vocab)
    assert root.children[0].tag == 'language'
    assert root.children[0].content == 'http://example.com/languages/#spa'
Пример #14
0
 def test_convert_content_no_qualifier_roundtrip(self):
     """Test adding a child without a qualifier doesn't create one
     when converting from py to dict to py.
     """
     field = PYUNTL_DISPATCH['title'](
         content='Tie Till the Title'
     )
     self.record.add_child(field)
     content_dict = py2dict(self.record)
     self.assertTrue(
         content_dict == {'title': [{'content': 'Tie Till the Title'}]}
     )
     py_from_dict = untldict2py(content_dict)
     self.assertTrue(
         py_from_dict.children[0].qualifier is None
     )
Пример #15
0
 def test_convert_content_no_qualifier_roundtrip(self):
     """Test adding a child without a qualifier doesn't create one
     when converting from py to dict to py.
     """
     field = PYUNTL_DISPATCH['title'](
         content='Tie Till the Title'
     )
     self.record.add_child(field)
     content_dict = py2dict(self.record)
     self.assertTrue(
         content_dict == {'title': [{'content': 'Tie Till the Title'}]}
     )
     py_from_dict = untldict2py(content_dict)
     self.assertTrue(
         py_from_dict.children[0].qualifier is None
     )
Пример #16
0
def test_untlpy2dcpy_add_permalink_and_ark():
    untl_dict = {
        'title': [{
            'content': 'UNT Book',
            'qualifier': 'officialtitle'
        }]
    }
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements,
                               ark='ark:/67531/metatest1',
                               domain_name='example.com',
                               scheme='https')
    assert root.children[0].tag == 'title'
    assert root.children[1].tag == 'identifier'
    assert root.children[
        1].content == 'https://example.com/ark:/67531/metatest1/'
    assert root.children[2].tag == 'identifier'
    assert root.children[2].content == 'ark: ark:/67531/metatest1'
Пример #17
0
def test_untlpy2highwirepy():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    assert len(highwire_list) == 4
    for element in highwire_list:
        assert element.tag == 'meta'
    assert highwire_list[0].qualifier == 'aut'
    assert highwire_list[0].name == 'citation_author'
    assert highwire_list[0].content == 'Last, Furston, 1807-1865.'
    assert highwire_list[1].qualifier is None
    assert highwire_list[1].name == 'citation_publisher'
    assert highwire_list[1].content == 'Fake Publishing'
    assert highwire_list[2].qualifier == 'creation'
    assert highwire_list[2].name == 'citation_publication_date'
    assert highwire_list[2].content == '1944'
    assert highwire_list[3].qualifier == 'officialtitle'
    assert highwire_list[3].name == 'citation_title'
    assert highwire_list[3].content == 'Tres Actos'
Пример #18
0
def test_highwirepy2dict():
    untl_elements = untldoc.untldict2py(UNTL_DICTIONARY)
    highwire_list = untldoc.untlpy2highwirepy(untl_elements)
    highwire_dict = untldoc.highwirepy2dict(highwire_list)
    assert highwire_dict == {
        'citation_author': [{
            'content': 'Last, Furston, 1807-1865.'
        }],
        'citation_publisher': [{
            'content': 'Fake Publishing'
        }],
        'citation_publication_date': [{
            'content': '1944'
        }],
        'citation_title': [{
            'content': 'Tres Actos'
        }]
    }
Пример #19
0
def test_untldict2py():
    root = untldoc.untldict2py(UNTL_DICTIONARY)
    assert isinstance(root, us.UNTLElement)
    assert len(root.children) == 5
    assert root.tag == 'metadata'
    assert root.children[0].tag == 'title'
    assert root.children[0].content == 'Tres Actos'
    assert root.children[0].qualifier == 'officialtitle'
    assert root.children[1].tag == 'creator'
    assert root.children[1].qualifier == 'aut'
    assert root.children[1].children[0].tag == 'name'
    assert root.children[1].children[0].content == 'Last, Furston, 1807-1865.'
    assert root.children[2].tag == 'publisher'
    assert root.children[2].children[0].tag == 'name'
    assert root.children[2].children[0].content == 'Fake Publishing'
    assert root.children[3].tag == 'collection'
    assert root.children[3].content == 'UNT'
    assert root.children[4].tag == 'date'
    assert root.children[4].content == '1944'
Пример #20
0
 def testTextEscape(self):
     """Test highwire elements are converted to ANVL text when escaped."""
     small_untl_dict = {
         'title': [{
             'content': 'Clifford & Lassie',
             'qualifier': 'officialtitle'
         }],
         'meta': [{
             'content': 'ark:/67531/metapth38622',
             'qualifier': 'ark'
         }, {
             'content': '2008-06-29, 00:31:14',
             'qualifier': 'metadataCreationDate'
         }]
     }
     untlpy = untldict2py(small_untl_dict)
     highwire_elements = untlpy2highwirepy(untlpy, escape=True)
     highwire_text = generate_highwire_text(highwire_elements)
     expected = ('citation_title: Clifford &amp; Lassie\n'
                 'citation_online_date: 06/29/2008')
     self.assertEqual(highwire_text, expected)
Пример #21
0
 def testHighwire2Dict(self):
     """Test dictionary creation from Highwire."""
     untlpy = untldict2py(UNTL_DICT)
     highwi = untlpy2highwirepy(untlpy)
     hidict = highwirepy2dict(highwi)
     self.assertEqual(type(hidict), dict)
Пример #22
0
 def setUp(self):
     """Set up the initial data."""
     self.root_element = untldict2py(UNTL_DICT)
Пример #23
0
 def testIncompleteness(self):
     """Test incompleteness."""
     bad_pyuntl = untldict2py(BAD_UNTL_DICT)
     incompleteness = determine_completeness(bad_pyuntl)
     self.assertTrue(incompleteness < 0.02)
Пример #24
0
 def testIncompleteness(self):
     """Test incompleteness."""
     bad_pyuntl = untldict2py(BAD_UNTL_DICT)
     incompleteness = determine_completeness(bad_pyuntl)
     self.assertTrue(incompleteness < 0.02)
Пример #25
0
 def test_complete_record(self):
     """Test each tag appears as created from the dict keys."""
     self.record = untldict2py(UNTL_DICT)
     for c in self.record.children:
         self.assertTrue(c.tag in UNTL_DICT.keys())
Пример #26
0
 def testETDfromUNTL(self):
     """Confirm that the ETD object conversion works."""
     etd = untlpy2etd_ms(untldict2py(UNTL_DICT))
     self.assertEqual(type(etd), ETD_MS)
Пример #27
0
 def test_generate_form_data(self):
     """Test for an instance of a FormGenerator class."""
     self.record = untldict2py(UNTL_DICT)
     form_gen = self.record.generate_form_data(sort_order=UNTL_PTH_ORDER)
     self.assertTrue(isinstance(form_gen, FormGenerator))
Пример #28
0
 def testUNTL2HIGHWIRE(self):
     """Test conversion from UNTL to Highwire."""
     untlpy = untldict2py(UNTL_DICT)
     highwi = untlpy2highwirepy(untlpy)
     for element in highwi:
         self.assertTrue(issubclass(type(element), HighwireElement))
Пример #29
0
    def __init__(self, identifier, metadataLocations, staticFileLocations,
                 mimetypeIconsPath, use, **kwargs):
        """
        identifier can either be an absolute path to a mets.xml file, or a
        meta_id.  In the latter case, it will derive the path from the meta_id
        """
        self.metadataLocations = metadataLocations
        self.staticFileLocations = staticFileLocations
        self.use = use
        getCopy_url = kwargs.get('getCopy_url', None)

        # if the identifier is a filename, use that.  Otherwise treat it as
        # a meta_id
        if identifier.endswith(".mets.xml"):
            self.mets_filename = identifier
            # self.metadata_file = identifier
            self.meta_id = os.path.split(identifier)[1].split(".")[0]
            self.pair_path = get_pair_path(self.meta_id)
            self.metadata_system = None
        else:
            self.meta_id = identifier
            # Get the pair path for the digital object
            self.pair_path = get_pair_path(self.meta_id)
            # Determine the location of the resource and the filename
            # of the resource's record
            self.mets_filename, self.metadata_system = get_mets_record_system(
                self.meta_id,
                self.pair_path,
                metadataLocations
            )
        # Get dimensions data
        self.dimensions = get_dimensions_data(self.mets_filename)
        # If a getCopy url was given
        if getCopy_url:
            self.getCopy_data = get_getCopy_data(getCopy_url, self.meta_id)
        else:
            self.getCopy_data = {}
        # Open the METS document
        try:
            mets_filehandle = open_system_file(self.mets_filename)
        except Exception:
            raise ResourceObjectException("Could not open the Mets " +
                                          "document: %s" % (self.meta_id))
        # Parse the mets document
        parsed_mets = etree.parse(mets_filehandle)
        # Close the mets file
        mets_filehandle.close()
        # Get Metadata File
        self.get_metadata_file(parsed_mets)
        # Get the descriptive metadata
        self.desc_MD = get_desc_metadata(self.metadata_file,
                                         self.metadata_type)
        # Get transcriptions data
        resource_type = self.desc_MD.get('resourceType')
        if resource_type:
            resource_type = resource_type[0].get('content')
        else:
            resource_type = None
        self.transcriptions = get_transcriptions_data(
            meta_id=self.meta_id,
            resource_type=resource_type,
            transcriptions_server_url=kwargs.get('transcriptions_server_url'),
        )
        # Get the fileSets within the fileSec
        self.get_structMap(parsed_mets)
        # Get the embargo information, if it exists
        self.get_embargo()
        # Get the author citation string
        self.author_citation_string = get_author_citation_string(self.desc_MD)
        self.completeness = untldict2py(self.desc_MD).completeness
Пример #30
0
 def test_complete_record(self):
     """Test each tag appears as created from the dict keys."""
     self.record = untldict2py(UNTL_DICT)
     for c in self.record.children:
         self.assertTrue(c.tag in UNTL_DICT.keys())
Пример #31
0
 def testUNTL2HIGHWIRE(self):
     """Test conversion from UNTL to Highwire."""
     untlpy = untldict2py(UNTL_DICT)
     highwi = untlpy2highwirepy(untlpy)
     for element in highwi:
         self.assertTrue(issubclass(type(element), HighwireElement))
Пример #32
0
 def test_generate_form_data(self):
     """Test for an instance of a FormGenerator class."""
     self.record = untldict2py(UNTL_DICT)
     form_gen = self.record.generate_form_data(sort_order=UNTL_PTH_ORDER)
     self.assertTrue(isinstance(form_gen, FormGenerator))
Пример #33
0
 def setUp(self):
     """Set up the initial data."""
     self.root_element = untldict2py(UNTL_DICT)
Пример #34
0
def test_untlpy2dcpy_only_eDate():
    untl_dict = {'coverage': [{'content': '1955', 'qualifier': 'eDate'}]}
    untl_elements = untldoc.untldict2py(untl_dict)
    root = untldoc.untlpy2dcpy(untl_elements)
    assert root.children[0].tag == 'coverage'
    assert root.children[0].content == '1955'
Пример #35
0
 def testCompleteness(self):
     """Test completeness."""
     pyuntl = untldict2py(UNTL_DICT)
     completeness = determine_completeness(pyuntl)
     self.assertEqual(completeness, 1.0)
Пример #36
0
 def testJson(self):
     """Test highwire elements are converted to a JSON string."""
     untlpy = untldict2py(UNTL_DICT)
     highwire_elements = untlpy2highwirepy(untlpy)
     highwire_json = generate_highwire_json(highwire_elements)
     self.assertEqual(highwire_json, HIGHWIRE_JSON)
Пример #37
0
 def testCompleteness(self):
     """Test completeness."""
     pyuntl = untldict2py(UNTL_DICT)
     completeness = determine_completeness(pyuntl)
     self.assertEqual(completeness, 1.0)
Пример #38
0
 def testText(self):
     """Test highwire elements are converted to ANVL text."""
     untlpy = untldict2py(UNTL_DICT)
     highwire_elements = untlpy2highwirepy(untlpy)
     highwire_text = generate_highwire_text(highwire_elements)
     self.assertEqual(highwire_text, HIGHWIRE_TEXT)
Пример #39
0
 def test_create_pyuntl_from_dict(self):
     self.root_element = untldict2py(UNTL_DICT)
     self.assertTrue(isinstance(self.root_element, Metadata))
Пример #40
0
 def testHighwire2Dict(self):
     """Test dictionary creation from Highwire."""
     untlpy = untldict2py(UNTL_DICT)
     highwi = untlpy2highwirepy(untlpy)
     hidict = highwirepy2dict(highwi)
     self.assertEqual(type(hidict), dict)
Пример #41
0
 def test_create_pyuntl_from_dict(self):
     self.root_element = untldict2py(UNTL_DICT)
     self.assertTrue(isinstance(self.root_element, Metadata))