def test_csv_small(self): dh = DataHandler(os.path.join('test_files', 'data-small.csv')) mods_records = dh.get_xml_records() self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(len(mods_records), 1) self.assertTrue(isinstance(mods_records[0].field_data()[0]['xml_path'], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]['data'], unicode)) self.assertEqual(mods_records[0].field_data()[0]['xml_path'], u'<mods:identifier type="local" displayLabel="Originăl noé.">') self.assertEqual(mods_records[0].field_data()[0]['data'], u'123') self.assertEqual(mods_records[0].field_data()[2]['xml_path'], u'<mods:titleInfo><mods:title>') self.assertEqual(mods_records[0].field_data()[2]['data'], u'Test 1') self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(mods_records[0].field_data()[4]['data'], u'2005-10-21')
def test_csv_small(self): dh = DataHandler(os.path.join("test_files", "data-small.csv")) mods_records = dh.get_mods_records() self.assertEqual(mods_records[0].id, u"test1") self.assertEqual(len(mods_records), 1) self.assertTrue(isinstance(mods_records[0].field_data()[0]["mods_path"], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]["data"], unicode)) self.assertEqual( mods_records[0].field_data()[0]["mods_path"], u'<mods:identifier type="local" displayLabel="Originăl noé.">' ) self.assertEqual(mods_records[0].field_data()[0]["data"], u"123") self.assertEqual(mods_records[0].field_data()[2]["mods_path"], u"<mods:titleInfo><mods:title>") self.assertEqual(mods_records[0].field_data()[2]["data"], u"Test 1") self.assertEqual(mods_records[0].id, u"test1") self.assertEqual(mods_records[0].field_data()[4]["data"], u"2005-10-21")
def test_xlsx(self): dh = DataHandler(os.path.join('test_files', 'data.xlsx'), obj_type='child') mods_records = dh.get_xml_records() self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(len(mods_records), 2) self.assertTrue(isinstance(mods_records[0].field_data()[0]['xml_path'], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]['data'], unicode)) self.assertEqual(mods_records[0].field_data()[0]['xml_path'], u'<mods:identifier type="local" displayLabel="Originăl noé.">') self.assertEqual(mods_records[0].field_data()[0]['data'], u'123') self.assertEqual(mods_records[0].field_data()[2]['xml_path'], u'<mods:titleInfo><mods:title>') self.assertEqual(mods_records[0].field_data()[2]['data'], u'Test 1') self.assertEqual(mods_records[0].field_data()[4]['data'], u'2005-10-21') self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(mods_records[0].xml_id, u'test1_1') #_1 because it's a child self.assertEqual(mods_records[1].group_id, u'test1') self.assertEqual(mods_records[1].xml_id, u'test1_2')
def test_xlsx(self): dh = DataHandler(os.path.join("test_files", "data.xlsx"), obj_type="child") mods_records = dh.get_mods_records() self.assertEqual(mods_records[0].id, u"test1") self.assertEqual(len(mods_records), 2) self.assertTrue(isinstance(mods_records[0].field_data()[0]["mods_path"], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]["data"], unicode)) self.assertEqual( mods_records[0].field_data()[0]["mods_path"], u'<mods:identifier type="local" displayLabel="Originăl noé.">' ) self.assertEqual(mods_records[0].field_data()[0]["data"], u"123") self.assertEqual(mods_records[0].field_data()[2]["mods_path"], u"<mods:titleInfo><mods:title>") self.assertEqual(mods_records[0].field_data()[2]["data"], u"Test 1") self.assertEqual(mods_records[0].field_data()[4]["data"], u"2005-10-21") self.assertEqual(mods_records[0].id, u"test1") self.assertEqual(mods_records[0].mods_id, u"test1_1") # _1 because it's a child self.assertEqual(mods_records[1].id, u"test1") self.assertEqual(mods_records[1].mods_id, u"test1_2")
def test_xls(self): dh = DataHandler(os.path.join('test_files', 'data.xls')) mods_records = dh.get_xml_records() self.assertEqual(len(mods_records), 2) self.assertTrue(isinstance(mods_records[0].field_data()[0]['xml_path'], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]['data'], unicode)) self.assertEqual(mods_records[0].field_data()[0]['xml_path'], u'<mods:identifier type="local" displayLabel="Originăl noé.">') self.assertEqual(mods_records[0].field_data()[0]['data'], u'123') self.assertEqual(mods_records[0].field_data()[2]['xml_path'], u'<mods:titleInfo><mods:title>') self.assertEqual(mods_records[0].field_data()[2]['data'], u'Test 1') self.assertEqual(mods_records[0].group_id, u'test1') self.assertEqual(mods_records[1].group_id, u'test2') self.assertEqual(mods_records[0].xml_id, u'test1') self.assertEqual(mods_records[1].xml_id, u'test2') #test that process_text_date is working right self.assertEqual(mods_records[0].field_data()[4]['data'], u'2005-10-21') #test that we can get the second sheet correctly dh = DataHandler(os.path.join('test_files', 'data.xls'), sheet=2) mods_records = dh.get_xml_records() self.assertEqual(len(mods_records), 1) self.assertEqual(mods_records[0].xml_id, u'mods0001') self.assertEqual(mods_records[0].field_data()[5]['data'], u'2008-10-21')
def test_csv_dwc(self): dh = DataHandler(os.path.join('test_files', 'data_dwc.csv')) xml_records = dh.get_xml_records() self.assertEqual(xml_records[0].group_id, u'test1') self.assertTrue(isinstance(xml_records[0].field_data()[0]['xml_path'], unicode)) self.assertTrue(isinstance(xml_records[0].field_data()[0]['data'], unicode)) self.assertEqual(xml_records[0].group_id, u'test1') self.assertEqual(xml_records[0].xml_id, u'test1') self.assertEqual(xml_records[0].field_data()[0]['xml_path'], u'<dwc:higherClassification>') self.assertEqual(xml_records[0].field_data()[0]['data'], u'higher classification') self.assertEqual(xml_records[0].field_data()[1]['xml_path'], u'<dwc:genus>') self.assertEqual(xml_records[0].field_data()[1]['data'], u'Genus') self.assertEqual(xml_records[0].field_data()[2]['xml_path'], u'<dwc:specificEpithet>') self.assertEqual(xml_records[0].field_data()[2]['data'], u'species') self.assertEqual(xml_records[0].field_data()[3]['xml_path'], u'<dwc:infraspecificEpithet>') self.assertEqual(xml_records[0].field_data()[3]['data'], u'variety') self.assertEqual(xml_records[0].field_data()[4]['xml_path'], u'<dwc:taxonRank>') self.assertEqual(xml_records[0].field_data()[4]['data'], u'variety') self.assertEqual(xml_records[0].field_data()[5]['xml_path'], u'<dwc:scientificNameAuthorship>') self.assertEqual(xml_records[0].field_data()[5]['data'], u'Variety Author') self.assertEqual(xml_records[0].field_data()[6]['xml_path'], u'<dwc:acceptedNameUsage>') self.assertEqual(xml_records[0].field_data()[6]['data'], u'Genus species var. variety Variety Author') self.assertEqual(xml_records[1].field_data()[2]['xml_path'], u'<dwc:specificEpithet>') self.assertEqual(xml_records[1].field_data()[2]['data'], u'species2') self.assertEqual(xml_records[1].field_data()[3]['xml_path'], u'<dwc:infraspecificEpithet>') self.assertEqual(xml_records[1].field_data()[3]['data'], u'subspecies') self.assertEqual(xml_records[1].field_data()[4]['xml_path'], u'<dwc:taxonRank>') self.assertEqual(xml_records[1].field_data()[4]['data'], u'subspecies') self.assertEqual(xml_records[1].field_data()[6]['xml_path'], u'<dwc:acceptedNameUsage>') self.assertEqual(xml_records[1].field_data()[6]['data'], u'Genus2 species2 subsp. subspecies Subspecies Author') self.assertEqual(xml_records[2].field_data()[2]['xml_path'], u'<dwc:specificEpithet>') self.assertEqual(xml_records[2].field_data()[2]['data'], u'species3') self.assertEqual(xml_records[2].field_data()[3]['xml_path'], u'<dwc:scientificNameAuthorship>') self.assertEqual(xml_records[2].field_data()[3]['data'], u'Species3 Author') self.assertEqual(xml_records[2].field_data()[4]['xml_path'], u'<dwc:acceptedNameUsage>') self.assertEqual(xml_records[2].field_data()[4]['data'], u'Genus3 species3 Species3 Author')
def test_xls(self): dh = DataHandler(os.path.join("test_files", "data.xls")) mods_records = dh.get_mods_records() self.assertEqual(len(mods_records), 2) self.assertTrue(isinstance(mods_records[0].field_data()[0]["mods_path"], unicode)) self.assertTrue(isinstance(mods_records[0].field_data()[0]["data"], unicode)) self.assertEqual( mods_records[0].field_data()[0]["mods_path"], u'<mods:identifier type="local" displayLabel="Originăl noé.">' ) self.assertEqual(mods_records[0].field_data()[0]["data"], u"123") self.assertEqual(mods_records[0].field_data()[2]["mods_path"], u"<mods:titleInfo><mods:title>") self.assertEqual(mods_records[0].field_data()[2]["data"], u"Test 1") self.assertEqual(mods_records[0].id, u"test1") self.assertEqual(mods_records[1].id, u"test2") self.assertEqual(mods_records[0].mods_id, u"test1") self.assertEqual(mods_records[1].mods_id, u"test2") # test that process_text_date is working right self.assertEqual(mods_records[0].field_data()[4]["data"], u"2005-10-21") # test that we can get the second sheet correctly dh = DataHandler(os.path.join("test_files", "data.xls"), sheet=2) mods_records = dh.get_mods_records() self.assertEqual(len(mods_records), 1) self.assertEqual(mods_records[0].mods_id, u"mods0001") self.assertEqual(mods_records[0].field_data()[5]["data"], u"2008-10-21")