def test_manual_cleanup_zipped(self): """Test no temporary files are left after execution (calling close() manually).""" num_files_before = len(os.listdir('.')) r = DwCAReader(BASIC_ARCHIVE_PATH) r.close() num_files_after = len(os.listdir('.')) self.assertEqual(num_files_before, num_files_after)
def test_manual_cleanup_zipped(self): """Test no temporary files are left after execution (calling close() manually).""" num_files_before = len(os.listdir('.')) r = DwCAReader(sample_data_path('dwca-simple-test-archive.zip')) r.close() num_files_after = len(os.listdir('.')) self.assertEqual(num_files_before, num_files_after)
def test_manual_cleanup_zipped(self): """Test no temporary files are left after execution (calling close() manually).""" num_files_before = len(os.listdir(".")) r = DwCAReader(BASIC_ARCHIVE_PATH) r.close() num_files_after = len(os.listdir(".")) self.assertEqual(num_files_before, num_files_after)
def test_source_data_not_destroyed_directory(self): """If archive is a directory, it should not be deleted after use. (check that the cleanup routine for zipped file is not called by accident) """ r = DwCAReader(sample_data_path('dwca-simple-dir')) r.close() # If previously destroyed, this will fail... r = DwCAReader(sample_data_path('dwca-simple-dir')) self.assertIsInstance(r.metadata, ET.Element) r.close()
def test_exception_invalid_archives_missing_metadata(self): """An exception is raised when referencing a missing metadata file.""" # Sometimes, the archive metafile references a metadata file that's not present in the # archive. See for example http://dev.gbif.org/issues/browse/PF-2125 with self.assertRaises(InvalidArchive) as cm: a = DwCAReader(sample_data_path('dwca-invalid-lacks-metadata')) a.close() the_exception = cm.exception expected_message = "eml.xml is referenced in the archive descriptor but missing." self.assertEqual(str(the_exception), expected_message)
def test_source_data_not_destroyed_directory(self): """In archive=directory, it should not be destroyed after use. (check that the cleanup routine for zipped file is not accidentaly called) """ r = DwCAReader(DIRECTORY_ARCHIVE_PATH) r.close() # If previously destroyed, this will fail... r = DwCAReader(DIRECTORY_ARCHIVE_PATH) self.assertIsInstance(r.metadata, ET.Element) r.close()
def test_exception_invalid_archives_missing_metadata(self): """Ensure an exception is raised when referencing a missing metadata file.""" # Sometimes, the archive metafile references a metadata file that's not present in the # archive. See for example http://dev.gbif.org/issues/browse/PF-2125 with self.assertRaises(InvalidArchive) as cm: a = DwCAReader(INVALID_LACKS_METADATA) a.close() the_exception = cm.exception expected_message = "eml.xml is referenced in the archive descriptor but missing." self.assertEqual(str(the_exception), expected_message)
def dwca_metadata(dwca_file): """Open a Darwin Core archive and return the metadata.""" # Open the Darwin Core Archive given in dwca_file dwca = DwCAReader(dwca_file) if not dwca: return None # Pull the metadata from the archive metadata=dwca.metadata # Close the archive to free resources dwca.close() return metadata
def test_exception_invalid_simple_archives(self): """Ensure an exception is raised when simple archives can't be interpreted. When there's no metafile in an archive, this one consists of a single data core file, and possibly some metadata in EML.xml. If the archive doesn't follow this structure, python-dwca-reader can't detect the data file and should throw an InvalidArchive exception. """ # There's a random file (in addition to data and EML.xml) in this one, so we can't choose # which file is the datafile. with self.assertRaises(InvalidArchive): a = DwCAReader(sample_data_path('dwca-invalid-simple-toomuch.zip')) a.close() with self.assertRaises(InvalidArchive): a = DwCAReader(sample_data_path('dwca-invalid-simple-two.zip')) a.close()
def test_exception_invalid_simple_archives(self): """Ensure an exception is raised when simple archives can't be interpreted. When there's no metafile in an archive, this one consists of a single data core file, and possibly some metadata in EML.xml. If the archive doesn't follow this structure, python-dwca-reader can't detect the data file and should throw an InvalidArchive exception. """ # There's a random file (in addition to data and EML.xml) in this one, so we can't choose # which file is the datafile. with self.assertRaises(InvalidArchive): a = DwCAReader(INVALID_SIMPLE_TOOMUCH) a.close() with self.assertRaises(InvalidArchive): a = DwCAReader(INVALID_SIMPLE_TWO) a.close()
def test_classic_opening(self): """Ensure it also works w/o the 'with' statement.""" dwca = DwCAReader(sample_data_path('dwca-simple-test-archive.zip')) self.assertIsInstance(dwca.metadata, ET.Element) dwca.close()
rowcount = 0 with open(fullpath, 'a') as tsvfile: writer = csv.DictWriter(tsvfile, dialect=dialect, fieldnames=termnames, quoting=csv.QUOTE_NONE, quotechar='') for row in dwcareader: # print 'Row %s:\n%s' % (rowcount,row.data) for f in row.data: row.data[f]=row.data[f].encode("utf-8") writer.writerow(row.data) rowcount += 1 # Get the number of records in the core file. # rowcount = get_core_rowcount(dwcareader) # Close the archive dwcareader.close() # Successfully completed the mission # Return a dict of important information as a JSON string response = {} returnvars = ['fullpath', 'rowcount'] returnvals = [fullpath, rowcount] i=0 for a in returnvars: response[a]= returnvals[i] i+=1 return json.dumps(response) def _getoptions(): """Parses command line options and returns them.""" parser = OptionParser()
def test_classic_opening(self): """Ensure it also works w/o the 'with' statement.""" dwca = DwCAReader(BASIC_ARCHIVE_PATH) self.assertIsInstance(dwca.metadata, ET.Element) dwca.close()
# if options.vocab_path is not None: # for term in controlledtermlist: # termvalues=get_distinct_term_values(dwcareader, term) # vocabfile='%s/%s.csv' % (options.vocab_path,term) # print 'vocabfile: %s term: %s termlist: %s' % (vocabfile, term, termvalues) # append_to_vocab(vocabfile, termvalues) # print '%s values: %s' % (term, termvalues) # print '\nGeography keys:' # i = 0 # for row in dwcareader: # geogkey = get_term_group_key(row.data, geogkeytermlist) # i = i + 1 # print '%s' % (geogkey) # print 'Count=%s' % i # # i = 0 # print '\nTaxonomy keys:\n' # for row in dwcareader: # taxonkey = get_term_group_key(row.data, taxonkeytermlist) # i = i + 1 # print '%s' % (taxonkey) # print 'Count=%s' % i dwcareader.close() if __name__ == '__main__': """ Demo of dwca_utils functions""" main()