def test_manual_cleanup_zipped(self):
        """Test no temporary files are left after execution (calling close() manually)."""
        num_files_before = len(os.listdir('.'))

        r = DwCAReader(BASIC_ARCHIVE_PATH)
        r.close()

        num_files_after = len(os.listdir('.'))

        self.assertEqual(num_files_before, num_files_after)
    def test_manual_cleanup_zipped(self):
        """Test no temporary files are left after execution (calling close() manually)."""
        num_files_before = len(os.listdir('.'))

        r = DwCAReader(sample_data_path('dwca-simple-test-archive.zip'))
        r.close()

        num_files_after = len(os.listdir('.'))

        self.assertEqual(num_files_before, num_files_after)
    def test_manual_cleanup_zipped(self):
        """Test no temporary files are left after execution (calling close() manually)."""
        num_files_before = len(os.listdir("."))

        r = DwCAReader(BASIC_ARCHIVE_PATH)
        r.close()

        num_files_after = len(os.listdir("."))

        self.assertEqual(num_files_before, num_files_after)
    def test_source_data_not_destroyed_directory(self):
        """If archive is a directory, it should not be deleted after use.

        (check that the cleanup routine for zipped file is not called by accident)
        """
        r = DwCAReader(sample_data_path('dwca-simple-dir'))
        r.close()

        # If previously destroyed, this will fail...
        r = DwCAReader(sample_data_path('dwca-simple-dir'))
        self.assertIsInstance(r.metadata, ET.Element)
        r.close()
    def test_exception_invalid_archives_missing_metadata(self):
        """An exception is raised when referencing a missing metadata file."""
        # Sometimes, the archive metafile references a metadata file that's not present in the
        # archive. See for example http://dev.gbif.org/issues/browse/PF-2125
        with self.assertRaises(InvalidArchive) as cm:
            a = DwCAReader(sample_data_path('dwca-invalid-lacks-metadata'))
            a.close()

        the_exception = cm.exception

        expected_message = "eml.xml is referenced in the archive descriptor but missing."
        self.assertEqual(str(the_exception), expected_message)
    def test_source_data_not_destroyed_directory(self):
        """In archive=directory, it should not be destroyed after use.

        (check that the cleanup routine for zipped file is not accidentaly called)
        """
        r = DwCAReader(DIRECTORY_ARCHIVE_PATH)
        r.close()

        # If previously destroyed, this will fail...
        r = DwCAReader(DIRECTORY_ARCHIVE_PATH)
        self.assertIsInstance(r.metadata, ET.Element)
        r.close()
    def test_exception_invalid_archives_missing_metadata(self):
        """Ensure an exception is raised when referencing a missing metadata file."""
        # Sometimes, the archive metafile references a metadata file that's not present in the
        # archive. See for example http://dev.gbif.org/issues/browse/PF-2125
        with self.assertRaises(InvalidArchive) as cm:
            a = DwCAReader(INVALID_LACKS_METADATA)
            a.close()

        the_exception = cm.exception

        expected_message = "eml.xml is referenced in the archive descriptor but missing."
        self.assertEqual(str(the_exception), expected_message)
    def test_source_data_not_destroyed_directory(self):
        """In archive=directory, it should not be destroyed after use.

        (check that the cleanup routine for zipped file is not accidentaly called)
        """
        r = DwCAReader(DIRECTORY_ARCHIVE_PATH)
        r.close()

        # If previously destroyed, this will fail...
        r = DwCAReader(DIRECTORY_ARCHIVE_PATH)
        self.assertIsInstance(r.metadata, ET.Element)
        r.close()
def dwca_metadata(dwca_file):
    """Open a Darwin Core archive and return the metadata."""
    # Open the Darwin Core Archive given in dwca_file
    dwca = DwCAReader(dwca_file)
    if not dwca:
        return None
        
    # Pull the metadata from the archive
    metadata=dwca.metadata
    
    # Close the archive to free resources
    dwca.close()
    
    return metadata
    def test_exception_invalid_simple_archives(self):
        """Ensure an exception is raised when simple archives can't be interpreted.

        When there's no metafile in an archive, this one consists of a single data core file,
        and possibly some metadata in EML.xml. If the archive doesn't follow this structure,
        python-dwca-reader can't detect the data file and should throw an InvalidArchive exception.
        """
        # There's a random file (in addition to data and EML.xml) in this one, so we can't choose
        # which file is the datafile.
        with self.assertRaises(InvalidArchive):
            a = DwCAReader(sample_data_path('dwca-invalid-simple-toomuch.zip'))
            a.close()

        with self.assertRaises(InvalidArchive):
            a = DwCAReader(sample_data_path('dwca-invalid-simple-two.zip'))
            a.close()
    def test_exception_invalid_simple_archives(self):
        """Ensure an exception is raised when simple archives can't be interpreted.

        When there's no metafile in an archive, this one consists of a single data core file,
        and possibly some metadata in EML.xml. If the archive doesn't follow this structure,
        python-dwca-reader can't detect the data file and should throw an InvalidArchive exception.
        """
        # There's a random file (in addition to data and EML.xml) in this one, so we can't choose
        # which file is the datafile.
        with self.assertRaises(InvalidArchive):
            a = DwCAReader(INVALID_SIMPLE_TOOMUCH)
            a.close()

        with self.assertRaises(InvalidArchive):
            a = DwCAReader(INVALID_SIMPLE_TWO)
            a.close()
 def test_classic_opening(self):
     """Ensure it also works w/o the 'with' statement."""
     dwca = DwCAReader(sample_data_path('dwca-simple-test-archive.zip'))
     self.assertIsInstance(dwca.metadata, ET.Element)
     dwca.close()
    rowcount = 0
    with open(fullpath, 'a') as tsvfile:
        writer = csv.DictWriter(tsvfile, dialect=dialect, fieldnames=termnames,
            quoting=csv.QUOTE_NONE, quotechar='')
        for row in dwcareader:
#            print 'Row %s:\n%s' % (rowcount,row.data)
            for f in row.data:
                row.data[f]=row.data[f].encode("utf-8")
            writer.writerow(row.data)
            rowcount += 1

    # Get the number of records in the core file.
#    rowcount = get_core_rowcount(dwcareader)

    # Close the archive    
    dwcareader.close()
    
    # Successfully completed the mission
    # Return a dict of important information as a JSON string
    response = {}
    returnvars = ['fullpath', 'rowcount']
    returnvals = [fullpath, rowcount]
    i=0
    for a in returnvars:
        response[a]= returnvals[i] 
        i+=1
    return json.dumps(response)

def _getoptions():
    """Parses command line options and returns them."""
    parser = OptionParser()
 def test_classic_opening(self):
     """Ensure it also works w/o the 'with' statement."""
     dwca = DwCAReader(BASIC_ARCHIVE_PATH)
     self.assertIsInstance(dwca.metadata, ET.Element)
     dwca.close()
示例#15
0
 def test_classic_opening(self):
     """Ensure it also works w/o the 'with' statement."""
     dwca = DwCAReader(BASIC_ARCHIVE_PATH)
     self.assertIsInstance(dwca.metadata, ET.Element)
     dwca.close()
示例#16
0
    #     if options.vocab_path is not None:
    #         for term in controlledtermlist:
    #             termvalues=get_distinct_term_values(dwcareader, term)
    #             vocabfile='%s/%s.csv' % (options.vocab_path,term)
    #             print 'vocabfile: %s term: %s termlist: %s' % (vocabfile, term, termvalues)
    #             append_to_vocab(vocabfile, termvalues)
    #             print '%s values: %s' % (term, termvalues)

    #     print '\nGeography keys:'
    #     i = 0
    #     for row in dwcareader:
    #         geogkey = get_term_group_key(row.data, geogkeytermlist)
    #         i = i + 1
    #         print '%s' % (geogkey)
    #     print 'Count=%s' % i
    #
    #     i = 0
    #     print '\nTaxonomy keys:\n'
    #     for row in dwcareader:
    #         taxonkey = get_term_group_key(row.data, taxonkeytermlist)
    #         i = i + 1
    #         print '%s' % (taxonkey)
    #     print 'Count=%s' % i

    dwcareader.close()


if __name__ == '__main__':
    """ Demo of dwca_utils functions"""
    main()