def test_core_contains_term(self):
        """Test the core_contains_term method."""

        # Example file contains locality but no country
        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            self.assertTrue(dwca.core_contains_term(qn('locality')))
            self.assertFalse(dwca.core_contains_term(qn('country')))
    def load_rows(self):

        with DwCAReader(self.gbif_path) as dwca:

            # We can now interact with the 'dwca' object
            print("Read core type: " + dwca.descriptor.core.type.__str__() +
                  "! :)")

            # Check if a Darwin Core term in present in the core file
            if 'http://rs.tdwg.org/dwc/terms/locality' in dwca.descriptor.core.terms:
                print("Locality term is present! :)")
            else:
                print("Locality term is not present.  :(")

            # Using full qualnames for DarwincCore terms (such as 'http://rs.tdwg.org/dwc/terms/country') is verbose...
            # The qualname() helper function make life easy for common terms.
            # (here, it has been imported as 'qn'):
            qn('locality')
            # => u'http://rs.tdwg.org/dwc/terms/locality'
            # Combined with previous examples, this can be used to things more clear:
            # For example:
            if qn('locality') in dwca.descriptor.core.terms:
                pass

            # Or:
            if dwca.descriptor.core.type == qn('Occurrence'):
                pass

            # load row data into memory
            self.gbif = dwca.rows
Exemplo n.º 3
0
    def test_dont_enclose_unenclosed(self):
        """If fields_enclosed_by is set to an empty string, don't enclose (even if quotes are present)"""
        with DwCAReader(sample_data_path('dwca-simple-dir')) as dwca:
            rows = list(dwca)

            self.assertEqual('"betta" splendens', rows[2].data[qn('scientificName')])
            self.assertEqual("'betta' splendens", rows[3].data[qn('scientificName')])
Exemplo n.º 4
0
    def test_read_core_value(self):
        """Retrieve a simple value from core file"""
        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            rows = list(dwca)

            # Check basic locality values from sample file
            self.assertEqual('Borneo', rows[0].data[qn('locality')])
            self.assertEqual('Mumbai', rows[1].data[qn('locality')])
Exemplo n.º 5
0
 def test_partial_default(self):
     with DwCAReader(sample_data_path("dwca-partial-default.zip")) as dwca:
         self.assertEqual(
             dwca.rows[0].data[qn("country")], "France"
         )  # Value comes from data file
         self.assertEqual(
             dwca.rows[1].data[qn("country")], "Belgium"
         )  # Value is field default
    def test_read_core_value(self):
        """Retrieve a simple value from core file"""
        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            rows = list(dwca)

            # Check basic locality values from sample file
            self.assertEqual("Borneo", rows[0].data[qn("locality")])
            self.assertEqual("Mumbai", rows[1].data[qn("locality")])
Exemplo n.º 7
0
    def test_read_core_value(self):
        """Retrieve a simple value from core file"""
        with DwCAReader(sample_data_path('dwca-simple-test-archive.zip')) as dwca:
            rows = list(dwca)

            # Check basic locality values from sample file
            self.assertEqual('Borneo', rows[0].data[qn('locality')])
            self.assertEqual('Mumbai', rows[1].data[qn('locality')])
    def test_qn(self):
        """Test the qn (shortcut generator) helper"""

        # Test success
        self.assertEqual("http://rs.tdwg.org/dwc/terms/Occurrence", qn("Occurrence"))

        # Test failure
        with self.assertRaises(StopIteration):
            qn("dsfsdfsdfsdfsdfsd")
Exemplo n.º 9
0
def set_term_value(rowdata, term, value):
    """Set the value of the term in the given rowdata."""
    if rowdata is None:
        return
    if term in rowdata.keys():
        rowdata[term]=value
    elif qn(term) in rowdata.keys():
        rowdata[qn(term)]=value
    return
Exemplo n.º 10
0
    def test_qn(self):
        """Test the qn (shortcut generator) helper"""

        # Test success
        self.assertEqual("http://rs.tdwg.org/dwc/terms/Occurrence", qn("Occurrence"))

        # Test failure
        with self.assertRaises(StopIteration):
            qn("dsfsdfsdfsdfsdfsd")
Exemplo n.º 11
0
def set_term_value(rowdata, term, value):
    """Set the value of the term in the given rowdata."""
    if rowdata is None:
        return
    if term in rowdata.keys():
        rowdata[term] = value
    elif qn(term) in rowdata.keys():
        rowdata[qn(term)] = value
    return
    def test_archives_without_metadata(self):
        """Ensure we can deal with an archive containing a metafile, but no metadata."""
        with DwCAReader(NOMETADATA_PATH) as dwca:
            self.assertIsNone(dwca.metadata)

            # But the data is nevertheless accessible
            rows = list(dwca)
            self.assertEqual(len(rows), 2)
            self.assertEqual("Borneo", rows[0].data[qn("locality")])
            self.assertEqual("Mumbai", rows[1].data[qn("locality")])
    def test_archives_without_metadata(self):
        """Ensure we can deal with an archive containing a metafile, but no metadata."""
        with DwCAReader(sample_data_path('dwca-nometadata.zip')) as dwca:
            self.assertIsNone(dwca.metadata)

            # But the data is nevertheless accessible
            rows = list(dwca)
            self.assertEqual(len(rows), 2)
            self.assertEqual('Borneo', rows[0].data[qn('locality')])
            self.assertEqual('Mumbai', rows[1].data[qn('locality')])
Exemplo n.º 14
0
def dwcaline_to_epsg4326(line):
    """ Returns a {'lat': X, 'lon': Y} dict for the given DwCALine. """

    try:
        lat = float(line.data[qn('decimalLatitude')])
        lon = float(line.data[qn('decimalLongitude')])
    except ValueError:
        raise CannotConvertException()

    return {'lat': lat, 'lon': lon}
 def applicable_to_archive(self, archive):
     if archive.core_rowtype == qn('Occurrence'):
         if archive.core_contains_term(qn('kingdom')):
             return True
         else:
             self.logger.log("Core should contain the 'kingdom' term", MessageTypes.APPLICABILITY, MessageLevels.ERROR)
             return False
     else:
         self.logger.log("Archive core should be of Occurrence type.", MessageTypes.APPLICABILITY, MessageLevels.ERROR)
         return False
Exemplo n.º 16
0
def dwcaline_to_epsg4326(line):
    """ Returns a {'lat': X, 'lon': Y} dict for the given DwCALine. """

    try:
        lat = float(line.data[qn('decimalLatitude')])
        lon = float(line.data[qn('decimalLongitude')])
    except ValueError:
        raise CannotConvertException()

    return {'lat': lat, 'lon': lon}
Exemplo n.º 17
0
    def test_core_contains_term(self):
        """Test the core_contains_term method."""
        # Example file contains locality but no country
        with DwCAReader(sample_data_path('dwca-simple-test-archive.zip')) as dwca:
            self.assertTrue(dwca.core_contains_term(qn('locality')))
            self.assertFalse(dwca.core_contains_term(qn('country')))

        # Also test it with a simple (= no metafile) archive
        with DwCAReader(sample_data_path('dwca-simple-csv.zip')) as dwca:
            self.assertTrue(dwca.core_contains_term('datasetkey'))
            self.assertFalse(dwca.core_contains_term('trucmachin'))
Exemplo n.º 18
0
def row_has_term_value(rowdata, term):
    """Return True if the row contains a value for the term other than ''."""
    if rowdata is None:
        return False
    if term in rowdata.keys():
        if rowdata[term]!='':
            return True
    elif qn(term) in rowdata.keys():
        if rowdata[qn(term)]!='':
            return True
    return False
    def test_core_contains_term(self):
        """Test the core_contains_term method."""
        # Example file contains locality but no country
        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            self.assertTrue(dwca.core_contains_term(qn("locality")))
            self.assertFalse(dwca.core_contains_term(qn("country")))

        # Also test it with a simple (= no metafile) archive
        with DwCAReader(SIMPLE_CSV) as dwca:
            self.assertTrue(dwca.core_contains_term("datasetkey"))
            self.assertFalse(dwca.core_contains_term("trucmachin"))
Exemplo n.º 20
0
def row_has_term_value(rowdata, term):
    """Return True if the row contains a value for the term other than ''."""
    if rowdata is None:
        return False
    if term in rowdata.keys():
        if rowdata[term] != '':
            return True
    elif qn(term) in rowdata.keys():
        if rowdata[qn(term)] != '':
            return True
    return False
Exemplo n.º 21
0
    def test_core_contains_term(self):
        """Test the core_contains_term method."""
        # Example file contains locality but no country
        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            self.assertTrue(dwca.core_contains_term(qn('locality')))
            self.assertFalse(dwca.core_contains_term(qn('country')))

        # Also test it with a simple (= no metafile) archive
        with DwCAReader(SIMPLE_CSV) as dwca:
            self.assertTrue(dwca.core_contains_term('datasetkey'))
            self.assertFalse(dwca.core_contains_term('trucmachin'))
Exemplo n.º 22
0
    def test_enclosed_data(self):
        """Ensure data is properly trimmed when fieldsEnclosedBy is in use."""
        with DwCAReader(BASIC_ENCLOSED_ARCHIVE_PATH) as dwca:
            rows = list(dwca)

            # Locality is enclosed in "'" chars, they should be trimmed...
            self.assertEqual('Borneo', rows[0].data[qn('locality')])
            self.assertEqual('Mumbai', rows[1].data[qn('locality')])

            # But family isn't, so it shouldn't be altered
            self.assertEqual('Tetraodontidae', rows[0].data[qn('family')])
            self.assertEqual('Osphronemidae', rows[1].data[qn('family')])
Exemplo n.º 23
0
    def test_tgz_archives(self):
        """Ensure the reader (basic features) works with a .tgz Archive."""
        with DwCAReader(BASIC_ARCHIVE_TGZ_PATH) as dwca:
            self.assertIsInstance(dwca.metadata, ET.Element)

            for row in dwca:
                self.assertIsInstance(row, CoreRow)

            rows = list(dwca)
            self.assertEqual(len(rows), 2)
            self.assertEqual('Borneo', rows[0].data[qn('locality')])
            self.assertEqual('Mumbai', rows[1].data[qn('locality')])
    def test_enclosed_data(self):
        """Ensure data is properly trimmed when fieldsEnclosedBy is in use."""
        with DwCAReader(BASIC_ENCLOSED_ARCHIVE_PATH) as dwca:
            rows = list(dwca)

            # Locality is enclosed in "'" chars, they should be trimmed...
            self.assertEqual("Borneo", rows[0].data[qn("locality")])
            self.assertEqual("Mumbai", rows[1].data[qn("locality")])

            # But family isn't, so it shouldn't be altered
            self.assertEqual("Tetraodontidae", rows[0].data[qn("family")])
            self.assertEqual("Osphronemidae", rows[1].data[qn("family")])
    def test_tgz_archives(self):
        """Ensure the reader (basic features) works with a .tgz Archive."""
        with DwCAReader(BASIC_ARCHIVE_TGZ_PATH) as dwca:
            self.assertIsInstance(dwca.metadata, ET.Element)

            for row in dwca:
                self.assertIsInstance(row, CoreRow)

            rows = list(dwca)
            self.assertEqual(len(rows), 2)
            self.assertEqual("Borneo", rows[0].data[qn("locality")])
            self.assertEqual("Mumbai", rows[1].data[qn("locality")])
Exemplo n.º 26
0
    def test_tgz_archives(self):
        """Ensure the reader (basic features) works with a .tgz Archive."""
        with DwCAReader(sample_data_path('dwca-simple-test-archive.tgz')) as dwca:
            self.assertIsInstance(dwca.metadata, ET.Element)

            for row in dwca:
                self.assertIsInstance(row, CoreRow)

            rows = list(dwca)
            self.assertEqual(len(rows), 2)
            self.assertEqual('Borneo', rows[0].data[qn('locality')])
            self.assertEqual('Mumbai', rows[1].data[qn('locality')])
Exemplo n.º 27
0
    def test_enclosed_data(self):
        """Ensure data is properly trimmed when fieldsEnclosedBy is in use."""
        with DwCAReader(sample_data_path('dwca-simple-test-archive-enclosed.zip')) as dwca:
            rows = list(dwca)

            # Locality is enclosed in "'" chars, they should be trimmed...
            self.assertEqual('Borneo', rows[0].data[qn('locality')])
            self.assertEqual('Mumbai', rows[1].data[qn('locality')])

            # But family isn't, so it shouldn't be altered
            self.assertEqual('Tetraodontidae', rows[0].data[qn('family')])
            self.assertEqual('Osphronemidae', rows[1].data[qn('family')])
Exemplo n.º 28
0
 def check_datatype(self, row, term, dtype):
     """check for datatypes (broader as python-specific, also json,...)
     """
     if dtype == 'json':
         try:
             json.loads(row.data[qn(term)])
         except:
             self._add_failure(row, term, 'ValidDataType')
     elif dtype == 'int' or dtype == 'integer':
         if not isinstance(row.data[qn(term)], int):
             self._add_failure(row, term, 'ValidDataType')
     else:
         raise Exception("{} not supported".format(dtype))
def create_occurrence_from_dwcaline(line):
    #import pdb; pdb.set_trace()
    occ = Occurrence()

    # Simple fields
    # TODO: move these long Dwc strings to a specific module ?
    occ.catalog_number = line.data[qn('catalogNumber')]
    occ.scientificname = ''  # TODO: Remove this field
    event_date = line.data[qn('eventDate')]
    if event_date != '':
        occ.event_date = event_date

    # Foreign keys
    mgrs_id = line.data[qn('verbatimCoordinates')]
    occ.square = MGRSSquare.objects.get_or_create(label=mgrs_id)[0]

    species = line.data[qn('specificEpithet')]
    genus = line.data[qn('genus')]
    family = line.data[qn('family')]
    scientificname = line.data[qn('scientificName')]
    specificepithet = line.data[qn('specificEpithet')]

    occ.species = get_or_create_taxonomy(family, genus, species,
                                         scientificname, specificepithet)

    occ.save()
Exemplo n.º 30
0
def row_has_term(rowdata, term):
    """Return True if the row contains the term in its data dictionary by name or identifier."""
    if rowdata is None:
        return False
    if term in rowdata.keys() or qn(term) in rowdata.keys():
        return True
    return False
 def test_csv_quote_dir_archive(self):
     """If the field separator is in a quoted field, don't break on it."""
     with DwCAReader(sample_data_path('dwca-csv-quote-dir')) as dwca:
         rows = list(dwca)
         self.assertEqual(len(rows), 2)
         self.assertEqual(rows[0].data[qn('basisOfRecord')],
                          'Observation, something')
Exemplo n.º 32
0
def row_has_term(rowdata, term):
    """Return True if the row contains the term in its data dictionary by name or identifier."""
    if rowdata is None:
        return False
    if term in rowdata.keys() or qn(term) in rowdata.keys():
        return True
    return False
    def test_exposes_core_type(self):
        """Test that it exposes the Archive Core Type as type"""

        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            coredescriptor = dwca.descriptor.core
            # dwca-simple-test-archive.zip should be of Occurrence type
            self.assertEqual(coredescriptor.type, "http://rs.tdwg.org/dwc/terms/Occurrence")
            # Check that shortcuts also work
            self.assertEqual(coredescriptor.type, qn("Occurrence"))
Exemplo n.º 34
0
    def test_dwcareader_features(self):
        """Ensure we didn't break inherited basic DwCAReader features."""
        with GBIFResultsReader(GBIF_RESULTS_PATH) as results_dwca:
            self.assertEqual(158, len(results_dwca.rows))
            self.assertEqual('http://rs.tdwg.org/dwc/terms/Occurrence',
                             results_dwca.descriptor.core.type)

            row1 = results_dwca.rows[0]
            self.assertEqual('Tetraodontidae', row1.data[qn('family')])
            self.assertEqual([], row1.extensions)
    def test_dwcareader_features(self):
        """Ensure we didn't break inherited basic DwCAReader features."""
        with GBIFResultsReader(GBIF_RESULTS_PATH) as results_dwca:
            self.assertEqual(158, len(results_dwca.rows))
            self.assertEqual('http://rs.tdwg.org/dwc/terms/Occurrence',
                             results_dwca.descriptor.core.type)

            row1 = results_dwca.rows[0]
            self.assertEqual('Tetraodontidae', row1.data[qn('family')])
            self.assertEqual([], row1.extensions)
Exemplo n.º 36
0
    def test_exposes_core_type(self):
        """Test that it exposes the Archive Core Type as type"""

        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            coredescriptor = dwca.descriptor.core
            # dwca-simple-test-archive.zip should be of Occurrence type
            self.assertEqual(coredescriptor.type,
                             'http://rs.tdwg.org/dwc/terms/Occurrence')
            # Check that shortcuts also work
            self.assertEqual(coredescriptor.type, qn('Occurrence'))
Exemplo n.º 37
0
def get_term_value(rowdata, term):
    """Return the value of the term in the given rowdata."""
    if rowdata is None:
        return None
    if term in rowdata.keys():
        return rowdata[term]
    # Try a Darwin Core fully qualified term if it wasn't found as is.
    try:
        q = qn(term)
    except Exception, e:
        return None
    def test_read_core_value_default(self):
        """Retrieve a (default) value from core

        Test similar to test_read_core_value(), but the retrieved data
        comes from a default value (in meta.xml) instead of from the core
        text file. This is part of the standard and was produced by IPT
        prior to version 2.0.3.
        """
        with DwCAReader(DEFAULT_VAL_PATH) as dwca:
            for l in dwca:
                self.assertEqual("Belgium", l.data[qn("country")])
    def test_read_core_value_default(self):
        """Retrieve a (default) value from core

        Test similar to test_read_core_value(), but the retrieved data
        comes from a default value (in meta.xml) instead of from the core
        text file. This is part of the standard and was produced by IPT
        prior to version 2.0.3.
        """
        with DwCAReader(sample_data_path('dwca-test-default.zip')) as dwca:
            for l in dwca:
                self.assertEqual('Belgium', l.data[qn('country')])
Exemplo n.º 40
0
def archive_has_core_term(dwcareader, term):
    """Return True if the core file contains a column for the term name or identifier."""
    if dwcareader is None or term is None:
        return False
    if term in dwcareader.descriptor.core.terms:
        return True
    try:
        q = qn(term)
    except Exception, e:
        logging.error('archive_has_core_term(): %s is not a Simple Darwin Core term. The search is case-sensitive.' % (term))
        return False
Exemplo n.º 41
0
def get_term_value(rowdata, term):
    """Return the value of the term in the given rowdata."""
    if rowdata is None:
        return None
    if term in rowdata.keys():
        return rowdata[term]
    # Try a Darwin Core fully qualified term if it wasn't found as is.
    try:
        q=qn(term)
    except Exception, e:
        return None
Exemplo n.º 42
0
    def _add_failure(self, row, term, test):
        """add the row id to the specific term and the sample if news
        """
        if term in self.log.keys():
            if not test in self.log[term].keys():
                self.log[term][test] = {test : self._setup_termtest_dict()}

        else:
            self.log[term] = {test : self._setup_termtest_dict()}

        self.log[term][test]["ids"].append(row.id)
        if not self._check_if_new_failure(row, term, test):
            self.log[term][test]["sample"].append(row.data[qn(term)])
Exemplo n.º 43
0
def archive_has_core_term(dwcareader, term):
    """Return True if the core file contains a column for the term name or identifier."""
    if dwcareader is None or term is None:
        return False
    if term in dwcareader.descriptor.core.terms:
        return True
    try:
        q = qn(term)
    except Exception, e:
        logging.error(
            'archive_has_core_term(): %s is not a Simple Darwin Core term. The search is case-sensitive.'
            % (term))
        return False
    def test_subdirectory_archive(self):
        """Ensure we support Archives where all the content is under a single directory."""
        num_files_before = len(os.listdir("."))
        num_files_during = None
        with DwCAReader(SUBDIR_ARCHIVE_PATH) as dwca:
            # Ensure we have access to metadata
            self.assertIsInstance(dwca.metadata, ET.Element)

            # And to the rows themselves
            for row in dwca:
                self.assertIsInstance(row, CoreRow)

            rows = list(dwca)
            self.assertEqual("Borneo", rows[0].data[qn("locality")])

            num_files_during = len(os.listdir("."))

        num_files_after = len(os.listdir("."))

        # Let's also check temporary dir is correctly created and removed.
        self.assertEqual(num_files_before + 1, num_files_during)
        self.assertEqual(num_files_before, num_files_after)
    def test_subdirectory_archive(self):
        """Ensure we support Archives where all the content is under a single directory."""
        tmp_dir = tempfile.gettempdir()

        num_files_before = len(os.listdir(tmp_dir))
        with DwCAReader(sample_data_path('dwca-simple-subdir.zip')) as dwca:
            # Ensure we have access to metadata
            self.assertIsInstance(dwca.metadata, ET.Element)

            # And to the rows themselves
            for row in dwca:
                self.assertIsInstance(row, CoreRow)

            rows = list(dwca)
            self.assertEqual('Borneo', rows[0].data[qn('locality')])

            num_files_during = len(os.listdir(tmp_dir))

        num_files_after = len(os.listdir(tmp_dir))

        # Let's also check temporary dir is correctly created and removed.
        self.assertEqual(num_files_before + 1, num_files_during)
        self.assertEqual(num_files_before, num_files_after)
Exemplo n.º 46
0
def valid_dwca(dwca):
    return (dwca.core_rowtype == qn('Occurrence') and
            dwca.core_contains_term(qn('decimalLatitude')) and
            dwca.core_contains_term(qn('decimalLongitude')))
Exemplo n.º 47
0
 def _check_if_new_failure(self, row, term, test):
     """check if the failure is different from the previous failures for
     this term-test combination; if so, store the value
     """
     return row.data[qn(term)] in self.log[term][test]["sample"]
 def test_partial_default(self):
     with DwCAReader(sample_data_path('dwca-partial-default.zip')) as dwca:
         self.assertEqual(dwca.rows[0].data[qn('country')],
                          'France')  # Value comes from data file
         self.assertEqual(dwca.rows[1].data[qn('country')],
                          'Belgium')  # Value is field default
Exemplo n.º 49
0
 def check_not_equal(self, row, term, value):
     """test if a specific term is equal to the provided value, log row id
     if not equal
     """
     if row.data[qn(term)]  == value:
         self._add_failure(row, term, 'NotEqual')
Exemplo n.º 50
0
 def check_equal_options(self, row, term, values):
     """test if a specific term is equal to one of the provided options in
     a list
     """
     if not row.data[qn(term)]  in values:
         self._add_failure(row, term, 'EqualList')
                dwca_url = csvrow['dataset_url']
                filename = csvrow['id']
                dwca_file = f'./data/dwca/{filename}.zip'

                # Download
                if not path.exists(dwca_file):
                    r = requests.get(dwca_url, stream=True)
                    with open(dwca_file, 'wb') as fd:
                        for byte in r.raw:
                            fd.write(byte)

                with DwCAReader(dwca_file) as dwca:
                    core_type = dwca.descriptor.core.type

                    has_term = {
                        t: qn(t) in dwca.descriptor.core.terms
                        for t in lookup_terms
                    }
                    for row in dwca:
                        potential_interaction_terms = []
                        for term in lookup_terms:
                            if has_term[term] == True:
                                term_value = row.data[qn(term)]
                                if term_value:
                                    potential_interaction_terms.append(term)

                        for ext in row.extensions:
                            for interaction_ext in extensions:
                                if qn(interaction_ext) == ext.rowtype:
                                    saveToDB(csvrow, {
                                        **row.data,
Exemplo n.º 52
0
def valid_dwca(dwca):
    return (dwca.core_rowtype == qn('Occurrence')
            and dwca.core_contains_term(qn('decimalLatitude'))
            and dwca.core_contains_term(qn('decimalLongitude')))
 def assess_line(self, line):
     kingdom = line.data[qn('kingdom')].lower().strip("\n\t")
     if len(kingdom) > 0 and kingdom not in self._accepted:
         self.logger.log("'{kingdom}' not in accepted list.".format(kingdom=kingdom))