Exemplo n.º 1
0
    def test_lines_to_ignore(self):
        # With explicit "0"
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.lines_to_ignore, 0)

        # With explicit 1
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.lines_to_ignore, 1)

        # Implicit 0 (when nothing stated)
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.lines_to_ignore, 0)
    def test_lines_to_ignore(self):
        # With explicit "0"
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.lines_to_ignore, 0)

        # With explicit 1
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.lines_to_ignore, 1)

        # Implicit 0 (when nothing stated)
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.lines_to_ignore, 0)
    def test_fields(self):
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
            <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/locality"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(metaxml_section))

        # .fields is supposed to return a list of dicts like those
        expected_fields = (
            {"term": "http://rs.tdwg.org/dwc/terms/country", "index": None, "default": "Belgium"},
            {"term": "http://rs.tdwg.org/dwc/terms/scientificName", "index": 1, "default": None},
        )

        for ef in expected_fields:
            self.assertTrue(ef in core_descriptor.fields)

        self.assertEqual(len(core_descriptor.fields), 5)
    def test_headers_unordered(self):
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Taxon">
            <files>
                <location>taxon.txt</location>
            </files>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/phylum"/>
            <id index="0" />
            <field index="1" term="http://rs.tdwg.org/dwc/terms/order"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/class"/>
            <field index="6" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/kingdom"/>
            <field index="5" term="http://rs.tdwg.org/dwc/terms/genus"/>
        </core>
        """
        core_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(metaxml_section))

        expected_headers_core = [
            "id",
            "http://rs.tdwg.org/dwc/terms/order",
            "http://rs.tdwg.org/dwc/terms/class",
            "http://rs.tdwg.org/dwc/terms/kingdom",
            "http://rs.tdwg.org/dwc/terms/phylum",
            "http://rs.tdwg.org/dwc/terms/genus",
            "http://rs.tdwg.org/dwc/terms/family",
        ]

        self.assertEqual(core_descriptor.headers, expected_headers_core)
Exemplo n.º 5
0
    def test_headers_unordered(self):
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Taxon">
            <files>
                <location>taxon.txt</location>
            </files>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/phylum"/>
            <id index="0" />
            <field index="1" term="http://rs.tdwg.org/dwc/terms/order"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/class"/>
            <field index="6" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/kingdom"/>
            <field index="5" term="http://rs.tdwg.org/dwc/terms/genus"/>
        </core>
        """
        core_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))

        expected_headers_core = [
            'id', 'http://rs.tdwg.org/dwc/terms/order',
            'http://rs.tdwg.org/dwc/terms/class',
            'http://rs.tdwg.org/dwc/terms/kingdom',
            'http://rs.tdwg.org/dwc/terms/phylum',
            'http://rs.tdwg.org/dwc/terms/genus',
            'http://rs.tdwg.org/dwc/terms/family'
        ]

        self.assertEqual(core_descriptor.headers, expected_headers_core)
Exemplo n.º 6
0
    def test_short_headers(self):
        metaxml_section = """
                <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
                ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
                    <files>
                        <location>occurrence.txt</location>
                    </files>
                    <id index="0" />
                    <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
                    <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
                    <field index="2" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
                    <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
                    <field index="4" term="http://rs.tdwg.org/dwc/terms/locality"/>
                </core>
                """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))

        expected_short_headers_core = [
            'id', 'scientificName', 'basisOfRecord', 'family', 'locality'
        ]

        self.assertEqual(core_descriptor.short_headers,
                         expected_short_headers_core)
Exemplo n.º 7
0
    def test_fields(self):
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
            <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/locality"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))

        # .fields is supposed to return a list of dicts like those
        expected_fields = ({
            'term': 'http://rs.tdwg.org/dwc/terms/country',
            'index': None,
            'default': 'Belgium'
        }, {
            'term': 'http://rs.tdwg.org/dwc/terms/scientificName',
            'index': 1,
            'default': None
        })

        for ef in expected_fields:
            self.assertTrue(ef in core_descriptor.fields)

        self.assertEqual(len(core_descriptor.fields), 5)
    def test_headers_defaultvalue(self):
        """ Ensure headers work properly when confronted to default values (w/o column in file)"""
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
            <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/locality"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(metaxml_section))

        expected_headers_core = [
            "id",
            "http://rs.tdwg.org/dwc/terms/scientificName",
            "http://rs.tdwg.org/dwc/terms/basisOfRecord",
            "http://rs.tdwg.org/dwc/terms/family",
            "http://rs.tdwg.org/dwc/terms/locality",
        ]

        self.assertEqual(core_descriptor.headers, expected_headers_core)
Exemplo n.º 9
0
    def test_lines_to_ignore_attribute(self):
        """.lines_to_ignore works as documented"""

        metaxml_section = r"""
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field index="1" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/locality"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
        </core>
        """

        descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))
        data_file = CSVDataFile(sample_data_path('dwca-simple-dir'),
                                descriptor)

        self.assertEqual(data_file.lines_to_ignore, 1)

        metaxml_section = r"""
                <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="3" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
                    <files>
                        <location>occurrence.txt</location>
                    </files>
                    <id index="0" />
                    <field index="1" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
                    <field index="2" term="http://rs.tdwg.org/dwc/terms/locality"/>
                    <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
                    <field index="4" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
                </core>
                """

        descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))
        data_file = CSVDataFile(sample_data_path('dwca-simple-dir'),
                                descriptor)

        self.assertEqual(data_file.lines_to_ignore, 3)
    def test_exposes_coreid_index_of_extensions(self):
        ext_section = """
        <extension encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/terms/1.0/Description">
            <files><location>description.txt</location></files>
            <coreid index="0" />
            <field index="1" term="http://purl.org/dc/terms/type"/>
            <field index="2" term="http://purl.org/dc/terms/language"/>
            <field index="3" term="http://purl.org/dc/terms/description"/>
        </extension>
        """

        ext_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(ext_section))

        self.assertEqual(ext_descriptor.coreid_index, 0)

        # ... but it doesn't have .id_index (only for core!)
        self.assertIsNone(ext_descriptor.id_index)
    def test_content_raw_element_tag(self):
        """ Test the content of raw_element seems decent. """
        ext_section = """
        <extension encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n"
        fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/terms/1.0/Description">
            <files><location>description.txt</location></files>
            <coreid index="0" />
            <field index="1" term="http://purl.org/dc/terms/type"/>
            <field index="2" term="http://purl.org/dc/terms/language"/>
            <field index="3" term="http://purl.org/dc/terms/description"/>
        </extension>
        """

        ext_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(ext_section))

        self.assertEqual(ext_descriptor.raw_element.tag, "extension")
        self.assertEqual(ext_descriptor.raw_element.get("encoding"), "utf-8")
        self.assertEqual(len(ext_descriptor.raw_element.findall("field")), 3)
Exemplo n.º 12
0
    def test_exposes_coreid_index_of_extensions(self):
        ext_section = """
        <extension encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/terms/1.0/Description">
            <files><location>description.txt</location></files>
            <coreid index="0" />
            <field index="1" term="http://purl.org/dc/terms/type"/>
            <field index="2" term="http://purl.org/dc/terms/language"/>
            <field index="3" term="http://purl.org/dc/terms/description"/>
        </extension>
        """

        ext_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(ext_section))

        self.assertEqual(ext_descriptor.coreid_index, 0)

        # ... but it doesn't have .id_index (only for core!)
        self.assertIsNone(ext_descriptor.id_index)
Exemplo n.º 13
0
    def test_content_raw_element_tag(self):
        """ Test the content of raw_element seems decent. """
        ext_section = """
        <extension encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n"
        fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/terms/1.0/Description">
            <files><location>description.txt</location></files>
            <coreid index="0" />
            <field index="1" term="http://purl.org/dc/terms/type"/>
            <field index="2" term="http://purl.org/dc/terms/language"/>
            <field index="3" term="http://purl.org/dc/terms/description"/>
        </extension>
        """

        ext_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(ext_section))

        self.assertEqual(ext_descriptor.raw_element.tag, 'extension')
        self.assertEqual(ext_descriptor.raw_element.get('encoding'), 'utf-8')
        self.assertEqual(len(ext_descriptor.raw_element.findall('field')), 3)
Exemplo n.º 14
0
    def test_iterate(self):
        metaxml_section = r"""
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files><location>occurrence.txt</location></files>
                <id index="0" />
                <field index="1" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
                <field index="2" term="http://rs.tdwg.org/dwc/terms/locality"/>
                <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
                <field index="4" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
            </core>
         """

        descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))
        data_file = CSVDataFile(sample_data_path("dwca-simple-dir"),
                                descriptor)

        for row in data_file:
            self.assertIsInstance(row, str)
    def test_file_details(self):
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
            <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/locality"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.file_location, "occurrence.txt")
        self.assertEqual(core_descriptor.file_encoding, "utf-8")
Exemplo n.º 16
0
    def test_close(self):
        metaxml_section = r"""
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files><location>occurrence.txt</location></files>
            <id index="0" />
            <field index="1" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/locality"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
        </core>
        """

        descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))
        data_file = CSVDataFile(DIRECTORY_ARCHIVE_PATH, descriptor)

        data_file.close()

        with self.assertRaises(ValueError):
            # It's not possible anymore to access the data because file has been closed.
            data_file.get_row_by_position(1)
Exemplo n.º 17
0
    def test_file_descriptor_attribute(self):
        """The instance of DataFileDescriptor which is passed to the constructor is available in .file_descriptor"""

        metaxml_section = r"""
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field index="1" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/locality"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
        </core>
        """

        descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))
        data_file = CSVDataFile(DIRECTORY_ARCHIVE_PATH, descriptor)

        self.assertEqual(data_file.file_descriptor, descriptor)
    def test_exposes_id_index_of_core(self):
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
            <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/locality"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.id_index, 0)

        # ... but it doesn't have .coreid_index (only for extensions!)
        self.assertIsNone(core_descriptor.coreid_index)
    def test_tell_if_represents_core(self):
        # 1. Test with core
        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            core_descriptor = dwca.descriptor.core
            self.assertTrue(core_descriptor.represents_corefile)
            self.assertFalse(core_descriptor.represents_extension)

        ext_section = """
        <extension encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n"
        fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/terms/1.0/Description">
            <files><location>description.txt</location></files>
            <coreid index="0" />
            <field index="1" term="http://purl.org/dc/terms/type"/>
            <field index="2" term="http://purl.org/dc/terms/language"/>
            <field index="3" term="http://purl.org/dc/terms/description"/>
        </extension>
        """

        # 2. And with extension
        ext_descriptor = DataFileDescriptor.make_from_metafile_section(ET.fromstring(ext_section))
        self.assertFalse(ext_descriptor.represents_corefile)
        self.assertTrue(ext_descriptor.represents_extension)
Exemplo n.º 20
0
    def test_tell_if_represents_core(self):
        # 1. Test with core
        with DwCAReader(BASIC_ARCHIVE_PATH) as dwca:
            core_descriptor = dwca.descriptor.core
            self.assertTrue(core_descriptor.represents_corefile)
            self.assertFalse(core_descriptor.represents_extension)

        ext_section = """
        <extension encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n"
        fieldsEnclosedBy="" ignoreHeaderLines="1" rowType="http://rs.gbif.org/terms/1.0/Description">
            <files><location>description.txt</location></files>
            <coreid index="0" />
            <field index="1" term="http://purl.org/dc/terms/type"/>
            <field index="2" term="http://purl.org/dc/terms/language"/>
            <field index="3" term="http://purl.org/dc/terms/description"/>
        </extension>
        """

        # 2. And with extension
        ext_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(ext_section))
        self.assertFalse(ext_descriptor.represents_corefile)
        self.assertTrue(ext_descriptor.represents_extension)
Exemplo n.º 21
0
    def test_exposes_id_index_of_core(self):
        metaxml_section = """
        <core encoding="utf-8" fieldsTerminatedBy="\t" linesTerminatedBy="\n" fieldsEnclosedBy=""
        ignoreHeaderLines="0" rowType="http://rs.tdwg.org/dwc/terms/Occurrence">
            <files>
                <location>occurrence.txt</location>
            </files>
            <id index="0" />
            <field default="Belgium" term="http://rs.tdwg.org/dwc/terms/country"/>
            <field index="1" term="http://rs.tdwg.org/dwc/terms/scientificName"/>
            <field index="2" term="http://rs.tdwg.org/dwc/terms/basisOfRecord"/>
            <field index="3" term="http://rs.tdwg.org/dwc/terms/family"/>
            <field index="4" term="http://rs.tdwg.org/dwc/terms/locality"/>
        </core>
        """

        core_descriptor = DataFileDescriptor.make_from_metafile_section(
            ET.fromstring(metaxml_section))

        self.assertEqual(core_descriptor.id_index, 0)

        # ... but it doesn't have .coreid_index (only for extensions!)
        self.assertIsNone(core_descriptor.coreid_index)