def __init__(self, extension, zip_file): # Load the metadata BaseFile.__init__(self, extension, 'extension', zip_file) # Load the data self.warnings = [] # Data in single file if len(self.locations) == 1: # Open file content = common._locateFileInZip(zip_file, self.locations[0]).read().split(self.linesTerminatedBy)[self.ignoreHeaderLines:] # Remove last newline if content[-1] == '': content = content[:-1] # Build populatedTerms and main infrastructure self.populatedTerms = [] if self.coreid.index not in self.fields.keys(): self.populatedTerms.append('coreid') for i in sorted(self.fields.keys()): term = self.fields[i].term.split("/")[-1].encode(self.encoding) self.populatedTerms.append(term) for term in self.populatedTerms: setattr(self, term, []) # Load the content for line in content: # Remove trailing newline if line[-1] == "\n": line = line[:-1] splitline = line.split(self.fieldsTerminatedBy) for pos in range(len(self.populatedTerms)): thiskey = self.populatedTerms[pos] thisvalue = splitline[pos] # Remove field enclosing character, if present if self.fieldsEnclosedBy != '' and thisvalue[0] == self.fieldsEnclosedBy and thisvalue[-1] == self.fieldsEnclosedBy: thisvalue = thisvalue[1:-1] getattr(self, thiskey).append(thisvalue) # Data in more than one file else: #TODO self.warnings.append("Sorry, multi-file loading of extension data is not yet supported. Basic metadata has been parsed but no actual record has been processed.") # Load the defaults if len(self.defaults) > 0: for i in self.defaults: term = i.term.split("/")[-1].encode(self.encoding) value = i.default.encode(self.encoding) if term in self.populatedTerms: self.warnings.append("'{0}' appears in the core file and as a default field in the metafile. Ignoring the default value.".format(term)) continue else: setattr(self, term, []) self.populatedTerms.append(term) for i in list(range(self.countRecords())): getattr(self, term).append(value)
def _needsMetafile(self, z, dwca_path): """ Checks if the DarwinCore Archive should have a meta.xml file. Cases when meta.xml is NOT needed: - One and only one core file, with first row containing headers that correspond to DarwinCore terms, without extensions or metadata file - Same as above, but with metadata file called EML.xml. If name is other than EML.xml, metafile is needed """ needsMetafile = False headers = None # List of files other than 'eml.xml' and 'meta.xml' list_of_files = [z.namelist()[i] for i in list(range(len(z.namelist()))) if z.namelist()[i] != 'meta.xml' and z.namelist()[i] != 'eml.xml'] if dwca_path.endswith('.zip') else [z.getnames()[i] for i in list(range(len(z.getnames()))) if z.getnames()[i] != 'meta.xml' and z.getnames()[i] != 'eml.xml'] # If there is more than one file left, metafile is needed if len(list_of_files) > 1: needsMetafile = True # If there is another .xml file, metafile is needed elif list_of_files[0].endswith('.xml'): needsMetafile = True # If first column in single file has terms different than DarwinCore Terms, metafile is needed else: # Extract first line o = common._locateFileInZip(z, list_of_files[0]) headers_unparsed = o.readline() o.close() headers = None # Split by tabs if txt file if list_of_files[0].endswith('.txt'): self.fieldsTerminatedBy = "\t" self.fieldsEnclosedBy = "" headers = headers_unparsed.split("\t") else: # Split by comma if comma present and semicolon not present if "," in headers_unparsed and not ";" in headers_unparsed: self.fieldsTerminatedBy = "," self.fieldsEnclosedBy = "\"" headers = headers_unparsed.split(",") # Split by semicolon if semicolon present and comma not present elif ";" in headers_unparsed and not "," in headers_unparsed: self.fieldsTerminatedBy = ";" self.fieldsEnclosedBy = "\"" headers = headers_unparsed.split(";") # If separator is not tab, comma or semicolon, metafile is needed if headers is None or len (headers) == 1: needsMetafile = True else: # If a single term is not a DarwinCore Term, metafile is needed for i in headers: if i.rstrip() not in self.dwcTerms: needsMetafile = True break return needsMetafile, list_of_files, headers
def __init__(self, zip_file): metafile_content = common._locateFileInZip(zip_file, 'meta.xml') if metafile_content is None: raise common.MetafileError('Could not find meta.xml.') metafile = minidom.parse(metafile_content) self._parseMain(metafile, zip_file) self.core = self._parseCore(metafile, zip_file) self._parseExtensions(metafile, zip_file)
def __init__(self, dwca_path): """ Initialization function for the DarwinCoreArchive class. Argument must be the path to a valid DarwinCore archive. Example: dwca = DarwinCoreArchive('/path/to/DarwinCore/archive.zip') """ #------------------------------------------------------------------------------# ################## # INITIALIZATION # ################## # Create a container for loading warnings self.warnings = [] # Create container for flags self.flags = {} # All DarwinCore Terms and URIs self.dwcTerms = {'identificationRemarks': 'http://rs.tdwg.org/dwc/terms/identificationRemarks', 'minimumDepthInMeters': 'http://rs.tdwg.org/dwc/terms/minimumDepthInMeters', 'footprintSRS': 'http://rs.tdwg.org/dwc/terms/footprintSRS', 'verbatimLatitude': 'http://rs.tdwg.org/dwc/terms/verbatimLatitude', 'month': 'http://rs.tdwg.org/dwc/terms/month', 'measurementDeterminedDate': 'http://rs.tdwg.org/dwc/terms/measurementDeterminedDate', 'informationWithheld': 'http://rs.tdwg.org/dwc/terms/informationWithheld', 'lithostratigraphicTerms': 'http://rs.tdwg.org/dwc/terms/lithostratigraphicTerms', 'latestPeriodOrHighestSystem': 'http://rs.tdwg.org/dwc/terms/latestPeriodOrHighestSystem', 'reproductiveCondition': 'http://rs.tdwg.org/dwc/terms/reproductiveCondition', 'continent': 'http://rs.tdwg.org/dwc/terms/continent', 'endDayOfYear': 'http://rs.tdwg.org/dwc/terms/endDayOfYear', 'identificationID': 'http://rs.tdwg.org/dwc/terms/identificationID', 'latestEraOrHighestErathem': 'http://rs.tdwg.org/dwc/terms/latestEraOrHighestErathem', 'occurrenceID': 'http://rs.tdwg.org/dwc/terms/occurrenceID', 'locationAccordingTo': 'http://rs.tdwg.org/dwc/terms/locationAccordingTo', 'latestEpochOrHighestSeries': 'http://rs.tdwg.org/dwc/terms/latestEpochOrHighestSeries', 'coordinateUncertaintyInMeters': 'http://rs.tdwg.org/dwc/terms/coordinateUncertaintyInMeters', 'coordinatePrecision': 'http://rs.tdwg.org/dwc/terms/coordinatePrecision', 'maximumDepthInMeters': 'http://rs.tdwg.org/dwc/terms/maximumDepthInMeters', 'waterBody': 'http://rs.tdwg.org/dwc/terms/waterBody', 'resourceRelationshipID': 'http://rs.tdwg.org/dwc/terms/resourceRelationshipID', 'kingdom': 'http://rs.tdwg.org/dwc/terms/kingdom', 'decimalLatitude': 'http://rs.tdwg.org/dwc/terms/decimalLatitude', 'verbatimTaxonRank': 'http://rs.tdwg.org/dwc/terms/verbatimTaxonRank', 'earliestEraOrLowestErathem': 'http://rs.tdwg.org/dwc/terms/earliestEraOrLowestErathem', 'verbatimCoordinates': 'http://rs.tdwg.org/dwc/terms/verbatimCoordinates', 'acceptedNameUsageID': 'http://rs.tdwg.org/dwc/terms/acceptedNameUsageID', 'infraspecificEpithet': 'http://rs.tdwg.org/dwc/terms/infraspecificEpithet', 'namePublishedIn': 'http://rs.tdwg.org/dwc/terms/namePublishedIn', 'originalNameUsage': 'http://rs.tdwg.org/dwc/terms/originalNameUsage', 'nameAccordingToID': 'http://rs.tdwg.org/dwc/terms/nameAccordingToID', 'dataGeneralizations': 'http://rs.tdwg.org/dwc/terms/dataGeneralizations', 'nomenclaturalStatus': 'http://rs.tdwg.org/dwc/terms/nomenclaturalStatus', 'bibliographicCitation': 'http://purl.org/dc/terms/bibliographicCitation', 'recordNumber': 'http://rs.tdwg.org/dwc/terms/recordNumber', 'day': 'http://rs.tdwg.org/dwc/terms/day', 'individualCount': 'http://rs.tdwg.org/dwc/terms/individualCount', 'type': 'http://purl.org/dc/terms/type', 'measurementType': 'http://rs.tdwg.org/dwc/terms/measurementType', 'institutionID': 'http://rs.tdwg.org/dwc/terms/institutionID', 'georeferenceVerificationStatus': 'http://rs.tdwg.org/dwc/terms/georeferenceVerificationStatus', 'lifeStage': 'http://rs.tdwg.org/dwc/terms/lifeStage', 'measurementUnit': 'http://rs.tdwg.org/dwc/terms/measurementUnit', 'locationRemarks': 'http://rs.tdwg.org/dwc/terms/locationRemarks', 'scientificName': 'http://rs.tdwg.org/dwc/terms/scientificName', 'parentNameUsage': 'http://rs.tdwg.org/dwc/terms/parentNameUsage', 'datasetID': 'http://rs.tdwg.org/dwc/terms/datasetID', 'eventID': 'http://rs.tdwg.org/dwc/terms/eventID', 'lowestBiostratigraphicZone': 'http://rs.tdwg.org/dwc/terms/lowestBiostratigraphicZone', 'habitat': 'http://rs.tdwg.org/dwc/terms/habitat', 'higherGeographyID': 'http://rs.tdwg.org/dwc/terms/higherGeographyID', 'references': 'http://purl.org/dc/terms/references', 'sex': 'http://rs.tdwg.org/dwc/terms/sex', 'accessRights': 'http://purl.org/dc/terms/accessRights', 'scientificNameAuthorship': 'http://rs.tdwg.org/dwc/terms/scientificNameAuthorship', 'associatedTaxa': 'http://rs.tdwg.org/dwc/terms/associatedTaxa', 'year': 'http://rs.tdwg.org/dwc/terms/year', 'taxonRemarks': 'http://rs.tdwg.org/dwc/terms/taxonRemarks', 'rightsHolder': 'http://purl.org/dc/terms/rightsHolder', 'namePublishedInYear': 'http://rs.tdwg.org/dwc/terms/namePublishedInYear', 'identificationVerificationStatus': 'http://rs.tdwg.org/dwc/terms/identificationVerificationStatus', 'eventTime': 'http://rs.tdwg.org/dwc/terms/eventTime', 'basisOfRecord': 'http://rs.tdwg.org/dwc/terms/basisOfRecord', 'latestEonOrHighestEonothem': 'http://rs.tdwg.org/dwc/terms/latestEonOrHighestEonothem', 'otherCatalogNumbers': 'http://rs.tdwg.org/dwc/terms/otherCatalogNumbers', 'georeferenceRemarks': 'http://rs.tdwg.org/dwc/terms/georeferenceRemarks', 'acceptedNameUsage': 'http://rs.tdwg.org/dwc/terms/acceptedNameUsage', 'georeferenceSources': 'http://rs.tdwg.org/dwc/terms/georeferenceSources', 'specificEpithet': 'http://rs.tdwg.org/dwc/terms/specificEpithet', 'verbatimLocality': 'http://rs.tdwg.org/dwc/terms/verbatimLocality', 'identificationReferences': 'http://rs.tdwg.org/dwc/terms/identificationReferences', 'measurementRemarks': 'http://rs.tdwg.org/dwc/terms/measurementRemarks', 'georeferencedBy': 'http://rs.tdwg.org/dwc/terms/georeferencedBy', 'geodeticDatum': 'http://rs.tdwg.org/dwc/terms/geodeticDatum', 'occurrenceRemarks': 'http://rs.tdwg.org/dwc/terms/occurrenceRemarks', 'collectionCode': 'http://rs.tdwg.org/dwc/terms/collectionCode', 'higherGeography': 'http://rs.tdwg.org/dwc/terms/higherGeography', 'nameAccordingTo': 'http://rs.tdwg.org/dwc/terms/nameAccordingTo', 'latestAgeOrHighestStage': 'http://rs.tdwg.org/dwc/terms/latestAgeOrHighestStage', 'fieldNumber': 'http://rs.tdwg.org/dwc/terms/fieldNumber', 'measurementMethod': 'http://rs.tdwg.org/dwc/terms/measurementMethod', 'disposition': 'http://rs.tdwg.org/dwc/terms/disposition', 'earliestEpochOrLowestSeries': 'http://rs.tdwg.org/dwc/terms/earliestEpochOrLowestSeries', 'group': 'http://rs.tdwg.org/dwc/terms/group', 'highestBiostratigraphicZone': 'http://rs.tdwg.org/dwc/terms/highestBiostratigraphicZone', 'ownerInstitutionCode': 'http://rs.tdwg.org/dwc/terms/ownerInstitutionCode', 'scientificNameID': 'http://rs.tdwg.org/dwc/terms/scientificNameID', 'relationshipEstablishedDate': 'http://rs.tdwg.org/dwc/terms/relationshipEstablishedDate', 'earliestAgeOrLowestStage': 'http://rs.tdwg.org/dwc/terms/earliestAgeOrLowestStage', 'country': 'http://rs.tdwg.org/dwc/terms/country', 'measurementDeterminedBy': 'http://rs.tdwg.org/dwc/terms/measurementDeterminedBy', 'decimalLongitude': 'http://rs.tdwg.org/dwc/terms/decimalLongitude', 'locationID': 'http://rs.tdwg.org/dwc/terms/locationID', 'rights': 'http://purl.org/dc/terms/rights', 'relationshipRemarks': 'http://rs.tdwg.org/dwc/terms/relationshipRemarks', 'startDayOfYear': 'http://rs.tdwg.org/dwc/terms/startDayOfYear', 'formation': 'http://rs.tdwg.org/dwc/terms/formation', 'genus': 'http://rs.tdwg.org/dwc/terms/genus', 'family': 'http://rs.tdwg.org/dwc/terms/family', 'collectionID': 'http://rs.tdwg.org/dwc/terms/collectionID', 'dynamicProperties': 'http://rs.tdwg.org/dwc/terms/dynamicProperties', 'eventRemarks': 'http://rs.tdwg.org/dwc/terms/eventRemarks', 'municipality': 'http://rs.tdwg.org/dwc/terms/municipality', 'individualID': 'http://rs.tdwg.org/dwc/terms/individualID', 'footprintWKT': 'http://rs.tdwg.org/dwc/terms/footprintWKT', 'county': 'http://rs.tdwg.org/dwc/terms/county', 'associatedMedia': 'http://rs.tdwg.org/dwc/terms/associatedMedia', 'associatedSequences': 'http://rs.tdwg.org/dwc/terms/associatedSequences', 'subgenus': 'http://rs.tdwg.org/dwc/terms/subgenus', 'footprintSpatialFit': 'http://rs.tdwg.org/dwc/terms/footprintSpatialFit', 'measurementValue': 'http://rs.tdwg.org/dwc/terms/measurementValue', 'higherClassification': 'http://rs.tdwg.org/dwc/terms/higherClassification', 'islandGroup': 'http://rs.tdwg.org/dwc/terms/islandGroup', 'resourceID': 'http://rs.tdwg.org/dwc/terms/resourceID', 'class': 'http://rs.tdwg.org/dwc/terms/class', 'verbatimSRS': 'http://rs.tdwg.org/dwc/terms/verbatimSRS', 'associatedOccurrences': 'http://rs.tdwg.org/dwc/terms/associatedOccurrences', 'catalogNumber': 'http://rs.tdwg.org/dwc/terms/catalogNumber', 'verbatimLongitude': 'http://rs.tdwg.org/dwc/terms/verbatimLongitude', 'preparations': 'http://rs.tdwg.org/dwc/terms/preparations', 'taxonID': 'http://rs.tdwg.org/dwc/terms/taxonID', 'nomenclaturalCode': 'http://rs.tdwg.org/dwc/terms/nomenclaturalCode', 'maximumElevationInMeters': 'http://rs.tdwg.org/dwc/terms/maximumElevationInMeters', 'verbatimCoordinateSystem': 'http://rs.tdwg.org/dwc/terms/verbatimCoordinateSystem', 'measurementID': 'http://rs.tdwg.org/dwc/terms/measurementID', 'relatedResourceID': 'http://rs.tdwg.org/dwc/terms/relatedResourceID', 'datasetName': 'http://rs.tdwg.org/dwc/terms/datasetName', 'earliestEonOrLowestEonothem': 'http://rs.tdwg.org/dwc/terms/earliestEonOrLowestEonothem', 'measurementAccuracy': 'http://rs.tdwg.org/dwc/terms/measurementAccuracy', 'verbatimDepth': 'http://rs.tdwg.org/dwc/terms/verbatimDepth', 'bed': 'http://rs.tdwg.org/dwc/terms/bed', 'georeferencedDate': 'http://rs.tdwg.org/dwc/terms/georeferencedDate', 'behavior': 'http://rs.tdwg.org/dwc/terms/behavior', 'island': 'http://rs.tdwg.org/dwc/terms/island', 'parentNameUsageID': 'http://rs.tdwg.org/dwc/terms/parentNameUsageID', 'minimumElevationInMeters': 'http://rs.tdwg.org/dwc/terms/minimumElevationInMeters', 'occurrenceStatus': 'http://rs.tdwg.org/dwc/terms/occurrenceStatus', 'vernacularName': 'http://rs.tdwg.org/dwc/terms/vernacularName', 'pointRadiusSpatialFit': 'http://rs.tdwg.org/dwc/terms/pointRadiusSpatialFit', 'countryCode': 'http://rs.tdwg.org/dwc/terms/countryCode', 'phylum': 'http://rs.tdwg.org/dwc/terms/phylum', 'institutionCode': 'http://rs.tdwg.org/dwc/terms/institutionCode', 'identificationQualifier': 'http://rs.tdwg.org/dwc/terms/identificationQualifier', 'namePublishedInID': 'http://rs.tdwg.org/dwc/terms/namePublishedInID', 'identifiedBy': 'http://rs.tdwg.org/dwc/terms/identifiedBy', 'earliestPeriodOrLowestSystem': 'http://rs.tdwg.org/dwc/terms/earliestPeriodOrLowestSystem', 'minimumDistanceAboveSurfaceInMeters': 'http://rs.tdwg.org/dwc/terms/minimumDistanceAboveSurfaceInMeters', 'language': 'http://purl.org/dc/terms/language', 'maximumDistanceAboveSurfaceInMeters': 'http://rs.tdwg.org/dwc/terms/maximumDistanceAboveSurfaceInMeters', 'taxonConceptID': 'http://rs.tdwg.org/dwc/terms/taxonConceptID', 'georeferenceProtocol': 'http://rs.tdwg.org/dwc/terms/georeferenceProtocol', 'locality': 'http://rs.tdwg.org/dwc/terms/locality', 'associatedReferences': 'http://rs.tdwg.org/dwc/terms/associatedReferences', 'stateProvince': 'http://rs.tdwg.org/dwc/terms/stateProvince', 'taxonomicStatus': 'http://rs.tdwg.org/dwc/terms/taxonomicStatus', 'relationshipAccordingTo': 'http://rs.tdwg.org/dwc/terms/relationshipAccordingTo', 'member': 'http://rs.tdwg.org/dwc/terms/member', 'relationshipOfResource': 'http://rs.tdwg.org/dwc/terms/relationshipOfResource', 'taxonRank': 'http://rs.tdwg.org/dwc/terms/taxonRank', 'previousIdentifications': 'http://rs.tdwg.org/dwc/terms/previousIdentifications', 'samplingEffort': 'http://rs.tdwg.org/dwc/terms/samplingEffort', 'verbatimElevation': 'http://rs.tdwg.org/dwc/terms/verbatimElevation', 'establishmentMeans': 'http://rs.tdwg.org/dwc/terms/establishmentMeans', 'typeStatus': 'http://rs.tdwg.org/dwc/terms/typeStatus', 'samplingProtocol': 'http://rs.tdwg.org/dwc/terms/samplingProtocol', 'originalNameUsageID': 'http://rs.tdwg.org/dwc/terms/originalNameUsageID', 'eventDate': 'http://rs.tdwg.org/dwc/terms/eventDate', 'geologicalContextID': 'http://rs.tdwg.org/dwc/terms/geologicalContextID', 'fieldNotes': 'http://rs.tdwg.org/dwc/terms/fieldNotes', 'dateIdentified': 'http://rs.tdwg.org/dwc/terms/dateIdentified', 'verbatimEventDate': 'http://rs.tdwg.org/dwc/terms/verbatimEventDate', 'recordedBy': 'http://rs.tdwg.org/dwc/terms/recordedBy', 'modified': 'http://purl.org/dc/terms/modified', 'order': 'http://rs.tdwg.org/dwc/terms/order'} # Parse and load the compressed file, either a .zip or a .tar.gz z = self._loadCompressedFile(dwca_path) # Parse the need for a meta.xml needsMetafile, list_of_files, headers = self._needsMetafile(z, dwca_path) #------------------------------------------------------------------------------# ############ # METAFILE # ############ # Even if there is no need for a metafile, if there IS a metafile, take advantage of it if needsMetafile or common._locateFileInZip(z, 'meta.xml'): self.metafile = Metafile(z) # Wrapper for certain key attributes self.core = self.metafile.core self.extensions = self.metafile.extensions self.locations = self.core.locations # Core field parsing elements self.linesTerminatedBy = self.core.linesTerminatedBy self.fieldsTerminatedBy = self.core.fieldsTerminatedBy self.fieldsEnclosedBy = self.core.fieldsEnclosedBy self.ignoreHeaderLines = self.core.ignoreHeaderLines self.rowType = self.core.rowType self.encoding = self.core.encoding self.dateFormat = self.core.dateFormat else: self.metafile = None self.extensions = [] self.locations = list_of_files # Load defaults except fieldsTerminatedBy and fieldsEnclosedBy, which depends on the file type self.linesTerminatedBy = "\n" self.ignoreHeaderLines = 1 self.rowType = "http://rs.tdwg.org/dwc/xsd/simpledarwincore/SimpleDarwinRecord" self.encoding = "UTF-8" self.dateFormat = "YYYY-MM-DD" # Build populatedTerms # Container self.populatedTerms = [] # If metafile is present, take values from it if self.metafile: # If field with index = 0 exists, don't add id; else, add id if 0 not in self.metafile.core.fields.keys() and self.metafile.core.id.index == 0: self.populatedTerms.append('id') for index in sorted(self.metafile.core.fields.keys()): uri = self.metafile.core.fields[index].term for term in self.dwcTerms.keys(): if self.dwcTerms[term] == uri: self.populatedTerms.append(str(term)) # If not, build populatedTerms from first row else: # If meta.xml is not needed, headers are already parsed in the needsMetafile section for i in headers: self.populatedTerms.append(i.rstrip()) #------------------------------------------------------------------------------# ############ # METADATA # ############ #TODO #------------------------------------------------------------------------------# ########### # CONTENT # ########### # Open the main content file and load the records # If locations contains only one file if len(self.locations) == 1: occfile = self.locations[0] # Assessment of the core file is made when loading metafile content = common._locateFileInZip(z, occfile) occlines = content.read().split(self.linesTerminatedBy)[self.ignoreHeaderLines:] # Remove last newline if occlines[-1] == '': occlines = occlines[:-1] # Trying to store values in Field elements and reference them after the import # Build main infrastructure #for term in self.dwcTerms.keys(): # setattr(self, term, []) # Main process colnames = self.populatedTerms warnings = [] for line in occlines: if line[-1] == "\n": line = line[:-1] splitline = line.split(self.fieldsTerminatedBy) for pos in range(len(colnames)): thiskey = colnames[pos] thisvalue = splitline[pos] # Remove the enclosing characters if present if self.fieldsEnclosedBy != '' and thisvalue[0] == self.fieldsEnclosedBy and thisvalue[-1] == self.fieldsEnclosedBy: thisvalue = thisvalue[1:-1] try: #getattr(self, thiskey).append(thisvalue) self.metafile.core.fields[pos].values.append(thisvalue) except KeyError: self.metafile.core.id.values.append(thisvalue) #if thiskey not in warnings: # warnings.append(thiskey) # setattr(self, thiskey, []) #getattr(self, thiskey).append(thisvalue) #self.metafile.core.fields[pos].values.append(thisvalue) # Shortcut to the values for i in list(range(len(self.populatedTerms))): if i == 0: setattr(self, self.populatedTerms[i], self.metafile.core.id) else: setattr(self, self.populatedTerms[i], self.metafile.core.fields[i]) # If locations contains more than one file else: #TODO self.warnings.append("Sorry, multi-file loading of core data is not yet supported. Basic metadata has been parsed but no actual record has been processed.") # Defaults if self.metafile: if len(self.metafile.core.defaults) > 0: for i in self.metafile.core.defaults: term = str(i.term.split("/")[-1]) value = str(i.default) if term in self.populatedTerms: self.warnings.append("'{0}' appears in the core file and as a default field in the metafile. Ignoring the default value.".format(term)) continue else: #setattr(self, term, []) self.populatedTerms.append(term) #for i in list(range(self.countRecords())): # getattr(self, term).append(value) i.values = [value]*len(getattr(self, self.populatedTerms[0]).values) setattr(self, term, i) # Print warnings if len(warnings) > 0: for i in warnings: self.warnings.append("'{0}' cannot be found in the list of DarwinCore terms. The use of rename or batchRename functions is suggested.".format(i)) #------------------------------------------------------------------------------# ####### # END # ####### # Print stats from the import # Total number of records imported print "{0} records imported from main file/s".format(len(getattr(self, self.populatedTerms[0]).values)) # Extensions loaded if len(self.extensions.keys()) > 0: print "{0} extensions loaded: {1}".format(len(self.extensions), ", ".join(self.extensions.keys())) # Heads up for warnings if len(self.warnings) > 0: print "IMPORTANT: some warning messages have been stored. To see them, call the function showWarnings()"