def generateDwcaExportFilesInThread(self, datatype_list, year_from, year_to, status, user): """ """ logfile_name = sharkdata_core.SharkdataAdminUtils().log_create( command='Generate DwC-A files', user=user) try: # Check if thread is running. if self._generate_dwca_thread: if self._generate_dwca_thread.is_alive(): error_message = '"Generate DwC-A files" is already running. Please try again later.' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FAILED') # return error_message # Use a thread to relese the user. Log file closed in thread. self._generate_dwca_thread = threading.Thread( target=sharkdata_core.GenerateDwcaExportFiles( ).generateDwcaExportFiles, args=(logfile_name, datatype_list, year_from, year_to, status, user)) self._generate_dwca_thread.start() except Exception as e: error_message = 'Can\'t generate DwC-A files.' + '\nException: ' + str( e) + '\n' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close(logfile_name, new_status='FAILED') # return None # No error message.
def updateDatasetsAndResourcesInThread(self, user): """ """ logfile_name = sharkdata_core.SharkdataAdminUtils().log_create( command='Update datasets and resources', user=user) try: # Check if thread is running. if self._update_thread: if self._update_thread.is_alive(): error_message = '"Update datasets and resources" is already running. Please try again later.' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FAILED') # return error_message # Use a thread to relese the user. Log file closed in thread. self._update_thread = threading.Thread( target=self.updateDatasetsAndResources, args=( logfile_name, user, )) self._update_thread.start() except Exception as e: error_message = 'Failed when loading datasets or resources.' + '\nException: ' + str( e) + '\n' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close(logfile_name, new_status='FAILED') # return None # No error message.
def validateIcesXmlInThread(self, datatype_list, user): """ """ logfile_name = sharkdata_core.SharkdataAdminUtils().log_create( command='Validate ICES-XML file', user=user) try: # Check if thread is running. if self._validate_ices_xml_thread: if self._validate_ices_xml_thread.is_alive(): error_message = '"Validate ICES-XML files" is already running. Please try again later.' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FAILED') # return error_message # Use a thread to relese the user. Log file closed in thread. self._validate_ices_xml_thread = threading.Thread( target=sharkdata_core.ValidateIcesXml().validateIcesXml, args=(logfile_name, datatype_list, user)) self._validate_ices_xml_thread.start() except Exception as e: error_message = 'Can\'t validate ICES-XML file.' + '\nException: ' + str( e) + '\n' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close(logfile_name, new_status='FAILED') # return None # No error message.
def deleteSpeciesObsInThread(self, user): """ """ logfile_name = sharkdata_core.SharkdataAdminUtils().log_create( command='Delete species observations', user=user) try: # Check if thread is running. if self._delete_obs_thread: if self._delete_obs_thread.is_alive(): error_message = '"Delete species observations" is already running. Please try again later.' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FAILED') # return error_message # Use a thread to relese the user. Log file closed in thread. self._delete_obs_thread = threading.Thread( target=sharkdata_core.SpeciesObsUtils().deleteSpeciesObs, args=( logfile_name, user, )) self._delete_obs_thread.start() except Exception as e: error_message = 'Can\'t delete species observations.' + '\nException: ' + str( e) + '\n' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close(logfile_name, new_status='FAILED') # return None # No error message.
def generateDwcaExportFiles(self, logfile_name, datatype_list, year_from, year_to, monitoring_types, user): """ """ try: # # Load resource content for ICES station. # sharkdata_core.ExportStations().load_export_stations('export_ices_stations') # Load resource content for filtering reported data. sharkdata_core.ExportFilter().load_export_filter('export_ices_filters') # # Load resource content to translate values. # sharkdata_core.TranslateValues().load_export_translate_values('export_ices_translate_values') # # Load resource content to translate from DynTaxa to WoRMS. # sharkdata_core.TranslateTaxa().load_translate_taxa('translate_dyntaxa_to_worms') # # Load resource content to translate from DynTaxa to Helcom PEG. # sharkdata_core.TranslateDyntaxaToHelcomPeg().load_translate_taxa('translate_dyntaxa_to_helcom_peg') # Create target directory if not exists. if not os.path.exists( self._export_dir_path): os.makedirs( self._export_dir_path) # error_counter = 0 # # Iterate over selected datatypes. for datatype in datatype_list: self.generateOneDwca(logfile_name, error_counter, datatype, year_from, year_to, monitoring_types, user) # if error_counter > 0: admin_models.changeLogRowStatus(logfile_name, status = 'FINISHED (Errors: ' + str(error_counter) + ')') else: admin_models.changeLogRowStatus(logfile_name, status = 'FINISHED') # # Log missing stations. # missing_station_list = sharkdata_core.ExportStations().get_missing_station_list() # if len(missing_station_list) > 0: # admin_models.addResultLog(logrow_id, result_log = 'Missing station(s): ') # for missing_station in sorted(missing_station_list): # admin_models.addResultLog(logrow_id, result_log = '- ' + missing_station) # if settings.DEBUG: print('DEBUG: missing station: ' + missing_station) # admin_models.addResultLog(logrow_id, result_log = '') # Log missing taxa. missing_taxa_list = sharkdata_core.TranslateTaxa().get_missing_taxa_list() if len(missing_taxa_list) > 0: # admin_models.addResultLog(logrow_id, result_log = 'Missing taxa: ') sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='Missing taxa: ') for missing_taxa in sorted(missing_taxa_list): # admin_models.addResultLog(logrow_id, result_log = '- ' + missing_taxa) sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='- ' + missing_taxa) if settings.DEBUG: print('DEBUG: missing taxon: ' + missing_taxa) # admin_models.addResultLog(logrow_id, result_log = '') sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='') # if settings.DEBUG: print('DEBUG: DwC-A generation FINISHED') except Exception as e: # admin_models.changeLogRowStatus(logrow_id, status = 'FAILED') error_message = u"Can't generate DwC-A file." + '\nException: ' + str(e) + '\n' # admin_models.addResultLog(logrow_id, result_log = error_message) sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row="Can't generate DwC-A file." + '\nException: ' + str(e) + '\n')
def writeLatestDatasetsInfoToDb(self, logfile_name=None, user=''): """ Updates the database from datasets stored in the FTP area. I multiple versions of a dataset are in the FTP area only the latest will be loaded. """ # Check dataset in 'data_in/datasets'. Create a list of dataset names. dataset_names = [] for dataset_path in self._data_in_datasets.glob('SHARK_*.zip'): print(dataset_path.name) parts = dataset_path.name.split('_version') if len(parts) >= 1: dataset_names.append(parts[0]) # Remove all datasets from 'data/datasets' not included in 'dataset_names'. for dataset_path in self._data_datasets.glob('SHARK_*.zip'): print(dataset_path.name) parts = dataset_path.name.split('_version') if len(parts) >= 1: if parts[0] not in dataset_names: # Delete the file. dataset_path.unlink() # Removes file. # Remove from database. datasets_models.Datasets.objects.get( dataset_name=dataset_path.name).delete() error_counter = 0 # Remove all db rows. datasets_models.Datasets.objects.all().delete() # CTD profiles. ctdprofiles_models.CtdProfiles.objects.all().delete() # Get latest datasets from FTP archive. archive = sharkdata_core.SharkArchive(self._data_in_datasets) for file_name in sorted(archive.getLatestSharkArchiveFilenames()): if logfile_name: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='Loading file: ' + file_name + '...') try: error_string = self.writeFileInfoToDb(file_name, logfile_name, user) if error_string: error_counter += 1 sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='ERROR: Failed to load: ' + file_name + '. Error: ' + error_string) except Exception as e: error_counter += 1 sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='ERROR: Failed to load: ' + file_name + '. Error: ' + str(e)) # return error_counter
def deleteExportFile(request, export_name): """ Deletes one row in the database. """ exportfile = models.ExportFiles.objects.get(export_name=export_name) # if request.method == "GET": form = forms.DeleteExportFileForm() contextinstance = { 'form': form, 'exportfile': exportfile, 'error_message': None } contextinstance.update(csrf(request)) return render(request, "delete_exportfile.html", contextinstance) elif request.method == "POST": error_message = None # initially. # form = forms.DeleteExportFileForm(request.POST) if form.is_valid(): # user = request.POST['user'] password = request.POST['password'] if password != settings.APPS_VALID_USERS_AND_PASSWORDS.get( user, None): error_message = 'Not a valid user or password. Please try again...' # if error_message == None: logfile_name = sharkdata_core.SharkdataAdminUtils().log_create( command='Delete export', user=user) try: exportfile = models.ExportFiles.objects.get( export_name=export_name) exportfile.delete() sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='Export deleted: ' + export_name) sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FINISHED') except: error_message = u"Can't delete exportfile: " + export_name sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FAILED') if error_message == None: return HttpResponseRedirect("/exportformats") # contextinstance = { 'form': form, 'exportfile': exportfile, 'error_message': error_message } contextinstance.update(csrf(request)) return render(request, "delete_exportfile.html", contextinstance) # Not a valid request method. return HttpResponseRedirect("/exportformats")
def deleteIcesXmlExportFiles(request): """ Delete all DarwinCore-Archive files from the 'Export formats' page. """ error_message = None # if request.method == "GET": # form = forms.DeleteIcesXmlExportFilesForm() contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "delete_ices_xml_exportfiles.html", contextinstance) elif request.method == "POST": # form = forms.DeleteIcesXmlExportFilesForm(request.POST) if form.is_valid(): # user = request.POST['user'] password = request.POST['password'] if password != settings.APPS_VALID_USERS_AND_PASSWORDS.get( user, None): error_message = 'Not a valid user or password. Please try again...' # if error_message == None: logfile_name = sharkdata_core.SharkdataAdminUtils().log_create( command='Delete all ICES-XML files', user=user) try: exportformats_models.ExportFiles.objects.all().filter( format='ICES-XML').delete() sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='+++++') sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FINISHED') except: error_message = u"Can't delete datasets from the database." sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FAILED') # OK. if error_message == None: return HttpResponseRedirect("/sharkdataadmin") # contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "delete_ices_xml_exportfiles.html", contextinstance) # Not a valid request method. return HttpResponseRedirect("/sharkdataadmin")
def cleanUpSpeciesObs(request): """ Removes species observations with status='DELETED'. """ error_message = None # if request.method == "GET": # form = forms.CleanUpSpeciesObsForm() contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "speciesobs_cleanup.html", contextinstance) elif request.method == "POST": form = forms.CleanUpSpeciesObsForm(request.POST) if form.is_valid(): # user = request.POST['user'] password = request.POST['password'] if password != settings.APPS_VALID_USERS_AND_PASSWORDS.get( user, None): error_message = 'Not a valid user or password. Please try again...' # if error_message == None: error_message = sharkdata_core.SharkdataAdminUtils( ).deleteSpeciesObsInThread(user) # if error_message == None: return HttpResponseRedirect("/sharkdataadmin") # contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "speciesobs_cleanup.html", contextinstance) # Not a valid request method. return HttpResponseRedirect("/sharkdataadmin")
def updateDatasetsAndResources(request): """ Updates the database from datasets and resources stored in the FTP area. """ error_message = None # if request.method == "GET": form = forms.UpdateDatasetsAndResourcesForm() contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "update_datasets_and_resources.html", contextinstance) elif request.method == "POST": # form = forms.UpdateDatasetsAndResourcesForm(request.POST) if form.is_valid(): # user = request.POST['user'] password = request.POST['password'] if password != settings.APPS_VALID_USERS_AND_PASSWORDS.get( user, None): error_message = 'Not a valid user or password. Please try again...' # Load datasets. if error_message == None: sharkdata_core.SharkdataAdminUtils( ).updateDatasetsAndResourcesInThread(user) # OK. if error_message == None: return HttpResponseRedirect("/sharkdataadmin") # contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "update_datasets_and_resources.html", contextinstance) # Not a valid request method. return HttpResponseRedirect("/sharkdataadmin")
def generateDwcaExportFiles(request): """ Generates DarwinCore-Archive files for the 'Export formats' page. """ error_message = None # if request.method == "GET": form = forms.GenerateDwcaExportFilesForm() contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "generate_dwca_exportfiles.html", contextinstance) elif request.method == "POST": # form = forms.GenerateDwcaExportFilesForm(request.POST) if form.is_valid(): # datatype_list = [] year_from = request.POST['year_from'] year_to = request.POST['year_to'] monitoring_type = request.POST['monitoring_type'] user = request.POST['user'] password = request.POST['password'] # if ('phytobenthos' in request.POST) and (request.POST['phytobenthos'] == 'on'): datatype_list.append('Epibenthos') # datatype_list.append('Phytobenthos') if ('phytoplankton' in request.POST) and (request.POST['phytoplankton'] == 'on'): datatype_list.append('Phytoplankton') if ('zoobenthos' in request.POST) and (request.POST['zoobenthos'] == 'on'): datatype_list.append('Zoobenthos') if ('zooplankton' in request.POST) and (request.POST['zooplankton'] == 'on'): datatype_list.append('Zooplankton') # if password != settings.APPS_VALID_USERS_AND_PASSWORDS.get( user, None): error_message = 'Not a valid user or password. Please try again...' # if error_message == None: sharkdata_core.SharkdataAdminUtils( ).generateDwcaExportFilesInThread(datatype_list, year_from, year_to, monitoring_type, user) # OK. if error_message == None: return HttpResponseRedirect("/sharkdataadmin") # contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "generate_dwca_exportfiles.html", contextinstance) # Not a valid request method. return HttpResponseRedirect("/sharkdataadmin")
def validateIcesXmlExportFiles(request): """ Validate ICES-XML files on the 'Export formats' page. """ error_message = None # if request.method == "GET": form = forms.ValidateIcesXmlForm() contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "validate_ices_xml_exportfiles.html", contextinstance) elif request.method == "POST": # form = forms.ValidateIcesXmlForm(request.POST) if form.is_valid(): # datatype_list = [] user = request.POST['user'] password = request.POST['password'] # if ('phytobenthos' in request.POST) and (request.POST['phytobenthos'] == 'on'): datatype_list.append('Phytobenthos') datatype_list.append('Epibenthos') if ('phytoplankton' in request.POST) and (request.POST['phytoplankton'] == 'on'): datatype_list.append('Phytoplankton') if ('zoobenthos' in request.POST) and (request.POST['zoobenthos'] == 'on'): datatype_list.append('Zoobenthos') if ('zooplankton' in request.POST) and (request.POST['zooplankton'] == 'on'): datatype_list.append('Zooplankton') # # if ('Phytobenthos' in datatype_list) or \ # ('Phytoplankton' in datatype_list) or \ # ('Zooplankton' in datatype_list): # error_message = 'Support for Zoobenthos only, others are under development. Please try again...' # if password != settings.APPS_VALID_USERS_AND_PASSWORDS.get( user, None): error_message = 'Not a valid user or password. Please try again...' # if error_message == None: sharkdata_core.SharkdataAdminUtils().validateIcesXmlInThread( datatype_list, user) # OK. if error_message == None: return HttpResponseRedirect("/sharkdataadmin") # contextinstance = {'form': form, 'error_message': error_message} contextinstance.update(csrf(request)) return render(request, "validate_ices_xml_exportfiles.html", contextinstance) # Not a valid request method. return HttpResponseRedirect("/sharkdataadmin")
def validateIcesXml(self, logfile_name, datatype_list, user): """ """ error_counter = 0 # db_exports = export_models.ExportFiles.objects.all() for db_export in db_exports: if db_export.datatype in datatype_list: # error_counter = self.validateOneIcesXml( logfile_name, error_counter, db_export, user) # if error_counter > 0: sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, 'FAILED') else: sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, 'FINISHED') if settings.DEBUG: print('DEBUG: ICES-XML generation FINISHED')
def viewLog(request, file_stem): """ """ log_content = '' try: log_content = sharkdata_core.SharkdataAdminUtils( ).get_log_file_content(file_stem) except: pass # response = HttpResponse(content_type='text/plain; charset=cp1252') response.write(log_content.encode('cp1252')) return response
def updateDatasetsAndResources(self, logfile_name, user): """ """ error_counter = 0 try: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='\nDatasets:') error_counter += sharkdata_core.DatasetUtils( ).writeLatestDatasetsInfoToDb(logfile_name, ) sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='\nResources:') error_counter += sharkdata_core.ResourcesUtils( ).writeResourcesInfoToDb(logfile_name, ) if error_counter > 0: sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FINISHED-' + str(error_counter) + '-errors') else: sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FINISHED') except Exception as e: error_message = 'Failed when loading datasets or resources.' + '\nException: ' + str( e) + '\n' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close(logfile_name, new_status='FAILED') # return None # No error message.
def writeResourcesInfoToDb(self, logfile_name, user = ''): """ Updates the database from datasets stored in the FTP area. I multiple versions of a dataset are in the FTP area only the latest will be loaded. """ self.clear() error_counter = 0 # Remove all db rows. models.Resources.objects.all().delete() # Get resources from FTP archive. for file_name in self.getResourceFiles(): if logfile_name: sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='Loading file: ' + file_name + '...') try: error_string = self.writeFileInfoToDb(file_name, user) if error_string: error_counter += 1 sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='ERROR: Failed to load: ' + file_name + '. Error: ' + error_string) except Exception as e: error_counter += 1 sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='ERROR: Failed to load: ' + file_name + '. Error: ' + str(e)) # return error_counter
def sharkDataAdmin(request): """ """ per_page = 5 try: if 'per_page' in request.GET: per_page = int(request.GET['per_page']) except: pass logrows = [] log_file_list = sharkdata_core.SharkdataAdminUtils().get_log_files() log_file_list = log_file_list[0:per_page] for log_file in sorted(log_file_list, reverse=True): log_file_path = pathlib.Path(log_file) stem = log_file_path.stem parts = stem.split('_') date = parts[0] time = parts[1] command = ' '.join(parts[2:-1]) status = parts[-1] logrow = {} logrow['date'] = date logrow['time'] = time logrow['started_datetime'] = datetime.datetime.strptime( date + ' ' + time, '%Y-%m-%d %H%M%S') logrow['command_name'] = command logrow['status'] = status logrow['file_stem'] = stem logrows.append(logrow) # return render(request, "sharkdata_admin.html", {'logrows': logrows})
def generateSpeciesObs(self, logfile_name, user): """ """ print('Species observations update. Started.') self.getHeaders() try: # Loop over all datasets. valid_datatypes = [ 'Epibenthos', 'GreySeal', 'HarbourSeal', 'Phytoplankton', 'RingedSeal', 'Zoobenthos', 'Zooplankton', ] for valid_datatype in valid_datatypes: # export_name = 'SpeciesObs_SMHI_' + valid_datatype export_file_name = export_name + '.txt' export_file_path = pathlib.Path(self._export_dir_path, export_file_name) error_log_file = export_name + '_log.txt' error_log_file_path = pathlib.Path(self._export_dir_path, error_log_file) # if not export_file_path.parent.exists(): export_file_path.parent.mkdir(parents=True) # To avoid duplicates. self.observation_id_lookup = set() # Counters. self.counter_rows = 0 self.counter_duplicates = 0 #first and last year. self.year_min = '' self.year_max = '' with export_file_path.open('w') as obsfile: data_header = self.translateHeaders(self._data_header) obsfile.write('\t'.join(data_header) + '\n') for dataset_queryset in datasets_models.Datasets.objects.all( ).filter(datatype=valid_datatype).order_by('dataset_name'): self.extract_observations_from_dataset( logfile_name, obsfile, dataset_queryset) print('') print('Summary for datatype: ', valid_datatype) print('- rows: ', self.counter_rows) print('- duplicates', self.counter_duplicates) print('') sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='') sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='Summary for datatype: ' + valid_datatype) sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='- file: : ' + export_name) sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='- rows: : ' + str(self.counter_rows)) sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='- duplicates: ' + str(self.counter_duplicates)) sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='') # Update database. # Delete row if exists. export_db_rows = exportformats_models.ExportFiles.objects.filter( export_name=export_name) for db_row in export_db_rows: db_row.delete() # Add row. years = self.year_min if self.year_min and self.year_max: years = self.year_min + '-' + self.year_max export_db_row = exportformats_models.ExportFiles( format='SpeciesObs', datatype=valid_datatype, year=years, approved='True', status='ok', export_name=export_name, export_file_name=export_file_name, export_file_path=str(export_file_path), error_log_file=error_log_file, error_log_file_path=error_log_file_path, generated_by=user, ) export_db_row.save() # sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, new_status='FINISHED') # print('Species observations update. Finished.') # except Exception as e: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='- Failed. Exception: ' + str(e)) sharkdata_core.SharkdataAdminUtils().log_close(logfile_name, new_status='FAILED')
def validateOneIcesXml(self, logfile_name, error_counter, db_export, user): """ Status = Checked by DC (No DATSU check) --> Approved = True Status = Not checked --> Status = DATSU-ok --> Approved = True Status = Not checked --> Status = DATSU-failed --> Approved = False Status = Test --> Status = Test-DATSU-ok --> Approved = False Status = Test --> Status = Test-DATSU-failed --> Approved = False """ try: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='Processing ICES-XML file: ' + db_export.export_file_name) # Status 'Checked by DC'. if (db_export.status == 'Checked by DC'): # Don't perform DATSU check. db_export.approved = True db_export.save() self.append_to_log_file(db_export, datsu_response=None) # Status 'Not checked'. elif (db_export.status == 'Not checked'): # url_part_1 = 'http://datsu.ices.dk/DatsuRest/api/ScreenFile/test,sharkdata,se!exportformats!' url_part_1 = 'http://datsu.ices.dk/DatsuRest/api/ScreenFile/sharkdata,se!exportformats!' # Test server: django.mellifica.org # url_part_1 = 'http://datsu.ices.dk/DatsuRest/api/ScreenFile/django,mellifica,org!exportformats!' # TODO: For test only... url_part_2 = db_export.export_file_name.replace('.', ',') url_part_3 = '/shark!smhi,se' # TODO: [email protected] url_part_4 = '' if ('Epibenthos' in db_export.export_file_name): url_part_4 = '/pb' elif ('Phytoplankton' in db_export.export_file_name): url_part_4 = '/pp' elif ('Zoobenthos' in db_export.export_file_name): url_part_4 = '/zb' elif ('Zooplankton' in db_export.export_file_name): url_part_4 = '/zp' # if settings.DEBUG: print(url_part_1 + url_part_2 + url_part_3 + url_part_4) sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='- DEBUG: ' + url_part_1 + url_part_2 + url_part_3 + url_part_4) # datsu_response = {} # Call DATSU. datsu_response_json = urllib.request.urlopen(url_part_1 + url_part_2 + url_part_3 + url_part_4) datsu_response = json.load(datsu_response_json) # For test: # datsu_response = dict({u'SessionID': u'484', # u'NumberOfErrors': -1, # u'ScreenResultURL': u'datsu.ices.dk/test/ScreenResult.aspx?groupError=0&sessionid=484'}) # if settings.DEBUG: print('DEBUG: \n' + json.dumps(datsu_response, sort_keys=True, indent=2)) # if datsu_response.get('NumberOfErrors', 0) == -1: db_export.status = 'DATSU-ok' db_export.approved = True else: db_export.status = 'DATSU-failed' db_export.approved = False # db_export.save() self.append_to_log_file(db_export, datsu_response=datsu_response) # Status 'Test'. elif (db_export.status == 'Test'): url_part_1 = 'http://datsu.ices.dk/DatsuRest/api/ScreenFile/test,sharkdata,se!exportformats!' url_part_2 = db_export.export_file_name.replace('.', ',') url_part_3 = '/shark!smhi,se' # TODO: [email protected] url_part_4 = '/zb' # if settings.DEBUG: print(url_part_1 + url_part_2 + url_part_3 + url_part_4) # datsu_response_json = urllib.request.urlopen(url_part_1 + url_part_2 + url_part_3 + url_part_4) datsu_response = json.load(datsu_response_json) # # For test: # datsu_response = dict({u'SessionID': u'484', u'NumberOfErrors': -1, u'ScreenResultURL': u'datsu.ices.dk/test/ScreenResult.aspx?groupError=0&sessionid=484'}) if settings.DEBUG: print('DEBUG: \n' + json.dumps(datsu_response, sort_keys=True, indent=2)) # if datsu_response.get('NumberOfErrors', 0) == -1: db_export.status = 'Test-DATSU-ok' db_export.approved = False else: db_export.status = 'Test-DATSU-failed' db_export.approved = False # Logging. sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row= 'ERROR: Failed to validating ICES-XML file. DATSU errors URL: ' + datsu_response.get('ScreenResultURL', '<not defined>')) # db_export.save() self.append_to_log_file(db_export, datsu_response=datsu_response) else: if settings.DEBUG: print('DEBUG: ' + db_export.export_file_name + ' ' + db_export.status + ' ' + str(db_export.approved)) except Exception as e: error_counter += 1 traceback.print_exc() sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row= 'ERROR: Failed to validating ICES-XML files. Exception: ' + str(e)) return error_counter
def generateOneDwca(self, logfile_name, error_counter, datatype, year_from, year_to, monitoring_types, user): """ """ if datatype.lower() not in ['epibenthos', 'phytobenthos']: return year_from = int(year_from) year_to = int(year_to) # Add all rows from all datasets that match datatype and year. darwincore_generator = sharkdata_core.DarwinCoreArchiveGenerator(datatype) # db_datasets = datasets_models.Datasets.objects.all() for db_dataset in db_datasets: if db_dataset.datatype.upper() != datatype.upper(): continue # Check metadata for year(s) in dataset. metadata_as_text = db_dataset.content_metadata_auto metadata_dict = {} for row in metadata_as_text.split('\r\n'): if ':' in row: parts = row.split(':', 1) # Split on first occurence. key = parts[0].strip() value = parts[1].strip() metadata_dict[key] = value # min_year_int = int(metadata_dict.get('min_year', 0)) max_year_int = int(metadata_dict.get('max_year', 0)) if (min_year_int >= year_from) and (max_year_int <= year_to): # TODO: Only whole datasets inside limits... pass # Ok. else: continue # Don't use this dataset. # dataset_name = metadata_dict.get('dataset_name', 0) if dataset_name not in sharkdata_core.ExportFilter().get_filter_keep_list('dataset_name'): continue # Don't use this dataset. # try: zip_file_name = db_dataset.dataset_file_name # admin_models.addResultLog(logrow_id, result_log = 'Reading archive file: ' + zip_file_name + '...') sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='Reading archive file: ' + zip_file_name + '...') if settings.DEBUG: if settings.DEBUG: print('DEBUG: DwC-A processing: ' + zip_file_name) # zip_file_path = pathlib.Path(self._ftp_dir_path, zip_file_name) darwincore_generator.calculate_dataset(str(zip_file_path)) # except Exception as e: error_counter += 1 traceback.print_exc() # admin_models.addResultLog(logrow_id, result_log = 'ERROR: Failed to generate ICES-XML from: ' + zip_file_name + '.') sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='ERROR: Failed to generate ICES-XML from: ' + zip_file_name + '.') # try: # Save the result. darwincore_generator.log_missing_taxa() darwincore_generator.save_dwca_file('D:/arnold/4_sharkdata/notebooks/test_data/dwca-epibenthos-obis_TEST.zip') # # out_rows = icesxmlgenerator.create_xml() # # # if settings.DEBUG: # print('DEBUG: ' + str(len(out_rows))) # # # if len(out_rows) > 1: # # # export_name = 'ICES-XML' + '_SMHI_' + datatype + '_' + str(year) # export_file_name = export_name + '.xml' # export_file_path = os.path.join(self._export_dir_path, export_file_name) # error_log_file = export_name + '_log.txt' # error_log_file_path = os.path.join(self._export_dir_path, error_log_file) # # # icesxmlgenerator.save_xml_file(out_rows, export_file_path) # # Update database. # # Delete row if exists. # export_db_rows = export_models.ExportFiles.objects.filter(export_name = export_name) # for db_row in export_db_rows: # db_row.delete() # # # # Write row. # dbrow = export_models.ExportFiles( # format = 'DwC-A', # datatype = datatype, # year = '', # approved = '', # status = '', # export_name = export_name, # export_file_name = export_file_name, # export_file_path = export_file_path, # error_log_file = error_log_file, # error_log_file_path = error_log_file_path, # generated_by = user, # ) # dbrow.save() # # # Log file. # log_rows = [] # log_rows.append('') # log_rows.append('') # log_rows.append('Generate ICES-XML files. ' + str(datetime.datetime.now())) # log_rows.append('') # log_rows.append('- Format: ' + dbrow.format) # log_rows.append('- Datatype: ' + str(dbrow.datatype)) # log_rows.append('- Year: ' + str(dbrow.year)) # log_rows.append('- Status: ' + str(dbrow.status)) # log_rows.append('- Approved: ' + str(dbrow.approved)) # log_rows.append('- Export name: ' + str(dbrow.export_name)) # log_rows.append('- Export file name: ' + str(dbrow.export_file_name)) # log_rows.append('') # # # icesxmlgenerator.save_log_file(log_rows, error_log_file_path) # except Exception as e: error_counter += 1 traceback.print_exc() # admin_models.addResultLog(logrow_id, result_log = 'ERROR: Failed to generate ICES-XML files. Exception: ' + str(e)) sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='ERROR: Failed to generate ICES-XML files. Exception: ' + str(e))
def writeFileInfoToDb(self, file_name, logfile_name=None, user=''): """ Extracts info from the dataset filename and from the zip file content and adds to database. """ try: # ftp_file_path = pathlib.Path(self._data_in_datasets, file_name) # Extract info from file name. dataset_name, datatype, version = self.splitFilename(file_name) # Extract metadata parts. metadata = '' metadata_auto = '' columndata_available = False # zipreader = sharkdata_core.SharkArchiveFileReader( file_name, self._data_in_datasets) try: zipreader.open() # try: metadata = zipreader.getMetadataAsText() encoding = 'cp1252' metadata = str(metadata, encoding, 'strict') except Exception as e: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='WARNING: ' + str(e)) # try: metadata_auto = zipreader.getMetadataAutoAsText() encoding = 'cp1252' metadata_auto = str(metadata_auto, encoding, 'strict') except Exception as e: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='WARNING: ' + str(e)) # columndata_available = zipreader.isDataColumnsAvailable() # CTD profiles. ctd_profiles_table = None # if datatype == 'CTDprofile': if datatype == 'Profile': ctd_profiles_table = zipreader.getDataAsText() finally: zipreader.close() # Remove from database. try: db_dataset = datasets_models.Datasets.objects.get( dataset_name=dataset_name) db_dataset.delete() except datasets_models.Datasets.DoesNotExist: pass # Not found. # Save to db. dataset = datasets_models.Datasets( dataset_name=dataset_name, datatype=datatype, version=version, dataset_file_name=file_name, ftp_file_path=ftp_file_path, content_data='NOT USED', content_metadata=metadata, content_metadata_auto=metadata_auto, # column_data_available=columndata_available, dwc_archive_eurobis_available=False, dwc_archive_eurobis_file_path='', ) dataset.save() if ctd_profiles_table: data_header = [] ctd_profiles_table = ctd_profiles_table.decode('cp1252') for index, row in enumerate(ctd_profiles_table.split('\n')): rowitems = row.strip().split('\t') if index == 0: data_header = rowitems else: if len(rowitems) > 1: row_dict = dict(zip(data_header, rowitems)) water_depth_m = 0.0 try: water_depth_m = float( row_dict.get('water_depth_m', -99)) except: pass db_profiles = ctdprofiles_models.CtdProfiles( visit_year=row_dict.get('visit_year', ''), # '2002', platform_code=row_dict.get( 'platform_code', ''), # 'Svea', expedition_id=row_dict.get( 'expedition_id', ''), # 'aa-bb-11', visit_id=row_dict.get('visit_id', ''), # '123456', station_name=row_dict.get('station_name', ''), # 'Station1A', latitude=float( row_dict.get('sample_latitude_dd', -99)), # 70.00, longitude=float( row_dict.get('sample_longitude_dd', -99)), # 10.00, water_depth_m=water_depth_m, # '80.0', sampler_type_code=row_dict.get( 'sampler_type_code', ''), # 'CTD', sample_date=row_dict.get('visit_date', ''), # '2000-01-01', sample_project_code=row_dict.get( 'sample_project_code', ''), # 'Proj', # sample_project_code = row_dict.get('sample_project_name_sv', ''), # 'Proj', sample_orderer_code=row_dict.get( 'sample_orderer_code', ''), # 'Orderer', # sample_orderer_code = row_dict.get('sample_orderer_name_sv', ''), # 'Orderer', sampling_laboratory_code=row_dict.get( 'sampling_laboratory_code', ''), # 'Slabo', # sampling_laboratory_code = row_dict.get('sampling_laboratory_name_sv', ''), # 'Slabo', revision_date=row_dict.get( 'revision_date', ''), # '2010-10-10', ctd_profile_name=row_dict.get( 'profile_file_name_db', ''), # 'ctd.profile', dataset_file_name=file_name, ftp_file_path=ftp_file_path, ) db_profiles.save() # return None # No error message. # except Exception as e: return str(e)
def generateOneIcesXml(self, logfile_name, error_counter, datatype, year, status, user): """ """ # Add all rows from all datasets that match datatype and year. icesxmlgenerator = sharkdata_core.IcesXmlGenerator() # db_datasets = datasets_models.Datasets.objects.all() for db_dataset in db_datasets: if db_dataset.datatype.upper() != datatype.upper(): continue # Check metadata for year(s) in dataset. metadata_as_text = db_dataset.content_metadata_auto metadata_dict = {} for row in metadata_as_text.split('\r\n'): if ':' in row: parts = row.split(':', 1) # Split on first occurence. key = parts[0].strip() value = parts[1].strip() metadata_dict[key] = value # min_year_int = int(metadata_dict.get('min_year', 0)) max_year_int = int(metadata_dict.get('max_year', 0)) if (year < min_year_int) or (year > max_year_int): continue # Don't use this dataset. # dataset_name = metadata_dict.get('dataset_name', 0) if dataset_name not in sharkdata_core.ExportFilter( ).get_filter_keep_list('dataset_name'): continue # Don't use this dataset. # try: zip_file_name = db_dataset.dataset_file_name sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='Reading archive file: ' + zip_file_name + '...') if settings.DEBUG: if settings.DEBUG: print('DEBUG: ICES-ZIP processing: ' + zip_file_name) # dataset = sharkdata_core.Dataset() dataset.loadDataFromZipFile( zip_file_name, dataset_dir_path=self._ftp_dir_path, encoding='cp1252') # dataheader = dataset.data_header if settings.DEBUG: print(dataheader) # # Phytobentos or Zoobenthos. Transect data for record 40. transect_data = sharkdata_core.TransectData() transect_data.clear() if (metadata_dict.get('datatype', 'Epibenthos')) or \ (metadata_dict.get('datatype', 'Phytobenthos')): # or \ # (metadata_dict.get('datatype', 'Zoobenthos')): transect_data.load_all_transect_data(dataset) # # Process rows. for datarow in dataset.data_rows: datarow_dict = dict(zip(dataheader, map(str, datarow))) # if datarow_dict.get('visit_year', '') == str(year): # Remove some projects. proj = datarow_dict.get('sample_project_name_en', '') if not proj: proj = datarow_dict.get('sample_project_name_sv', '') remove_list_sv = sharkdata_core.ExportFilter( ).get_filter_remove_list('sample_project_name_sv') remove_list_en = sharkdata_core.ExportFilter( ).get_filter_remove_list('sample_project_name_en') # if proj in remove_list_sv: continue if proj in remove_list_en: continue # Remove some stations. station_name = datarow_dict.get('station_name', '') if station_name in sharkdata_core.ExportFilter( ).get_filter_remove_list('station_name'): continue # Remove RAMSKRAP. if 'FRAMENET' == datarow_dict.get( 'sampler_type_code', ''): continue # OK to add row. icesxmlgenerator.add_row(datarow_dict) # except Exception as e: error_counter += 1 traceback.print_exc() sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='ERROR: Failed to generate ICES-XML from: ' + zip_file_name + '.') # try: # Create and save the result. out_rows = icesxmlgenerator.create_xml() # if settings.DEBUG: print('DEBUG: ' + str(len(out_rows))) # if len(out_rows) > 1: # export_name = 'ICES-XML' + '_SMHI_' + datatype + '_' + str( year) export_file_name = export_name + '.xml' export_file_path = os.path.join(self._export_dir_path, export_file_name) error_log_file = export_name + '_log.txt' error_log_file_path = os.path.join(self._export_dir_path, error_log_file) # icesxmlgenerator.save_xml_file(out_rows, export_file_path) # Update database. # Delete row if exists. export_db_rows = export_models.ExportFiles.objects.filter( export_name=export_name) for db_row in export_db_rows: db_row.delete() # approved = False if status == 'Checked by DC': approved = True # Will not be validated via DATSU. # Write row. dbrow = export_models.ExportFiles( format='ICES-XML', datatype=datatype, year=str(year), approved=approved, status=status, export_name=export_name, export_file_name=export_file_name, export_file_path=export_file_path, error_log_file=error_log_file, error_log_file_path=error_log_file_path, generated_by=user, ) dbrow.save() # Log file. log_rows = [] log_rows.append('') log_rows.append('') log_rows.append('Generate ICES-XML files. ' + str(datetime.datetime.now())) log_rows.append('') log_rows.append('- Format: ' + dbrow.format) log_rows.append('- Datatype: ' + str(dbrow.datatype)) log_rows.append('- Year: ' + str(dbrow.year)) log_rows.append('- Status: ' + str(dbrow.status)) log_rows.append('- Approved: ' + str(dbrow.approved)) log_rows.append('- Export name: ' + str(dbrow.export_name)) log_rows.append('- Export file name: ' + str(dbrow.export_file_name)) log_rows.append('') # icesxmlgenerator.save_log_file(log_rows, error_log_file_path) # except Exception as e: error_counter += 1 traceback.print_exc() sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='ERROR: Failed to generate ICES-XML files. Exception: ' + str(e))
def generateIcesXmlExportFiles(self, logfile_name, datatype_list, year_from, year_to, status, user): """ """ try: # Load resource content for ICES station. sharkdata_core.ExportStations().load_export_stations( 'export_ices_stations') # Load resource content for filtering reported data. sharkdata_core.ExportFilter().load_export_filter( 'export_ices_filters') # Load resource content to translate values. sharkdata_core.TranslateValues().load_export_translate_values( 'export_ices_translate_values') # Load resource content to translate from DynTaxa to WoRMS. sharkdata_core.TranslateTaxa().load_translate_taxa( 'translate_dyntaxa_to_worms') # Load resource content to translate from DynTaxa to Helcom PEG. sharkdata_core.TranslateDyntaxaToHelcomPeg().load_translate_taxa( 'translate_dyntaxa_to_helcom_peg') # Create target directory if not exists. if not os.path.exists(self._export_dir_path): os.makedirs(self._export_dir_path) # error_counter = 0 # # Iterate over selected datatypes. for datatype in datatype_list: year_int = int(year_from) year_to_int = int(year_to) while year_int <= year_to_int: "" self.generateOneIcesXml(logfile_name, error_counter, datatype, year_int, status, user) # year_int += 1 # # Log missing stations. missing_station_list = sharkdata_core.ExportStations( ).get_missing_station_list() if len(missing_station_list) > 0: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='Missing station(s): ') for missing_station in sorted(missing_station_list): sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='- ' + missing_station) if settings.DEBUG: print('DEBUG: missing station: ' + missing_station) sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='') # Log missing taxa. missing_taxa_list = sharkdata_core.TranslateTaxa( ).get_missing_taxa_list() if len(missing_taxa_list) > 0: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='Missing taxa: ') for missing_taxa in sorted(missing_taxa_list): # Don't log filtered taxa. if missing_taxa not in sharkdata_core.ExportFilter( ).get_filter_remove_list('scientific_name'): sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='- ' + missing_taxa) if settings.DEBUG: print('DEBUG: missing taxon: ' + missing_taxa) sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='') # if error_counter > 0: sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, 'FAILED') else: sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, 'FINISHED') # if settings.DEBUG: print('DEBUG: ICES-XML generation FINISHED') except Exception as e: sharkdata_core.SharkdataAdminUtils().log_write(logfile_name, log_row='FAILED') error_message = u"Can't generate ICES-XML file." + '\nException: ' + str( e) + '\n' sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row=error_message) sharkdata_core.SharkdataAdminUtils().log_close( logfile_name, 'FAILED')
def extract_observations_from_dataset(self, logfile_name, obsfile, dataset_queryset): """ """ print('- ' + dataset_queryset.dataset_file_name) sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='Extracting species observations from: ' + dataset_queryset.dataset_file_name + '...') zipreader = sharkdata_core.SharkArchiveFileReader( dataset_queryset.ftp_file_path) try: zipreader.open() data = zipreader.getDataAsText() finally: zipreader.close() # encoding = 'cp1252' rowseparator = '\n' fieldseparator = '\t' # data = str(data, encoding, 'strict') datarows = (item.strip() for item in data.split(rowseparator) ) # Generator instead of list. # for rowindex, datarow in enumerate(datarows): # try: if len(datarow) == 0: continue # row = [item.strip() for item in datarow.split(fieldseparator)] if rowindex == 0: header = row else: header = self.cleanUpHeader(header) rowdict = dict(zip(header, row)) rowdict['data_type'] = dataset_queryset.datatype # Scientific name is mandatory. if not rowdict.get('scientific_name', ''): continue # Position. Check if position is valid. Skip row if not. lat_dd = rowdict.get('sample_latitude_dd', '').replace(',', '.') long_dd = rowdict.get('sample_longitude_dd', '').replace(',', '.') if (not lat_dd) or (not long_dd): lat_dd = rowdict.get('latitude_dd', '').replace(',', '.') long_dd = rowdict.get('longitude_dd', '').replace(',', '.') # try: if (float(lat_dd) > 70.0) or ( float(lat_dd) < 50.0 ) or (float(long_dd) > 30.0) or (float(long_dd) < 5.0): sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row= 'Row skipped, position outside box. Latitude: ' + lat_dd + ' Longitude: ' + long_dd + ' Row: ' + str(rowindex)) lat_dd = '' long_dd = '' except: lat_dd = '' long_dd = '' # if lat_dd and long_dd: rowdict['latitude_dd'] = lat_dd rowdict['longitude_dd'] = long_dd else: # Don't add to SpeciesObs if position is invalid. continue # # Only count for some parameters. parameter = rowdict.get('parameter', '') value = rowdict.get('value', '') if parameter not in [ 'Abundance', '# counted', 'Observed species', 'Cover (%)', 'Species distribution max depth', 'Abundance class', '# pups counted on land', 'Total # counted in water', 'Total # counted on land', ]: continue # Value must be positive. value = rowdict.get('value', '') if value.upper() in [ '', '0', '0.0', '0.0000', 'N', 'NO', 'F', 'FALSE' ]: continue value_float = 0.0 try: value_float = float(value) except: pass if value_float > 0.0: pass # Ok. else: continue # Calculate DarwinCore Observation Id. generated_occurrence_id = self.calculateDarwinCoreObservationIdAsMD5( rowdict) if generated_occurrence_id not in self.observation_id_lookup: self.observation_id_lookup.add(generated_occurrence_id) # Row id as md5. rowdict['occurrence_id'] = generated_occurrence_id # When. tmp_date = rowdict.get('sampling_date', '') if len(tmp_date) >= 10: year = tmp_date[0:4] rowdict['sampling_year'] = year rowdict['sampling_month'] = tmp_date[5:7] rowdict['sampling_day'] = tmp_date[8:10] if (self.year_min == '') or (year < self.year_min): self.year_min = year if (self.year_max == '') or (year > self.year_max): self.year_max = year if not rowdict.get('sample_min_depth', ''): rowdict['sample_min_depth'] = rowdict.get( 'water_depth_m', '') if not rowdict.get('sample_max_depth', ''): rowdict['sample_max_depth'] = rowdict.get( 'water_depth_m', '') # Classification. scientificname = rowdict.get( 'scientific_name', '') if rowdict.get('scientific_name') else '' taxon_worms_info = self.worms_info_object.getTaxonInfoDict( scientificname) if taxon_worms_info: rowdict['taxon_kingdom'] = taxon_worms_info.get( 'kingdom', '') rowdict['taxon_phylum'] = taxon_worms_info.get( 'phylum', '') rowdict['taxon_class'] = taxon_worms_info.get( 'class', '') rowdict['taxon_order'] = taxon_worms_info.get( 'order', '') rowdict['taxon_family'] = taxon_worms_info.get( 'family', '') rowdict['taxon_genus'] = taxon_worms_info.get( 'genus', '') else: rowdict['taxon_kingdom'] = '' rowdict['taxon_phylum'] = '' rowdict['taxon_class'] = '' rowdict['taxon_order'] = '' rowdict['taxon_family'] = '' rowdict['taxon_genus'] = '' # if not rowdict.get('orderer', ''): rowdict['orderer'] = rowdict.get( 'orderer_code', '') if not rowdict.get('orderer', ''): rowdict['orderer'] = rowdict.get( 'sample_orderer_code', '') if not rowdict.get('orderer', ''): rowdict['orderer'] = rowdict.get( 'sample_orderer_name_sv', '') if not rowdict.get('orderer', ''): rowdict['orderer'] = rowdict.get( 'sample_orderer_name_en', '') if not rowdict.get('reporting_institute', ''): rowdict['reporting_institute'] = rowdict.get( 'reporting_institute_code', '') if not rowdict.get('reporting_institute', ''): rowdict['reporting_institute'] = rowdict.get( 'reporting_institute_name_sv', '') if not rowdict.get('reporting_institute', ''): rowdict['reporting_institute'] = rowdict.get( 'reporting_institute_name_en', '') if not rowdict.get('sampling_laboratory', ''): rowdict['sampling_laboratory'] = rowdict.get( 'sampling_laboratory_code', '') if not rowdict.get('sampling_laboratory', ''): rowdict['sampling_laboratory'] = rowdict.get( 'sampling_laboratory_name_sv', '') if not rowdict.get('sampling_laboratory', ''): rowdict['sampling_laboratory'] = rowdict.get( 'sampling_laboratory_name_en', '') if not rowdict.get('analytical_laboratory', ''): rowdict['analytical_laboratory'] = rowdict.get( 'analytical_laboratory_code', '') if not rowdict.get('analytical_laboratory', ''): rowdict['analytical_laboratory'] = rowdict.get( 'analytical_laboratory_name_sv', '') if not rowdict.get('analytical_laboratory', ''): rowdict['analytical_laboratory'] = rowdict.get( 'analytical_laboratory_name_en', '') # out_row = [] for header_item in self.getHeaders(): out_row.append(rowdict.get(header_item, '')) # obsfile.write('\t'.join(out_row) + '\n') self.counter_rows += 1 else: #print('- Duplicate md5.') self.counter_duplicates += 1 except Exception as e: sharkdata_core.SharkdataAdminUtils().log_write( logfile_name, log_row='- Error in row ' + str(rowindex) + ': ' + str(e))