def generate_source_reference_filter(file_path=None): references = SourceReference.objects.filter( biologicalcollectionrecord__isnull=False, ).distinct('id') results = [] reference_source_list = [] for reference in references: if (reference.reference_type == 'Peer-reviewed scientific article' or reference.reference_type == 'Published report or thesis'): source = u'{authors} | {year} | {title}'.format( authors=reference.authors, year=reference.year, title=reference.title) else: source = str(reference.source) if source not in reference_source_list: reference_source_list.append(source) else: continue results.append({ 'id': reference.id, 'reference': source, 'type': reference.reference_type }) if not file_path: file_path = os.path.join(settings.MEDIA_ROOT, SOURCE_REFERENCE_FILTER_FILE) log(file_path) with open(file_path, 'w') as file_handle: json.dump(results, file_handle)
def process_row(self, row, index): valid_from = self.get_row_value('validfrom') valid_to = self.get_row_value('validto') user = self.get_object_from_uuid(column='userid', model=get_user_model()) status = self.get_row_value('status') if user: profile, created = Profile.objects.get_or_create(user=user) if not profile.sass_accredited_date_from: profile.sass_accredited_date_from = valid_from else: if valid_from.date() < profile.sass_accredited_date_from: profile.sass_accredited_date_from = valid_from if not profile.sass_accredited_date_to: profile.sass_accredited_date_to = valid_to else: if valid_to.date() > profile.sass_accredited_date_to: profile.sass_accredited_date_to = valid_to try: json_data = json.loads(profile.data) json_data['sass_accredited_status'] = status profile.data = json_data except ValueError: pass profile.save() log('{user}-{valid_from}-{valid_to}-{status}'.format( user=profile, valid_from=valid_from, valid_to=valid_to, status=status))
def handle(self, *args, **options): signals.post_save.disconnect(location_site_post_save_handler) signals.post_save.disconnect(collection_post_save_handler) biobase_collection = BiologicalCollectionRecord.objects.filter( additional_data__BioBaseData=True) index = 0 for biobase in biobase_collection: index += 1 print('Processing -- %s/%s' % (index, biobase_collection.count())) if not biobase.source_reference: continue authors = biobase.source_reference.source.get_authors() try: author = authors[0] if not author.user: author.save() author_user = ( biobase.source_reference.source.get_authors()[0].user) if biobase.owner != author_user: biobase.owner = author_user biobase.save() log('Collection {id} - new owner : {owner}'.format( id=biobase.id, owner=biobase.owner)) if biobase.site.owner != author_user: biobase.site.owner = author_user biobase.site.save() log('Site {id} - new owner : {owner}'.format( id=biobase.site.id, owner=biobase.site.owner)) except IndexError: continue
def handle(self, *args, **options): signals.post_save.disconnect( collection_post_save_handler, sender=BiologicalCollectionRecord ) # Get all collections that came from gbif and have reference collections = BiologicalCollectionRecord.objects.filter( additional_data__fetch_from_gbif=True ).exclude(reference__iexact='') index = 0 for collection in collections: index += 1 log('Processing : {index}/{len}'.format( index=index, len=collections.count() )) if collection.collector and not collection.collector_user: users = create_users_from_string(collection.collector) if len(users) > 0: log('Update owner and collector to {}'.format( users[0].username )) collection.collector_user = users[0] collection.owner = users[0] collection.save()
def handle(self, *args, **options): log('Deleting all search results...') all_search_process = SearchProcess.objects.all() all_search_process.delete() csv_path = os.path.join(settings.MEDIA_ROOT, 'csv_processed') shutil.rmtree(csv_path) os.mkdir(csv_path)
def wrap(*args, **kwargs): time1 = time.time() ret = f(*args, **kwargs) time2 = time.time() log('%s function took %0.3f ms' % ( f.func_name, (time2 - time1) * 1000.0)) return ret
def handle(self, *args, **options): self.init(options) csv_file_name = self.csv_file_name(options) csv_file_path = os.path.join(self.csv_root_folder, csv_file_name) if not csv_file_name or not os.path.exists(csv_file_path): log('Csv file name not provided / csv file does not exist') return False with open(csv_file_path) as csv_file: self.csv_dict_reader(csv.DictReader(csv_file))
def site_visit_post_save_handler(**kwargs): from sass.scripts.site_visit_ecological_condition_generator import ( generate_site_visit_ecological_condition) try: site_visit = kwargs['instance'] except KeyError: return log('Generate site visit ecological condition') site_visits = list() site_visits.append(site_visit) generate_site_visit_ecological_condition(site_visits)
def csv_dict_reader(self, csv_reader): errors = [] success = [] units = [] index = 2 for row in csv_reader: if row[UNIT] not in units: units.append(row[UNIT]) try: chems = Chem.objects.filter(chem_code__iexact=row[CODE]) if chems.exists(): print('exist') else: chems = Chem.objects.filter(chem_code__iexact=row[NAME]) if not chems.exists(): chem = Chem.objects.create( chem_code=row[CODE] if row[CODE] else row[NAME], chem_description=row[DESCRIPTION], ) chems = Chem.objects.filter(id=chem.id) if chems.count() > 1: chem_id = chems[0].id # Change unit of chemical records to use the first one ChemicalRecord.objects.filter(chem__in=chems).update( chem=chem_id) # Delete chemical units except the first one chems.exclude(id=chem_id).delete() chems = Chem.objects.filter(id=chem_id) if chems: chem_unit = None for unit in ChemUnit: if unit.value == row[UNIT]: chem_unit = unit break chems.update( minimum=row[MIN] if row[MIN] else None, maximum=row[MAX] if row[MAX] else None, show_in_abiotic_list=row[RETAIN_IN_LIST] == 'Yes', chem_code=row[CODE] if row[CODE] else row[NAME], chem_description=row[DESCRIPTION], chem_unit=chem_unit.name) except Exception as e: # noqa errors.append({'row': index, 'error': str(e)}) index += 1 if len(errors) > 0: logger.debug(errors) log('----') if len(success) > 0: logger.debug(success) print(units)
def handle(self, *args, **options): collections = BiologicalCollectionRecord.objects.filter( module_group__isnull=True ) index = 0 for bio in collections: index += 1 log('Processing {current}/{total}'.format( current=index, total=collections.count() )) bio.save()
def handle(self, *args, **options): location_sites = LocationSite.objects.filter( legacy_site_code__iregex=r'([A-Za-z0-9]){1,6}-([A-Za-z]*)$' ).exclude(site_code=F('legacy_site_code')) log('Update {} location site(s)'.format(location_sites.count())) signals.post_save.disconnect(location_site_post_save_handler) location_sites.update(site_code=F('legacy_site_code')) signals.post_save.connect(location_site_post_save_handler)
def collections_upload(session_id): from bims.utils.logger import log from bims.models import UploadSession as TaxaUploadSession from bims.scripts.collections_upload import CollectionsCSVUpload try: upload_session = ( TaxaUploadSession.objects.get(id=session_id) ) except TaxaUploadSession.DoesNotExist: log('Session does not exist') return # - Check the headers upload_session.progress = 'Checking header row' upload_session.save() def check_header(_csv_file): reader = csv.DictReader(_csv_file) headers = reader.fieldnames for header in FILE_HEADERS: if header not in headers: error_message = ( 'Header row does not follow the correct format' ) upload_session.progress = error_message upload_session.error_file = ( upload_session.process_file ) upload_session.processed = True upload_session.save() return False return True try: with open(upload_session.process_file.path) as csv_file: checked = check_header(csv_file) except UnicodeDecodeError: with open( upload_session.process_file.path, encoding='ISO-8859-1' ) as csv_file: checked = check_header(csv_file) if not checked: return upload_session.progress = 'Processing' upload_session.save() taxa_csv_upload = CollectionsCSVUpload() taxa_csv_upload.upload_session = upload_session taxa_csv_upload.start()
def handle(self, *args, **options): date_and_sites = ChemicalRecord.objects.filter( survey__isnull=True).annotate(site=F('location_site')).values( 'site', 'date') index = 1 for date_and_site in date_and_sites: log('{index}/{count}'.format(index=index, count=date_and_sites.count())) index += 1 site = LocationSite.objects.get(id=date_and_site['site']) survey, survey_created = Survey.objects.get_or_create( site=site, date=date_and_site['date']) ChemicalRecord.objects.filter( location_site=site, date=date_and_site['date']).update(survey=survey)
def add_to_error_summary(self, error_message, row, add_to_error=True, only_log=False): error_message = '{id} : {error}'.format(id=row + 2, error=error_message) log(error_message) if only_log: return if add_to_error: self.errors.append(error_message) self.data_failed += 1 else: self.warnings.append(error_message)
def handle(self, *args, **options): sites = LocationSite.objects.filter( biological_collection_record__notes__icontains='sass', biological_collection_record__source_collection__icontains='fbis', ).distinct() index = 0 models.signals.post_save.disconnect(location_site_post_save_handler, ) for site in sites: index += 1 log('Processing {0}/{1}'.format(index, sites.count())) additional_data = json.loads(site.additional_data) additional_data['comment'] = site.site_description site.additional_data = additional_data site.site_description = site.name site.save() models.signals.post_save.connect(location_site_post_save_handler, )
def update_location_context(location_site_id): from bims.utils.logger import log from bims.models import LocationSite from bims.utils.location_context import get_location_context_data if isinstance(location_site_id, str): if ',' in location_site_id: get_location_context_data(site_id=str(location_site_id), only_empty=False) return try: LocationSite.objects.get(id=location_site_id) except LocationSite.DoesNotExist: log('Location site does not exist') return get_location_context_data(site_id=str(location_site_id), only_empty=False)
def generate_spatial_scale_filter(file_path=None): spatial_tree = [] location_context_filters = LocationContextFilter.objects.all().order_by( 'display_order', ) for location_context_filter in location_context_filters: spatial_tree_data = { 'name': location_context_filter.title, 'key': slugify(location_context_filter.title), 'children': [] } for group in location_context_filter.location_context_groups.all( ).order_by('locationcontextfiltergrouporder__group_display_order'): location_contexts = LocationContext.objects.filter( group=group).distinct('value').order_by('value').exclude( value='None') if not location_contexts: continue spatial_tree_value = list( location_contexts.annotate(query=F('value'), key=F('group__key')).values( 'query', 'key')) spatial_tree_value_sorted = sorted( spatial_tree_value, key=lambda i: (int(i['query'].split(' ')[0]) if i['query'].split(' ')[0].isdigit() else i['query'])) layer_name = group.layer_name spatial_tree_children = { 'key': group.key, 'name': group.name, 'value': spatial_tree_value_sorted, 'layer_name': layer_name, 'wms_url': group.wms_url, 'wms_format': group.wms_format, 'layer_identifier': group.layer_identifier, } spatial_tree_data['children'].append(spatial_tree_children) spatial_tree.append(spatial_tree_data) if spatial_tree: if not file_path: file_name = 'spatial_scale_filter_list.txt' file_path = os.path.join(settings.MEDIA_ROOT, file_name) log(file_path) with open(file_path, 'w') as file_handle: json.dump(spatial_tree, file_handle)
def handle(self, *args, **options): location_sites = LocationSite.objects.exclude( site_code__iregex=r'([A-Za-z0-9]){1,6}-([A-Za-z0-9]*)$') log('Update {} location site(s)'.format(location_sites.count())) signals.post_save.disconnect(location_site_post_save_handler) index = 0 for location_site in location_sites: log('processing %s of %s' % (index, location_sites.count())) index += 1 # Allocate site code allocate_site_codes_from_river(update_site_code=True, location_id=location_site.id) signals.post_save.connect(location_site_post_save_handler)
def handle(self, *args, **options): site_ids = options.get('location_sites', None) if site_ids: site_ids = site_ids.split(',') location_sites = LocationSite.objects.filter( location_context__isnull=False) if site_ids: location_sites = location_sites.filter(id__in=site_ids) if not location_sites: log('Location site does not exist') return site_count = 1 for site in location_sites: log('Migrating (%s) %s/%s' % (site.id, site_count, location_sites.count())) site_count += 1 context_json = json.loads(site.location_context) try: for key, group in ( context_json['context_group_values'].iteritems()): group_key = group['key'] if isinstance(group['service_registry_values'], list): continue for k, context_value in ( group['service_registry_values'].iteritems()): context_key = context_value['key'] context_name = context_value['name'] context_value = str(context_value['value']) LocationContext.objects.get_or_create( site=site, group_key=group_key, key=context_key, name=context_name, value=context_value) except (KeyError, UnicodeEncodeError): continue site.location_context = None site.location_context_document = None site.save()
def handle(self, *args, **options): found_all = True sass_version = options.get('sass_version', 5) # noqa sass_taxa = SassTaxon.objects.filter(sass_5_score__isnull=False, taxon_sass_5__isnull=False) current_index = 1 for taxa in TAXON_LIST: sass_taxon = sass_taxa.filter(taxon_sass_5__icontains=taxa) if not sass_taxon.exists(): log('This taxa not found : {}'.format(taxa)) found_all = False continue else: sass_taxon = sass_taxon[0] sass_taxon.display_order_sass_5 = current_index sass_taxon.save() current_index += 1 log('Found all : {}'.format(found_all))
def get_location_context_data(group_keys=None, site_id=None, only_empty=False): # Get location context data from GeoContext if not group_keys: group_keys = preferences.SiteSetting.geocontext_keys.split(',') else: if not isinstance(group_keys, list): group_keys = group_keys.split(',') if site_id: location_sites = LocationSite.objects.filter(id__in=site_id.split(',')) else: location_sites = LocationSite.objects.all() if only_empty: location_sites = location_sites.exclude( reduce(operator.and_, (Q(locationcontext__group__geocontext_group_key=x) for x in group_keys))) num = len(location_sites) i = 1 if num == 0: log('No locations with applied filters were found') return for location_site in location_sites: log('Updating %s of %s, %s' % (i, num, location_site.name)) i += 1 all_context = None if only_empty: try: all_context = list( LocationContext.objects.filter( site=location_site).values_list( 'group__geocontext_group_key', flat=True)) except (ValueError, TypeError): pass for group_key in group_keys: if (all_context and group_key in all_context): log('Context data already exists for {}'.format(group_key)) continue current_outcome, message = ( location_site.add_context_group(group_key)) success = current_outcome log( str('[{status}] [{site_id}] [{group}] - {message}').format( status='SUCCESS' if success else 'FAILED', site_id=location_site.id, message=message, group=group_key))
def handle(self, *args, **options): clear_site_code = options.get('clear_site_code') restore_legacy_site_code = options.get('restore_legacy_site_code') if clear_site_code: location_site_to_clear = LocationSite.objects.filter( river__isnull=False) log('Clear site code for for {} sites'.format( location_site_to_clear.count())) location_site_to_clear.update(site_code='') if restore_legacy_site_code: sites_with_legacy_site_code = LocationSite.objects.filter( legacy_site_code__isnull=False).exclude(legacy_site_code='') log('Restoring legacy site code for {} sites'.format( sites_with_legacy_site_code.count())) sites_with_legacy_site_code.update(site_code=F('legacy_site_code')) location_sites = LocationSite.objects.filter(site_code__exact='', river__isnull=False) index = 0 for location_site in location_sites: log('processing %s of %s' % (index, location_sites.count())) index += 1 # Allocate site code allocate_site_codes_from_river(update_site_code=True, location_id=location_site.id)
def harvest_collections(session_id): from bims.utils.logger import log from bims.models import HarvestSession from bims.scripts.import_gbif_occurrences import ( import_gbif_occurrences ) try: harvest_session = ( HarvestSession.objects.get(id=session_id) ) except HarvestSession.DoesNotExist: log('Session does not exist') return harvest_session.status = 'Processing' harvest_session.save() taxonomies = harvest_session.module_group.taxonomies.all() index = 1 for taxon in taxonomies: if HarvestSession.objects.get(id=session_id).canceled: print('Canceled') return harvest_session.status = 'Fetching gbif data for {c} ({i}/{t})'.format( c=taxon.canonical_name, i=index, t=taxonomies.count() ) index += 1 harvest_session.save() import_gbif_occurrences( taxonomy=taxon, log_file_path=harvest_session.log_file.path, session_id=session_id ) harvest_session.status = 'Finished' harvest_session.finished = True harvest_session.save()
def handle(self, *args, **options): dev_folder = '/home/web/django_project' folder_name = 'data' if os.path.exists(dev_folder): root = dev_folder else: root = '/usr/src/bims' csv_file_path = os.path.join( root, 'scripts/static/{folder}/{filename}'.format( folder=folder_name, filename=self.file_name)) if not os.path.exists(csv_file_path): log('File not found') with open(csv_file_path) as csv_file: csv_reader = csv.DictReader(csv_file) current_taxon_group = None for row in csv_reader: taxon_group_name = row[TAXON_GROUP] if taxon_group_name: current_taxon_group = TaxonGroup.objects.get( name__iexact=taxon_group_name) sass_taxa = SassTaxon.objects.filter( Q(taxon_sass_5__iexact=row[TAXON]) | Q(taxon_sass_4__iexact=row[TAXON])) if sass_taxa.exists(): if not current_taxon_group.taxonomies.filter( id__in=sass_taxa.values_list('taxon_id')).exists(): log('Sass taxon does not in the correct group') current_taxon_group.taxonomies.add(sass_taxa[0].taxon) else: log('Sass Taxon does not exist')
def handle(self, *args, **options): sass_taxon_4 = SassTaxon.objects.filter(taxon_sass_4__isnull=False) for sass_taxon in sass_taxon_4: sass_taxon_name = sass_taxon.taxon_sass_4.lower().replace( '1 sp', '1') taxon_5 = SassTaxon.objects.filter( taxon_sass_4__isnull=True, taxon_sass_5__isnull=False, taxon_sass_5__icontains=sass_taxon_name) if taxon_5.count() > 0: taxon_5 = taxon_5[0] log('Found taxon 5 {0}'.format(taxon_5.taxon_sass_5)) site_visit_taxon = SiteVisitTaxon.objects.filter( sass_taxon=taxon_5) site_visit_taxon.update(sass_taxon=sass_taxon) site_visit_biotope_taxon = ( SiteVisitBiotopeTaxon.objects.filter(sass_taxon=taxon_5)) site_visit_biotope_taxon.update(sass_taxon=sass_taxon) sass_taxon.sass_5_score = taxon_5.sass_5_score sass_taxon.taxon_sass_5 = taxon_5.taxon_sass_5 sass_taxon.save() taxon_5.delete()
def merge_context_group(excluded_group=None, group_list=None): """ Merge multiple location context groups """ if not excluded_group: return if not group_list: return groups = group_list.exclude(id=excluded_group.id) if groups.count() < 1: return log('Merging %s data' % groups.count()) links = [ rel.get_accessor_name() for rel in excluded_group._meta.get_fields() if issubclass(type(rel), ForeignObjectRel) ] if links: for group in groups: log('----- {} -----'.format(str(group))) for link in links: try: objects = getattr(group, link).all() if objects.count() > 0: print('Updating {obj} for : {taxon}'.format( obj=str(objects.model._meta.label), taxon=str(group))) update_dict = { getattr(group, link).field.name: excluded_group } objects.update(**update_dict) except Exception as e: # noqa continue log(''.join(['-' for i in range(len(str(group)) + 12)])) groups.delete()
def format_location_context(location_site_id, force_update=False): try: location_site = LocationSite.objects.get(id=location_site_id) except LocationSite.DoesNotExist: log('LocationSite Does Not Exist', 'debug') return if not location_site.location_context_document: log('LocationSite context document does not exist', 'debug') return location_context = json.loads(location_site.location_context_document) hash_string = hashlib.md5( location_site.location_context_document).hexdigest() formatted = {} if location_site.location_context and not force_update: formatted_location_context = json.loads(location_site.location_context) if not location_site.original_geomorphological: try: context_geo = formatted_location_context[ 'context_group_values']['geomorphological_group'][ 'service_registry_values']['geo_class_recoded'][ 'value'] models.signals.post_save.disconnect( location_site_post_save_handler, ) location_site.original_geomorphological = context_geo location_site.save() models.signals.post_save.connect( location_site_post_save_handler, ) except (KeyError, TypeError): pass if 'hash' in formatted_location_context: if formatted_location_context['hash'] == hash_string: process_spatial_scale_data( formatted_location_context['context_group_values']) if location_site.refined_geomorphological: # Update geo value in geocontext data try: context_geo = formatted_location_context[ 'context_group_values']['geomorphological_group'][ 'service_registry_values'][ 'geo_class_recoded']['value'] if (context_geo == location_site.refined_geomorphological): log('Formatted location context already exist') return except KeyError: log('Formatted location context already exist') return else: log('Formatted location context already exist') return if not isinstance(location_context, dict): return for context_key, context_value in location_context.iteritems(): if isinstance(context_value, list): formatted[context_key] = array_to_dict(context_value, key_name='key') else: formatted[context_key] = context_value models.signals.post_save.disconnect(location_site_post_save_handler, ) if not location_site.original_geomorphological: try: context_geo = formatted['context_group_values'][ 'geomorphological_group']['service_registry_values'][ 'geo_class_recoded']['value'] location_site.original_geomorphological = context_geo except KeyError: pass if location_site.refined_geomorphological: try: formatted['context_group_values']['geomorphological_group'][ 'service_registry_values']['geo_class_recoded']['value'] = ( location_site.refined_geomorphological) except KeyError: pass process_spatial_scale_data(formatted['context_group_values']) formatted['hash'] = hash_string location_site.location_context = formatted location_site.save() log('Location context formatted', 'info') models.signals.post_save.connect(location_site_post_save_handler, )
def handle(self, *args, **options): folder_name = 'data' file_path = os.path.join( os.path.abspath(os.path.dirname(__name__)), 'scripts/static/{folder}/{filename}'.format( folder=folder_name, filename=self.file_name )) found = 0 not_found = [] data_error = [] with open(file_path, 'r') as csvfile: csv_reader = csv.DictReader(csvfile) for index, record in enumerate(csv_reader): collection_records = BiologicalCollectionRecord.objects.none() try: record_point = Point( float(record[LONGITUDE]), float(record[LATITUDE])) location_sites = LocationSite.objects.filter( geometry_point=record_point, name=record[LOCATION_SITE] ) if not location_sites.exists(): log('no location site') continue else: location_site = location_sites[0] if record[SAMPLING_DATE].lower() == 'unspecified': log('Unspecified date -> Next row') continue uuid_value = uuid.UUID(record[UUID]) collection_records = ( BiologicalCollectionRecord.objects.filter( uuid=uuid_value ) ) if not collection_records.exists(): if record[ORIGIN] == 'Native': category = 'indigenous' else: category = 'alien' collection_records = ( BiologicalCollectionRecord.objects.filter( site=location_site, original_species_name=record[ SPECIES_NAME ], collection_date=datetime.strptime( record[SAMPLING_DATE], '%Y/%m/%d'), category=category, collector=record[COLLECTOR], notes=record['Notes'] ) ) if collection_records.count() != 1: print('multiple collection records or zero') if collection_records.count() == 0: not_found.append(-99) else: not_found.extend( list( collection_records.values_list( 'id', flat=True))) continue print( 'found collection record %s' % collection_records[0].id) found += 1 collection_record = collection_records[0] collection_record.uuid = str(uuid_value) collection_record.save() reference_category = record['Reference category'] document = None document_link = record['Document Upload Link'] document_id = document_link.split('/')[ len(document_link.split('/'))-1 ] if document_id: document_id = int(document_id) try: document = Document.objects.get( id=document_id ) bims_document, b_created = ( BimsDocument.objects.get_or_create( document=document )) author = record['Reference'] if ( bims_document.author and bims_document.author != author ): bims_document.author = author bims_document.save() except Document.DoesNotExist: pass if ( reference_category == 'Peer-reviewed scientific article'): # peer-reviewed doi = record[DOI].strip() if not doi and not document: continue if doi: # Add doi try: entry = Entry.objects.get(doi__iexact=doi) except Entry.DoesNotExist: doi_loader = DOILoader() try: doi_loader.load_records(DOIs=[doi]) except DOILoaderError as e: log('DOILoaderError, skipping') continue except HTTPError: log('Could not fetch the doi, skipping') continue doi_loader.save_records() try: entry = Entry.objects.get(doi__iexact=doi) except Entry.DoesNotExist: log('Entry does not exist, skipping') continue source_reference = ( SourceReference.create_source_reference( category='bibliography', source_id=entry.id, note=None ) ) print('Add DOI to %s' % collection_record.id) collection_record.source_reference = ( source_reference ) collection_record.save() else: source_reference, sr_created = ( SourceReferenceBibliography.objects. get_or_create( document=document ) ) collection_record.source_reference = ( source_reference ) collection_record.save() print('Add Bibliography Document to %s' % collection_record.id) elif reference_category == 'Database': # Database if not document: continue database_name = record['Reference'] database, created = ( DatabaseRecord.objects.get_or_create( name=database_name,)) source_reference = ( SourceReference.create_source_reference( category='database', source_id=database.id, note=None ) ) source_reference.document = document source_reference.save() collection_record.source_reference = source_reference collection_record.save() print('Add Database Document to %s' % collection_record.id) elif ( reference_category == 'Thesis' or reference_category == 'Published report'): # published if not document: continue source_reference = ( SourceReference.create_source_reference( category='document', source_id=document.id, note=None ) ) collection_record.source_reference = source_reference collection_record.save() print('Add Published Document to %s' % collection_record.id) else: # unpublished source_reference, created = ( SourceReference.objects.get_or_create( note=record['Reference'] )) collection_record.source_reference = source_reference collection_record.save() print('Add Unpublished to %s' % collection_record.id) except KeyError as e: print('KeyError') data_error.extend( list(collection_records.values_list('id', flat=True))) continue except ValueError as e: print('ValueError') data_error.extend( list(collection_records.values_list('id', flat=True))) continue log('Summary') log('Total found : %s' % found) log('Total not found: %s' % len(not_found)) log('Total data error: %s' % len(data_error))
def csv_dict_reader(self, csv_reader): signals.pre_save.disconnect(taxonomy_pre_save_handler, sender=Taxonomy) errors = [] success = [] csv_data = [] index = 1 for row in csv_reader: index += 1 taxon_name = self.row_value(row, TAXON) if SCIENTIFIC_NAME in row: scientific_name = (self.row_value( row, SCIENTIFIC_NAME) if self.row_value( row, SCIENTIFIC_NAME) else taxon_name) else: scientific_name = taxon_name scientific_name = scientific_name.strip() # Get rank rank = self.row_value(row, 'Taxon Rank') if not rank: rank = self.row_value(row, 'Taxon rank') if not rank: if self.row_value(row, SUBSPECIES): rank = SUBSPECIES elif self.row_value(row, SPECIES): rank = SPECIES elif self.row_value(row, GENUS): rank = GENUS elif self.row_value(row, SUBFAMILY): rank = SUBFAMILY elif self.row_value(row, FAMILY): rank = FAMILY elif self.row_value(row, ORDER): rank = ORDER elif self.row_value(row, CLASS): rank = CLASS elif self.row_value(row, PHYLUM): rank = PHYLUM else: rank = KINGDOM taxa = Taxonomy.objects.filter(canonical_name__iexact=taxon_name, rank=rank.upper()) print('---------') ids = [] if self.check_only: print('Checking data {}'.format(taxon_name)) if not taxa.exists(): errors.append('Missing taxon {taxon} - {row}'.format( taxon=taxon_name, row=index)) else: if taxa.count() > 1: errors.append( 'Duplicate taxa for {taxon} - {row}'.format( taxon=taxon_name, row=index)) check_taxa_duplicates(taxon_name, rank) if taxa[0].id not in ids: ids.append(taxa[0].id) else: errors.append( 'Duplicate ids for {taxon} - {row}'.format( taxon=taxon_name, row=index)) continue try: taxonomy = None if self.missing_only and taxa.exists(): logger.debug( 'Skip ingesting existing data {}'.format(taxon_name)) continue if taxa.exists(): taxonomy = taxa[0] logger.debug('{} already in the system'.format(taxon_name)) if not taxonomy: # Fetch from gbif taxonomy = fetch_all_species_from_gbif( species=taxon_name, taxonomic_rank=rank, should_get_children=False, fetch_vernacular_names=False, use_name_lookup=True, **self.rank_classifier()) if taxonomy: success.append(taxonomy.id) else: # Try again with lookup logger.debug('Use different method') taxonomy = fetch_all_species_from_gbif( species=taxon_name, taxonomic_rank=rank, should_get_children=False, fetch_vernacular_names=False, use_name_lookup=False, **self.rank_classifier()) if not taxonomy: errors.append({ 'row': index, 'error': 'Taxonomy not found' }) else: success.append(taxonomy.id) # Validate data if taxonomy: if (taxon_name not in taxonomy.scientific_name and taxon_name.lower().strip() != taxonomy.canonical_name.lower().strip() and taxon_name.lower() not in taxonomy.legacy_canonical_name.lower()): taxonomy = None else: if not taxonomy.parent: taxonomy.parent = self.get_parent(row, rank) # Data from GBIF couldn't be found, so add it manually if not taxonomy: parent = self.get_parent(row, rank) if not parent: errors.append({ 'row': index, 'error': 'Parent not found {}'.format(taxon_name) }) else: # Taxonomy not found, create one taxonomy, _ = Taxonomy.objects.get_or_create( scientific_name=scientific_name, canonical_name=taxon_name, rank=TaxonomicRank[rank.upper()].name, parent=parent) success.append(taxonomy.id) # -- Finish if taxonomy: # Merge taxon with same canonical name legacy_canonical_name = taxonomy.legacy_canonical_name legacy_canonical_name = legacy_canonical_name.replace( '\\xa0', '') if FORMER_SPECIES_NAME in row: former_species_name = self.row_value( row, FORMER_SPECIES_NAME) if len(former_species_name) > 500: former_species_name = former_species_name[:500] if former_species_name not in legacy_canonical_name: legacy_canonical_name += ';' + former_species_name taxonomy.legacy_canonical_name = legacy_canonical_name[: 700] # -- Import date if self.import_date: taxonomy.import_date = parse_date(self.import_date) self.additional_data(taxonomy, row) # Add to csv data if self.csv_name: csv_data.append(self.process_csv_data(taxonomy)) # -- Validate parents self.validate_parents(taxon=taxonomy, row=row) if taxonomy.canonical_name != taxon_name: taxonomy.canonical_name = taxon_name taxonomy.save() except Exception as e: # noqa print(str(e)) errors.append({'row': index, 'error': str(e)}) if len(errors) > 0: logger.debug(errors) log('----') if len(success) > 0: logger.debug(success) if self.csv_name: self.export_to_csv(csv_data)
def generate_site_visit_ecological_condition(site_visits): """ Generate site visit ecological condition from list of site visit :param site_visits: list of site visit query object """ for site_visit in site_visits: log('Generate ecological condition for site visit : {}'.format( site_visit.id )) site_visit_taxa = SiteVisitTaxon.objects.filter( site_visit=site_visit ) summary = site_visit_taxa.annotate( count=Count('sass_taxon'), sass_score=Coalesce(Sum(Case( When( condition=Q(site_visit__sass_version=5, sass_taxon__sass_5_score__isnull=False, taxon_abundance__isnull=False), then='sass_taxon__sass_5_score'), When( condition=Q(site_visit__sass_version=4, sass_taxon__score__isnull=False, taxon_abundance__isnull=False), then='sass_taxon__score'), default=0), ), 0), sass_id=F('site_visit__id') ).annotate( aspt=Cast(F('sass_score'), FloatField()) / Cast(F('count'), FloatField()), ).values('sass_score', 'aspt', 'count') if not summary: continue aspt_score = summary[0]['aspt'] sass_score = summary[0]['sass_score'] site_visit_ecological, created = ( SiteVisitEcologicalCondition.objects.get_or_create( site_visit=site_visit, sass_score=sass_score, aspt_score=aspt_score ) ) try: location_context = json.loads( site_visit.location_site.location_context ) eco_region = ( location_context['context_group_values'][ 'river_ecoregion_group'][ 'service_registry_values']['eco_region_1'][ 'value'].encode( 'utf-8') ) geo_class = ( location_context['context_group_values'][ 'geomorphological_group'][ 'service_registry_values']['geo_class'][ 'value'].encode( 'utf-8') ) # Fix eco_region name eco_region_splits = eco_region.split(' ') if eco_region_splits[0].isdigit(): eco_region_splits.pop(0) eco_region = ' '.join(eco_region_splits) except (TypeError, ValueError, KeyError): continue sass_ecological_conditions = ( SassEcologicalCondition.objects.filter( ecoregion_level_1__icontains=eco_region.strip(), geomorphological_zone__icontains=geo_class.strip() ) ) found_ecological_condition = False for sass_ecological_condition in sass_ecological_conditions: if ( sass_score > sass_ecological_condition.sass_score_precentile or aspt_score > sass_ecological_condition.aspt_score_precentile ): site_visit_ecological.ecological_condition = ( sass_ecological_condition.ecological_category ) site_visit_ecological.save() found_ecological_condition = True log( 'Found ecological condition : {}'.format( sass_ecological_condition.ecological_category )) break if found_ecological_condition: continue # Set to lowest category lowest_category = SassEcologicalCategory.objects.filter( Q(category__icontains='e') | Q(category__icontains='f') ).order_by('category') if not lowest_category: continue log( 'Set to lowest ecological category : {}'.format( lowest_category[0].category )) site_visit_ecological.ecological_condition = lowest_category[0] site_visit_ecological.save()