def validate_line(self, how: ImportHow, diag: ImportDiagnostic, lig, vals_cache): """ Validate a line from data point of view. :param how: :param diag: :param lig: :param vals_cache: :return: """ latitude_was_seen = False predefined_mapping = GlobalMapping.PREDEFINED_FIELDS custom_mapping = how.custom_mapping for raw_field, a_field in self.clean_fields.items(): m = predefined_mapping.get(a_field) if m is None: m = custom_mapping.search_field(a_field) # No mapping, not stored if m is None: continue raw_val = lig.get(raw_field) # Try to get the value from the cache cache_key = (raw_field, raw_val) if cache_key in vals_cache: if a_field == 'object_lat': latitude_was_seen = True continue vals_cache[cache_key] = 1 is_numeric = m['type'] == 'n' # Same column with same value was not seen already, proceed csv_val: str = clean_value_and_none(raw_val, is_numeric) diag.cols_seen.add(a_field) # From V1.1, if column is present then it's considered as seen. # Before, the criterion was 'at least one value'. if csv_val == '': # If no relevant value, leave field as NULL continue if a_field == 'object_lat': vf = convert_degree_minute_float_to_decimal_degree(csv_val) if vf < -90 or vf > 90: diag.error( "Invalid Lat. value '%s' for Field '%s' in file %s. " "Incorrect range -90/+90°." % (csv_val, raw_field, self.relative_name)) del vals_cache[cache_key] else: latitude_was_seen = True elif a_field == 'object_lon': vf = convert_degree_minute_float_to_decimal_degree(csv_val) if vf < -180 or vf > 180: diag.error( "Invalid Long. value '%s' for Field '%s' in file %s. " "Incorrect range -180/+180°." % (csv_val, raw_field, self.relative_name)) elif is_numeric: vf = to_float(csv_val) if vf is None: diag.error( "Invalid float value '%s' for Field '%s' in file %s." % (csv_val, raw_field, self.relative_name)) elif a_field == 'object_annotation_category_id': diag.classif_id_seen.add(int(csv_val)) elif a_field == 'object_date': try: ObjectHeader.date_from_txt(csv_val) except ValueError: diag.error( "Invalid Date value '%s' for Field '%s' in file %s." % (csv_val, raw_field, self.relative_name)) elif a_field == 'object_time': try: ObjectHeader.time_from_txt(csv_val) except ValueError: diag.error( "Invalid Time value '%s' for Field '%s' in file %s." % (csv_val, raw_field, self.relative_name)) elif a_field == 'object_annotation_category': if clean_value_and_none( lig.get('object_annotation_category_id', '')) == '': # Apply the mapping, if and only if there is no id csv_val = how.taxo_mapping.get(csv_val.lower(), csv_val) # Record that the taxon was seen how.found_taxa[csv_val.lower()] = None elif a_field == 'object_annotation_person_name': maybe_email = clean_value_and_none( lig.get('object_annotation_person_email', '')) # TODO: It's more "diag" than "how" how.found_users[csv_val.lower()] = {'email': maybe_email} elif a_field == 'object_annotation_status': if csv_val != 'noid' and csv_val.lower( ) not in classif_qual_revert: diag.error( "Invalid Annotation Status '%s' for Field '%s' in file %s." % (csv_val, raw_field, self.relative_name)) # Update missing GPS count if not latitude_was_seen: diag.nb_objects_without_gps += 1
def fetch_existing_ranks(session, prj_id): """ Get existing image ranks from the project """ return ObjectHeader.fetch_existing_ranks(session, prj_id)
def read_fields_to_dicts(how: ImportHow, field_set: Set, lig: Dict[str, str], dicts_to_write, vals_cache: Dict): """ Read the data line into target dicts. Values go into the right bucket, i.e. target dict, depending on mappings (standard one and per-project custom one). :param how: Importing directives. :param field_set: The fields present in DB record. :param lig: A line of TSV data, as {header: val} dict. :param dicts_to_write: The output data. :param vals_cache: A cache of values, per column and seen value. """ predefined_mapping = GlobalMapping.PREDEFINED_FIELDS custom_mapping = how.custom_mapping # CSV reader returns a minimal dict with no value equal to None # so we have values only for common fields. for a_field in field_set.intersection(lig.keys()): # We have a value raw_val = lig[a_field] m = predefined_mapping.get(a_field) if not m: m = custom_mapping.search_field(a_field) assert m is not None field_table = m["table"] field_name = m["field"] is_numeric = m['type'] == 'n' # Try to get the transformed value from the cache cache_key: Any = (a_field, raw_val) if cache_key in vals_cache: cached_field_value = vals_cache.get(cache_key) else: csv_val = clean_value(raw_val, is_numeric) # If no relevant value, set field to NULL, i.e. None if csv_val == '': cached_field_value = None elif a_field == 'object_lat': # It's [n] type but since UVPApp they can contain a notation like ddd°MM.SS # which can be [t] as well. cached_field_value = convert_degree_minute_float_to_decimal_degree( csv_val) elif a_field == 'object_lon': cached_field_value = convert_degree_minute_float_to_decimal_degree( csv_val) elif is_numeric: cached_field_value = to_float(csv_val) elif a_field == 'object_date': cached_field_value = ObjectHeader.date_from_txt(csv_val) elif a_field == 'object_time': cached_field_value = ObjectHeader.time_from_txt(csv_val) elif field_name == 'classif_when': v2 = clean_value( lig.get('object_annotation_time', '000000')).zfill(6) cached_field_value = datetime.datetime( int(csv_val[0:4]), int(csv_val[4:6]), int(csv_val[6:8]), int(v2[0:2]), int(v2[2:4]), int(v2[4:6])) # no caching of this one as it depends on another value on same line cache_key = "0" elif field_name == 'classif_id': # We map 2 fields to classif_id, the second (text one) has [t] type, treated here. # The first, numeric, version is in "if type=n" case above. mapped_val = how.taxo_mapping.get(csv_val.lower(), csv_val) # Use initial mapping cached_field_value = how.found_taxa[none_to_empty( mapped_val).lower()] # better crash than write bad value into the DB assert cached_field_value is not None, "Column %s: no classification of %s mapped as %s" % ( a_field, csv_val, mapped_val) elif field_name == 'classif_who': # Eventually map to another user if asked so usr_key = none_to_empty(csv_val).lower() cached_field_value = how.found_users[usr_key].get( 'id', None) elif field_name == 'classif_qual': cached_field_value = classif_qual_revert.get( csv_val.lower()) else: # Assume it's an ordinary text field with nothing special cached_field_value = csv_val # Cache if relevant, setting the cache_key to "0" above effectively voids vals_cache[cache_key] = cached_field_value # Write the field into the right object dict_to_write = dicts_to_write[field_table] dict_to_write[field_name] = cached_field_value # Ensure that all dicts' fields are valued, to None if needed. This is needed for bulk inserts, # in DBWriter.py, as SQL Alchemy core computes an insert for the first line and just injects the # data for following ones. for a_field in field_set.difference(lig.keys()): fld_mping = custom_mapping.search_field(a_field) m = predefined_mapping.get(a_field, fld_mping) assert m is not None if m["field"] not in dicts_to_write[m["table"]]: dicts_to_write[m["table"]][m["field"]] = None
def fetch_existing_objects(session, prj_id): """ Get existing object IDs (orig_id AKA object_id in TSV) from the project """ with CodeTimer("Existing objects for %d: " % prj_id, logger): return ObjectHeader.fetch_existing_objects(session, prj_id)