def derive(self, num=P('Flight Number')): # Q: Should we validate the flight number? if num.array.dtype.type is np.string_: value = most_common_value(num.array, threshold=0.45) if value is not None: self.set_flight_attr(value) return # Values of 0 are invalid flight numbers array = np.ma.masked_less_equal(num.array, 0) # Ignore masked values compressed_array = array.compressed() _, minvalue = min_value(compressed_array) if minvalue < 0: self.warning( "'%s' only supports unsigned (positive) values > 0, " "but none were found. Cannot determine flight number", self.name) self.set_flight_attr(None) return # note reverse of value, index from max_value due to bincount usage. value, count = max_value( np.bincount(compressed_array.astype(np.integer))) if count > len(compressed_array) * 0.45: # this value accounts for at least 45% of the values in the array self.set_flight_attr(str(int(value))) else: self.warning( "Only %d out of %d flight numbers were the same." " Flight Number attribute will be set as None.", count or 0, len(num.array)) self.set_flight_attr(None) return
def derive(self, num=P('Flight Number')): # Q: Should we validate the flight number? if num.array.dtype.type is np.string_: # XXX: Slow, but Flight Number should be sampled infrequently. value, count = next(reversed(sorted(Counter(num.array).items(), key=itemgetter(1)))) if value is not np.ma.masked and count > len(num.array) * 0.45: self.set_flight_attr(value) return _, minvalue = min_value(num.array) if minvalue < 0: self.warning("'%s' only supports unsigned (positive) values", self.name) self.set_flight_attr(None) return # TODO: Fill num.array masked values (as there is no np.ma.bincount) - perhaps with 0.0 and then remove all 0 values? # note reverse of value, index from max_value due to bincount usage. compressed_array = num.array.compressed() value, count = \ max_value(np.bincount(compressed_array.astype(np.integer))) if count > len(compressed_array) * 0.45: # this value accounts for at least 45% of the values in the array self.set_flight_attr(str(int(value))) else: self.warning("Only %d out of %d flight numbers were the same."\ " Flight Number attribute will be set as None.", count, len(num.array)) self.set_flight_attr(None) return
def derive(self, num=P('Flight Number')): # Q: Should we validate the flight number? if num.array.dtype.type is np.string_: value = most_common_value(num.array, threshold=0.45) if value is not None: self.set_flight_attr(value) return # Values of 0 are invalid flight numbers array = np.ma.masked_less_equal(num.array, 0) # Ignore masked values compressed_array = array.compressed() _, minvalue = min_value(compressed_array) if minvalue < 0: self.warning( "'%s' only supports unsigned (positive) values > 0, " "but none were found. Cannot determine flight number", self.name) self.set_flight_attr(None) return # note reverse of value, index from max_value due to bincount usage. value, count = max_value( np.bincount(compressed_array.astype(np.integer))) if count > len(compressed_array) * 0.45: # this value accounts for at least 45% of the values in the array self.set_flight_attr(str(int(value))) else: self.warning("Only %d out of %d flight numbers were the same." " Flight Number attribute will be set as None.", count or 0, len(num.array)) self.set_flight_attr(None) return
def derive(self, num=P('Flight Number'), mobiles=S('Mobile')): # Limit to Mobile sections num_array = mask_outside_slices(num.array, mobiles.get_slices()) # Q: Should we validate the flight number? if num.array.dtype.type is np.string_: value = most_common_value(num_array, threshold=0.45) if value is not None: # Only parse valid ASCII characters try: self.set_flight_attr(re.sub(r'[^\x00-\x7f]', r'', value.decode())) except UnicodeDecodeError: self.set_flight_attr(None) return # Values of 0 are invalid flight numbers array = np.ma.masked_less_equal(num_array, 0) # Ignore masked values compressed_array = array.compressed() _, minvalue = min_value(compressed_array) if minvalue is None or minvalue < 0: self.warning( "'%s' only supports unsigned (positive) values > 0, " "but none were found. Cannot determine flight number", self.name) self.set_flight_attr(None) return # note reverse of value, index from max_value due to bincount usage. value, count = max_value( np.bincount(compressed_array.astype(np.integer))) if count > len(compressed_array) * 0.45: # this value accounts for at least 45% of the values in the array self.set_flight_attr(str(int(value))) else: self.warning("Only %d out of %d flight numbers were the same." " Flight Number attribute will be set as None.", count or 0, len(num.array)) self.set_flight_attr(None) return