Пример #1
0
    def derive(self, num=P('Flight Number')):
        # Q: Should we validate the flight number?
        if num.array.dtype.type is np.string_:
            value = most_common_value(num.array, threshold=0.45)
            if value is not None:
                self.set_flight_attr(value)

            return

        # Values of 0 are invalid flight numbers
        array = np.ma.masked_less_equal(num.array, 0)
        # Ignore masked values
        compressed_array = array.compressed()
        _, minvalue = min_value(compressed_array)
        if minvalue < 0:
            self.warning(
                "'%s' only supports unsigned (positive) values > 0, "
                "but none were found. Cannot determine flight number",
                self.name)
            self.set_flight_attr(None)
            return

        # note reverse of value, index from max_value due to bincount usage.
        value, count = max_value(
            np.bincount(compressed_array.astype(np.integer)))
        if count > len(compressed_array) * 0.45:
            # this value accounts for at least 45% of the values in the array
            self.set_flight_attr(str(int(value)))
        else:
            self.warning(
                "Only %d out of %d flight numbers were the same."
                " Flight Number attribute will be set as None.", count or 0,
                len(num.array))
            self.set_flight_attr(None)
            return
    def derive(self, num=P('Flight Number')):
        # Q: Should we validate the flight number?
        if num.array.dtype.type is np.string_:
            # XXX: Slow, but Flight Number should be sampled infrequently.
            value, count = next(reversed(sorted(Counter(num.array).items(),
                                                key=itemgetter(1))))
            if value is not np.ma.masked and count > len(num.array) * 0.45:
                self.set_flight_attr(value)
            return
        _, minvalue = min_value(num.array)
        if minvalue < 0:
            self.warning("'%s' only supports unsigned (positive) values",
                            self.name)
            self.set_flight_attr(None)
            return

        # TODO: Fill num.array masked values (as there is no np.ma.bincount) - perhaps with 0.0 and then remove all 0 values?
        # note reverse of value, index from max_value due to bincount usage.
        compressed_array = num.array.compressed()
        value, count = \
            max_value(np.bincount(compressed_array.astype(np.integer)))
        if count > len(compressed_array) * 0.45:
            # this value accounts for at least 45% of the values in the array
            self.set_flight_attr(str(int(value)))
        else:
            self.warning("Only %d out of %d flight numbers were the same."\
                         " Flight Number attribute will be set as None.",
                         count, len(num.array))
            self.set_flight_attr(None)
            return
    def derive(self, num=P('Flight Number')):
        # Q: Should we validate the flight number?
        if num.array.dtype.type is np.string_:
            value = most_common_value(num.array, threshold=0.45)
            if value is not None:
                self.set_flight_attr(value)

            return

        # Values of 0 are invalid flight numbers
        array = np.ma.masked_less_equal(num.array, 0)
        # Ignore masked values
        compressed_array = array.compressed()
        _, minvalue = min_value(compressed_array)
        if minvalue < 0:
            self.warning(
                "'%s' only supports unsigned (positive) values > 0, "
                "but none were found. Cannot determine flight number",
                self.name)
            self.set_flight_attr(None)
            return

        # note reverse of value, index from max_value due to bincount usage.
        value, count = max_value(
            np.bincount(compressed_array.astype(np.integer)))
        if count > len(compressed_array) * 0.45:
            # this value accounts for at least 45% of the values in the array
            self.set_flight_attr(str(int(value)))
        else:
            self.warning("Only %d out of %d flight numbers were the same."
                         " Flight Number attribute will be set as None.",
                         count or 0, len(num.array))
            self.set_flight_attr(None)
            return
    def derive(self,
               num=P('Flight Number'),
               mobiles=S('Mobile')):

        # Limit to Mobile sections
        num_array = mask_outside_slices(num.array, mobiles.get_slices())
        # Q: Should we validate the flight number?
        if num.array.dtype.type is np.string_:
            value = most_common_value(num_array, threshold=0.45)
            if value is not None:
                # Only parse valid ASCII characters
                try:
                    self.set_flight_attr(re.sub(r'[^\x00-\x7f]', r'', value.decode()))
                except UnicodeDecodeError:
                    self.set_flight_attr(None)
            return

        # Values of 0 are invalid flight numbers
        array = np.ma.masked_less_equal(num_array, 0)
        # Ignore masked values
        compressed_array = array.compressed()
        _, minvalue = min_value(compressed_array)
        if minvalue is None or minvalue < 0:
            self.warning(
                "'%s' only supports unsigned (positive) values > 0, "
                "but none were found. Cannot determine flight number",
                self.name)
            self.set_flight_attr(None)
            return

        # note reverse of value, index from max_value due to bincount usage.
        value, count = max_value(
            np.bincount(compressed_array.astype(np.integer)))
        if count > len(compressed_array) * 0.45:
            # this value accounts for at least 45% of the values in the array
            self.set_flight_attr(str(int(value)))
        else:
            self.warning("Only %d out of %d flight numbers were the same."
                         " Flight Number attribute will be set as None.",
                         count or 0, len(num.array))
            self.set_flight_attr(None)
            return