示例#1
0
    def normalize(self, ds_source):
        """
        Apply the normalizing operation to a given `DataSet`.

        :Parameters:
            ds_source : `DataSet` 
                Data set to normalize.

        :Returns:
            `DataSet` : Normalized data set.

        :Raises NpyDataTypeError:
            If the given `DataSet` has not been numerized.
        """

        if ds_source.is_numerized == False:
            raise NpyDataTypeError, 'ds_source must be numerized first.'

        ds_dest = DataSet()
        ds_dest.set_name_attribute(ds_source.get_name_attribute())

        data_instances = ds_source.get_data_instances()
        for data_instance_old in data_instances:

            attributes_new = []

            # Normalize each attribute
            for index, value in enumerate(data_instance_old.get_attributes()):
                value_new = (value - self.min[index]) * self.max[index] * (self.upper_bound - self.lower_bound) + self.lower_bound
                attributes_new.append(value_new)

            ds_dest.add_data_instance(data_instance_old.get_index_number(), attributes_new, data_instance_old.get_label_number())

        ds_dest.is_numerized = True
        return ds_dest
示例#2
0
    def numerize(self, ds_source):
        """
        Apply the numerizing operation to a given `DataSet`.

        :Parameters:
            ds_source : `DataSet`
                Data set to numerize.

        :Returns:
            `DataSet` : Numerized data set.

        :Raises NpyDataTypeError:
            If ds_source has already been numerized.
        """
        if ds_source.is_numerized == True:
            raise NpyDataTypeError, 'ds_source has already been numerized.'

        ds_dest = DataSet()
        ds_dest.set_name_attribute(ds_source.get_name_attribute())

        data_instances = ds_source.get_data_instances()
        for data_instance_old in data_instances:

            attributes = []

            # Process the attribute values
            for index, value in enumerate(data_instance_old.get_attributes()):
                try:
                    number = float(value)
                except ValueError:
                    # Every time a non-float attribute value is met,
                    # it is added to the numerizer
                    number = self.attribute_string_to_number(value, index) 
                attributes.append(number)

            # Process the label value
            label_old = data_instance_old.get_label_number()
            try:
                label_new = float(label_old)
            except ValueError:
                # Every time a non-float label value is met,
                # it is added to the numerizer
                label_new = self.label_string_to_number(label_old)

            ds_dest.add_data_instance(data_instance_old.get_index_number(), attributes, label_new)

        ds_dest.is_numerized = True
        return ds_dest