Exemplo n.º 1
0
    def reverse_transform(self, data):
        """Reverse the transformed data to the original format.

        Args:
            data (pandas.DataFrame):
                Data to be reverse transformed.

        Returns:
            pandas.DataFrame
        """
        if not self.fitted:
            raise MetadataNotFittedError()

        reversed_data = self._hyper_transformer.reverse_transform(data)

        for constraint in self._constraints:
            reversed_data = constraint.reverse_transform(reversed_data)

        for name, field_metadata in self._fields_metadata.items():
            field_type = field_metadata['type']
            if field_type == 'id' and name not in reversed_data:
                field_data = self._make_ids(name, field_metadata, len(reversed_data))
            elif field_metadata.get('pii', False):
                faker = self._get_faker(field_metadata['pii_category'])
                field_data = pd.Series([faker() for _ in range(len(reversed_data))])
            else:
                field_data = reversed_data[name]

            reversed_data[name] = field_data[field_data.notnull()].astype(self._dtypes[name])

        return reversed_data[self._field_names]
Exemplo n.º 2
0
    def reverse_transform(self, data):
        """Reverse the transformed data to the original format.

        Args:
            data (pandas.DataFrame):
                Data to be reverse transformed.

        Returns:
            pandas.DataFrame
        """
        if not self.fitted:
            raise MetadataNotFittedError()

        reversed_data = self._hyper_transformer.reverse_transform(data)

        for constraint in self._constraints:
            reversed_data = constraint.reverse_transform(reversed_data)

        fields = self._fields_metadata
        for name, dtype in self.get_dtypes(ids=True).items():
            field_metadata = fields[name]
            field_type = field_metadata['type']
            if field_type != 'id':
                field_data = reversed_data[name]
            elif field_metadata.get('pii', False):
                faker = self._get_faker(field_metadata['pii_category'])
                field_data = pd.Series(
                    [faker() for _ in range(len(reversed_data))])
            else:
                field_data = pd.Series(np.arange(len(reversed_data)))

            reversed_data[name] = field_data.dropna().astype(dtype)

        return reversed_data[self._field_names]
Exemplo n.º 3
0
    def transform(self, data):
        """Transform the given data.

        Args:
            data (pandas.DataFrame):
                Table data.

        Returns:
            pandas.DataFrame:
                Transformed data.
        """
        if not self.fitted:
            raise MetadataNotFittedError()

        fields = [
            field for field in self.get_dtypes(ids=False)
            if field in data.columns
        ]
        LOGGER.debug('Anonymizing table %s', self.name)
        data = self._anonymize(data[fields])

        LOGGER.debug('Transforming constraints for table %s', self.name)
        for constraint in self._constraints:
            data = constraint.transform(data)

        LOGGER.debug('Transforming table %s', self.name)
        return self._hyper_transformer.transform(data)
Exemplo n.º 4
0
    def reverse_transform(self, data):
        """Reverse the transformed data to the original format.

        Args:
            data (pandas.DataFrame):
                Data to be reverse transformed.

        Returns:
            pandas.DataFrame
        """
        if not self.fitted:
            raise MetadataNotFittedError()

        try:
            reversed_data = self._hyper_transformer.reverse_transform(data)
        except rdt.errors.NotFittedError:
            reversed_data = data

        for constraint in reversed(self._constraints_to_reverse):
            reversed_data = constraint.reverse_transform(reversed_data)

        for name, field_metadata in self._fields_metadata.items():
            field_type = field_metadata['type']
            if field_type == 'id' and name not in reversed_data:
                field_data = self._make_ids(field_metadata, len(reversed_data))
            elif field_metadata.get('pii', False):
                field_data = pd.Series(
                    Table._get_fake_values(field_metadata, len(reversed_data)))
            else:
                field_data = reversed_data[name]

            reversed_data[name] = field_data[field_data.notnull()].astype(
                self._dtypes[name])

        return reversed_data[self._field_names]
Exemplo n.º 5
0
    def transform(self, data):
        """Transform the given data.

        Args:
            data (pandas.DataFrame):
                Table data.

        Returns:
            pandas.DataFrame:
                Transformed data.
        """
        if not self.fitted:
            raise MetadataNotFittedError()

        fields = self.get_dtypes(ids=False)
        data = self._anonymize(data[fields])

        for constraint in self._constraints:
            data = constraint.transform(data)

        return self._hyper_transformer.transform(data)
Exemplo n.º 6
0
    def transform(self, data, on_missing_column='error'):
        """Transform the given data.

        Args:
            data (pandas.DataFrame):
                Table data.
            on_missing_column (str):
                If the value is error, then a `MissingConstraintColumnError` is raised.
                If the value is drop, then the columns involved in the constraint that
                are present in data will be dropped.

        Returns:
            pandas.DataFrame:
                Transformed data.

        Raises:
            ConstraintsNotMetError:
                If the table data is not valid for the provided constraints.
        """
        if not self.fitted:
            raise MetadataNotFittedError()

        fields = [
            field for field in self.get_dtypes(ids=False)
            if field in data.columns
        ]
        LOGGER.debug('Anonymizing table %s', self.name)
        data = self._anonymize(data[fields])

        self._validate_data_on_constraints(data)

        LOGGER.debug('Transforming constraints for table %s', self.name)
        data = self._transform_constraints(data, on_missing_column)

        LOGGER.debug('Transforming table %s', self.name)
        return self._hyper_transformer.transform(data)