Exemplo n.º 1
0
    def setUp(self):

        self.maxDiff = None
        self.contamination = 0.1
        self.window_size = 2
        self.roc_floor = 0.  # 0.8
        # self.n_train = 200
        # self.n_test = 100
        # self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
        #     n_train=self.n_train, n_test=self.n_test,
        #     contamination=self.contamination, random_state=42)
        # self.X_train = d3m_dataframe(self.X_train, generate_metadata=True)
        # self.X_test = d3m_dataframe(self.X_test, generate_metadata=True)

        self.X_train = d3m_dataframe(
            {
                'data':
                [3., 4., 8., 16, 18, 13., 22., 36., 59., 128, 62, 67, 78, 100]
            },
            columns=['data'],
            generate_metadata=True)
        self.y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
        self.X_test = d3m_dataframe(
            {
                'data':
                [3., 4., 8.6, 13.4, 22.5, 17, 19.2, 36.1, 127, -23, 59.2]
            },
            columns=['data'],
            generate_metadata=True)
        self.y_test = np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0])

        hyperparams_default = KDiscordODetector.metadata.get_hyperparams(
        ).defaults()
        hyperparams = hyperparams_default.replace({
            'contamination':
            self.contamination,
        })
        hyperparams = hyperparams.replace({
            'window_size': self.window_size,
        })
        hyperparams = hyperparams.replace({
            'return_subseq_inds': True,
        })
        self.primitive = KDiscordODetector(hyperparams=hyperparams)

        self.primitive.set_training_data(inputs=self.X_train)
        self.primitive.fit()
        self.prediction_labels = self.primitive.produce(
            inputs=self.X_test).value
        self.prediction_score = self.primitive.produce_score(
            inputs=self.X_test).value

        self.collective_common_test = CollectiveCommonTest(
            model=self.primitive._clf,
            X_train=self.X_train,
            y_train=self.y_train,
            X_test=self.X_test,
            y_test=self.y_test,
            roc_floor=self.roc_floor,
        )
Exemplo n.º 2
0
 def set_training_data(self, *, inputs: Inputs) -> None:
     inputs_timeseries = d3m_dataframe(inputs.iloc[:, -1])
     inputs_d3mIndex = d3m_dataframe(inputs.iloc[:, 0])
     if len(inputs_timeseries) == 0:
         print(
             "Warning: Inputs timeseries data to timeseries_featurization primitive's length is 0.")
         return
     column_name = inputs_timeseries.columns[0]
     self._training_inputs, self._target_names = inputs_timeseries, column_name
     self._training_outputs = inputs_timeseries
    def log_likelihoods(self,
                        *,
                        outputs: Outputs,
                        inputs: Inputs,
                        timeout: float = None,
                        iterations: int = None) -> CallResult[Sequence[float]]:
        inputs = inputs.iloc[:, self._training_indices]  # Get ndarray
        outputs = outputs.iloc[:, self._target_column_indices]

        if len(inputs.columns) and len(outputs.columns):

            if outputs.shape[1] != self._n_classes:
                raise exceptions.InvalidArgumentValueError(
                    "\"outputs\" argument does not have the correct number of target columns."
                )

            log_proba = self._predict_log_proba(inputs, self._weights)

            # Making it always a list, even when only one target.
            if self._n_classes == 1:
                log_proba = [log_proba]
                classes = [self._classes_]
            else:
                classes = self._classes_

            samples_length = inputs.shape[0]

            log_likelihoods = []
            for k in range(self._n_classes):
                # We have to map each class to its internal (numerical) index used in the learner.
                # This allows "outputs" to contain string classes.
                outputs_column = outputs.iloc[:, k]
                classes_map = pandas.Series(np.arange(len(classes[k])),
                                            index=classes[k])
                mapped_outputs_column = outputs_column.map(classes_map)

                # For each target column (column in "outputs"), for each sample (row) we pick the log
                # likelihood for a given class.
                log_likelihoods.append(log_proba[k][np.arange(samples_length),
                                                    mapped_outputs_column])

            results = d3m_dataframe(dict(enumerate(log_likelihoods)),
                                    generate_metadata=True)
            results.columns = outputs.columns

            for k in range(self._n_classes):
                column_metadata = outputs.metadata.query_column(k)
                if 'name' in column_metadata:
                    results.metadata = results.metadata.update_column(
                        k, {'name': column_metadata['name']})

        else:
            results = d3m_dataframe(generate_metadata=True)

        return CallResult(results)
Exemplo n.º 4
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        with stopit.ThreadingTimeout(timeout) as timer:
            sk_inputs = inputs
            if self.hyperparams['use_semantic_types']:
                sk_inputs = inputs.iloc[:, self._training_indices]
            sk_output = self._model.predict(sk_inputs)
            if sparse.issparse(sk_output):
                sk_output = sk_output.toarray()
            output = d3m_dataframe(
                sk_output,
                columns=self._target_names if self._target_names else None,
                generate_metadata=False)
            output.metadata = inputs.metadata.clear(for_value=output,
                                                    generate_metadata=True)
            output.metadata = self._add_target_semantic_types(
                metadata=output.metadata,
                target_names=self._target_names,
                source=self)
            outputs = common_utils.combine_columns(
                return_result=self.hyperparams['return_result'],
                add_index_columns=self.hyperparams['add_index_columns'],
                inputs=inputs,
                column_indices=[],
                columns_list=[output])

        if timer.state == timer.EXECUTED:
            return CallResult(outputs)
        else:
            raise TimeoutError('BBNMLPClassifier exceeded time limit')
Exemplo n.º 5
0
    def _can_accept(
            cls, *, self, method_name: str,
            arguments: typing.Dict[str, typing.Union[metadata_module.Metadata,
                                                     type]],
            hyperparams: Hyperparams,
            outputs: Outputs) -> typing.Optional[metadata_module.DataMetadata]:
        output_metadata = super().can_accept(method_name=method_name,
                                             arguments=arguments,
                                             hyperparams=hyperparams)

        if 'inputs' not in arguments:
            return output_metadata

        inputs_metadata = typing.cast(metadata_module.DataMetadata,
                                      arguments['inputs'])

        metadata_lookup = cls._parse_metadata(metadata=inputs_metadata)
        #try:
        #    cls._parse_metadata(metadata = inputs_metadata)
        #except:
        #    return None

        num_data = inputs_metadata.query(metadata_lookup['primary_resource_id']
                                         ['selector'])['dimension']['length']

        outputs = d3m_dataframe(data={})
        metadata = outputs.metadata.update(
            (metadata_module.ALL_ELEMENTS, 0),
            inputs_metadata.query(metadata_lookup['targets']['selector']))

        return metadata
Exemplo n.º 6
0
 def _wrap_predictions(self, inputs: Inputs,
                       predictions: ndarray) -> Outputs:
     outputs = d3m_dataframe(predictions, generate_metadata=True)
     target_columns_metadata = self._copy_inputs_metadata(
         inputs.metadata, self._training_indices, outputs.metadata,
         self.hyperparams)
     outputs.metadata = self._update_predictions_metadata(
         inputs.metadata, outputs, target_columns_metadata)
     return outputs
Exemplo n.º 7
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        with stopit.ThreadingTimeout(timeout) as timer:
            x = self._tfidf.transform(inputs).toarray()
            outputs = d3m_dataframe(x, generate_metadata=False)

            metadata = inputs.metadata.clear(
                {
                    'schema':
                    metadata_module.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    type(outputs),
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'length':
                        outputs.shape[0],
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ]
                    }
                },
                for_value=outputs
            ).update(
                ((metadata_base.ALL_ELEMENTS, )), {
                    'dimension': {
                        'length':
                        outputs.shape[1],
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ]
                    }
                }
            ).update(
                ((metadata_base.ALL_ELEMENTS, metadata_base.ALL_ELEMENTS)),
                {
                    #'structural_type': self._v.dtype,
                    'semantic_types': [
                        'https://metadata.datadrivendiscovery.org/types/Attribute'
                    ],
                })

            # Set metadata attribute.
            outputs.metadata = metadata

        if timer.state == timer.EXECUTED:
            return CallResult(outputs)
        else:
            raise TimeoutError('BBNTfidfTransformer exceeded time limit')
Exemplo n.º 8
0
    def setUp(self):

        self.maxDiff = None
        self.n_train = 200
        self.n_test = 100
        self.contamination = 0.1
        self.roc_floor = 0.8
        self.X_train, self.y_train, self.X_test, self.y_test = generate_data(
            n_train=self.n_train,
            n_test=self.n_test,
            contamination=self.contamination,
            random_state=42)

        self.X_train = d3m_dataframe(self.X_train, generate_metadata=True)
        self.X_test = d3m_dataframe(self.X_test, generate_metadata=True)

        hyperparams_default = IsolationForest.metadata.get_hyperparams(
        ).defaults()
        hyperparams = hyperparams_default.replace({
            'contamination':
            self.contamination,
        })
        hyperparams = hyperparams.replace({
            'return_subseq_inds': True,
        })

        self.primitive = IsolationForest(hyperparams=hyperparams)

        self.primitive.set_training_data(inputs=self.X_train)
        self.primitive.fit()
        self.prediction_labels = self.primitive.produce(
            inputs=self.X_test).value
        self.prediction_score = self.primitive.produce_score(
            inputs=self.X_test).value

        self.uodbase_test = UODCommonTest(
            model=self.primitive._clf,
            X_train=self.X_train,
            y_train=self.y_train,
            X_test=self.X_test,
            y_test=self.y_test,
            roc_floor=self.roc_floor,
        )
Exemplo n.º 9
0
    def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs:
        """
        Wrap predictions into dataframe
        Args:
            inputs: Container Dataframe
            predictions: array-like data (n_samples, n_features)

        Returns:
            Dataframe
        """
        outputs = d3m_dataframe(predictions, generate_metadata=True)
        target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams)
        outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata)
        return outputs
Exemplo n.º 10
0
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        """
        Arguments
            - inputs: List( # Data
                         List( # Segments
                            [ context0, ..., contextN ], ... # for N-gram
                         )
                       ),

        Returns:
            - List(d3m_ndarray)
        """
        with stopit.ThreadingTimeout(timeout) as timer:
            x = seq_to_tokenfreq_csr(inputs, self._vocab).toarray()
            #outputs = List([ x[i] for i in range(x.shape[0]) ])
            outputs = d3m_dataframe(x, generate_metadata=False)

            metadata = inputs.metadata.clear({
                'schema': metadata_module.CONTAINER_SCHEMA_VERSION,
                'structural_type': type(outputs),
                'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Table' ],
                'dimension': {
                    'length': outputs.shape[0],
                    'name': 'rows',
                    'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularRow' ]
                }
            }, for_value=outputs).update(
                ((metadata_base.ALL_ELEMENTS,)), {
                'dimension': {
                    'length': outputs.shape[1],
                    'name': 'columns',
                    'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/TabularColumn' ]
                }
                }
            ).update(
                ((metadata_base.ALL_ELEMENTS, metadata_base.ALL_ELEMENTS)), {
                #'structural_type': self._v.dtype,
                'semantic_types': [ 'https://metadata.datadrivendiscovery.org/types/Attribute' ],
                }
            )

            # Set metadata attribute.
            outputs.metadata = metadata

        if timer.state == timer.EXECUTED:
            return CallResult(outputs)
        else:
            raise TimeoutError('SequenceToBagOfTokens exceeded time limit')
Exemplo n.º 11
0
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        if not self._fitted:
            _logger.info("Please run fit() first!")
            return CallResult(None, self._has_finished, self._iterations_done)

        self.n_predict_step = inputs.shape[0]
        self.pred_point_test = tf.slice(
            self.prediction_method, (self.n_total - self.n_predict_step, 0), (self.n_predict_step, 1))
        self.pred_lower_test = tf.slice(
            self.prediction_method, (self.n_total - self.n_predict_step, 1), (self.n_predict_step, 1))
        self.pred_upper_test = tf.slice(
            self.prediction_method, (self.n_total - self.n_predict_step, 2), (self.n_predict_step, 1))

        with tf.Session(config=self.tf_config) as sess:
            sess.run(tf.global_variables_initializer())
            self._load_weights(sess)
            self._current_cell_state = np.zeros(
                (self.hyperparams["n_batch"], self.hyperparams["n_neurons"]), dtype=np.float32)
            pred_test, pred_test_lower, pred_test_upper = sess.run(
                [self.pred_point_test, self.pred_lower_test, self.pred_upper_test],
                feed_dict={
                    self.batchX_placeholder: self.x,
                    self.cell_state: self._current_cell_state,
                }
            )
            pred_test = self.scaler.inverse_transform(pred_test)
            pred_test_lower = self.scaler.inverse_transform(pred_test_lower)
            pred_test_upper = self.scaler.inverse_transform(pred_test_upper)
            pred_test_lower = np.minimum(
                pred_test, np.minimum(pred_test_lower, pred_test_upper))
            pred_test_upper = np.maximum(
                pred_test, np.maximum(pred_test_upper, pred_test_lower))

            _logger.info(pred_test.tolist())

        res=pred_test.ravel().tolist()
        output = d3m_dataframe({'d3mIndex': inputs['d3mIndex'], self._target_name: res})
        output.metadata = inputs.metadata.clear(
            source=self, for_value=output, generate_metadata=True)
        meta_d3mIndex = {"name": "d3mIndex", "structural_type":int, "semantic_types":["http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey"]}
        meta_target = {"name": self._target_name, "structural_type":float, "semantic_types":["https://metadata.datadrivendiscovery.org/types/Target", "https://metadata.datadrivendiscovery.org/types/PredictedTarget"]}
        output.metadata = output.metadata.update(selector=(ALL_ELEMENTS, 0), metadata=meta_d3mIndex)
        output.metadata = output.metadata.update(selector=(ALL_ELEMENTS, 1), metadata=meta_target)
        self._has_finished = True
        self._iterations_done = True
        return CallResult(output, self._has_finished, self._iterations_done)
Exemplo n.º 12
0
    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        arima_inputs = inputs
        if self.hyperparams['use_semantic_types']:
            sk_inputs = inputs.iloc[:, self._training_indices]
        sk_output = self._clf.predict(n_periods=len(arima_inputs))
        output = d3m_dataframe(sk_output, generate_metadata=False, source=self)
        output.metadata = inputs.metadata.clear(
            source=self, for_value=output, generate_metadata=True)
        output.metadata = self._add_target_semantic_types(
            metadata=output.metadata, target_names=self._target_names, source=self)
        if not self.hyperparams['use_semantic_types']:
            return CallResult(output)
        # outputs = common_utils.combine_columns(return_result=self.hyperparams['return_result'],
        #                                        add_index_columns=self.hyperparams['add_index_columns'],
        #                                        inputs=inputs, column_indices=self._training_indices, columns_list=[output], source=self)

        return CallResult(output)
Exemplo n.º 13
0
    def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs:
        """
        Wrap predictions into dataframe
        Args:
            inputs: Container Dataframe
            predictions: array-like data (n_samples, n_features)

        Returns:
            Dataframe
        """
        outputs = d3m_dataframe(predictions, generate_metadata=True)
        # target_columns_metadata = self._copy_inputs_metadata(inputs.metadata, self._training_indices, outputs.metadata,
        #                                                      self.hyperparams)
        target_columns_metadata = self._add_target_columns_metadata(outputs.metadata, self.hyperparams, self.primitiveNo)
        outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, target_columns_metadata)
        # print(outputs.metadata.to_internal_simple_structure())

        return outputs
Exemplo n.º 14
0
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        """
        Arguments:
            - inputs: Dataset

        Returns:
            - [ num_samples, num_channels ]
        """

        with stopit.ThreadingTimeout(timeout) as timer:
            metadata_lookup = self.__class__._parse_metadata(
                metadata=inputs.metadata)
            if not metadata_lookup:
                return None

            primary_key_name = inputs.metadata.query(
                metadata_lookup['primary_key']['selector'])['name']
            targets_name = inputs.metadata.query(
                metadata_lookup['targets']['selector'])['name']
            #outputs = d3m_dataframe(inputs[metadata_lookup['targets']['selector'][0]][metadata_lookup['targets']['selector'][-1]])
            outputs = d3m_dataframe({
                targets_name:
                inputs[metadata_lookup['targets']['selector'][0]][targets_name]
            })
            outputs.metadata = outputs.metadata.update(
                (metadata_module.ALL_ELEMENTS, 0),
                inputs.metadata.query(metadata_lookup['targets']['selector']))

        if timer.state == timer.EXECUTED:
            return CallResult(outputs)
        else:
            raise TimeoutError('TargetsReader exceeded time limit')
 def _wrap_predictions(self, inputs: Inputs, predictions: ndarray) -> Outputs:
     outputs = d3m_dataframe(predictions, generate_metadata=False)
     outputs.metadata = self._update_predictions_metadata(inputs.metadata, outputs, self._target_columns_metadata)
     return outputs
Exemplo n.º 16
0
    def test_basic(self):
        self.maxDiff = None

        dataset_fname = os.path.join(
            this_path,
            '../../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv'
        )
        dataset = pd.read_csv(dataset_fname)
        # dataset = np.random.rand(1000)

        main = d3m_dataframe(dataset, generate_metadata=True)
        # print(main)

        hyperparams_class = SKPowerTransformer.SKPowerTransformer.metadata.get_hyperparams(
        )
        primitive = SKPowerTransformer.SKPowerTransformer(
            hyperparams=hyperparams_class.defaults())
        primitive.set_training_data(inputs=main)
        primitive.fit()
        new_main = primitive.produce(inputs=main).value

        test_data = new_main.values[:, 2]
        # hist_data = new_main.values
        std_normal_samples = np.random.randn(test_data.__len__())

        # Plot the distribution
        # import matplotlib.pyplot as plt
        # plt.hist(test_data, bins=100, alpha=0.6)
        # plt.hist(std_normal_samples, bins=100, alpha=0.6)
        # plt.legend(labels=['PowerTransformer', 'Standard Gaussian'], loc='best')
        # plt.savefig('./fig/test_SKPowerTransformer.png')
        # plt.close()
        # plt.show()

        # centerization check
        new_mean, new_std = test_data.mean(), test_data.std()
        mean_mse = new_mean**2
        std_mse = (new_std - 1)**2
        # print(mean_mse, std_mse)
        self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-5)
        self.assertAlmostEqual(std_mse.__float__(), 0., delta=1e-5)
        #
        # print(main.metadata.to_internal_simple_structure())
        # print(new_main.metadata.to_internal_simple_structure())

        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            7027,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            4,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'd3mIndex',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'name':
                        'timestamp',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'name':
                        'value',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 3],
                    'metadata': {
                        'name':
                        'ground_truth',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                }
            ])

        params = primitive.get_params()
        primitive.set_params(params=params)
Exemplo n.º 17
0
    def test_basic(self):
        self.maxDiff = None
        curr_path = os.path.dirname(__file__)
        dataset_fname = os.path.join(
            curr_path,
            '../../datasets/anomaly/kpi/TRAIN/dataset_TRAIN/tables/learningData.csv'
        )
        dataset = pd.read_csv(dataset_fname)
        # print(dataset.columns)
        value = dataset['value']
        main = d3m_dataframe(value, generate_metadata=True)

        ################## Test Wavelet transform ##################

        hyperparams_default = WaveletTransformer.metadata.get_hyperparams(
        ).defaults()
        hyperparams = hyperparams_default.replace({
            'wavelet': 'db8',
            'level': 2,
            'inverse': 0,
            'return_result': 'new'
        })

        primitive = WaveletTransformer(hyperparams=hyperparams)
        new_main = primitive.produce(inputs=main).value

        # print(new_main)
        # print(mean_mse, std_mse)

        # self.assertAlmostEqual(mean_mse.__float__(), 0., delta=1e-8)
        # self.assertAlmostEquael(std_mse.__float__(), 0., delta=1e-8)

        # print(main.metadata.to_internal_simple_structure())
        # print(new_main.metadata.to_internal_simple_structure())

        self.assertEqual(
            utils.to_json_structure(
                new_main.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            3521,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            3,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'value',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 1],
                    'metadata': {
                        'name':
                        'output_1',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 2],
                    'metadata': {
                        'name':
                        'output_2',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                }
            ])

        ################## Test inverse transform ##################

        hyperparams = hyperparams_default.replace({'inverse': 1})

        primitive = WaveletTransformer(hyperparams=hyperparams)
        main_recover = primitive.produce(inputs=main).value

        self.assertAlmostEqual(main_recover.values.tolist(),
                               main.values.tolist(),
                               delta=1e-6)
        # print(main.metadata.to_internal_simple_structure())
        # print(main_recover.metadata.to_internal_simple_structure())

        self.assertEqual(
            utils.to_json_structure(
                main_recover.metadata.to_internal_simple_structure()),
            [
                {
                    'selector': [],
                    'metadata': {
                        # 'top_level': 'main',
                        'schema':
                        metadata_base.CONTAINER_SCHEMA_VERSION,
                        'structural_type':
                        'd3m.container.pandas.DataFrame',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Table'
                        ],
                        'dimension': {
                            'name':
                            'rows',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularRow'
                            ],
                            'length':
                            7027,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__'],
                    'metadata': {
                        'dimension': {
                            'name':
                            'columns',
                            'semantic_types': [
                                'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                            ],
                            'length':
                            1,
                        },
                    },
                },
                {
                    'selector': ['__ALL_ELEMENTS__', 0],
                    'metadata': {
                        'name':
                        'value',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/Attribute'
                        ],
                        'structural_type':
                        'numpy.float64',
                    },
                }
            ])

        params = primitive.get_params()
        primitive.set_params(params=params)
    def produce(self,
                *,
                inputs: Inputs,
                timeout: float = None,
                iterations: int = None) -> CallResult[Outputs]:
        with stopit.ThreadingTimeout(timeout) as timer:
            num_data = len(inputs)
            outputs = np.empty((num_data, self.hyperparams['ivec_dim']),
                               dtype=self._v.dtype)
            VtV = compute_VtV(self._v, self._gmm.n_components)
            I = np.eye(self.hyperparams['ivec_dim'], dtype=self._v.dtype)
            for idx in range(num_data):
                X = inputs[idx]
                if len(X.shape) != 2:
                    outputs[idx] = np.zeros((self.hyperparams['ivec_dim']))
                    continue
                gamma = self._gmm.predict_proba(X)
                N0 = gamma.T.sum(axis=1)
                F0 = gamma.T.dot(X)
                N0, F0 = normalize_stats(N0, F0, self._gmm.means_,
                                         self._gmm.precisions_cholesky_)
                ivec = estimate_i(row(N0.astype(self._v.dtype)),
                                  row(F0.astype(self._v.dtype)), self._v, VtV,
                                  I)
                outputs[idx] = ivec.flatten()
            #adding normalization
            if (self.hyperparams['ivec_normalize']):
                outputs = preprocessing.normalize(outputs, norm='l2')
            outputs = d3m_dataframe(outputs, generate_metadata=False)

            metadata = inputs.metadata.clear(
                {
                    'schema':
                    metadata_module.CONTAINER_SCHEMA_VERSION,
                    'structural_type':
                    type(outputs),
                    'semantic_types':
                    ['https://metadata.datadrivendiscovery.org/types/Table'],
                    'dimension': {
                        'length':
                        outputs.shape[0],
                        'name':
                        'rows',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularRow'
                        ]
                    }
                },
                for_value=outputs
            ).update(
                ((metadata_base.ALL_ELEMENTS, )), {
                    'dimension': {
                        'length':
                        outputs.shape[1],
                        'name':
                        'columns',
                        'semantic_types': [
                            'https://metadata.datadrivendiscovery.org/types/TabularColumn'
                        ]
                    }
                }
            ).update(
                ((metadata_base.ALL_ELEMENTS, metadata_base.ALL_ELEMENTS)),
                {
                    #'structural_type': self._v.dtype,
                    'semantic_types': [
                        'https://metadata.datadrivendiscovery.org/types/Attribute'
                    ],
                })

            # Set metadata attribute.
            outputs.metadata = metadata

        if timer.state == timer.EXECUTED:
            return CallResult(outputs)
        else:
            raise TimeoutError('IVectorExtractor exceeded time limit')