示例#1
0
 def test_create_complete(self):
     feature = Feature(self.minimum_data)
     feature.add_custom_data(self.additional_data)
     eq_('alias/project', feature.project)
     eq_('status_200', feature.name)
     eq_(0.0, feature.min30)
     eq_(0.0453, feature.mean30)
     eq_(1119.0, feature.max30)
     eq_(28.5136, feature.var30)
     eq_(5.3398, feature.std30)
     eq_(0.0, feature.median30)
示例#2
0
    def execute(self, *args, **kwargs):
        """ Computes simple statistics over all examples of project and interval.
            All operations are calculated over entire matrix across columns
            (features) except percentile and histogram. After all calculations
            data is saved in feature set in one record per feature/project and one
            bin per stat/interval.
        """
        project_interval = "{0}_{1}".format(self.project.full_name,
                                            self.interval)

        labels, X = self.create_ndarray(
            self.session.query(Example).filter_by(p_interval=project_interval))

        tmp_stats = {
            'max': np.amax(X[:, 3:], 0),
            'min': np.amin(X[:, 3:], 0),
            'mean': np.mean(X[:, 3:], 0),
            'std': np.std(X[:, 3:], 0),
            'var': np.var(X[:, 3:], 0),
            'median': np.median(X[:, 3:], 0),
        }

        # reshape by feature
        for feature_index, feature_name in enumerate(labels[3:]):
            if feature_name == 'grouptime':
                continue
            custom_data = {}
            xkey = feature_index + 3
            feature = Feature({
                'project': self.project.full_name,
                'name': feature_name
            })
            hist, bin_hedges = np.histogram(X[:, xkey], bins=10)
            # cast to list are needed by persistence layer (array not supported)
            custom_data['histogram{0}'.format(
                self.interval)] = [list(hist), list(bin_hedges)]
            custom_data['percentile{0}'.format(self.interval)] = list(
                np.percentile(X[:, xkey], [25, 50, 75]))

            for stat_name, stat_value in tmp_stats.items():
                name = '{0}{1}'.format(stat_name, self.interval)
                value = float(stat_value[feature_index])  # cast needed
                custom_data[name] = value
            feature.add_custom_data(custom_data)
            self.session.add(feature)
    def execute(self, *args, **kwargs):
        """ Computes simple statistics over all examples of project and interval.
            All operations are calculated over entire matrix across columns
            (features) except percentile and histogram. After all calculations
            data is saved in feature set in one record per feature/project and one
            bin per stat/interval.
        """
        project_interval = "{0}_{1}".format(
            self.project.full_name, self.interval)

        labels, X = self.create_ndarray(
            self.session.query(Example).filter_by(p_interval=project_interval)
        )

        tmp_stats = {
            'max': np.amax(X[:, 3:], 0),
            'min': np.amin(X[:, 3:], 0),
            'mean': np.mean(X[:, 3:], 0),
            'std': np.std(X[:, 3:], 0),
            'var': np.var(X[:, 3:], 0),
            'median': np.median(X[:, 3:], 0),
        }

        # reshape by feature
        for feature_index, feature_name in enumerate(labels[3:]):
            if feature_name == 'grouptime':
                continue
            custom_data = {}
            xkey = feature_index + 3
            feature = Feature({'project': self.project.full_name, 'name': feature_name})
            hist, bin_hedges = np.histogram(X[:, xkey], bins=10)
            # cast to list are needed by persistence layer (array not supported)
            custom_data['histogram{0}'.format(self.interval)] = [list(hist), list(bin_hedges)]
            custom_data['percentile{0}'.format(self.interval)] = list(np.percentile(X[:, xkey], [25, 50, 75]))

            for stat_name, stat_value in tmp_stats.items():
                name = '{0}{1}'.format(stat_name, self.interval)
                value = float(stat_value[feature_index]) # cast needed
                custom_data[name] = value
            feature.add_custom_data(custom_data)
            self.session.add(feature)
示例#4
0
 def test_to_primitive(self):
     feature = Feature(self.minimum_data)
     feature.add_custom_data(self.additional_data)
     primitive = feature.to_primitive()
示例#5
0
 def test_validate_complete_data(self):
     feature = Feature(self.minimum_data)
     feature.add_custom_data(self.additional_data)
     feature.validate()