Python FeatureStatistics示例

编程语言: Python

命名空间/包名称: InnerEye.ML.utils.features_util

hotexamples.com的示例: 3

Python FeatureStatistics - 已找到3个示例。这些是从开源项目中提取的最受好评的InnerEye.ML.utils.features_util.FeatureStatistics现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

from_data_sources(3)

示例#1

显示文件

def test_standardize_features() -> None:
    """
    Test if the non-image feature can be normalized to mean 0, std 1.
    :return:
    """
    set_random_seed(1234)
    expected_mean = torch.tensor([[123, 2, 3], [4, 5, 6]])
    expected_std = torch.tensor([[0, 2, 3], [3, 4, 4]])
    feature_size = (2, 3)
    sequences: List[ClassificationItemSequence] = []
    for s in range(1000):
        items = []
        seq_length = torch.randint(low=3, high=6, size=(1, )).item()
        for i in range(seq_length):  # type: ignore
            # All features are random Gaussian, apart from feature 0 which is constant.
            # Normalization must be able to deal with constant features when dividing by standard deviation.
            features = torch.randn(size=feature_size, dtype=torch.float32
                                   ) * expected_std + expected_mean
            # Randomly put some infinite values in the vector
            features[s % 2, s %
                     3] = np.inf if torch.rand(1) > 0.9 else features[s % 2,
                                                                      s % 3]
            features[0, 0] = expected_mean[0, 0]
            item = ScalarItem(metadata=GeneralSampleMetadata(id="foo"),
                              numerical_non_image_features=features,
                              categorical_non_image_features=features,
                              label=torch.tensor([]),
                              images=torch.tensor([]),
                              segmentations=torch.tensor([]))
            items.append(item)
        sequences.append(ClassificationItemSequence(id="foo", items=items))
    mean_std = FeatureStatistics.from_data_sources(sequences)
    assert mean_std.mean.shape == feature_size
    assert mean_std.std.shape == feature_size

    assert_tensors_equal(mean_std.mean, expected_mean, 0.07)
    assert_tensors_equal(mean_std.std, expected_std, 0.07)

    # After normalization, mean should be 0, and std should be 1.
    standardized_seq = mean_std.standardize(sequences)
    mean_std_from_standardized = FeatureStatistics.from_data_sources(
        standardized_seq)
    # After normalization, the mean should be 0, apart from the constant feature, which should be left untouched,
    # hence its mean is the original feature value.
    expected_mean_from_standardized = torch.zeros(feature_size)
    expected_mean_from_standardized[0, 0] = expected_mean[0, 0]
    expected_std_from_standardized = torch.ones(feature_size)
    expected_std_from_standardized[0, 0] = 0.0
    assert_tensors_equal(mean_std_from_standardized.mean,
                         expected_mean_from_standardized,
                         abs=1e-5)
    assert_tensors_equal(mean_std_from_standardized.std,
                         expected_std_from_standardized,
                         abs=1e-5)

示例#2

显示文件

def test_standardize_features_when_singleton(is_sequence: bool) -> None:
    """
    Test how feature standardize copes with datasets that only have 1 entry.
    """
    numerical_features = torch.ones((1, 3))
    categorical_features = torch.tensor([[0, 1, 1], [1, 0, 0]])
    item: Union[SequenceDataSource, ScalarDataSource]
    sources: Union[ListOfSequences, List[ScalarDataSource]]
    if is_sequence:
        item = SequenceDataSource(
            metadata=GeneralSampleMetadata(id="foo"),
            numerical_non_image_features=numerical_features,
            categorical_non_image_features=categorical_features,
            label=torch.tensor([]),
            channel_files=[])
        sources = [ClassificationItemSequence(id="foo", items=[item])]
        mean_std = FeatureStatistics.from_data_sources(sources)
    else:
        item = ScalarDataSource(
            metadata=GeneralSampleMetadata(id="foo"),
            numerical_non_image_features=numerical_features,
            categorical_non_image_features=categorical_features,
            label=torch.tensor([]),
            channel_files=[])

        sources = [item]
        mean_std = FeatureStatistics.from_data_sources(sources)

    assert_tensors_equal(mean_std.mean, numerical_features)
    # Standard deviation can't be computed because there is only one element, hence becomes nan.
    assert torch.all(torch.isnan(mean_std.std))
    # When applying such a standardization to the sequences, they should not be changed (similar to features that
    # are constant)
    standardized_sources = mean_std.standardize(sources)
    if is_sequence:
        assert_tensors_equal(
            standardized_sources[0].items[0].numerical_non_image_features,
            numerical_features)
        assert_tensors_equal(
            standardized_sources[0].items[0].categorical_non_image_features,
            categorical_features)
    else:
        assert_tensors_equal(
            standardized_sources[0].numerical_non_image_features,
            numerical_features)
        assert_tensors_equal(
            standardized_sources[0].categorical_non_image_features,
            categorical_features)

示例#3

显示文件

文件： scalar_dataset.py 项目： JacopoTeneggi/InnerEye-DeepLearning

 def standardize_non_imaging_features(self) -> None:
     """
     Modifies the non image features that this data loader stores, such that they have mean 0, variance 1.
     Mean and variances are either taken from the argument feature_mean_and_variance (use that when
     the data set contains validation or test sequences), or computed from the dataset itself (use for the
     training set).
     If None, they will be computed from the data in the present object.
     """
     if self.items:
         self.feature_statistics = self.feature_statistics or FeatureStatistics.from_data_sources(
             self.items)
         self.items = self.feature_statistics.standardize(self.items)