def test_load_single_item_1() -> None:
    """
    Test if we can create a classificationItem from the rows for a single subject,
    including NaN scalar and categorical values.
    """
    csv_string = StringIO(
        """subject,channel,path,value,scalar1,scalar2,categorical1,categorical2
S1,image1,foo1.nii,,2.1,2.2,True,False
S1,image2,foo2.nii,,3.1,,True,False
S1,label,,True,1.1,1.2,,False
""")
    df = pd.read_csv(csv_string, sep=",", dtype=str)
    numerical_columns = ["scalar2", "scalar1"]
    categorical_columns = ["categorical1", "categorical2"]
    non_image_feature_channels = _get_non_image_dict(
        ["label", "image2"], ["scalar2", "scalar1"],
        ["categorical1", "categorical2"])
    item: ScalarDataSource = load_single_data_source(
        df,
        subject_id="S1",
        # Provide values in a different order from the file!
        image_channels=["image2", "image1"],
        image_file_column="path",
        label_channels=["label"],
        label_value_column="value",
        non_image_feature_channels=non_image_feature_channels,
        # Provide values in a different order from the file!
        numerical_columns=numerical_columns,
        categorical_data_encoder=CategoricalToOneHotEncoder.
        create_from_dataframe(dataframe=df, columns=categorical_columns),
        channel_column="channel")
    assert item.channel_files[0] == "foo2.nii"
    assert item.channel_files[1] == "foo1.nii"
    assert item.label == torch.tensor([1.0])
    assert item.label.dtype == torch.float32
    assert item.numerical_non_image_features[0] == 1.2
    assert item.numerical_non_image_features[2] == 1.1
    assert item.numerical_non_image_features[3] == 3.1
    assert math.isnan(item.numerical_non_image_features[1].item())
    assert np.all(np.isnan(item.categorical_non_image_features[0].numpy()))
    assert item.categorical_non_image_features[1:].tolist() == [1.0, 1.0, 1.0]
    assert item.numerical_non_image_features.dtype == torch.float32

    item_no_scalars: ScalarDataSource = load_single_data_source(
        df,
        subject_id="S1",
        # Provide values in a different order from the file!
        image_channels=["image2", "image1"],
        image_file_column="path",
        label_channels=["label"],
        label_value_column="value",
        non_image_feature_channels={},
        numerical_columns=[],
        channel_column="channel")
    assert item_no_scalars.numerical_non_image_features.shape == (0, )
Пример #2
0
def test_load_single_item_6() -> None:
    """
    Test loading of different channels for different categorical features.
    """
    csv_string = StringIO("""subject,path,channel,cat1,cat2,scalar1,label
S1,foo1.nii,week1,True,True,1.2,True
S1,foo2.nii,week2,False,False,1.2,True
S1,foo2.nii,week3,False,True,1.3,True
""")
    df = pd.read_csv(csv_string, sep=",", dtype=str)
    item: ScalarDataSource = load_single_data_source(df,
                                                     subject_id="S1",
                                                     image_channels=["week1"],
                                                     image_file_column="path",
                                                     label_channels=["week1"],
                                                     label_value_column="label",
                                                     numerical_columns=["scalar1"],
                                                     non_image_feature_channels={"scalar1": ["week3"],
                                                                                 "cat1": ["week1", "week2"],
                                                                                 "cat2": ["week3"]},
                                                     categorical_data_encoder=CategoricalToOneHotEncoder.create_from_dataframe(
                                                         dataframe=df,
                                                         columns=["cat1", "cat2"]
                                                     ),
                                                     channel_column="channel")
    assert torch.all(item.categorical_non_image_features == torch.tensor([0, 1, 1, 0, 0, 1]))
def test_load_single_item_5() -> None:
    """
    Test loading of different channels for different numerical features.
    """
    csv_string = StringIO("""subject,path,channel,scalar1,scalar2,label
S1,foo1.nii,week1,2.1,2.2,True
S1,foo2.nii,week2,2.3,2.2,True
""")
    df = pd.read_csv(csv_string, sep=",", dtype=str)
    item: ScalarDataSource = load_single_data_source(
        df,
        subject_id="S1",
        image_channels=["week1"],
        image_file_column="path",
        label_channels=["week1"],
        label_value_column="label",
        non_image_feature_channels={
            "scalar1": ["week1", "week2"],
            "scalar2": ["week1"]
        },
        numerical_columns=["scalar2", "scalar1"],
        channel_column="channel")
    assert item.channel_files[0] == "foo1.nii"
    assert item.label == torch.tensor([1.0])
    assert item.label.dtype == torch.float32
    assert torch.all(
        item.numerical_non_image_features == torch.tensor([2.2, 2.1, 2.3]))
    assert item.numerical_non_image_features.dtype == torch.float32
Пример #4
0
 def load_item(csv_string: StringIO) -> str:
     df = pd.read_csv(csv_string, sep=",", dtype=str)
     numerical_columns = ["scalar2", "scalar1"]
     non_image_feature_channels = _get_non_image_dict(["label", "image2"],
                                                      ["scalar2", "scalar1"])
     with pytest.raises(Exception) as ex:
         load_single_data_source(df,
                                 subject_id="S1",
                                 # Provide values in a different order from the file!
                                 image_channels=["image2", "image1"],
                                 image_file_column="path",
                                 label_channels=["label"],
                                 label_value_column="value",
                                 # Provide values in a different order from the file!
                                 non_image_feature_channels=non_image_feature_channels,
                                 numerical_columns=numerical_columns,
                                 channel_column="channel")
     return str(ex)
def test_load_single_item_7() -> None:
    """
    Test loading of different channels for different categorical features.
    Case where one column value is invalid.
    """
    # Fit the encoder on the valid labels.
    csv_string_valid = StringIO("""subject,path,channel,cat1,cat2,label
    S1,foo1.nii,week1,True,True,True
    S1,foo2.nii,week2,False,False,True
    S1,foo2.nii,week3,False,,True
    """)
    df = pd.read_csv(csv_string_valid, sep=",", dtype=str)
    encoder = CategoricalToOneHotEncoder.create_from_dataframe(
        dataframe=df, columns=["cat1", "cat2"])

    # Try to encode a dataframe with invalid value
    csv_string_invalid = StringIO("""subject,path,channel,cat1,cat2,label
    S1,foo1.nii,week1,True,True,True
    S1,foo2.nii,week2,houhou,False,False
    S1,foo2.nii,week3,False,,True
    """)
    df = pd.read_csv(csv_string_invalid, sep=",", dtype=str)
    item: ScalarDataSource = load_single_data_source(
        df,
        subject_id="S1",
        image_channels=["week1"],
        image_file_column="path",
        label_channels=["week1"],
        label_value_column="label",
        non_image_feature_channels={
            "cat1": ["week1", "week2"],
            "cat2": ["week3"]
        },
        categorical_data_encoder=encoder,
        channel_column="channel")
    # cat1 - week1 is valid
    assert torch.all(
        item.categorical_non_image_features[0:2] == torch.tensor([0, 1]))
    # cat1 - week2 is invalid test regression
    assert torch.all(torch.isnan(item.categorical_non_image_features[2:4]))
    # cat2 - week 3 is invalid
    assert torch.all(torch.isnan(item.categorical_non_image_features[4:6]))
Пример #6
0
 def _test_load_labels(label_channels: List[str],
                       transform_labels: Union[Callable, List[Callable]]) -> ScalarDataSource:
     csv_string = StringIO("""subject,channel,path,value,scalar1,scalar2
 S1,label_w1,,1,1.1,1.2
 S1,label_w2,,3,,
 """)
     df = pd.read_csv(csv_string, sep=",", dtype=str)
     numerical_columns = ["scalar2", "scalar1"]
     non_image_feature_channels = _get_non_image_dict(["label_w1"],
                                                      ["scalar2", "scalar1"])
     return load_single_data_source(df,
                                    subject_id="S1",
                                    channel_column="channel",
                                    label_channels=label_channels,
                                    label_value_column="value",
                                    transform_labels=transform_labels,
                                    # Provide values in a different order from the file!
                                    non_image_feature_channels=non_image_feature_channels,
                                    numerical_columns=numerical_columns,
                                    is_classification_dataset=False)
Пример #7
0
def test_load_single_item_3() -> None:
    """
    Test if we can create a classificationItem from a single row of data (no channels available).
    """
    csv_string = StringIO("""subject,path,value,scalar1,scalar2,label
S1,foo1.nii,,2.1,2.2,True
""")
    df = pd.read_csv(csv_string, sep=",", dtype=str)
    item: ScalarDataSource = load_single_data_source(df,
                                                     subject_id="S1",
                                                     image_channels=[],
                                                     image_file_column="path",
                                                     label_channels=None,
                                                     label_value_column="label",
                                                     non_image_feature_channels={},
                                                     numerical_columns=["scalar2", "scalar1"],
                                                     channel_column="foo")
    assert item.channel_files[0] == "foo1.nii"
    assert item.label == torch.tensor([1.0])
    assert item.label.dtype == torch.float32
    assert item.numerical_non_image_features.tolist() == pytest.approx([2.2, 2.1])
    assert item.numerical_non_image_features.dtype == torch.float32