Пример #1
0
def test_get_class_counts_binary(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test the get_class_counts method for binary scalar datasets.
    """
    dataset_folder = Path(test_output_dirs.make_sub_dir("dataset"))
    dataset_contents = """subject,channel,path,label,numerical1,numerical2,CAT1
   S1,week0,scan1.npy,,1,10,A
   S1,week1,scan2.npy,True,2,20,A
   S2,week0,scan3.npy,,3,30,A
   S2,week1,scan4.npy,False,4,40,A
   S3,week0,scan1.npy,,5,50,A
   S3,week1,scan3.npy,True,6,60,A
   """
    config = ScalarModelBase(
        local_dataset=dataset_folder,
        label_channels=["week1"],
        label_value_column="label",
        non_image_feature_channels=["week0", "week1"],
        numerical_columns=["numerical1", "numerical2"],
        should_validate=False
    )
    config.set_output_to(test_output_dirs.root_dir)
    train_dataset = ScalarDataset(config, pd.read_csv(StringIO(dataset_contents), dtype=str))
    class_counts = train_dataset.get_class_counts()
    assert class_counts == {0: 2}
Пример #2
0
def test_get_class_counts_multilabel(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test the get_class_counts method for multilabel scalar datasets.
    """
    dataset_folder = Path(test_output_dirs.make_sub_dir("dataset"))
    dataset_contents = """subject,channel,path,label,CAT1
   S1,week0,scan1.npy,,A
   S1,week1,scan2.npy,0|1|2,A
   S2,week0,scan3.npy,,A
   S2,week1,scan4.npy,1|2,A
   S3,week0,scan1.npy,,A
   S3,week1,scan3.npy,1,A
   """
    config = ScalarModelBase(
        local_dataset=dataset_folder,
        class_names=["class0", "class1", "class2", "class3"],
        label_channels=["week1"],
        label_value_column="label",
        non_image_feature_channels=["week0", "week1"],
        should_validate=False
    )
    config.set_output_to(test_output_dirs.root_dir)
    train_dataset = ScalarDataset(config, pd.read_csv(StringIO(dataset_contents), dtype=str))
    class_counts = train_dataset.get_class_counts()
    assert class_counts == {0: 1, 1: 3, 2: 2, 3: 0}
Пример #3
0
def test_get_class_weights_dataset(test_output_dirs: OutputFolderForTests) -> None:
    """
    Test training and testing of sequence models that predicts at multiple time points,
    when it is started via run_ml.
    """
    dataset_folder = Path(test_output_dirs.make_sub_dir("dataset"))
    dataset_contents = """subject,channel,path,label,numerical1,numerical2,CAT1
   S1,week0,scan1.npy,,1,10,A
   S1,week1,scan2.npy,True,2,20,A
   S2,week0,scan3.npy,,3,30,A
   S2,week1,scan4.npy,False,4,40,A
   S3,week0,scan1.npy,,5,50,A
   S3,week1,scan3.npy,True,6,60,A
   """
    config = ScalarModelBase(
        local_dataset=dataset_folder,
        label_channels=["week1"],
        label_value_column="label",
        non_image_feature_channels=["week0", "week1"],
        numerical_columns=["numerical1", "numerical2"],
        should_validate=False
    )
    config.set_output_to(test_output_dirs.root_dir)
    train_dataset = ScalarDataset(config, pd.read_csv(StringIO(dataset_contents), dtype=str))
    class_counts = train_dataset.get_class_counts()
    assert class_counts == {0.0: 1, 1.0: 2}