def test_get_class_counts_binary(test_output_dirs: OutputFolderForTests) -> None: """ Test the get_class_counts method for binary scalar datasets. """ dataset_folder = Path(test_output_dirs.make_sub_dir("dataset")) dataset_contents = """subject,channel,path,label,numerical1,numerical2,CAT1 S1,week0,scan1.npy,,1,10,A S1,week1,scan2.npy,True,2,20,A S2,week0,scan3.npy,,3,30,A S2,week1,scan4.npy,False,4,40,A S3,week0,scan1.npy,,5,50,A S3,week1,scan3.npy,True,6,60,A """ config = ScalarModelBase( local_dataset=dataset_folder, label_channels=["week1"], label_value_column="label", non_image_feature_channels=["week0", "week1"], numerical_columns=["numerical1", "numerical2"], should_validate=False ) config.set_output_to(test_output_dirs.root_dir) train_dataset = ScalarDataset(config, pd.read_csv(StringIO(dataset_contents), dtype=str)) class_counts = train_dataset.get_class_counts() assert class_counts == {0: 2}
def test_get_class_counts_multilabel(test_output_dirs: OutputFolderForTests) -> None: """ Test the get_class_counts method for multilabel scalar datasets. """ dataset_folder = Path(test_output_dirs.make_sub_dir("dataset")) dataset_contents = """subject,channel,path,label,CAT1 S1,week0,scan1.npy,,A S1,week1,scan2.npy,0|1|2,A S2,week0,scan3.npy,,A S2,week1,scan4.npy,1|2,A S3,week0,scan1.npy,,A S3,week1,scan3.npy,1,A """ config = ScalarModelBase( local_dataset=dataset_folder, class_names=["class0", "class1", "class2", "class3"], label_channels=["week1"], label_value_column="label", non_image_feature_channels=["week0", "week1"], should_validate=False ) config.set_output_to(test_output_dirs.root_dir) train_dataset = ScalarDataset(config, pd.read_csv(StringIO(dataset_contents), dtype=str)) class_counts = train_dataset.get_class_counts() assert class_counts == {0: 1, 1: 3, 2: 2, 3: 0}
def test_get_class_weights_dataset(test_output_dirs: OutputFolderForTests) -> None: """ Test training and testing of sequence models that predicts at multiple time points, when it is started via run_ml. """ dataset_folder = Path(test_output_dirs.make_sub_dir("dataset")) dataset_contents = """subject,channel,path,label,numerical1,numerical2,CAT1 S1,week0,scan1.npy,,1,10,A S1,week1,scan2.npy,True,2,20,A S2,week0,scan3.npy,,3,30,A S2,week1,scan4.npy,False,4,40,A S3,week0,scan1.npy,,5,50,A S3,week1,scan3.npy,True,6,60,A """ config = ScalarModelBase( local_dataset=dataset_folder, label_channels=["week1"], label_value_column="label", non_image_feature_channels=["week0", "week1"], numerical_columns=["numerical1", "numerical2"], should_validate=False ) config.set_output_to(test_output_dirs.root_dir) train_dataset = ScalarDataset(config, pd.read_csv(StringIO(dataset_contents), dtype=str)) class_counts = train_dataset.get_class_counts() assert class_counts == {0.0: 1, 1.0: 2}