def test_angola_example_file_object_output_correct(): with open(example_file_path, 'rb') as example_file: genotype, additional = RecrudescenceFileParser.parse_file(example_file) assert genotype.shape == expected_genotype_shape assert additional.shape == expected_additional_shape np.testing.assert_array_equal(genotype.columns.to_numpy(), expected_columns) np.testing.assert_array_equal(additional.columns.to_numpy(), expected_columns) pd.testing.assert_series_equal(genotype.iloc[0], expected_genotype_row_0.iloc[0]) pd.testing.assert_series_equal(additional.iloc[0], expected_additional_row_0.iloc[0])
3 NA 82.0 78 149.6 159.9 NA NA 4 NA 82.0 NA 175.7 187.2 NA NA 5 NA 81.8 NA 164.2 175.0 NA NA 6 NA 79.0 NA 149.4 160.0 NA NA 7 NA 78.6 NA 147.9 158.8 NA NA 8 NA 88.0 NA 170.0 181.4 NA NA 9 NA 78.6 NA 148.6 159.9 NA NA 10 NA 82.0 NA 152.4 163.1 NA NA 11 NA 81.7 NA 148.8 160.4 NA NA 12 NA 81.7 NA 163.2 175.3 NA NA ''' # NOTE: Makes this reliant on AlgorithmInstance tests passing example_file = os.path.join(os.path.dirname(__file__), '../Angola2017_example.xlsx') genotypedata, additional = RecrudescenceFileParser.parse_file(example_file) genotypedata_RR = AlgorithmInstance._get_samples_from_site( genotypedata, 'Benguela') additional_neutral = AlgorithmInstance._replace_sample_names( AlgorithmInstance._get_samples_from_site(additional, 'Benguela'), 'Additional_') expected_maxMOI = 5 locirepeats = np.array([2, 2, 3, 3, 3, 3, 3]) expected_ids = pd.unique( ["BQ17-269", "BD17-040", "BD17-083", "BD17-085", "BD17-087", "BD17-090"]) expected_locinames = pd.unique( ["313", "383", "TA1", "POLYA", "PFPK2", "2490", "TA109"]) alleles_definitions_RR = AlgorithmSiteInstance._get_allele_definitions( genotypedata_RR, additional_neutral, expected_locinames.size, locirepeats)