def test_parse_metadata_df(self): mini_gctoo = mini_gctoo_for_testing.make() # convert row_metadata to np.nan mini_row_meta = mini_gctoo.row_metadata_df.replace([-666, "-666", -666.0], [np.nan, np.nan, np.nan]) logger.debug("mini_row_meta.shape: {}".format(mini_row_meta.shape)) logger.debug("mini_row_meta.index: {}".format(mini_row_meta.index)) logger.debug("mini_row_meta.columns: {}".format(mini_row_meta.columns)) logger.debug("mini_row_meta.dtypes: {}".format(mini_row_meta.dtypes)) gctx_file = h5py.File("functional_tests/mini_gctoo_for_testing.gctx", "r") row_dset = gctx_file[row_meta_group_node] col_dset = gctx_file[col_meta_group_node] # with convert_neg_666 row_df = parse_gctx.parse_metadata_df("row", row_dset, True) logger.debug("row_df.dtypes: {}".format(row_df.dtypes)) pandas_testing.assert_frame_equal(mini_row_meta, row_df) # no convert_neg_666 mini_gctoo_with_neg_666 = mini_gctoo_for_testing.make(convert_neg_666=False) col_df = parse_gctx.parse_metadata_df("col", col_dset, False) pandas_testing.assert_frame_equal(mini_gctoo_with_neg_666.col_metadata_df, col_df) # test that ID's are not converted to numeric expected_rids = [str(i) for i in range(3)] row_dset = {"id": MockHdf5Dset(expected_rids, str), "other_meta": MockHdf5Dset(range(3, 6), str)} r = parse_gctx.parse_metadata_df("row", row_dset, True) logger.debug("test that ID's are not converted to numeric - r: {}".format(r)) logger.debug("r.index: {}".format(r.index)) self.assertEqual(set(expected_rids), set(r.index))
def test_gct_parsing(self): # parse in gct, no other arguments mg1 = mini_gctoo_for_testing.make() mg2 = parse.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct" ) pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) pandas_testing.assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df) # check convert_neg_666 worked correctly self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 mg2_alt = parse.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gct", convert_neg_666=False) self.assertCountEqual( mg2_alt.col_metadata_df["mfc_plate_id"].values.tolist(), [-666] * 6) # parse in gct with subsetting my_rid = "LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33" mg3 = parse.parse( "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gct", cidx=[0, 2], rid=[my_rid]) self.assertEqual(mg3.data_df.shape, (1, 2)) self.assertCountEqual(mg3.data_df.values.flatten().tolist(), [1., 3.]) self.assertEqual(mg3.row_metadata_df.index[0], my_rid)
def test_get_ordered_idx(self): mg = mini_gctoo_for_testing.make() # case 1: id_type == None case1 = parse_gctx.get_ordered_idx(None, [], mg.row_metadata_df, sort_idx=True) self.assertEqual( case1, list(range(0, 6)), "Expected ordered idx to be {} but got {}".format( list(range(0, 6)), case1)) # case 2: id_type == "id" case2 = parse_gctx.get_ordered_idx( "id", ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], mg.col_metadata_df, sort_idx=True) self.assertEqual( case2, [4], "Expected ordered idx to be {} but got {}".format([4], case2)) # case 3: id_type == ridx case3 = parse_gctx.get_ordered_idx("idx", [5, 1, 3], mg.col_metadata_df, sort_idx=True) self.assertEqual( case3, [1, 3, 5], "Expected ordered idx to be {} but got {}".format([1, 3, 5], case3))
def test_make_specified_size_gctoo(self): mini_gctoo = mini_gctoo_for_testing.make() logger.debug("mini gctoo data_df shape: {}".format( mini_gctoo.data_df.shape)) logger.debug("mini gctoo row_meta shape: {}".format( mini_gctoo.row_metadata_df.shape)) logger.debug("mini gctoo col_meta shape: {}".format( mini_gctoo.col_metadata_df.shape)) # case 1: dim isn't 'row' or 'col' with self.assertRaises(AssertionError) as context: random_slice.make_specified_size_gctoo(mini_gctoo, 3, "aaaalll") self.assertEqual(str(context.exception), "dim specified must be either 'row' or 'col'") # case 2: row subsetting - happy row_subset = random_slice.make_specified_size_gctoo( mini_gctoo, 3, "row") self.assertEqual( row_subset.data_df.shape, (3, 6), "data_df after row slice is incorrect shape: {} vs (3,6)".format( row_subset.data_df.shape)) self.assertEqual( row_subset.row_metadata_df.shape, (3, 5), "row_metadata_df after row slice is incorrect shape: {} vs (3,5)". format(row_subset.row_metadata_df.shape)) self.assertEqual( row_subset.col_metadata_df.shape, (6, 5), "col_metadata_df after row slice is incorrect shape: {} vs (6,5)". format(row_subset.col_metadata_df.shape)) # case 3: row subsetting - sample subset > og # of samples with self.assertRaises(AssertionError) as context: random_slice.make_specified_size_gctoo(mini_gctoo, 30, "row") self.assertTrue( "number of entries must be smaller than dimension being subsetted " in str(context.exception)) # case 4: col subsetting - happy col_subset = random_slice.make_specified_size_gctoo( mini_gctoo, 3, "col") self.assertEqual( col_subset.data_df.shape, (6, 3), "data_df after col slice is incorrect shape: {} vs (6,3)".format( col_subset.data_df.shape)) self.assertEqual( col_subset.row_metadata_df.shape, (6, 5), "row_metadata_df after col slice is incorrect shape: {} vs (6, 5)". format(col_subset.row_metadata_df.shape)) self.assertEqual( col_subset.col_metadata_df.shape, (3, 5), "col_metadata_df after col slice is incorrect shape: {} vs (3,5)". format(col_subset.col_metadata_df.shape)) # case 5: col subsetting - sample subset > og # of samples with self.assertRaises(AssertionError) as context: random_slice.make_specified_size_gctoo(mini_gctoo, 7, "col") self.assertTrue( "number of entries must be smaller than dimension being subsetted " in str(context.exception))
def test_parse_metadata_df(self): mini_gctoo = mini_gctoo_for_testing.make() # convert row_metadata to np.nan mini_row_meta = mini_gctoo.row_metadata_df.replace([-666, "-666", -666.0], [np.nan, np.nan, np.nan]) gctx_file = h5py.File("functional_tests/mini_gctoo_for_testing.gctx", "r") row_dset = gctx_file[row_meta_group_node] col_dset = gctx_file[col_meta_group_node] # with convert_neg_666 row_df = parse_gctx.parse_metadata_df("row", row_dset, True) assert_frame_equal(mini_row_meta, row_df) # no convert_neg_666 mini_gctoo_with_neg_666 = mini_gctoo_for_testing.make(convert_neg_666=False) col_df = parse_gctx.parse_metadata_df("col", col_dset, False) assert_frame_equal(mini_gctoo_with_neg_666.col_metadata_df, col_df)
def test_write_src(self): # case 1: gctoo obj doesn't have src mini1 = mini_gctoo_for_testing.make() mini1.src = None write_gctx.write(mini1, "no_src_example") hdf5_file = h5py.File("no_src_example.gctx") hdf5_src1 = hdf5_file.attrs[write_gctx.src_attr] hdf5_file.close() self.assertEqual(hdf5_src1, "no_src_example.gctx") os.remove("no_src_example.gctx") # case 2: gctoo obj does have src mini2 = mini_gctoo_for_testing.make() write_gctx.write(mini2, "with_src_example.gctx") hdf5_file = h5py.File("with_src_example.gctx") hdf5_src2 = hdf5_file.attrs[write_gctx.src_attr] hdf5_file.close() self.assertEqual(hdf5_src2, "mini_gctoo.gctx") os.remove("with_src_example.gctx")
def test_write_version(self): # TODO @oana refactor this test so it just calls the write_version method # case 1: gctoo obj doesn't have version mini1 = mini_gctoo_for_testing.make() mini1.version = None fn = "no_version_provided_example.gctx" write_gctx.write(mini1, fn) hdf5_file = h5py.File(fn) hdf5_v1 = hdf5_file.attrs[write_gctx.version_attr] hdf5_file.close() self.assertEqual(hdf5_v1, write_gctx.version_number) os.remove(fn) # case 2: gctoo obj does have version, but it is not used when writing mini2 = mini_gctoo_for_testing.make() mini2.version = "MY_VERSION" fn = "with_version_provided_example.gctx" write_gctx.write(mini2, fn) hdf5_file = h5py.File(fn) hdf5_v2 = hdf5_file.attrs[write_gctx.version_attr] hdf5_file.close() self.assertEqual(hdf5_v2, write_gctx.version_number) os.remove(fn)
def test_set_metadata_index_and_column_names(self): mini_gctoo = mini_gctoo_for_testing.make() mini_gctoo.row_metadata_df.index.name = None mini_gctoo.row_metadata_df.columns.name = None mini_gctoo.col_metadata_df.index.name = None mini_gctoo.col_metadata_df.columns.name = None # case 1: dim == "row" parse_gctx.set_metadata_index_and_column_names("row", mini_gctoo.row_metadata_df) self.assertEqual(mini_gctoo.row_metadata_df.index.name, "rid") self.assertEqual(mini_gctoo.row_metadata_df.columns.name, "rhd") # case 2: dim == "col" parse_gctx.set_metadata_index_and_column_names("col", mini_gctoo.col_metadata_df) self.assertEqual(mini_gctoo.col_metadata_df.index.name, "cid") self.assertEqual(mini_gctoo.col_metadata_df.columns.name, "chd")
def test_gct_parsing(self): # parse in gct, no other arguments mg1 = mini_gctoo_for_testing.make() mg2 = parse.parse("functional_tests/mini_gctoo_for_testing.gct") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) pandas_testing.assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df) # check convert_neg_666 worked correctly self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 mg2_alt = parse.parse("functional_tests/mini_gctoo_for_testing.gct", convert_neg_666=False) self.assertFalse( mg2_alt.col_metadata_df["mfc_plate_id"].isnull().all()) # check unused rid argument handling with self.assertRaises(Exception) as context: mg3 = parse.parse("functional_tests/mini_gctoo_for_testing.gct", rid=["a"]) self.assertTrue( "parse_gct does not use the argument" in str(context.exception)) # check unused cid argument handling with self.assertRaises(Exception) as context: mg4 = parse.parse("functional_tests/mini_gctoo_for_testing.gct", cid=["a"]) self.assertTrue( "parse_gct does not use the argument" in str(context.exception)) # check unused ridx argument handling with self.assertRaises(Exception) as context: mg5 = parse.parse("functional_tests/mini_gctoo_for_testing.gct", ridx=[0]) self.assertTrue( "parse_gct does not use the argument" in str(context.exception)) # check unused cidx argument handling with self.assertRaises(Exception) as context: mg6 = parse.parse("functional_tests/mini_gctoo_for_testing.gct", cidx=[0]) self.assertTrue( "parse_gct does not use the argument" in str(context.exception))
def test_gctx_parsing(self): # parse in gctx, no other arguments mg1 = mini_gctoo_for_testing.make() mg2 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) pandas_testing.assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df) # check convert_neg_666 worked correctly self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 mg2_alt = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", convert_neg_666 = False) self.assertFalse(mg2_alt.col_metadata_df["mfc_plate_id"].isnull().all()) # parsing w/rids & cids specified test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) mg4 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) pandas_testing.assert_frame_equal(mg3.col_metadata_df, mg4.col_metadata_df) # parsing w/ridx & cidx specified mg5 = subset_gctoo.subset_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) mg6 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df) pandas_testing.assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) pandas_testing.assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # parsing row metadata only mg7 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg7, mg1.row_metadata_df) # parsing col metadata only mg8 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg8, mg1.col_metadata_df) # parsing w/multiindex mg9 = parse.parse("../functional_tests/mini_gctoo_for_testing.gctx", make_multiindex=True) self.assertTrue(mg9.multi_index_df is not None)
def test_parse(self): # parse whole thing mg1 = mini_gctoo_for_testing.make() mg2 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx") assert_frame_equal(mg1.data_df, mg2.data_df) assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df) # test with string rid/cid test_rids = ['LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33','LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = slice_gct.slice_gctoo(mg1, rid=test_rids, cid=test_cids) mg4 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", rid = test_rids, cid = test_cids) assert_frame_equal(mg3.data_df, mg4.data_df) assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) assert_frame_equal(mg3.col_metadata_df, mg4.col_metadata_df) # first, make & write out temp version of mini_gctoo with int rids/cids new_mg = mini_gctoo_for_testing.make(convert_neg_666=False) int_indexed_data_df = new_mg.data_df.copy() int_indexed_data_df.index = range(0,6) int_indexed_data_df.columns = range(10,16) int_indexed_row_meta = new_mg.row_metadata_df.copy() int_indexed_row_meta.index = range(0,6) int_indexed_col_meta = new_mg.col_metadata_df.copy() int_indexed_col_meta.index = range(10,16) int_indexed_gctoo = GCToo.GCToo(data_df = int_indexed_data_df, row_metadata_df = int_indexed_row_meta, col_metadata_df = int_indexed_col_meta) write_gctx.write(int_indexed_gctoo, "int_indexed_mini_gctoo.gctx") # test with numeric (repr as string) rid/cid mg5 = GCToo.GCToo(data_df = int_indexed_data_df, row_metadata_df = int_indexed_row_meta, col_metadata_df = int_indexed_col_meta) mg5 = slice_gct.slice_gctoo(mg5, row_bool = [True, False, True, False, True, False], col_bool = [True, False, False, True, True, True]) mg5.data_df.index.name = "rid" mg5.data_df.columns.name = "cid" mg5.row_metadata_df.index.name = "rid" mg5.row_metadata_df.columns.name = "rhd" mg5.col_metadata_df.index.name = "cid" mg5.col_metadata_df.columns.name = "chd" mg6 = parse_gctx.parse("int_indexed_mini_gctoo.gctx", rid = [0, 2, 4], cid = [10,13,14,15], convert_neg_666=False) os.remove("int_indexed_mini_gctoo.gctx") assert_frame_equal(mg5.data_df, mg6.data_df) assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # test with ridx/cidx mg7 = slice_gct.slice_gctoo(mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid='LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666') mg8 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) assert_frame_equal(mg7.data_df, mg8.data_df) assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df) assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df) # test with rid/cidx mg9 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cidx = [4]) assert_frame_equal(mg7.data_df, mg9.data_df) assert_frame_equal(mg7.row_metadata_df, mg9.row_metadata_df) assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df) # test with ridx/cid mg10 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cid = ['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) assert_frame_equal(mg7.data_df, mg10.data_df) assert_frame_equal(mg7.row_metadata_df, mg10.row_metadata_df) assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df)
def test_write_metadata(self): """ CASE 1: - write metadata (has '-666') to file, do not convert -666 - parse in written metadata, don't convert -666 """ mini_gctoo = mini_gctoo_for_testing.make(convert_neg_666=False) hdf5_writer = h5py.File(FUNCTIONAL_TESTS_PATH + "/mini_gctoo_metadata.gctx", "w") write_gctx.write_metadata(hdf5_writer, "row", mini_gctoo.row_metadata_df, False, 6) write_gctx.write_metadata(hdf5_writer, "col", mini_gctoo.col_metadata_df, False, 6) hdf5_writer.close() logger.debug("Wrote mini_gctoo_metadata.gctx to {}".format( os.path.join(FUNCTIONAL_TESTS_PATH, "mini_gctoo_metadata.gctx"))) # read in written metadata, then close and delete file mini_gctoo_col_metadata = parse_gctx.get_column_metadata(FUNCTIONAL_TESTS_PATH + "/mini_gctoo_metadata.gctx", convert_neg_666=False) mini_gctoo_row_metadata = parse_gctx.get_row_metadata(FUNCTIONAL_TESTS_PATH + "/mini_gctoo_metadata.gctx", convert_neg_666=False) os.remove(FUNCTIONAL_TESTS_PATH + "/mini_gctoo_metadata.gctx") # check row metadata self.assertTrue(set(mini_gctoo.row_metadata_df.columns) == set(mini_gctoo_row_metadata.columns), "Mismatch between expected row metadata columns {} and column values written to file: {}".format( mini_gctoo.row_metadata_df.columns, mini_gctoo_row_metadata.columns)) self.assertTrue(set(mini_gctoo.row_metadata_df.index) == set(mini_gctoo.col_metadata_df.index), "Mismatch between expect row metadata index {} and index values written to file: {}".format( mini_gctoo.row_metadata_df.index, mini_gctoo_row_metadata.index)) for c in list(mini_gctoo.row_metadata_df.columns): logger.debug("C1: For column name: {}".format(c)) logger.debug("C1: populated values: {}".format(set(mini_gctoo_row_metadata[c]))) logger.debug("C1: mini_gctoo values: {}".format(set(mini_gctoo.row_metadata_df[c]))) self.assertTrue(set(mini_gctoo.row_metadata_df[c]) == set(mini_gctoo_row_metadata[c]), "Values in column {} differ between expected metadata and written row metadata: {} vs {}".format( c, set(mini_gctoo.row_metadata_df[c]), set(mini_gctoo_row_metadata[c]))) # check col metadata self.assertTrue(set(mini_gctoo.col_metadata_df.columns) == set(mini_gctoo_col_metadata.columns), "Mismatch between expected col metadata columns {} and column values written to file: {}".format( mini_gctoo.col_metadata_df.columns, mini_gctoo_col_metadata.columns)) self.assertTrue(set(mini_gctoo.col_metadata_df.index) == set(mini_gctoo.col_metadata_df.index), "Mismatch between expect col metadata index {} and index values written to file: {}".format( mini_gctoo.col_metadata_df.index, mini_gctoo_col_metadata.index)) for c in list(mini_gctoo.col_metadata_df.columns): self.assertTrue(set(mini_gctoo.col_metadata_df[c]) == set(mini_gctoo_col_metadata[c]), "Values in column {} differ between expected metadata and written col metadata!".format(c)) """ CASE 2: - write metadata (has NaN, not '-666') to file, do convert NaN back to '-666' - parse in written metadata, don't convert -666 """ # first convert mini_gctoo's row & col metadata dfs -666s to NaN converted_row_metadata = mini_gctoo.row_metadata_df.replace([-666, "-666", -666.0], [numpy.nan, numpy.nan, numpy.nan]) logger.debug("First row of converted_row_metadata: {}".format(converted_row_metadata.iloc[0])) converted_col_metadata = mini_gctoo.col_metadata_df.replace([-666, "-666", -666.0], [numpy.nan, numpy.nan, numpy.nan]) # write row and col metadata fields from mini_gctoo_for_testing instance to file # Note this time does convert back to -666 hdf5_writer = h5py.File(FUNCTIONAL_TESTS_PATH + "/mini_gctoo_metadata.gctx", "w") write_gctx.write_metadata(hdf5_writer, "row", converted_row_metadata, True, 6) write_gctx.write_metadata(hdf5_writer, "col", converted_col_metadata, True, 6) hdf5_writer.close() # read in written metadata, then close and delete file mini_gctoo_col_metadata = parse_gctx.get_column_metadata(FUNCTIONAL_TESTS_PATH + "/mini_gctoo_metadata.gctx", convert_neg_666=False) mini_gctoo_row_metadata = parse_gctx.get_row_metadata(FUNCTIONAL_TESTS_PATH + "/mini_gctoo_metadata.gctx", convert_neg_666=False) os.remove(FUNCTIONAL_TESTS_PATH + "/mini_gctoo_metadata.gctx") # check row metadata self.assertTrue(set(mini_gctoo.row_metadata_df.columns) == set(mini_gctoo_row_metadata.columns), "Mismatch between expected row metadata columns {} and column values written to file: {}".format( mini_gctoo.row_metadata_df.columns, mini_gctoo_row_metadata.columns)) self.assertTrue(set(mini_gctoo.row_metadata_df.index) == set(mini_gctoo.col_metadata_df.index), "Mismatch between expect row metadata index {} and index values written to file: {}".format( mini_gctoo.row_metadata_df.index, mini_gctoo_row_metadata.index)) for c in list(mini_gctoo.row_metadata_df.columns): logger.debug("C2: For column name: {}".format(c)) logger.debug("C2: populated values: {}".format(set(mini_gctoo_row_metadata[c]))) logger.debug("C2: mini_gctoo values: {}".format(set(mini_gctoo.row_metadata_df[c]))) self.assertTrue(set(mini_gctoo.row_metadata_df[c]) == set(mini_gctoo_row_metadata[c]), "Values in column {} differ between expected metadata and written row metadata!".format(c)) # check col metadata self.assertTrue(set(mini_gctoo.col_metadata_df.columns) == set(mini_gctoo_col_metadata.columns), "Mismatch between expected col metadata columns {} and column values written to file: {}".format( mini_gctoo.col_metadata_df.columns, mini_gctoo_col_metadata.columns)) self.assertTrue(set(mini_gctoo.col_metadata_df.index) == set(mini_gctoo.col_metadata_df.index), "Mismatch between expect col metadata index {} and index values written to file: {}".format( mini_gctoo.col_metadata_df.index, mini_gctoo_col_metadata.index)) for c in list(mini_gctoo.col_metadata_df.columns): self.assertTrue(set(mini_gctoo.col_metadata_df[c]) == set(mini_gctoo_col_metadata[c]), "Values in column {} differ between expected metadata and written col metadata!".format(c))
def test_parse(self): # parse whole thing mg1 = mini_gctoo_for_testing.make() mg2 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx" ) pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) pandas_testing.assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df) # test with string rid/cid test_rids = [ 'LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666' ] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = subset_gctoo.subset_gctoo(mg1, rid=test_rids, cid=test_cids) mg4 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) pandas_testing.assert_frame_equal(mg3.col_metadata_df, mg4.col_metadata_df) # first, make & write out temp version of mini_gctoo with int rids/cids new_mg = mini_gctoo_for_testing.make(convert_neg_666=False) int_indexed_data_df = new_mg.data_df.copy() int_indexed_data_df.index = [str(i) for i in range(0, 6)] int_indexed_data_df.columns = [str(i) for i in range(10, 16)] int_indexed_row_meta = new_mg.row_metadata_df.copy() int_indexed_row_meta.index = int_indexed_data_df.index int_indexed_col_meta = new_mg.col_metadata_df.copy() int_indexed_col_meta.index = int_indexed_data_df.columns int_indexed_gctoo = GCToo.GCToo(data_df=int_indexed_data_df, row_metadata_df=int_indexed_row_meta, col_metadata_df=int_indexed_col_meta) write_gctx.write(int_indexed_gctoo, "int_indexed_mini_gctoo.gctx") # test with numeric (repr as string) rid/cid mg5 = GCToo.GCToo(data_df=int_indexed_data_df, row_metadata_df=int_indexed_row_meta, col_metadata_df=int_indexed_col_meta) mg5 = subset_gctoo.subset_gctoo( mg5, row_bool=[True, False, True, False, True, False], col_bool=[True, False, False, True, True, True]) mg5.data_df.index.name = "rid" mg5.data_df.columns.name = "cid" mg5.row_metadata_df.index.name = "rid" mg5.row_metadata_df.columns.name = "rhd" mg5.col_metadata_df.index.name = "cid" mg5.col_metadata_df.columns.name = "chd" mg6 = parse_gctx.parse("int_indexed_mini_gctoo.gctx", rid=["0", "2", "4"], cid=["10", "13", "14", "15"], convert_neg_666=False) os.remove("int_indexed_mini_gctoo.gctx") pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df) pandas_testing.assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) pandas_testing.assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # test with ridx/cidx mg7 = subset_gctoo.subset_gctoo( mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) mg8 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg7.data_df, mg8.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df) # test with rid/cidx mg9 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cidx=[4]) pandas_testing.assert_frame_equal(mg7.data_df, mg9.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg9.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df) # test with ridx/cid mg10 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) pandas_testing.assert_frame_equal(mg7.data_df, mg10.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg10.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df) # test with row_meta_only mg11 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg11, mg1.row_metadata_df) # test with col_meta_only mg12 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df) # test with sort_row_meta False and ridx mg13 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", ) # test with sort_col_meta False and cidx mg13 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", cidx=[4, 1, 3], sort_col_meta=False) pandas_testing.assert_frame_equal(mg13.data_df, mg1.data_df.iloc[:, [4, 1, 3]]) pandas_testing.assert_frame_equal( mg13.col_metadata_df, mg1.col_metadata_df.iloc[[4, 1, 3], :]) pandas_testing.assert_frame_equal(mg13.row_metadata_df, mg1.row_metadata_df) # test with sort_row_meta False and ridx mg14 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", ridx=[3, 0, 1], sort_row_meta=False) pandas_testing.assert_frame_equal(mg14.data_df, mg1.data_df.iloc[[3, 0, 1], :]) pandas_testing.assert_frame_equal(mg14.col_metadata_df, mg1.col_metadata_df) pandas_testing.assert_frame_equal( mg14.row_metadata_df, mg1.row_metadata_df.iloc[[3, 0, 1], :]) # test with sort_col_meta False and cidx and col_meta_only mg15 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", cidx=[4, 1, 3], sort_col_meta=False, col_meta_only=True) pandas_testing.assert_frame_equal( mg15, mg1.col_metadata_df.iloc[[4, 1, 3], :]) # test with sort_row_meta False and ridx and row_meta_only mg16 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", ridx=[3, 0, 1], sort_row_meta=False, row_meta_only=True) pandas_testing.assert_frame_equal( mg16, mg1.row_metadata_df.iloc[[3, 0, 1], :]) # test with sort_col_meta False and cid cid_unsorted = [ 'LJP007_MCF7_24H:TRT_POSCON:BRD-K81418486:10', 'LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33' ] mg17 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests//mini_gctoo_for_testing.gctx", cid=cid_unsorted, sort_col_meta=False) pandas_testing.assert_frame_equal(mg17.data_df, mg1.data_df.iloc[:, [2, 0]]) pandas_testing.assert_frame_equal(mg17.col_metadata_df, mg1.col_metadata_df.iloc[[2, 0], :]) pandas_testing.assert_frame_equal(mg17.row_metadata_df, mg1.row_metadata_df) # test with sort_row_meta False and rid rid_unsorted = [ 'LJP007_MCF7_24H:TRT_CP:BRD-K64857848:10', 'MISC003_A375_24H:TRT_CP:BRD-K93918653:3.33' ] mg18 = parse_gctx.parse( "cmapPy/pandasGEXpress/tests/functional_tests/mini_gctoo_for_testing.gctx", rid=rid_unsorted, sort_row_meta=False) pandas_testing.assert_frame_equal(mg18.data_df, mg1.data_df.iloc[[5, 1], :]) pandas_testing.assert_frame_equal(mg18.col_metadata_df, mg1.col_metadata_df) pandas_testing.assert_frame_equal(mg18.row_metadata_df, mg1.row_metadata_df.iloc[[5, 1], :])