def test_slice_and_exclude_rids(self): out_gct = sg.slice_gctoo(self.in_gct, rid=["a", "c", "d"], exclude_rid=["d"]) # Outputs should be dataframes even if there is only 1 index or column pd.util.testing.assert_frame_equal(out_gct.data_df, self.in_gct.data_df.iloc[[0, 2], :]) pd.util.testing.assert_frame_equal(out_gct.row_metadata_df, self.in_gct.row_metadata_df.iloc[[0, 2], :]) pd.util.testing.assert_frame_equal(out_gct.col_metadata_df, self.in_gct.col_metadata_df)
def test_slice_bools(self): out_gct = sg.slice_gctoo(self.in_gct, row_bool=[True, False, True, False], col_bool=[False, False, True]) # Outputs should be dataframes even if there is only 1 index or column pd.util.testing.assert_frame_equal(out_gct.data_df, pd.DataFrame(self.in_gct.data_df.iloc[[0, 2], 2])) pd.util.testing.assert_frame_equal(out_gct.row_metadata_df, self.in_gct.row_metadata_df.iloc[[0, 2], :]) pd.util.testing.assert_frame_equal(out_gct.col_metadata_df, pd.DataFrame(self.in_gct.col_metadata_df.iloc[2, :]).T)
def main(): # Get args args = build_parser().parse_args(sys.argv[1:]) setup_logger.setup(verbose=args.verbose) # Read the input gct in_gct = parse.parse(args.in_gct_path) # Read in each of the command line arguments rid = _read_arg(args.rid) cid = _read_arg(args.cid) exclude_rid = _read_arg(args.exclude_rid) exclude_cid = _read_arg(args.exclude_cid) # Slice the gct out_gct = sg.slice_gctoo(in_gct, rid=rid, cid=cid, exclude_rid=exclude_rid, exclude_cid=exclude_cid) assert out_gct.data_df.size > 0, "Slicing yielded an empty gct!" # Write the output gct if args.use_gctx: wgx.write(out_gct, args.out_name) else: wg.write(out_gct, args.out_name, data_null="NaN", metadata_null="NA", filler_null="NA")
def test_slice_cid_and_col_bool(self): # cid and col_bool should not both be provided with self.assertRaises(AssertionError) as e: sg.slice_gctoo(self.in_gct, cid=["e", "f", "g"], col_bool=[True, True, False]) self.assertIn("cid and col_bool", str(e.exception))
def test_gctx_parsing(self): # parse in gctx, no other arguments mg1 = mini_gctoo_for_testing.make() mg2 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) pandas_testing.assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df) # check convert_neg_666 worked correctly self.assertTrue(mg2.col_metadata_df["mfc_plate_id"].isnull().all()) # parse w/o convert_neg_666 mg2_alt = parse.parse("functional_tests/mini_gctoo_for_testing.gctx", convert_neg_666=False) self.assertFalse( mg2_alt.col_metadata_df["mfc_plate_id"].isnull().all()) # parsing w/rids & cids specified test_rids = [ 'LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666' ] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = slice_gctoo.slice_gctoo(mg1, rid=test_rids, cid=test_cids) mg4 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) pandas_testing.assert_frame_equal(mg3.col_metadata_df, mg4.col_metadata_df) # parsing w/ridx & cidx specified mg5 = slice_gctoo.slice_gctoo( mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid='LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666') mg6 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df) pandas_testing.assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) pandas_testing.assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # parsing row metadata only mg7 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg7, mg1.row_metadata_df) # parsing col metadata only mg8 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg8, mg1.col_metadata_df) # parsing w/multiindex mg9 = parse.parse("functional_tests/mini_gctoo_for_testing.gctx", make_multiindex=True) self.assertTrue(mg9.multi_index_df is not None)
def test_parse(self): # parse whole thing mg1 = mini_gctoo_for_testing.make() mg2 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx") pandas_testing.assert_frame_equal(mg1.data_df, mg2.data_df) pandas_testing.assert_frame_equal(mg1.row_metadata_df, mg2.row_metadata_df) pandas_testing.assert_frame_equal(mg1.col_metadata_df, mg2.col_metadata_df) # test with string rid/cid test_rids = [ 'LJP007_MCF10A_24H:TRT_CP:BRD-K93918653:3.33', 'LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666' ] test_cids = ['LJP007_MCF7_24H:TRT_POSCON:BRD-A61304759:10'] mg3 = slice_gctoo.slice_gctoo(mg1, rid=test_rids, cid=test_cids) mg4 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", rid=test_rids, cid=test_cids) pandas_testing.assert_frame_equal(mg3.data_df, mg4.data_df) pandas_testing.assert_frame_equal(mg3.row_metadata_df, mg4.row_metadata_df) pandas_testing.assert_frame_equal(mg3.col_metadata_df, mg4.col_metadata_df) # first, make & write out temp version of mini_gctoo with int rids/cids new_mg = mini_gctoo_for_testing.make(convert_neg_666=False) int_indexed_data_df = new_mg.data_df.copy() int_indexed_data_df.index = [str(i) for i in range(0, 6)] int_indexed_data_df.columns = [str(i) for i in range(10, 16)] int_indexed_row_meta = new_mg.row_metadata_df.copy() int_indexed_row_meta.index = int_indexed_data_df.index int_indexed_col_meta = new_mg.col_metadata_df.copy() int_indexed_col_meta.index = int_indexed_data_df.columns int_indexed_gctoo = GCToo.GCToo(data_df=int_indexed_data_df, row_metadata_df=int_indexed_row_meta, col_metadata_df=int_indexed_col_meta) write_gctx.write(int_indexed_gctoo, "int_indexed_mini_gctoo.gctx") # test with numeric (repr as string) rid/cid mg5 = GCToo.GCToo(data_df=int_indexed_data_df, row_metadata_df=int_indexed_row_meta, col_metadata_df=int_indexed_col_meta) mg5 = slice_gctoo.slice_gctoo( mg5, row_bool=[True, False, True, False, True, False], col_bool=[True, False, False, True, True, True]) mg5.data_df.index.name = "rid" mg5.data_df.columns.name = "cid" mg5.row_metadata_df.index.name = "rid" mg5.row_metadata_df.columns.name = "rhd" mg5.col_metadata_df.index.name = "cid" mg5.col_metadata_df.columns.name = "chd" mg6 = parse_gctx.parse("int_indexed_mini_gctoo.gctx", rid=["0", "2", "4"], cid=["10", "13", "14", "15"], convert_neg_666=False) os.remove("int_indexed_mini_gctoo.gctx") pandas_testing.assert_frame_equal(mg5.data_df, mg6.data_df) pandas_testing.assert_frame_equal(mg5.row_metadata_df, mg6.row_metadata_df) pandas_testing.assert_frame_equal(mg5.col_metadata_df, mg6.col_metadata_df) # test with ridx/cidx mg7 = slice_gctoo.slice_gctoo( mg1, rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cid='LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666') mg8 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cidx=[4]) pandas_testing.assert_frame_equal(mg7.data_df, mg8.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg8.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg8.col_metadata_df) # test with rid/cidx mg9 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", rid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666'], cidx=[4]) pandas_testing.assert_frame_equal(mg7.data_df, mg9.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg9.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg9.col_metadata_df) # test with ridx/cid mg10 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", ridx=[4], cid=['LJP007_MCF7_24H:CTL_VEHICLE:DMSO:-666']) pandas_testing.assert_frame_equal(mg7.data_df, mg10.data_df) pandas_testing.assert_frame_equal(mg7.row_metadata_df, mg10.row_metadata_df) pandas_testing.assert_frame_equal(mg7.col_metadata_df, mg10.col_metadata_df) # test with row_meta_only mg11 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", row_meta_only=True) pandas_testing.assert_frame_equal(mg11, mg1.row_metadata_df) # test with col_meta_only mg12 = parse_gctx.parse("functional_tests/mini_gctoo_for_testing.gctx", col_meta_only=True) pandas_testing.assert_frame_equal(mg12, mg1.col_metadata_df)