Exemplo n.º 1
0
    def test_027_from_parquet_largefile(self):
        """
        test whether or not incremental_quantile_normalize works with a larger
        random file
        """
        np.random.seed(42)
        df1 = pd.DataFrame(
            index=range(5000),
            columns=["sample" + str(col) for col in range(100)],
        )
        df1[:] = np.random.randint(0, 100, size=df1.shape)
        df1 = df1.astype(float)
        df1.to_parquet("test_large.parquet")

        qnorm.incremental_quantile_normalize(
            "test_large.parquet",
            "test_large_out.parquet",
            rowchunksize=11,
            colchunksize=11,
        )
        df2 = pd.read_parquet("test_large_out.parquet")

        np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                       df2.values,
                                       decimal=4)
Exemplo n.º 2
0
    def test_021_from_hdf_largefile(self):
        """
        test whether or not incremental_quantile_normalize works with a larger
        random file
        """
        np.random.seed(42)
        df1 = pd.DataFrame(
            index=range(5000),
            columns=["sample" + str(col) for col in range(100)],
            dtype=int,
        )
        df1[:] = np.random.randint(0, 100, size=df1.shape)
        df1.to_hdf("test_large.hdf",
                   key="qnorm",
                   format="table",
                   data_columns=True)

        qnorm.incremental_quantile_normalize(
            "test_large.hdf",
            "test_large_out.hdf",
            rowchunksize=11,
            colchunksize=11,
        )
        df2 = pd.read_hdf("test_large_out.hdf", index_col=0, header=0)

        np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                       df2.values,
                                       decimal=4)
Exemplo n.º 3
0
    def test_017_from_hdf(self):
        """
        test the basic incremental_quantile_normalize functionality
        """
        qnorm.incremental_quantile_normalize("test.hdf", "test_out.hdf")
        df1 = pd.read_hdf("test.hdf", index_col=0, header=0)
        df2 = pd.read_hdf("test_out.hdf", index_col=0, header=0)

        np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                       df2.values,
                                       decimal=5)
Exemplo n.º 4
0
    def test_023_from_parquet(self):
        """
        test the basic incremental_quantile_normalize functionality
        """
        qnorm.incremental_quantile_normalize("test.parquet",
                                             "test_out.parquet")
        df1 = pd.read_parquet("test.parquet")
        df2 = pd.read_parquet("test_out.parquet")

        np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                       df2.values,
                                       decimal=5)
Exemplo n.º 5
0
    def test_025_from_parquet_colchunk(self):
        """
        test the incremental_quantile_normalize with colchunks functionality
        """
        df1 = pd.read_parquet("test.parquet")

        for colchunksize in range(1, 10):
            qnorm.incremental_quantile_normalize("test.parquet",
                                                 "test_out.parquet",
                                                 colchunksize=colchunksize)
            df2 = pd.read_parquet("test_out.parquet")

            np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                           df2.values,
                                           decimal=5)
Exemplo n.º 6
0
    def test_019_from_hdf_colchunk(self):
        """
        test the incremental_quantile_normalize with colchunks functionality
        """
        df1 = pd.read_hdf("test.hdf", index_col=0, header=0)

        for colchunksize in range(1, 10):
            qnorm.incremental_quantile_normalize("test.hdf",
                                                 "test_out.hdf",
                                                 colchunksize=colchunksize)
            df2 = pd.read_hdf("test_out.hdf", index_col=0, header=0)

            np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                           df2.values,
                                           decimal=5)
Exemplo n.º 7
0
    def test_013_from_csv_rowchunk(self):
        """
        test the incremental_quantile_normalize with rowchunks functionality
        """
        df1 = pd.read_csv("test.csv", index_col=0, header=0)

        for rowchunksize in range(1, 10):
            qnorm.incremental_quantile_normalize("test.csv",
                                                 "test_out.csv",
                                                 rowchunksize=rowchunksize)
            df2 = pd.read_csv("test_out.csv", index_col=0, header=0)

            np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                           df2.values,
                                           decimal=5)
Exemplo n.º 8
0
    def test_026_from_parquet_colrowchunk(self):
        """
        test the incremental_quantile_normalize with both row and colchunks
        """
        df1 = pd.read_parquet("test.parquet")

        for colchunksize in range(1, 10):
            for rowchunksize in range(1, 10):
                qnorm.incremental_quantile_normalize(
                    "test.parquet",
                    "test_out.parquet",
                    rowchunksize=rowchunksize,
                    colchunksize=colchunksize,
                )
                df2 = pd.read_parquet("test_out.parquet")

                np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                               df2.values,
                                               decimal=5)
Exemplo n.º 9
0
    def test_020_from_hdf_colrowchunk(self):
        """
        test the incremental_quantile_normalize with both row and colchunks
        """
        df1 = pd.read_hdf("test.hdf", index_col=0, header=0)

        for colchunksize in range(1, 10):
            for rowchunksize in range(1, 10):
                qnorm.incremental_quantile_normalize(
                    "test.hdf",
                    "test_out.hdf",
                    rowchunksize=rowchunksize,
                    colchunksize=colchunksize,
                )
                df2 = pd.read_hdf("test_out.hdf", index_col=0, header=0)

                np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                               df2.values,
                                               decimal=5)
Exemplo n.º 10
0
    def test_016_from_csv_largefile(self):
        """
        test whether or not incremental_quantile_normalize works with a larger
        random file
        """
        np.random.seed(42)
        df1 = pd.DataFrame(index=range(5000), columns=range(100))
        df1[:] = np.random.randint(0, 100, size=df1.shape)
        df1.to_csv("test_large.csv")

        qnorm.incremental_quantile_normalize(
            "test_large.csv",
            "test_large_out.csv",
            rowchunksize=11,
            colchunksize=11,
        )
        df2 = pd.read_csv("test_large_out.csv", index_col=0, header=0)

        np.testing.assert_almost_equal(qnorm.quantile_normalize(df1),
                                       df2.values,
                                       decimal=4)