Exemplo n.º 1
0
    def setUp(self):
        x, self.y = load_whas500()

        x = column.categorical_to_numeric(column.standardize(x,
                                                             with_std=False))
        self.x = x.values
        self.columns = x.columns.tolist()
Exemplo n.º 2
0
    def test_standardize_numpy_array():
        result = column.standardize(MIXED_DATA_FRAME.values)

        expected = numpy.array(
            [[-1.486301, -1.486301, -1.486301, -1.486301, -1.486301],
             [-1.156012, -1.156012, -1.156012, -1.156012, -1.156012],
             [-0.825723, -0.825723, -0.825723, -0.825723, -0.825723],
             [-0.495434, -0.495434, -0.495434, -0.495434, -0.495434],
             [-0.165145, -0.165145, -0.165145, -0.165145, -0.165145],
             [0.165145, 0.165145, 0.165145, 0.165145, 0.165145],
             [0.495434, 0.495434, 0.495434, 0.495434, 0.495434],
             [0.825723, 0.825723, 0.825723, 0.825723, 0.825723],
             [1.156012, 1.156012, 1.156012, 1.156012, 1.156012],
             [1.486301, 1.486301, 1.486301, 1.486301, 1.486301]])

        assert isinstance(result, numpy.ndarray)
        assert_array_almost_equal(expected,
                                  result[:, :NUMERIC_DATA_FRAME.shape[1]])

        assert_array_equal(
            pandas.isnull(NON_NUMERIC_DATA_FRAME),
            pandas.isnull(result[:, NUMERIC_DATA_FRAME.shape[1]:]))

        non_nan_idx = [0, 1, 2, 3, 4, 5, 8, 9]

        assert_array_equal(
            NON_NUMERIC_DATA_FRAME.iloc[non_nan_idx, :].values,
            result[:, NUMERIC_DATA_FRAME.shape[1]:][non_nan_idx, :])
Exemplo n.º 3
0
    def test_compare_clinical_kernel(self):
        x_full, y = load_whas500()

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        kpca = KernelPCA(kernel=trans.pairwise_kernel)
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree',
                                tol=1e-8,
                                max_iter=1000,
                                random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree',
                                     kernel=trans.pairwise_kernel,
                                     tol=1e-8,
                                     max_iter=1000,
                                     random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Exemplo n.º 4
0
 def _make_whas500(with_mean=True, with_std=True, to_numeric=False):
     x, y = load_whas500()
     if with_mean:
         x = standardize(x, with_std=with_std)
     if to_numeric:
         x = categorical_to_numeric(x)
     names = ['(Intercept)'] + x.columns.tolist()
     return DataSetWithNames(x=x.values, y=y, names=names, x_data_frame=x)
Exemplo n.º 5
0
    def test_standardize_not_inplace(numeric_data):
        numeric_data_frame, expected = numeric_data
        numeric_array = numeric_data_frame.values

        before = numeric_array.copy()
        result = column.standardize(numeric_array)
        assert_array_almost_equal(expected, result)
        assert_array_almost_equal(before, numeric_array)
Exemplo n.º 6
0
    def test_standardize_mixed(numeric_data, non_numeric_data_frame):
        numeric_data_frame, expected = numeric_data
        mixed_data_frame = pandas.concat((numeric_data_frame, non_numeric_data_frame), axis=1)
        result = column.standardize(mixed_data_frame)

        assert isinstance(result, pandas.DataFrame)
        assert_array_almost_equal(expected, result.iloc[:, :numeric_data_frame.shape[1]].values)

        tm.assert_frame_equal(non_numeric_data_frame, result.iloc[:, numeric_data_frame.shape[1]:])
Exemplo n.º 7
0
    def test_standardize_non_numeric_numpy_array(non_numeric_data_frame):
        result = column.standardize(non_numeric_data_frame.values)

        assert isinstance(result, numpy.ndarray)

        assert_array_equal(pandas.isnull(non_numeric_data_frame),
                           pandas.isnull(result))

        non_nan_idx = [0, 1, 2, 3, 4, 5, 8, 9]

        assert_array_equal(non_numeric_data_frame.iloc[non_nan_idx, :].values,
                           result[non_nan_idx, :])
Exemplo n.º 8
0
    def test_standardize_mixed_numpy_array(numeric_data, non_numeric_data_frame):
        numeric_data_frame, _ = numeric_data
        mixed_data_frame = pandas.concat((numeric_data_frame, non_numeric_data_frame), axis=1)
        result = column.standardize(mixed_data_frame.values)

        assert_array_equal(pandas.isnull(mixed_data_frame),
                           pandas.isnull(result))

        assert_array_almost_equal(numeric_data_frame, result[:, :numeric_data_frame.shape[1]])

        non_nan_idx = [0, 1, 2, 3, 4, 5, 8, 9]

        assert_array_equal(non_numeric_data_frame.iloc[non_nan_idx, :].values,
                           result[:, numeric_data_frame.shape[1]:][non_nan_idx, :])
Exemplo n.º 9
0
    def test_predict(rossi):
        cph = CoxPHSurvivalAnalysis()
        xc = standardize(rossi.x, with_std=False)
        cph.fit(xc.values, rossi.y)

        expected = numpy.array([-0.136002823953217, -1.13104636905577, 0.741965816026403, -0.98072115186145,
                                -0.600098931134794, -0.997407014712788, -0.0993800739865776, -0.266761246895696,
                                -0.665145743277517, -0.418747210463951, -0.0770761787926419, 0.411385264707043,
                                -0.0770761787926419, 0.563114305747799, -1.07096133044073])

        idx = numpy.array([15, 77, 79, 90, 113, 122, 134, 172, 213, 219, 257, 313, 364, 395, 409])

        pred = cph.predict(xc.iloc[idx, :].values)

        assert_array_almost_equal(expected, pred)
Exemplo n.º 10
0
    def test_standardize_numeric(self):
        result = column.standardize(NUMERIC_DATA_FRAME)

        expected = numpy.array([[-1.486301, -1.486301, -1.486301, -1.486301, -1.486301],
                                [-1.156012, -1.156012, -1.156012, -1.156012, -1.156012],
                                [-0.825723, -0.825723, -0.825723, -0.825723, -0.825723],
                                [-0.495434, -0.495434, -0.495434, -0.495434, -0.495434],
                                [-0.165145, -0.165145, -0.165145, -0.165145, -0.165145],
                                [0.165145, 0.165145, 0.165145, 0.165145, 0.165145],
                                [0.495434, 0.495434, 0.495434, 0.495434, 0.495434],
                                [0.825723, 0.825723, 0.825723, 0.825723, 0.825723],
                                [1.156012, 1.156012, 1.156012, 1.156012, 1.156012],
                                [1.486301, 1.486301, 1.486301, 1.486301, 1.486301]])

        self.assertTrue(isinstance(result, pandas.DataFrame))
        assert_array_almost_equal(expected, result)
Exemplo n.º 11
0
    def test_standardize_mixed(self):
        result = column.standardize(MIXED_DATA_FRAME)

        expected = numpy.array([[-1.486301, -1.486301, -1.486301, -1.486301, -1.486301],
                                [-1.156012, -1.156012, -1.156012, -1.156012, -1.156012],
                                [-0.825723, -0.825723, -0.825723, -0.825723, -0.825723],
                                [-0.495434, -0.495434, -0.495434, -0.495434, -0.495434],
                                [-0.165145, -0.165145, -0.165145, -0.165145, -0.165145],
                                [0.165145, 0.165145, 0.165145, 0.165145, 0.165145],
                                [0.495434, 0.495434, 0.495434, 0.495434, 0.495434],
                                [0.825723, 0.825723, 0.825723, 0.825723, 0.825723],
                                [1.156012, 1.156012, 1.156012, 1.156012, 1.156012],
                                [1.486301, 1.486301, 1.486301, 1.486301, 1.486301]])

        self.assertTrue(isinstance(result, pandas.DataFrame))
        assert_array_almost_equal(expected, result.iloc[:, :NUMERIC_DATA_FRAME.shape[1]].values)

        tm.assert_frame_equal(NON_NUMERIC_DATA_FRAME, result.iloc[:, NUMERIC_DATA_FRAME.shape[1]:])
Exemplo n.º 12
0
    def test_fit_and_predict_clinical_kernel(self):
        x_full, y = load_whas500()

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        ssvm = FastKernelSurvivalSVM(optimizer="rbtree",
                                     kernel=trans.pairwise_kernel,
                                     max_iter=100,
                                     random_state=0)
        ssvm.fit(x.values, y)

        self.assertFalse(ssvm._pairwise)
        self.assertEquals(x.shape[0], ssvm.coef_.shape[0])

        c = ssvm.score(x.values, y)
        self.assertLessEqual(abs(0.83699051218246412 - c), 1e-3)
Exemplo n.º 13
0
    def test_standardize_non_numeric(self):
        result = column.standardize(NON_NUMERIC_DATA_FRAME)

        self.assertTrue(isinstance(result, pandas.DataFrame))
        tm.assert_frame_equal(NON_NUMERIC_DATA_FRAME, result)
Exemplo n.º 14
0
 def setUp(self):
     x, self.y = load_whas500()
     self.x = encode_categorical(standardize(x))
Exemplo n.º 15
0
 def setUp(self):
     x, self.y, = load_whas500()
     self.x = standardize(x)
Exemplo n.º 16
0
    def test_predict_survival_function(self):
        cph = CoxPHSurvivalAnalysis()
        xc = standardize(self.x, with_std=False)
        cph.fit(xc, self.y)

        test_idx = [9, 3, 313, 122, 431]
        f = cph.predict_survival_function(xc.values[test_idx, :])
        self.assertEqual(len(f), len(test_idx))

        expected_x = numpy.array([
            1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
            20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 36, 37,
            38, 39, 40, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52
        ])
        assert_array_almost_equal(f[0].x, expected_x)

        expected_y = numpy.array(
            [[
                0.997461059155262, 0.994921063628358, 0.992385735058868,
                0.989851752006058, 0.987316287353143, 0.984776787077271,
                0.982229880405155, 0.969572939420418, 0.964484914195169,
                0.961943739897691, 0.956832876286024, 0.95164893059497,
                0.949061479987368, 0.941308786676055, 0.936108961524705,
                0.930908109920408, 0.923134421802298, 0.915290309566414,
                0.910039421812024, 0.896950008976804, 0.891710496383746,
                0.889091087732452, 0.886464513020254, 0.876021834875649,
                0.868208744867335, 0.860295712033317, 0.855028100866882,
                0.849763725779484, 0.844479679508505, 0.841831970503126,
                0.836536000803172, 0.831235646418842, 0.825894605739379,
                0.815224813877833, 0.807213878413131, 0.796552033034344,
                0.793873120526445, 0.788520538165122, 0.777873630457932,
                0.772511632709885, 0.761861634114672, 0.756514792077836,
                0.751165903610347, 0.740464726897091, 0.737784677797559,
                0.732429416106706, 0.719080607292328, 0.71107749717937,
                0.700374533644241
            ],
             [
                 0.999043882016474, 0.998085846760273, 0.997128049274331,
                 0.996169233883962, 0.995208324378805, 0.994244341388665,
                 0.993275988428092, 0.988440345477267, 0.986485348400118,
                 0.985506531271212, 0.983533018718637, 0.981524558134704,
                 0.980519526333744, 0.977497911740073, 0.975462571926908,
                 0.973419764375874, 0.970353074837982, 0.967242232655452,
                 0.965150517551612, 0.95990330510734, 0.957789534855612,
                 0.9567298847967, 0.955665378803038, 0.951413568664925,
                 0.948211695979523, 0.944950494817014, 0.942769166879369,
                 0.940580788681338, 0.938375713299261, 0.937267565568928,
                 0.935044498941264, 0.932810785284511, 0.930550920812338,
                 0.926008946520639, 0.922574381856697, 0.917970142218167,
                 0.916807238599598, 0.914476355200083, 0.909810449176314,
                 0.907445513776142, 0.902717737080621, 0.900328600224742,
                 0.897927987577297, 0.893093037207259, 0.891875336188489,
                 0.88943384158804, 0.883299174284255, 0.879587091746084,
                 0.874581779605126
             ],
             [
                 0.997047493865252, 0.994094987736893, 0.991149132479536,
                 0.988206067094847, 0.985262510789073, 0.982315505128104,
                 0.979361149485527, 0.964697962626958, 0.958812207831876,
                 0.955874508145704, 0.949969979834156, 0.943986274173002,
                 0.941001628199154, 0.932066788913148, 0.926080800490561,
                 0.920099053753847, 0.91116833279606, 0.902169133142995,
                 0.896152043528463, 0.881177346924239, 0.875193135907483,
                 0.872203575284039, 0.869207278320275, 0.857309023710775,
                 0.848421992818626, 0.839434570137547, 0.833459215086135,
                 0.827493527632619, 0.821511608241697, 0.818516504870906,
                 0.812530287850531, 0.806545301458209, 0.800520667700624,
                 0.788504317779599, 0.779499208968453, 0.76753682302423,
                 0.764535224692751, 0.758542854366408, 0.746643087255005,
                 0.740660154164664, 0.728796971370325, 0.722851218124105,
                 0.716910044469078, 0.705044665474413, 0.702077410385152,
                 0.696153525886713, 0.681418170940623, 0.67260509515153,
                 0.660844217439567
             ],
             [
                 0.999277487981792, 0.998553357599042, 0.997829237091254,
                 0.99710417672379, 0.996377361626995, 0.995648049432879,
                 0.994915257060425, 0.991253310198176, 0.989771583274112,
                 0.989029450846354, 0.9875325990774, 0.986008486326716,
                 0.985245535623559, 0.982950582288741, 0.981403739445045,
                 0.979850427761197, 0.977517078995485, 0.9751482936019,
                 0.973554486943764, 0.969552590992207, 0.967938970293575,
                 0.967129721757568, 0.966316544163349, 0.963066369097215,
                 0.960616445221447, 0.958119046496724, 0.956447429333005,
                 0.954769459155805, 0.953077720742534, 0.952227180730954,
                 0.950520162020759, 0.948803968432749, 0.947066660332597,
                 0.943571819475399, 0.9409262972535, 0.937376036066338,
                 0.936478650448705, 0.934679123812276, 0.931073502063006,
                 0.929244251313332, 0.925583871649869, 0.923732352300608,
                 0.921870729568298, 0.918117633409758, 0.917171619503635,
                 0.915273906739705, 0.910499952666305, 0.907607302382901,
                 0.903702170040254
             ],
             [
                 0.998608413113071, 0.997214644411471, 0.995821831013303,
                 0.994428148000897, 0.993032034348724, 0.991632072399662,
                 0.99022638680066, 0.983216197230745, 0.980386484787682,
                 0.978970679590968, 0.976118050823751, 0.97321758347997,
                 0.971767207148281, 0.967410751583306, 0.964479728136077,
                 0.961540752050541, 0.957133998596443, 0.952670279630427,
                 0.949672572311715, 0.942165647657932, 0.93914685715923,
                 0.937634655006455, 0.936116291486253, 0.930059391448493,
                 0.925506309233532, 0.920876059409491, 0.917783073342697,
                 0.91468336624063, 0.911563331503877, 0.909996636973231,
                 0.906856220364748, 0.903704188812084, 0.900518754266508,
                 0.89412719205641, 0.889303474943224, 0.882849825667986,
                 0.881222141750833, 0.877962502029932, 0.871448809785976,
                 0.868153120346739, 0.861576393621591, 0.85825886866239,
                 0.854929447472668, 0.848236143873358, 0.846553007760733,
                 0.84318146930209, 0.834728527145727, 0.82962662516592,
                 0.822762829346384
             ]])

        for i, ff in enumerate(f):
            actual_y = [ff(v) for v in expected_x]
            # check that values decrease
            self.assertTrue((numpy.diff(actual_y) < 0).all())
            assert_array_almost_equal(actual_y, expected_y[i, :])
Exemplo n.º 17
0
    def test_standardize_numeric(numeric_data):
        numeric_data_frame, expected = numeric_data
        result = column.standardize(numeric_data_frame)

        assert isinstance(result, pandas.DataFrame)
        assert_array_almost_equal(expected, result)
Exemplo n.º 18
0
    def test_standardize_int_numpy_array(numeric_data):
        numeric_data_frame, expected = numeric_data
        result = column.standardize(numeric_data_frame.values.astype(int))

        assert isinstance(result, numpy.ndarray)
        assert_array_almost_equal(expected, result)
Exemplo n.º 19
0
    def test_standardize_non_numeric(non_numeric_data_frame):
        result = column.standardize(non_numeric_data_frame)

        assert isinstance(result, pandas.DataFrame)
        tm.assert_frame_equal(non_numeric_data_frame, result)