Exemplo n.º 1
0
    def test_alpha_too_small(self):
        X, y = load_breast_cancer()
        Xt = OneHotEncoder().fit_transform(X)
        index = numpy.array([
            0, 1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21,
            22, 23, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 41, 42, 43,
            44, 45, 46, 47, 48, 49, 51, 52, 53, 54, 56, 57, 58, 60, 61, 62, 63,
            64, 65, 66, 68, 70, 71, 72, 75, 76, 78, 79, 80, 82, 84, 85, 86, 87,
            88, 90, 91, 92, 93, 94, 95, 98, 99, 100, 102, 103, 104, 105, 107,
            108, 109, 110, 111, 113, 114, 115, 116, 117, 118, 119, 120, 121,
            124, 125, 126, 127, 128, 130, 131, 132, 133, 135, 136, 137, 138,
            139, 140, 143, 144, 145, 147, 148, 150, 151, 153, 154, 155, 156,
            157, 158, 160, 161, 164, 165, 166, 167, 168, 169, 170, 171, 172,
            174, 175, 177, 178, 180, 181, 182, 183, 184, 185, 186, 187, 188,
            190, 191, 192, 193, 194, 195, 196, 197
        ])

        nn = CoxnetSurvivalAnalysis(alphas=[0.007295025406624247],
                                    l1_ratio=1.0)
        Xf, yf = Xt.iloc[index], y[index]

        self.assertRaisesRegex(
            ArithmeticError,
            "Numerical error, because weights are too large. Consider increasing alpha.",
            nn.fit, Xf, yf)
Exemplo n.º 2
0
def test_breast_cancer_2():
    X, y = load_breast_cancer()
    X.loc[:, "er"] = X.loc[:, "er"].replace({"negative": 0, "positive": 1})
    X.loc[:, "grade"] = X.loc[:, "grade"].replace({
        "poorly differentiated": 0,
        "intermediate": 1,
        "well differentiated": 2,
        "unkown": 3
    })

    tree = SurvivalTree(max_features="log2",
                        splitter="random",
                        max_depth=5,
                        min_samples_split=30,
                        min_samples_leaf=15,
                        random_state=6)
    tree.fit(X.values, y)

    assert tree.tree_.capacity == 11
    assert_array_equal(
        tree.tree_.feature,
        numpy.array([
            55, 14, TREE_UNDEFINED, 60, 23, TREE_UNDEFINED, TREE_UNDEFINED, 31,
            TREE_UNDEFINED, TREE_UNDEFINED, TREE_UNDEFINED
        ]))
    assert_array_equal(
        tree.tree_.n_node_samples,
        numpy.array([198, 153, 76, 77, 46, 16, 30, 31, 16, 15, 45]))
    assert_array_almost_equal(
        tree.tree_.threshold,
        numpy.array([
            11.3019, 9.0768, TREE_UNDEFINED, 8.6903, 6.83564, TREE_UNDEFINED,
            TREE_UNDEFINED, 10.66262, TREE_UNDEFINED, TREE_UNDEFINED,
            TREE_UNDEFINED
        ]), 5)
Exemplo n.º 3
0
    def test_breast_example(self):
        x, y = load_breast_cancer()
        x = column.encode_categorical(x)

        coxnet = CoxnetSurvivalAnalysis(l1_ratio=1.0)
        coxnet.fit(x.values, y)

        expected_alphas = numpy.array([
            0.207764947265866, 0.189307681974955, 0.172490109262135, 0.157166563357949, 0.143204319038428,
            0.130482442022696, 0.118890741498079, 0.108328815700004, 0.0987051822799425, 0.0899364859290742,
            0.0819467763944772, 0.0746668506343715, 0.0680336534144775, 0.0619897311537413, 0.0564827342889011,
            0.051464963847614, 0.046892958302776, 0.0427271171295661, 0.0389313578046448, 0.0354728032765984,
            0.0323214972006479, 0.0294501444711215, 0.0268338748043064, 0.0244500273239498, 0.0222779542835891,
            0.0202988422256499, 0.0184955490282766, 0.0168524554284737, 0.0153553297355215, 0.0139912045628799,
            0.0127482645108893, 0.0116157438274312, 0.0105838331601337, 0.00964359459245389, 0.00878688422772072,
            0.00800628165059773, 0.0072950256549955, 0.0066469556817389, 0.00605645845875073, 0.00551841938157428,
            0.00502817821311635, 0.00458148871890295, 0.00417448188822764, 0.00380363242263169, 0.00346572820145532,
            0.00315784245998521, 0.00287730843921864, 0.00262169628767281, 0.00238879201517371, 0.00217657831633235,
            0.00198321709761059, 0.00180703355663423, 0.00164650167585602, 0.00150023100492174, 0.0013669546172544,
            0.00124551813654232, 0.00113486973808373, 0.00103405103838443, 0.000942188794098442, 0.000858487338411865,
            0.000782221689357606, 0.00071273127036839, 0.000649414188678556, 0.000591722022016858, 0.00053915506843511,
            0.00049125801812897, 0.000447616009762226, 0.000407851037136367, 0.000371618675081733, 0.000338605096211458,
            0.000308524352698783, 0.00028111589953377, 0.000256142337807075, 0.000233387358474159, 0.000212653868789829,
            0.000193762285185162, 0.000176548977800548, 0.000160864853202119, 0.000146574063005757,
            0.000133552827223371, 0.000121688362139862, 0.000110877903434536, 0.000101027816085719,
            9.20527833489927e-05, 8.38750677843702e-05, 7.64238379317803e-05, 6.96345548028444e-05, 6.34484128750348e-05
        ])

        assert_array_almost_equal(expected_alphas, coxnet.alphas_)

        expected_deviance_ratio = numpy.array([
            0, 0.00741462796207568, 0.0135178719105177, 0.0183232499901932, 0.0221250029051101, 0.0251530137843965,
            0.0275599035016693, 0.0298664819929119, 0.033763232356598, 0.0374249162331977, 0.0409637006907067,
            0.0454486054162627, 0.0551615080395675, 0.0651612844343542, 0.0736024993960834, 0.0808820441173129,
            0.0894426534710234, 0.0992239010000626, 0.108910229105339, 0.121376204780063, 0.134004998770465,
            0.145079557491685, 0.156667501995989, 0.167543840680748, 0.178622131991811, 0.189365153169168,
            0.199027839424271, 0.20909726215036, 0.218610320633419, 0.228024278642459, 0.238171883969976,
            0.248070501745195, 0.258480494697342, 0.268971907277929, 0.280744803445048, 0.291329662029924,
            0.300942928439923, 0.309972153913063, 0.318315812887558, 0.325822700491885, 0.332992506325249,
            0.339665277042211, 0.345876707002969, 0.351605625998246, 0.357206102668659, 0.362484660673399,
            0.367624391654207, 0.372275248793233, 0.37674043994605, 0.380887801196039, 0.384795899779142,
            0.388569806807258, 0.392075171498793, 0.395375481018565, 0.398377579969751, 0.400997300805061,
            0.403375467852471, 0.405431976972633, 0.407443593366561, 0.409668341757423, 0.411628734365416,
            0.413367576771339, 0.414896999887021, 0.416268233594787, 0.417475290203319, 0.418554781508749,
            0.419526121036389, 0.420522904669104, 0.421455233639571, 0.422296101083462, 0.423049677446171,
            0.423716974236606, 0.424302533927477, 0.424825925226932, 0.425286695396174, 0.425693415010937,
            0.426052733081791, 0.426369464812111, 0.426652822940747, 0.42686317150694, 0.427072533094355,
            0.427264216646862, 0.427427314063872, 0.427573225735422, 0.427700379783919, 0.427814235325525,
            0.427912925916531, 0.427998148400703
        ])

        assert_array_almost_equal(expected_deviance_ratio, coxnet.deviance_ratio_)

        coef = pandas.DataFrame(coxnet.coef_, index=x.columns, dtype=float)
        expected_coef = pandas.read_csv(BREAST_CANCER_COEFFICIENTS_FILE, index_col=0)
        expected_coef.columns = numpy.arange(expected_coef.shape[1])

        assert_columns_almost_equal(coef, expected_coef, 5)
Exemplo n.º 4
0
def breast_cancer():
    X, y = load_breast_cancer()
    X.loc[:, "er"] = X.loc[:, "er"].replace({"negative": 0, "positive": 1})
    X.loc[:, "grade"] = X.loc[:, "grade"].replace({
        "intermediate": 0,
        "poorly differentiated": 1,
        "unkown": 2,
        "well differentiated": 3
    })
    return X, y
Exemplo n.º 5
0
def test_pipeline_predict(breast_cancer, name, func):
    X_str, _ = load_breast_cancer()
    X_num, y = breast_cancer

    est = FORESTS[name](n_estimators=10, random_state=1)
    est.fit(X_num[10:], y[10:])

    pipe = make_pipeline(OneHotEncoder(), FORESTS[name](n_estimators=10, random_state=1))
    pipe.fit(X_str[10:], y[10:])

    tree_pred = getattr(est, func)(X_num[:10], return_array=True)
    pipe_pred = getattr(pipe, func)(X_str[:10], return_array=True)

    assert_array_almost_equal(tree_pred, pipe_pred)
Exemplo n.º 6
0
    def test_fit_unpenalized():
        X, y = load_breast_cancer()
        included = X["grade"] != "unkown"
        X = X.loc[included, :]
        y = y[included.values]

        X["grade"] = pandas.Series(pandas.Categorical(
            X["grade"].astype(object),
            categories=["intermediate", "poorly differentiated",
                        "well differentiated"]),
            index=X.index, name="grade")

        enc = OneHotEncoder()
        X = enc.fit_transform(X)

        cols_unpen = ['age', 'size', 'grade=poorly differentiated',
                      'grade=well differentiated', 'er=positive']
        X = pandas.concat((
            X.loc[:, cols_unpen],
            X.drop(cols_unpen, axis=1)),
            axis=1)

        alphas = numpy.ones(X.shape[1])
        alphas[:len(cols_unpen)] = 0.0

        cph = CoxPHSurvivalAnalysis(alpha=alphas)
        cph.fit(X, y)

        coef = numpy.array([
            -0.0228825990482334, 0.635554486750423, -0.242079636336473,
            -1.30197563647684, -2.27790151300312,
            0.291950212930807, 0.210861165049552, -0.612456645638769, -0.453414844486013, -0.1239424190253,
            0.196855946938761, 1.08724198521351, -0.313645443818603, -0.660016141198812, 1.07104977404073,
            0.559632480471393, -0.47740746012516, -1.26199769642326, -1.40486191330444, -0.418517018253652,
            0.284936091689505, -0.215531076378674, -0.200889269720281, 0.341231176941461, 0.0307350667648337,
            -0.212527052910377, -0.3019678509188, 0.54491723178866, -0.286914381308269, 0.370374100647823,
            -0.496258248067704, 0.624528657777646, 0.287884026214139, 0.022095151910937, 0.910293732936019,
            -0.13076488639207, 0.0857209529827562, -0.0922302696963889, 0.498136631416287, 0.937133644376614,
            0.395090607856869, -1.04727952099579, -0.54974694800345, 0.442372971174454, -0.745558450753062,
            -0.0920496108021893, 0.75549238586293, 0.562496351046743, 0.259183349320614, 0.405816113039412,
            -0.0969485695700491, -0.507388915258978, -0.474246597197329, -0.209335517183595, 0.187390427612498,
            -0.0522568530719332, 0.0806559868641646, -0.0397654339013217, -0.269582356665396, 0.791793553908743,
            0.344208857844796, -0.180165785909583, -0.7927695046551, 0.0311635012097026, -0.579429950080662,
            -0.264770995160963, 0.869512689697827, 0.765479119494175, -0.173588059680979, -0.199781736503338,
            -0.58712767650975, -0.457389854855, 0.3891865514653, 0.707309743580534, -0.121997864690072,
            0.0447174402649954, 0.0319336975869795, 0.0117988435665652, -0.593691059339064, -0.838107176656365,
            -0.247955128152877
        ])

        assert_array_almost_equal(cph.coef_, coef)
Exemplo n.º 7
0
def test_pipeline_predict(func):
    X_str, y = load_breast_cancer()
    X_num = encode_categorical(X_str)

    est = RandomSurvivalForest(n_estimators=10, random_state=1)
    est.fit(X_num[10:], y[10:])

    pipe = make_pipeline(OneHotEncoder(),
                         RandomSurvivalForest(n_estimators=10, random_state=1))
    pipe.fit(X_str[10:], y[10:])

    tree_pred = getattr(est, func)(X_num[:10])
    pipe_pred = getattr(pipe, func)(X_str[:10])

    assert_array_almost_equal(tree_pred, pipe_pred)
Exemplo n.º 8
0
def test_pipeline_predict(breast_cancer, func):
    X_num, y = breast_cancer
    X_num = X_num.loc[:, ["er", "grade"]].values

    tree = SurvivalTree().fit(X_num[10:], y[10:])

    X_str, _ = load_breast_cancer()
    X_str = X_str.loc[:, ["er", "grade"]].values

    pipe = make_pipeline(OrdinalEncoder(), SurvivalTree())
    pipe.fit(X_str[10:], y[10:])

    tree_pred = getattr(tree, func)(X_num[:10], return_array=True)
    pipe_pred = getattr(pipe, func)(X_str[:10], return_array=True)

    assert_array_almost_equal(tree_pred, pipe_pred)
Exemplo n.º 9
0
def test_breast_cancer_1():
    X, y = load_breast_cancer()
    X.loc[:, "er"] = X.loc[:, "er"].replace({"negative": 0, "positive": 1})
    X.loc[:, "grade"] = X.loc[:, "grade"].replace({
        "poorly differentiated": 0,
        "intermediate": 1,
        "well differentiated": 2,
        "unkown": 3
    })

    tree = SurvivalTree(max_features="auto",
                        max_depth=5,
                        max_leaf_nodes=10,
                        min_samples_split=0.06,
                        min_samples_leaf=0.03,
                        random_state=6)
    tree.fit(X.values, y)

    assert tree.tree_.capacity == 19
    assert_array_equal(
        tree.tree_.feature,
        numpy.array([
            61, 29, 5, TREE_UNDEFINED, 40, 65, TREE_UNDEFINED, 10, 12, 4,
            TREE_UNDEFINED, TREE_UNDEFINED, TREE_UNDEFINED, TREE_UNDEFINED,
            TREE_UNDEFINED, TREE_UNDEFINED, 10, TREE_UNDEFINED, TREE_UNDEFINED
        ]))
    assert_array_equal(
        tree.tree_.n_node_samples,
        numpy.array([
            198, 170, 28, 8, 20, 164, 6, 59, 105, 74, 31, 9, 65, 13, 7, 39, 20,
            7, 13
        ]))
    assert_array_almost_equal(
        tree.tree_.threshold,
        numpy.array([
            10.97448, 11.10251, 11.34859, TREE_UNDEFINED, 10.53533, 8.08848,
            TREE_UNDEFINED, 10.86403, 10.14138, 11.49171, TREE_UNDEFINED,
            TREE_UNDEFINED, TREE_UNDEFINED, TREE_UNDEFINED, TREE_UNDEFINED,
            TREE_UNDEFINED, 11.01874, TREE_UNDEFINED, TREE_UNDEFINED
        ]), 5)
Exemplo n.º 10
0
def test_pipeline_predict(func):
    X_str, y = load_breast_cancer()
    X_num = column.encode_categorical(X_str)

    est = CoxnetSurvivalAnalysis(alpha_min_ratio=0.0001,
                                 l1_ratio=1.0,
                                 fit_baseline_model=True)
    est.fit(X_num[10:], y[10:])

    pipe = make_pipeline(
        OneHotEncoder(),
        CoxnetSurvivalAnalysis(alpha_min_ratio=0.0001,
                               l1_ratio=1.0,
                               fit_baseline_model=True))
    pipe.fit(X_str[10:], y[10:])

    tree_pred = getattr(est, func)(X_num[:10])
    pipe_pred = getattr(pipe, func)(X_str[:10])

    for s1, s2 in zip(tree_pred, pipe_pred):
        assert_array_almost_equal(s1.x, s2.x)
        assert_array_almost_equal(s1.y, s2.y)
Exemplo n.º 11
0
def breast_cancer():
    X_str, y = load_breast_cancer()
    X_num = encode_categorical(X_str)
    return X_num, y
Exemplo n.º 12
0
 def test_load_breast_cancer():
     x, y = sdata.load_breast_cancer()
     assert x.shape == (198, 80)
     assert y.shape == (198,)
     assert_structured_array_dtype(y, 'e.tdm', 't.tdm', 51)