Exemplo n.º 1
0
def runFM(train_X, train_y, test_X, test_y, test_X2, label, dev_index,
          val_index):
    class_weights = {
        'toxic': 1.0,
        'severe_toxic': 0.2,
        'obscene': 1.0,
        'threat': 0.1,
        'insult': 0.8,
        'identity_hate': 0.2
    }
    model = FM_FTRL(alpha=0.02,
                    beta=0.01,
                    L1=0.00001,
                    L2=30.0,
                    D=train_X.shape[1],
                    alpha_fm=0.1,
                    L2_fm=0.5,
                    init_fm=0.01,
                    weight_fm=50.0,
                    D_fm=200,
                    e_noise=0.0,
                    iters=3,
                    inv_link="identity",
                    e_clip=1.0,
                    threads=4,
                    use_avx=1,
                    verbose=1)
    train_weight = np.array(
        [1.0 if x == 1 else class_weights[label] for x in train_y])
    model.fit(train_X, train_y, train_weight, reset=False)
    pred_test_y = sigmoid(model.predict(test_X))
    pred_test_y2 = sigmoid(model.predict(test_X2))
    return pred_test_y, pred_test_y2
Exemplo n.º 2
0
    def testSigmoid(self):
        self.assertAlmostEqual(0.5, preprocess.sigmoid(0))
        self.assertAlmostEqual(0.7310585786300049, preprocess.sigmoid(1))
        self.assertAlmostEqual(0.2689414213699951, preprocess.sigmoid(-1))

        t = numpy.array([-2, -1, 0, 1, 2])
        expected = numpy.array([0.11920292202211755, 0.2689414213699951, 0.5, 0.7310585786300049, 0.8807970779778823])
        actual = preprocess.sigmoid(t)
        self.assertAlmostEqual(0.0, linalg.norm(expected - actual))
Exemplo n.º 3
0
        def test(x):
            mu, sigma = preprocess.muSigma(x)

            self.assertAlmostEqual(1.23902738264240, x[1][2])

            self.assertEqual(5, len(mu))
            self.assertEqual(5, len(sigma))

            self.assertAlmostEqual(2.87969736221038, mu[0])
            self.assertAlmostEqual(2.04868506865762, sigma[0])
            self.assertAlmostEqual(-0.99025024303433, (x[0][0] - mu[0]) / sigma[0])

            self.assertAlmostEqual(1.97861578296198, mu[2])
            self.assertAlmostEqual(2.33076030134340, sigma[2])
            self.assertAlmostEqual(-0.31731637092553, (x[1][2] - mu[2]) / sigma[2])

            y = preprocess.normalize(x, mu, sigma)

            m, n = y.shape
            self.assertEqual(4, m)
            self.assertEqual(5, n)

            self.assertAlmostEqual(-0.99025024303433, y[0][0])
            self.assertAlmostEqual(-0.31731637092553, y[1][2])

            u = preprocess.sigmoid(y)
            self.assertAlmostEqual(0.27086265279957, u[0][0])
            self.assertAlmostEqual(0.42132990768430, u[1][2])
Exemplo n.º 4
0
 def normalize(self, data):
     x, y = data
     z = preprocess.sigmoid(preprocess.normalize(x, self.mu, self.sigma))
     return numpy.array(z, dtype = numpy.float32), y        
Exemplo n.º 5
0
def runChainedFM(train_X, train_y, test_X, test_y, test_X2, label, dev_index,
                 val_index):
    print_step('Loading Lvl1')
    lvl1_train, lvl1_test = load_cache('lvl1_fm')
    [
        lvl1_train[c].apply(lambda x: 0 if x < 0.5 else 1)
        for c in lvl1_train.columns if 'fm_' in c and c != label
    ]
    lvl1_train = csr_matrix(
        pd.concat([
            lvl1_train[c].apply(lambda x: 0 if x < 0.5 else 1)
            for c in lvl1_train.columns if 'fm_' in c and c != label
        ],
                  axis=1).values)
    lvl1_test = csr_matrix(
        pd.concat([
            lvl1_test[c].apply(lambda x: 0 if x < 0.5 else 1)
            for c in lvl1_test.columns if 'fm_' in c and c != label
        ],
                  axis=1).values)
    print_step('Merging 1/3')
    lvl1_valid = lvl1_train[val_index]
    lvl1_train = lvl1_train[dev_index]
    train_X = csr_matrix(hstack([train_X, lvl1_train]))
    print_step('Merging 2/3')
    test_X = csr_matrix(hstack([test_X, lvl1_valid]))
    print_step('Merging 3/3')
    test_X2 = csr_matrix(hstack([test_X2, lvl1_test]))

    print_step('Modeling')
    class_weights = {
        'toxic': 1.0,
        'severe_toxic': 0.2,
        'obscene': 1.0,
        'threat': 0.1,
        'insult': 0.8,
        'identity_hate': 0.2
    }
    model = FM_FTRL(alpha=0.02,
                    beta=0.01,
                    L1=0.00001,
                    L2=30.0,
                    D=train_X.shape[1],
                    alpha_fm=0.1,
                    L2_fm=0.5,
                    init_fm=0.01,
                    weight_fm=50.0,
                    D_fm=200,
                    e_noise=0.0,
                    iters=3,
                    inv_link="identity",
                    e_clip=1.0,
                    threads=4,
                    use_avx=1,
                    verbose=1)
    train_weight = np.array(
        [1.0 if x == 1 else class_weights[label] for x in train_y])
    model.fit(train_X, train_y, train_weight, reset=False)
    pred_test_y = sigmoid(model.predict(test_X))
    pred_test_y2 = sigmoid(model.predict(test_X2))
    return pred_test_y, pred_test_y2