Пример #1
0
def main():
    df = pd.read_csv(DATA_FILEPATH)
    print(df.columns)

    x_data = df.drop(["clase"], axis=1).as_matrix()
    y_data = np.array(list(map(int, df["clase"])), dtype="int32")
    n_samples = x_data.shape[0]
    print(type(y_data))

    x = tensor.matrix(name="x")
    y = tensor.ivector(name="y")

    clf = lr.LogisticRegression(x, x_data.shape[1], 2)

    with_validation = True
    if with_validation:
        val_frac = 0.3
        val_samples = int(n_samples * val_frac)
        train_samples = n_samples - val_samples
        x_tr, y_tr = x_data[:train_samples, :], y_data[:train_samples]
        x_tr_sh = theano.shared(x_tr, borrow=True)
        y_tr_sh = theano.shared(y_tr, borrow=True)
        x_val, y_val = (x_data[train_samples:(train_samples + val_samples), :],
                        y_data[train_samples:(train_samples + val_samples)])
        x_val_sh = theano.shared(x_val, borrow=True)
        y_val_sh = theano.shared(y_val, borrow=True)
        print("calling sgd_with_validation")
        sgd.sgd_with_validation(clf,
                                x_tr_sh,
                                y_tr_sh,
                                x_val_sh,
                                y_val_sh,
                                learning_rate=0.01,
                                reg_term=0.0001,
                                batch_size=32,
                                n_epochs=1000,
                                max_its=10000,
                                improv_thresh=0.01,
                                max_its_incr=4,
                                rel_val_tol=1e-3,
                                verbose=True)
    else:
        x_tr_sh = theano.shared(x_data, borrow=True)
        y_tr_sh = theano.shared(y_data, borrow=True)
        print("calling sgd")
        sgd.sgd(clf,
                x_tr_sh,
                y_tr_sh,
                y=y,
                learning_rate=0.01,
                reg_term=0.0001,
                batch_size=220,
                rel_tol=2e-3,
                n_epochs=256,
                verbose=True)

    acc = theano.function([x, y], clf.score(y))
    print("accuracy: %.2f%%" % (100 * acc(x_data, y_data)))
Пример #2
0
    def __init__(
            self, 
            inp,
            n_inp, n_hidden, n_out,
            activation_f=tensor.tanh,
            w_init_f="xavier_tanh",
            reg="l2",
            rand_state=42):
        """
        Initialization of network.
        Parameters:
        *inp: Input tensor.
        *n_inp: Number of input neurons.
        *n_hidden: Number of hidden neurons.
        *n_out: Number of outut neurons.
        *activation_f: Activation function to use.
        *w_init_f: Weight initialization method.
        *reg: Regularization method for weights.
        *rand_state: Random state.
        """

        #input/output matrices
        self.inp = inp

        #hidden layer
        self.hidden_layer = HiddenLayer(
            inp=inp,
            n_inp=n_inp, n_out=n_hidden,
            activation_f=activation_f,
            reg=reg)

        #logistic regression layer
        self.log_reg_layer = lr.LogisticRegression(
            inp=self.hidden_layer.output,
            n_inp=n_hidden, n_out=n_out,
            reg=reg)

        #model parameters
        self.params = self.hidden_layer.params + self.log_reg_layer.params

        #regularization term symbolic expression
        self.reg = self.hidden_layer.reg + self.log_reg_layer.reg

        #cost(y) function
        self.cost = self.log_reg_layer.cost

        #score(y) function
        self.score = self.log_reg_layer.score

        #pred function
        self.pred = self.log_reg_layer.pred
Пример #3
0
 def __init__(self, rng, input, n_in, n_hidden, n_out):
     self.hiddenLayer = HiddenLayer(rng=rng,
                                    input=input,
                                    n_in=n_in,
                                    n_out=n_hidden,
                                    activation=T.tanh)
     self.logRegressionLayer = lr.LogisticRegression(
         input=self.hiddenLayer.output, n_in=n_hidden, n_out=n_out)
     self.L1 = (np.abs(self.hiddenLayer.W).sum() +
                np.abs(self.logRegressionLayer.W.sum()))
     self.L2_sqr = ((self.hiddenLayer.W**2).sum() +
                    (self.logRegressionLayer.W**2).sum())
     self.negative_log_likelihood = self.logRegressionLayer.negative_log_likelihood
     self.errors = self.logRegressionLayer.error
     self.params = self.hiddenLayer.params + self.logRegressionLayer.params
Пример #4
0
        row[11] = 1
    else:
        row[11] = 0

# BREAST CANCER IMPORT #
bcdata = load_data("breast-cancer-wisconsin.csv", ",")
# clean up data
for row in bcdata:
    row[-1] = 0 if row[-1] == 2 else 1
# pick out training points from classes
X=bcdata[:,1:10]
Y=np.ravel((bcdata[:,-1:]))

# create models
lda = lda.LDA()
linr = lr.LogisticRegression()

# k-fold accuracy test
k = 10
k_values = [0]*k
ldawineacc = [0.0]*10
ldawinert = [0.0]*10
ldabcacc = [0.0]*10
ldabcrt = [0.0]*10
for i in range(1, k):
    k_values[i] = i
    print()
    startTime = time.time()
    ldabcacc[i] = k_fold(X, Y, i, lda)
    ldabcrt[i] = time.time() - startTime
    print(str(i) + "-fold LDA accuracy on breast cancer: " + str(ldabcacc[i]))
Пример #5
0
import lr, fm, deepfm, deepandwide
import dcn, attention_net
import pnn, attention_fm, xdeepfm

torch.set_num_threads(1)

## load data

input_path = "./data/census_148d_train.libsvm.tmp"
X, Y = load_svmlight_file(input_path, dtype=np.float32)
size, dim = X.shape

## train

model = lr.LogisticRegression(dim)
# model = fm.FactorizationMachine(dim, 10)
# model = deepfm.DeepFM(dim, 13, 10, [10, 5, 1])
# model = deepandwide.DeepAndWide(dim, 13, 10, [10, 5, 1])
# model = attention_net.AttentionNet(dim, 13, 10, [10, 5, 1])
# model = dcn.DCNet(dim, 13, 10, cross_depth=4, deep_layers=[10, 10, 10])
# model = pnn.PNN(dim, 13, 10, [10, 5, 1])
# model = attention_fm.AttentionFM(dim, 13, 10, 10)
# model = xdeepfm.xDeepFM(dim, 13, 10, [10, 5, 5], [128, 128])

optim = torch.optim.Adam(model.parameters(), 0.01)
loss_fn = torch.nn.BCELoss()
batch_size = 30

for epoch in range(10):
    start = 0
Пример #6
0
'''
Created on 2017/4/9/

@author: jiefisher
'''
import numpy as np
import math
import pandas as pd
import lr as lgr
if __name__ == '__main__':

    lr = lgr.LogisticRegression(learning_rate=.0001, l2=.0005, iters=50000)
    df = pd.read_csv("train.csv")
    x_label = df["Survived"]
    df = df.drop(
        ["Survived", "PassengerId", "Name", "Ticket", "Cabin", "Embarked"],
        axis=1)
    df['Sex'] = np.where(df['Sex'] == 'male', 1, 0)
    df = df.fillna(0)
    lr.train(df.values, x_label.values)
    df = pd.read_csv("test.csv")
    bf = pd.read_csv("gender_submission.csv")
    x_label = bf["Survived"]
    df = df.drop(["PassengerId", "Name", "Ticket", "Cabin", "Embarked"],
                 axis=1)
    df['Sex'] = np.where(df['Sex'] == 'male', 1, 0)
    df = df.fillna(0)
    print(lr.predict(df.values))