示例#1
0
def na_2_model():
    
    #set up nnet params
    layer_list=[30]
    epsilon_init=0.12
    alpha=0.02
    n_iter=5000
    lambd=1
    threshold=0.5
    seed=1234
    cop_layer_list=copy.deepcopy(layer_list)
    #read data
    train,test=read_data()
    
    #get train,test pre pre processed :D
    train,test,y_matrix,id_matrix=pre_process_(train,test)
    
    #get train,test and indexes with age
    train_with_age,test_with_age,age_train_idxs,age_test_idxs=pre_process_2(train,test)
    other=np.setdiff1d(test.index.values,age_test_idxs)
    #get train,test without age
    train_without_age,test_without_age=pre_process_3(train,test,age_test_idxs)
    
    theta=nnet(train_with_age,y_matrix[age_train_idxs],layer_list,alpha,epsilon_init,n_iter,lambd,seed)
    
    pred1=predict_nnet(teTitanicst_with_age,y_matrix[age_test_idxs],threshold,layer_list,theta,epsilon_init,seed)
    
    theta=nnet(train_without_age,y_matrix,layer_list,alpha,epsilon_init,n_iter,lambd,seed)
    
    pred2=predict_nnet(test_without_age,y_matrix[other],threshold,layer_list,theta,epsilon_init,seed)
    
    pred=test.index.values.astype('float')
    
    pred[age_test_idxs]=pred1
    pred[other]=pred2
    pred=pred.reshape((len(pred),1))
    #merge PassengerId and predictions to create the submission DataFrame
    df=pd.DataFrame(np.concatenate([id_matrix,pred],axis=1))
    #set up column names
    df.columns=['PassengerId','Survived']
    #turn PassengerId column from float to int
    df['PassengerId']=df['PassengerId'].astype('int')
    df['Survived']=(df['Survived']>threshold)*1
    #create submission csv
    df.to_csv(filename,header=True,index=False)
示例#2
0
def submit(filename):
    """(string) -> None
    
    Runs perceptron on the titanic dataset and generates
    a submission file 'filename'.csv
    
    """

    #read data
    train, test = read_data()
    #does some of the trevor stephens pre-processing, check docstring of function
    train, test, y_matrix, id_matrix = pre_process_(train, test)
    #turns dataframes into matrices so the logistic regression code doesnt flip out
    train_, test_ = turn_into_matrices(train, test)
    #set nnet params
    layer_list = [10]
    epsilon_init = 0.12
    alpha = 0.1
    n_iter = 1000
    lambd = 0
    threshold = 0.5
    seed = 1234
    #get theta
    theta = nnet(train_, y_matrix, layer_list, alpha, epsilon_init, n_iter,
                 lambd, seed)
    #get predictions in the test set
    pred = predict_nnet(test_, y_matrix, threshold, layer_list, theta,
                        epsilon_init, seed)
    #merge PassengerId and predictions to create the submission DataFrame
    df = pd.DataFrame(np.concatenate([id_matrix, pred], axis=1))
    #set up column names
    df.columns = ['PassengerId', 'Survived']
    #turn PassengerId column from float to int
    df['PassengerId'] = df['PassengerId'].astype('int')
    #turn predictions from float into binary int 0,1
    df['Survived'] = (df['Survived'] > threshold) * 1
    #create submission csv
    df.to_csv(filename, header=True, index=False)
示例#3
0
def imputation(train,test,y):
#    #set up logistic regression parameters
#    n_iter=10000
#    alpha=0.1
#    #get theta
#    _,theta=linear_regression(train,y,alpha,n_iter)
#    m=np.shape(test)[0]
#    test=normalize_features(test)
#    test=np.concatenate((np.ones((m,1)),test),axis=1)
#    #get predictions in the test set
#    pred=hyp_lin_r(test,theta)
#    #train=np.concatenate([train,pred],axis=1)
    layer_list=[20]
    epsilon_init=0.12
    alpha=0.1
    n_iter=1000
    lambd=0
    threshold=0.5
    seed=1234
    theta=nnet(train,y,layer_list,alpha,epsilon_init,n_iter,lambd,seed)
    layer_list=[20]
    pred=predict_nnet(test,y,threshold,layer_list,theta,epsilon_init,seed)
    return pred
示例#4
0
def submit(filename):
    """(string) -> None
    
    Runs perceptron on the titanic dataset and generates
    a submission file 'filename'.csv
    
    """
    
    #read data
    train,test=read_data()
    #does some of the trevor stephens pre-processing, check docstring of function
    train,test,y_matrix,id_matrix=pre_process_(train,test)
    #turns dataframes into matrices so the logistic regression code doesnt flip out
    train_,test_=turn_into_matrices(train,test)
    #set nnet params
    layer_list=[10]
    epsilon_init=0.12
    alpha=0.1
    n_iter=1000
    lambd=0
    threshold=0.5
    seed=1234
    #get theta
    theta=nnet(train_,y_matrix,layer_list,alpha,epsilon_init,n_iter,lambd,seed)
    #get predictions in the test set
    pred=predict_nnet(test_,y_matrix,threshold,layer_list,theta,epsilon_init,seed)
    #merge PassengerId and predictions to create the submission DataFrame
    df=pd.DataFrame(np.concatenate([id_matrix,pred],axis=1))
    #set up column names
    df.columns=['PassengerId','Survived']
    #turn PassengerId column from float to int
    df['PassengerId']=df['PassengerId'].astype('int')
    #turn predictions from float into binary int 0,1
    df['Survived']=(df['Survived']>threshold)*1
    #create submission csv
    df.to_csv(filename,header=True,index=False)
        for I in range(1, 5):
            file_name = cifar_dir + '/data_batch_' + str(I + 1)
            file_id = open(file_name, 'rb')
            dict = cPickle.load(file_id)
            file_id.close()
            data = np.concatenate((data, dict['data']), axis=0)
            labels = np.concatenate((labels, dict['labels']), axis=0)
        return data, labels



nnet_size = np.array([[992, 500], [500, 500], [500, 10]])
learning_rate = 0.1

num_layers = 3
mlp = nnet(nnet_size, num_layers, learning_rate)

epoch = 3
cifar_dir = '/home/habtegebrial/Desktop/python-destin/cifar-10-batches-py/'
trainData = np.array([])
for I in range(0, 50000, 1):
#for I in range(499, 500, 500):
    Name = '/home/habtegebrial/GSoc/python-destin/testing/train/' + str(I + 1) + '.txt'
    file_id = open(Name, 'r')
    #print np.shape(np.ravel(np.loadtxt(Name)))
    Temp = np.array(cPickle.load(file_id))
    print np.shape(Temp)
    file_id.close()
    trainData = np.hstack((trainData, Temp))