def na_2_model(): #set up nnet params layer_list=[30] epsilon_init=0.12 alpha=0.02 n_iter=5000 lambd=1 threshold=0.5 seed=1234 cop_layer_list=copy.deepcopy(layer_list) #read data train,test=read_data() #get train,test pre pre processed :D train,test,y_matrix,id_matrix=pre_process_(train,test) #get train,test and indexes with age train_with_age,test_with_age,age_train_idxs,age_test_idxs=pre_process_2(train,test) other=np.setdiff1d(test.index.values,age_test_idxs) #get train,test without age train_without_age,test_without_age=pre_process_3(train,test,age_test_idxs) theta=nnet(train_with_age,y_matrix[age_train_idxs],layer_list,alpha,epsilon_init,n_iter,lambd,seed) pred1=predict_nnet(teTitanicst_with_age,y_matrix[age_test_idxs],threshold,layer_list,theta,epsilon_init,seed) theta=nnet(train_without_age,y_matrix,layer_list,alpha,epsilon_init,n_iter,lambd,seed) pred2=predict_nnet(test_without_age,y_matrix[other],threshold,layer_list,theta,epsilon_init,seed) pred=test.index.values.astype('float') pred[age_test_idxs]=pred1 pred[other]=pred2 pred=pred.reshape((len(pred),1)) #merge PassengerId and predictions to create the submission DataFrame df=pd.DataFrame(np.concatenate([id_matrix,pred],axis=1)) #set up column names df.columns=['PassengerId','Survived'] #turn PassengerId column from float to int df['PassengerId']=df['PassengerId'].astype('int') df['Survived']=(df['Survived']>threshold)*1 #create submission csv df.to_csv(filename,header=True,index=False)
def submit(filename): """(string) -> None Runs perceptron on the titanic dataset and generates a submission file 'filename'.csv """ #read data train, test = read_data() #does some of the trevor stephens pre-processing, check docstring of function train, test, y_matrix, id_matrix = pre_process_(train, test) #turns dataframes into matrices so the logistic regression code doesnt flip out train_, test_ = turn_into_matrices(train, test) #set nnet params layer_list = [10] epsilon_init = 0.12 alpha = 0.1 n_iter = 1000 lambd = 0 threshold = 0.5 seed = 1234 #get theta theta = nnet(train_, y_matrix, layer_list, alpha, epsilon_init, n_iter, lambd, seed) #get predictions in the test set pred = predict_nnet(test_, y_matrix, threshold, layer_list, theta, epsilon_init, seed) #merge PassengerId and predictions to create the submission DataFrame df = pd.DataFrame(np.concatenate([id_matrix, pred], axis=1)) #set up column names df.columns = ['PassengerId', 'Survived'] #turn PassengerId column from float to int df['PassengerId'] = df['PassengerId'].astype('int') #turn predictions from float into binary int 0,1 df['Survived'] = (df['Survived'] > threshold) * 1 #create submission csv df.to_csv(filename, header=True, index=False)
def imputation(train,test,y): # #set up logistic regression parameters # n_iter=10000 # alpha=0.1 # #get theta # _,theta=linear_regression(train,y,alpha,n_iter) # m=np.shape(test)[0] # test=normalize_features(test) # test=np.concatenate((np.ones((m,1)),test),axis=1) # #get predictions in the test set # pred=hyp_lin_r(test,theta) # #train=np.concatenate([train,pred],axis=1) layer_list=[20] epsilon_init=0.12 alpha=0.1 n_iter=1000 lambd=0 threshold=0.5 seed=1234 theta=nnet(train,y,layer_list,alpha,epsilon_init,n_iter,lambd,seed) layer_list=[20] pred=predict_nnet(test,y,threshold,layer_list,theta,epsilon_init,seed) return pred
def submit(filename): """(string) -> None Runs perceptron on the titanic dataset and generates a submission file 'filename'.csv """ #read data train,test=read_data() #does some of the trevor stephens pre-processing, check docstring of function train,test,y_matrix,id_matrix=pre_process_(train,test) #turns dataframes into matrices so the logistic regression code doesnt flip out train_,test_=turn_into_matrices(train,test) #set nnet params layer_list=[10] epsilon_init=0.12 alpha=0.1 n_iter=1000 lambd=0 threshold=0.5 seed=1234 #get theta theta=nnet(train_,y_matrix,layer_list,alpha,epsilon_init,n_iter,lambd,seed) #get predictions in the test set pred=predict_nnet(test_,y_matrix,threshold,layer_list,theta,epsilon_init,seed) #merge PassengerId and predictions to create the submission DataFrame df=pd.DataFrame(np.concatenate([id_matrix,pred],axis=1)) #set up column names df.columns=['PassengerId','Survived'] #turn PassengerId column from float to int df['PassengerId']=df['PassengerId'].astype('int') #turn predictions from float into binary int 0,1 df['Survived']=(df['Survived']>threshold)*1 #create submission csv df.to_csv(filename,header=True,index=False)
for I in range(1, 5): file_name = cifar_dir + '/data_batch_' + str(I + 1) file_id = open(file_name, 'rb') dict = cPickle.load(file_id) file_id.close() data = np.concatenate((data, dict['data']), axis=0) labels = np.concatenate((labels, dict['labels']), axis=0) return data, labels nnet_size = np.array([[992, 500], [500, 500], [500, 10]]) learning_rate = 0.1 num_layers = 3 mlp = nnet(nnet_size, num_layers, learning_rate) epoch = 3 cifar_dir = '/home/habtegebrial/Desktop/python-destin/cifar-10-batches-py/' trainData = np.array([]) for I in range(0, 50000, 1): #for I in range(499, 500, 500): Name = '/home/habtegebrial/GSoc/python-destin/testing/train/' + str(I + 1) + '.txt' file_id = open(Name, 'r') #print np.shape(np.ravel(np.loadtxt(Name))) Temp = np.array(cPickle.load(file_id)) print np.shape(Temp) file_id.close() trainData = np.hstack((trainData, Temp))