Пример #1
0
def launchsession2(num_slots, descriptor_type, randomSplits, levels_pyramid,
                   useKernelInter, useKernelPyr, rocCurveCM):
    start = time.time()

    # Read the train and test files
    train_images_filenames, test_images_filenames, train_labels, test_labels = dataUtils.readData(
    )

    #Divide training into training and validation splits
    train_percentage = 0.7  #70% training 30%validation
    if randomSplits:
        TrainingSplit, ValidationSplit = dataUtils.getRandomTrainingValidationSplit(
            train_images_filenames, train_labels, train_percentage)
    else:
        TrainingSplit, ValidationSplit = dataUtils.getTrainingValidationSplit(
            train_images_filenames, train_labels, train_percentage)

    #Get descriptors D
    if levels_pyramid != 0:
        D, Train_descriptors, Train_label_per_descriptor, Train_keypoints, Train_image_size = descriptors.extractFeaturesPyramid(
            TrainingSplit, descriptor_type, num_slots)
    else:
        D, Train_descriptors, Train_label_per_descriptor = descriptors.extractFeatures(
            TrainingSplit, descriptor_type, num_slots)

    #Computing bag of words using k-means and save codebook
    k = 512
    codebook = BoW.computeCodebook(D, k)

    #Determine visual words
    if levels_pyramid != 0:
        visual_words = BoW.getVisualWordsSpatialPyramid(
            codebook, k, Train_descriptors, Train_image_size, Train_keypoints,
            levels_pyramid)
    else:
        visual_words = BoW.getVisualWords(codebook, k, Train_descriptors)

    # Train a linear SVM classifier
    if useKernelInter | useKernelPyr:
        #Kernel intersection
        clf, stdSlr, train_scaled = SVMClassifiers.trainSVMKernel(
            visual_words,
            Train_label_per_descriptor,
            useKernelPyr,
            levels_pyramid,
            Cparam=1,
            probabilities=rocCurveCM)
    else:
        clf, stdSlr = SVMClassifiers.trainSVM(visual_words,
                                              Train_label_per_descriptor,
                                              Cparam=1,
                                              kernel_type='linear',
                                              probabilities=rocCurveCM)

    #For test set
    if useKernelInter | useKernelPyr:
        predictedLabels2 = SVMClassifiers.predictKernel(
            test_images_filenames, descriptor_type, clf, stdSlr, train_scaled,
            k, codebook, levels_pyramid, num_slots)
        accuracy2 = Evaluation.computeAccuracyOld(predictedLabels2,
                                                  test_labels)
        print 'Final Kernel intersection test accuracy: ' + str(accuracy2)
    else:
        # Get all the test data and predict their labels
        predictedLabels = SVMClassifiers.predict(test_images_filenames,
                                                 descriptor_type, stdSlr,
                                                 codebook, k, levels_pyramid,
                                                 num_slots)
        #Compute accuracy
        accuracy = Evaluation.getMeanAccuracy(clf, predictedLabels,
                                              test_labels)
        print 'Final test accuracy: ' + str(accuracy)

    #For validation set
    validation_images_filenames, validation_labels = dataUtils.unzipTupleList(
        ValidationSplit)
    if useKernelInter | useKernelPyr:
        predictedLabels2 = SVMClassifiers.predictKernel(
            validation_images_filenames, descriptor_type, clf, stdSlr,
            train_scaled, k, codebook, levels_pyramid, num_slots)
        accuracy2 = Evaluation.computeAccuracyOld(predictedLabels2,
                                                  validation_labels)
        print 'Final Kernel intersection validation accuracy: ' + str(
            accuracy2)
    else:
        # Get all the test data and predict their labels
        predictedLabels = SVMClassifiers.predict(validation_images_filenames,
                                                 descriptor_type, stdSlr,
                                                 codebook, k, levels_pyramid,
                                                 num_slots)
        #Compute accuracy
        validation_accuracy = Evaluation.getMeanAccuracy(
            clf, predictedLabels, validation_labels)
        print 'Final validation accuracy: ' + str(validation_accuracy)
    #Roc curve and Confusion Matrix
    if rocCurveCM:
        graphs.rcurve(predictedLabels, validation_labels, clf)
        graphs.plot_confusion_matrix(clf,
                                     validation_labels,
                                     stdSlr.transform(predictedLabels),
                                     normalize=False,
                                     title='Confusion matrix',
                                     cmap=plt.cm.Blues)

    end = time.time()
    print 'Done in ' + str(end - start) + ' secs.'
Пример #2
0
def launchsession4(layer_taken, randomSplits, k, useServer, method_used):
    start = time.time()

    # Read the train and test files
    if useServer:
        train_images_filenames, test_images_filenames, train_labels, test_labels = dataUtils.readServerData(
        )
    else:
        train_images_filenames, test_images_filenames, train_labels, test_labels = dataUtils.readData(
        )

    #Divide training into training and validation splits
    train_percentage = 0.7  #70% training 30%validation
    if randomSplits:
        TrainingSplit, ValidationSplit = dataUtils.getRandomTrainingValidationSplit(
            train_images_filenames, train_labels, train_percentage)
    else:
        TrainingSplit, ValidationSplit = dataUtils.getTrainingValidationSplit(
            train_images_filenames, train_labels, train_percentage)

    #Obtain information from VGG ConvNet
    CNN_base_model = descriptors.getBaseModel()  #Base model

    #Compute features
    print 'Extracting features'
    D, Train_descriptors, Train_label_per_descriptor = descriptors.extractFeaturesMaps(
        TrainingSplit, layer_taken, CNN_base_model, method_used)
    not_use_BoW_other_layers = method_used[
        'method_to_reduce_dim'] == 'Average' or method_used[
            'method_to_reduce_dim'] == 'Max'
    if layer_taken == 'fc1' or layer_taken == 'fc2' or layer_taken == 'flatten' or not_use_BoW_other_layers:
        visual_words = D
        codebook = None
    else:
        if method_used['usePCA'] > 0:
            print 'Applying PCA'
            D, Train_descriptors, pca = PCA_computing.PCA_to_data(
                D, Train_descriptors, method_used['usePCA'])
        else:
            pca = None
        #Computing bag of words using k-means and save codebook when necessary
        codebook = BoW.computeCodebook(D, k)
        #Determine visual words
        visual_words = BoW.getVisualWords(codebook, k, Train_descriptors)

    # Train a linear SVM classifier
    clf, stdSlr = SVMClassifiers.trainSVM(visual_words,
                                          Train_label_per_descriptor,
                                          Cparam=1,
                                          kernel_type='linear')

    #For test set
    TestSplit = zip(test_images_filenames, test_labels)
    if layer_taken == 'fc1' or layer_taken == 'fc2' or layer_taken == 'flatten' or not_use_BoW_other_layers:
        ##Not using BoVW
        predictedLabels = SVMClassifiers.predict(TestSplit, layer_taken,
                                                 stdSlr, clf, CNN_base_model,
                                                 method_used)
        accuracy = Evaluation.computeAccuracyOld(predictedLabels, test_labels)
        print 'Final test accuracy: ' + str(accuracy)
    else:
        #BoVW
        predictedLabels = SVMClassifiers.predictBoVW(TestSplit, layer_taken,
                                                     stdSlr, codebook, k,
                                                     CNN_base_model, pca,
                                                     method_used)
        accuracy = Evaluation.getMeanAccuracy(clf, predictedLabels,
                                              test_labels)
        print 'Final test accuracy: ' + str(accuracy)

    #For validation set
    validation_images_filenames, validation_labels = dataUtils.unzipTupleList(
        ValidationSplit)
    if layer_taken == 'fc1' or layer_taken == 'fc2' or layer_taken == 'flatten' or not_use_BoW_other_layers:
        #Not using BoVW
        predictedLabels = SVMClassifiers.predict(ValidationSplit, layer_taken,
                                                 stdSlr, clf, CNN_base_model,
                                                 method_used)
        validation_accuracy = Evaluation.computeAccuracyOld(
            predictedLabels, validation_labels)
        print 'Final validation accuracy: ' + str(validation_accuracy)
    else:
        #BoVW
        predictedLabels = SVMClassifiers.predictBoVW(ValidationSplit,
                                                     layer_taken, stdSlr,
                                                     codebook, k,
                                                     CNN_base_model, pca,
                                                     method_used)
        validation_accuracy = Evaluation.getMeanAccuracy(
            clf, predictedLabels, validation_labels)
        print 'Final validation accuracy: ' + str(validation_accuracy)

    end = time.time()
    print 'Done in ' + str(end - start) + ' secs.'
Пример #3
0
def launchsession3(num_slots,descriptor_type,randomSplits,levels_pyramid,usePCA):
    start = time.time()
    
    # Read the train and test files
    train_images_filenames,test_images_filenames,train_labels,test_labels=dataUtils.readData()
    
    #Divide training into training and validation splits
    train_percentage=0.7#70% training 30%validation
    if randomSplits:
        TrainingSplit, ValidationSplit=dataUtils.getRandomTrainingValidationSplit(train_images_filenames,train_labels,train_percentage)
    else:
        TrainingSplit, ValidationSplit=dataUtils.getTrainingValidationSplit(train_images_filenames,train_labels,train_percentage)
    
    #Get descriptors D
    if levels_pyramid>0:
        D, Train_descriptors, Train_label_per_descriptor, Train_keypoints, Train_image_size = descriptors.extractFeaturesPyramid(TrainingSplit,descriptor_type,num_slots)
    else:
        D, Train_descriptors, Train_label_per_descriptor = descriptors.extractFeatures(TrainingSplit, descriptor_type,num_slots)
    
    if usePCA>0:
        print 'Applying PCA'
        D, Train_descriptors, pca = PCA_computing.PCA_to_data(D, Train_descriptors, usePCA)
    else:
        pca = None
    
    #Computing gmm
    k = 64      # short codebooks (32, 64...)
    
    gmm = fisherVectors.getGMM(D,k)
    
    
    for idx,TrainDes in enumerate(Train_descriptors):    
            train_descriptor = np.float32(TrainDes)
            Train_descriptors[idx]=train_descriptor

    
    if levels_pyramid > 0:
        fisher = fisherVectors.getFisherVectorsSpatialPyramid(Train_descriptors, k, gmm, Train_image_size, Train_keypoints, levels_pyramid)
    else:
        fisher = fisherVectors.getFisherVectors(Train_descriptors,k,gmm)
        
    # Power-normalization
    #fisher=fisherVectors.powerNormalization(fisher)

    # L2 normalize
    fisher=fisherVectors.normalizeL2(fisher)
    
    # Train a linear SVM classifier
    clf, stdSlr=SVMClassifiers.trainSVM(fisher,Train_label_per_descriptor,Cparam=1,kernel_type='linear')
    
    #For test set
    # Get all the test data and predict their labels
    predictedLabels=SVMClassifiers.predict(test_images_filenames,descriptor_type,stdSlr,gmm, k, levels_pyramid,num_slots,pca)
    #Compute accuracy
    accuracy = Evaluation.getMeanAccuracy(clf,predictedLabels,test_labels)
    print 'Final test accuracy: ' + str(accuracy)

    #For validation set
    validation_images_filenames,validation_labels=dataUtils.unzipTupleList(ValidationSplit)
    # Get all the test data and predict their labels
    predictedLabels=SVMClassifiers.predict(validation_images_filenames,descriptor_type,stdSlr, gmm, k, levels_pyramid,num_slots,pca)
    #Compute accuracy
    validation_accuracy = Evaluation.getMeanAccuracy(clf,predictedLabels,validation_labels)
    print 'Final validation accuracy: ' + str(validation_accuracy)
    
    end=time.time()
    print 'Done in '+str(end-start)+' secs.'
Пример #4
0
    return model

def catboost_predict(model,df):
    col=df.columns.tolist()
    test_pool=Pool(df,cat_features=[col.index(i) for i in cat_colums])
    return model.predict(test_pool)

def prepare_submission(test_merged,array):
    sub_df=pd.DataFrame(columns=['id','num_orders'])
    sub_df['id']=test_merged['id'].values
    sub_df['num_orders']=array
    #sub_df['num_orders_tree']=arrayt
    #sub_df['avg']=np.mean(array+arrayt,axis=0)
    sub_df.to_csv('sub_tree.csv',index=False)


if __name__=='__main__':
    train_merged,test_merged=mergeData(*readData())
    catProcess=Categorify(cat_colums,numeric_cols)
    catProcess.apply_train(train_merged)
    catProcess.apply_test(test_merged)
    # print(train_merged.head())
    # catmodel=catboost_train(train_merged[numeric_cols+cat_colums],train_merged['num_orders'].astype('float32'))
    # pred=catboost_predict(catmodel,test_merged[numeric_cols+cat_colums])
    # prepare_submission(test_merged,pred)
    rf,xgbr=train_trees(train_merged[numeric_cols+cat_colums],train_merged['num_orders'].astype('float32'))
    df=predict_trees([rf,xgbr],test_merged[numeric_cols+cat_colums])
    df.to_csv('res.csv')


Пример #5
0
    for res in estimator.predict(input_fn=inp_fn):
        predicted.append(res['predictions'])
    return np.array(predicted).ravel()


def prepare_submission(test_merged, array, arrayt):
    sub_df = pd.DataFrame(columns=['id', 'num_orders'])
    sub_df['id'] = test_merged['id'].values
    sub_df['num_orders'] = array
    sub_df['num_orders_tree'] = arrayt
    sub_df['avg'] = np.mean(array + arrayt, axis=0)
    sub_df.to_csv('sub_tf4.csv', index=False)


if __name__ == '__main__':
    train, test, center_info, meal_info = readData()
    train_merged, test_merged = mergeData(train, test, center_info, meal_info)
    dense_feat, lin_feat = makeFeatureColum(train_merged)
    bucket_colum, indicator_column = makeFeaturesForTrees()
    estimator = buildEstimator(dense_feat, lin_feat)
    treestimator = buildTreeEstimator(bucket_colum + indicator_column)
    train_inp_fn = estInpFunc()
    test_inp_fn = estInpFunc(train=False)

    #train
    estimator = train_estimator(estimator, train_inp_fn)
    treestimator = train_estimator(treestimator, train_inp_fn)

    #test
    arrayt = predict_using_trained_estimator(treestimator, test_inp_fn)
    array = predict_using_trained_estimator(estimator, test_inp_fn)
Пример #6
0
"""
Project 1

At the end you should see something like this
Step Count:1000
Training accuracy: 0.8999999761581421 loss: 0.42281264066696167
Test accuracy: 0.8199999928474426 loss: 0.4739704430103302

play around with your model to try and get an even better score
"""

import tensorflow as tf
import dataUtils

training_data, training_labels = dataUtils.readData("project1trainingdata.csv")
test_data, test_labels = dataUtils.readData("project1testdata.csv")

# Build tensorflow blueprint
# Tensorflow placeholder
input_placeholder = tf.placeholder(tf.float32, shape=[None, 113])
# Neural network hidden layers
w1 = tf.get_variable("w1",
                     shape=[113, 150],
                     initializer=tf.contrib.layers.xavier_initializer())
b1 = tf.get_variable("12",
                     shape=[150],
                     initializer=tf.contrib.layers.xavier_initializer())
hidden_layer_1 = tf.nn.dropout(tf.layers.batch_normalization(
    tf.nn.relu(tf.matmul(input_placeholder, w1) + b1),
    axis=1,
    center=True,