def predict_third_set(gram_train,

    gram_train = gram_train[0] + gram_train[1] + gram_train[2]
    gram_test = gram_test[0] + gram_test[1] + gram_test[2]

    krl = KRL(gram_m=gram_train / scale, max_iter=max_iter, lambd=lambd)
    y_pred_krl = krl.predict(gram_test / scale)

    clf = SVM(gram_m=gram_train)
    y_pred_svm = clf.predict(gram_test)

    y_pred = np.sign(y_pred_svm + y_pred_krl)
    return y_pred
def getConfidenceScores(features_train, labels_train, C):
    train_confidence = []
    #confidence scores for training data are computed using K-fold cross validation
    kfold = KFold(features_train.shape[0], n_folds=10)

    for train_index,test_index in kfold:
        X_train, X_test = features_train[train_index], features_train[test_index]
        y_train, y_test = labels_train[train_index], labels_train[test_index]

        #train classifier for the subset of train data
        m = SVM.train(X_train,y_train,c=C,k="linear")

        #predict confidence for test data and append it to list
        conf = m.decision_function(X_test)
        for x in conf:

    return np.array(train_confidence)
                         Y_test) = imdb.load_data(num_words=max_features)

    X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

    model = Sequential()
    model.add(Embedding(max_features, 128))
    model.add(LSTM(128, dropout=0.5, recurrent_dropout=0.5))
    model.add(Dense(1, activation='sigmoid'))


    model.fit(X_train, Y_train, batch_size=batch_size, epochs=15)

    score, accuracy = model.evaluate(X_test, Y_test, batch_size=batch_size)

    print('Test score: {0}'.format(score))
    print('Test accuracy: {0}'.format(accuracy))

if sys.argv[1] != 'rnn':
    print("SVM: {0}".format(SVM(X_train, Y_train, X_test, Y_test)))
    print("Naive Bayes: {0}".format(
        nultinomialNB(X_train, Y_train, X_test, Y_test)))
    print("Logistic Regression: {0}".format(
        LR(X_train, Y_train, X_test, Y_test)))
    print("Fully connected Neural Net: {0}".format(
        NN(X_train, Y_train, X_test, Y_test)))
 X2_test = test_data_mat_2[0].str.split(' ').values
 for i , lst in enumerate(X2_test):
   X2_test[i] = np.array([float(x) for x  in lst])
 X2_test = np.vstack(X2_test)
 X3_test = test_data_mat_3[0].str.split(' ').values
 for i , lst in enumerate(X3_test):
   X3_test[i] = np.array([float(x) for x  in lst])
 X3_test= np.vstack(X3_test)
 if config.Kernel == 'linear':
     print("-- This will take few milliseconds per dataset to compute the kernel matrix --\n")
     if config.classifier =='SVM': 
         classifier = SVM(kernel_name=config.Kernel, kernel=linear_kernel, C=20)
     elif config.classifier =='RIDGE':
         classifier = Ridge_Classifier(lam = 1e-8, kernel_name=config.Kernel, kernel=linear_kernel, loss_func=log_rg_loss)
 elif config.Kernel == 'rbf':
     print("-- This will take few milliseconds per dataset to compute the kernel matrix --\n")
     if config.classifier =='SVM': 
         classifier = SVM(kernel_name=config.Kernel, kernel=rbf_kernel, C=20)
     elif config.classifier =='RIDGE':
         classifier = Ridge_Classifier(lam = 1e-8, kernel_name=config.Kernel, kernel=rbf_kernel, loss_func=log_rg_loss)
     classifier = None
     print("Kernel not found")
 if classifier!=None:
def main(args):

    # Create algorithm objects
    lbp = LBP()
    detector = FaceDetector()
    svm = SVM()
    knn = KNearest()

    # Get subjects to train the svm on
    imgs = [

    # Load the subjects and extract their features
    hists, labels = load_subjects(imgs, detector, lbp)

    # Transform to np arrays
    samples = np.array(hists, dtype=np.float32)
    labels = np.array(labels, dtype=np.int)

    # Train classifiers
    svm.train(samples, labels)
    knn.train(samples, labels)

    # Check which mode the app is running in (image vs. video)
    if args.image is not None:
        # Read the image from the file path provided
        img = cv2.imread(args.image, 0)
        # Check the image exists
        if img is not None:
            # Run face recognition algorithm
            classify_snapshot(img, detector, lbp, knn)
            print('The image could not be found...')

    # Establish connection to camera
    cap = cv2.VideoCapture(0)

    # Continuously grab the next frame from the camera
    while cap.isOpened():
        # Capture frame-by-frame
        ret, frame = cap.read()

        # Start timer for performance logging
        start = time.time()

        # Convert frame to gray scale for face detector
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Detect a face in the frame and crop the image
        face_coords = detector.detect(gray)
        face = detector.crop_face(gray, face_coords)

        # Check we have detected a face
        if face is not None:
            # Apply LBP operator to get feature descriptor
            hist, bins = lbp.run(face, False)

            # Convert the LBP descriptor to numpy array for opencv classifiers
            test_sample = np.array([hist], dtype=np.float32)

            # Get the class of id of the closest neighbour and its distance
            dist, class_id = knn.predict(test_sample)

            # Draw the face if found
            util.draw_face(dist, class_id, frame, face_coords)
            # util.segment_face(frame)

        # Processing finished
        end = time.time()

        # Write the fps to the video
        util.write_fps(start, end, frame)

        # Display the resulting frame
        cv2.imshow('frame', frame)

        # Check if we should stop the application
        if cv2.waitKey(1) & 0xFF == ord('q'):

    # When everything done, release the capture
def classify(messages_train,labels_train,messages_test,process_messages_train,process_messages_test,tokens_train,tokens_test,process_tokens_train,process_tokens_test,pos_tags_train,pos_tags_test,negationList,clusters,slangDictionary,lexicons,mpqa_lexicons): 
    # 0 - negative messages
    # 1 - positives messages
    labels_train = [0 if x=="negative" else 1 for x in labels_train]
    #compute pos tag bigrams for all messages
    pos_bigrams_train = getBigrams(pos_tags_train)
    pos_bigrams_test = getBigrams(pos_tags_test)

    #compute pos tag trigrams for all messages
    pos_trigrams_train = getTrigrams(pos_tags_train)
    pos_trigrams_test = getTrigrams(pos_tags_test)

    #get the unique pos bigrams and trigrams from training set
    unique_pos_tags = getPosTagsSet(pos_tags_train)
    unique_bigrams = getBigramsSet(pos_bigrams_train)
    unique_trigrams= getTrigramsSet(pos_trigrams_train)

    #calculate pos bigrams score for all categories
    #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages)
    pos_tags_scores_negative = posTagsScore(unique_pos_tags,0,pos_tags_train,labels_train)
    pos_tags_scores_positive = posTagsScore(unique_pos_tags,1,pos_tags_train,labels_train)

    #calculate pos bigrams score for all categories
    #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages)
    pos_bigrams_scores_negative = posBigramsScore(unique_bigrams,0,pos_bigrams_train,labels_train)
    pos_bigrams_scores_positive = posBigramsScore(unique_bigrams,1,pos_bigrams_train,labels_train)

    #calculate pos bigrams score for all categories
    #both dictionaries will be used for training and testing (cannot create new for testing because we don't know the labels of the new messages)
    pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams,0,pos_trigrams_train,labels_train)
    pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams,1,pos_trigrams_train,labels_train)

    #assign a precision and F1 score to each word of to all mpqa lexicons
    mpqaScores = getScores(mpqa_lexicons,process_messages_train,labels_train)

    #get features from train messages
    features_train = features.getFeatures(messages_train,process_messages_train,tokens_train,process_tokens_train,pos_tags_train,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_train,pos_trigrams_train,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters)

    #regularize train features

    #get features from test messages 
    features_test = features.getFeatures(messages_test,process_messages_test,tokens_test,process_tokens_test,pos_tags_test,slangDictionary,lexicons,mpqa_lexicons,pos_bigrams_test,pos_trigrams_test,pos_bigrams_scores_negative,pos_bigrams_scores_positive,pos_trigrams_scores_negative,pos_trigrams_scores_positive,pos_tags_scores_negative,pos_tags_scores_positive,mpqaScores,negationList,clusters)

    #regularize test features

    #feature selection
    #features_train, features_test = selection.feature_selection(features_train,labels_train,features_test,1150)

    #C parameter of SVM
    C = 0.001953125
    #C = 19.3392161013
    #train classifier and return trained model
    #model = LogisticRegression.train(features_train,labels_train)
    model = SVM.train(features_train,labels_train,c=C,k="linear")
    #predict labels
    #prediction = LogisticRegression.predict(features_test,model)
    prediction = SVM.predict(features_test,model)

    return prediction
def main(f):
    print "System training started"

    #load training dataset
    dataset_train = f
    ids, labels_train, messages_train = tsvreader.opentsv(dataset_train)
    print "Train data loaded"

    #labels for subjectivity detection (2 categories)
    temp_labels_train = [0 if x == "neutral" else 1 for x in labels_train]
    #labels for polarity detection (3 categories)
    labels_train = [
        0 if x == "neutral" else -1 if x == "negative" else 1
        for x in labels_train

    #convert labels to numpy arrays
    temp_labels_train = np.array(temp_labels_train)
    labels_train = np.array(labels_train)

    #load word clusters
    clusters = loadClusters()
    print "Clusters loaded"

    #load Lexicons
    negationList, slangDictionary, lexicons, mpqa_lexicons = loadLexicons()
    print "Lexicons loaded"

    #tokenize all messages
    tokens_train = tokenize(messages_train)
    print "Messages tokenized"

    #compute pos tags for all messages
    pos_tags_train = arktagger.pos_tag_list(messages_train)
    print "Pos tags computed"

    #compute pos tag bigrams
    pos_bigrams_train = getBigrams(pos_tags_train)
    #compute pos tag trigrams
    pos_trigrams_train = getTrigrams(pos_tags_train)

    #get the unique pos bigrams from training set
    unique_pos_tags = getPosTagsSet(pos_tags_train)
    unique_bigrams = getBigramsSet(pos_bigrams_train)
    unique_trigrams = getTrigramsSet(pos_trigrams_train)

    #compute POS tag scores
    pos_tags_scores_neutral = posTagsScore(unique_pos_tags, 0, pos_tags_train,
    pos_tags_scores_positive = posTagsScore(unique_pos_tags, 1, pos_tags_train,
    pos_tags_scores_negative = posTagsScore(unique_pos_tags, -1,
                                            pos_tags_train, labels_train)

    pos_bigrams_scores_neutral = posBigramsScore(unique_bigrams, 0,
    pos_bigrams_scores_positive = posBigramsScore(unique_bigrams, 1,
    pos_bigrams_scores_negative = posBigramsScore(unique_bigrams, -1,

    pos_trigrams_scores_neutral = posTrigramsScore(unique_trigrams, 0,
    pos_trigrams_scores_positive = posTrigramsScore(unique_trigrams, 1,
    pos_trigrams_scores_negative = posTrigramsScore(unique_trigrams, -1,

    #compute mpqa scores
    mpqaScores = getScores(mpqa_lexicons,

    #save scores and other resources for future use
    savePosScores(pos_tags_scores_neutral, pos_tags_scores_positive,
                  pos_tags_scores_negative, pos_bigrams_scores_neutral,
                  pos_bigrams_scores_positive, pos_bigrams_scores_negative,
                  pos_trigrams_scores_neutral, pos_trigrams_scores_positive,
                  pos_trigrams_scores_negative, mpqaScores)
    #save lexicons
    saveLexicons(negationList, slangDictionary, lexicons, mpqa_lexicons)
    #save clusters

    #load Glove embeddings
    d = 200
    glove = GloveDictionary.Glove(d)

    #save Glove embeddings for future use

    #Subjectivity Detection Features

    #SD1 features
    features_train_1 = features.getFeatures(
        messages_train, tokens_train, pos_tags_train, slangDictionary,
        lexicons, mpqa_lexicons, pos_bigrams_train, pos_trigrams_train,
        pos_bigrams_scores_negative, pos_bigrams_scores_positive,
        pos_trigrams_scores_negative, pos_trigrams_scores_positive,
        pos_tags_scores_negative, pos_tags_scores_positive, mpqaScores,
        negationList, clusters, pos_bigrams_scores_neutral,
        pos_trigrams_scores_neutral, pos_tags_scores_neutral)

    #SD2 features
    features_train_2 = []
    #for message in tokens_train :
    for i in range(0, len(messages_train)):
    features_train_2 = np.array(features_train_2)

    #regularize features
    features_train_1 = regularization.regularize(features_train_1)
    features_train_2 = regularization.regularizeHorizontally(features_train_2)

    #Penalty parameter C of the error term for every SD system
    C1 = 0.001953125
    C2 = 1.4068830572470667

    #get confidence scores
    train_confidence_1 = getConfidenceScores(features_train_1,
                                             temp_labels_train, C1)
    train_confidence_2 = getConfidenceScores(features_train_2,
                                             temp_labels_train, C2)

    #normalize confidence scores
    softmax = lambda x: 1 / (1. + math.exp(-x))
    train_confidence_1 = [softmax(conf) for conf in train_confidence_1]
    train_confidence_2 = [softmax(conf) for conf in train_confidence_2]

    train_confidence_1 = np.array(train_confidence_1)
    train_confidence_2 = np.array(train_confidence_2)

    #train SD classifiers
    sd1 = SVM.train(features_train_1, temp_labels_train, c=C1, k="linear")
    sd2 = SVM.train(features_train_2, temp_labels_train, c=C2, k="linear")

    #Sentiment Polarity Features (append confidence scores to SD features)

    #SP1 features
    features_train_1 = np.hstack(
         train_confidence_1.reshape(train_confidence_1.shape[0], 1)))
    #SP1 features
    features_train_2 = np.hstack(
         train_confidence_2.reshape(train_confidence_2.shape[0], 1)))

    #Penalty parameter C of the error term for every SP system
    C1 = 0.003410871889693192
    C2 = 7.396183688299606

    #train SP classifiers
    sp1 = SVM.train(features_train_1, labels_train, c=C1, k="linear")
    sp2 = SVM.train(features_train_2, labels_train, c=C2, k="linear")

    #save trained models
    saveModels(sd1, sd2, sp1, sp2)

    print "System training completed!"
import sys
import os
import numpy as np
import math
import cv2
from plyfile import PlyData

from LBP import LBP, LocalBinaryPatterns
from classifiers import SVM, KNearest

# todo test the classifier
# todo validate on a 80/20 split

lbp = LBP()

svm = SVM()
knn = KNearest()

hists = []
labels = []

def main(photoface_dir):
    traverse(photoface_dir, describe_face)
    samples = np.array(hists, dtype=np.float32)
    ids = np.array(labels, dtype=np.int)

    # Train classifiers
    svm.train(samples, ids)
    knn.train(samples, ids)
def performCrossvalidationSVM(mat, c):

    scores = SVM.performCrossValidationSVM(mat, c)
    return checkResultsCrossvalidation(scores)
def performLinearSVC(training, test):

    prediction, prediction_prob = SVM.performSVM(training, test)
    return checkResultsPredicted(test, training, prediction, prediction_prob)
print("Predict the EVAL set")
y_preds = nb.predict(data_eval_compact)['y_preds']
make_submission_data(y_preds, 'nb_1214.csv')

# run Perceptron -----------------------
perceptron = Perceptron(r=0.1, margin=0.01, n_epoch=20)
print("Predict the TEST set")
perceptron.predict(data_test, perceptron.weights[-1])
print("Predict the EVAL set")
y_preds = perceptron.predict(data_eval, perceptron.weights[-1])['y_preds']
make_submission_data(y_preds, 'perceptron.csv')

# run SVM
svm = SVM(r=0.01, c=1, n_epoch=17)
print("Predict the TEST set")
svm.predict(data_test, svm.weights[-1])
print("Predict the EVAL set")
y_preds = svm.predict(data_eval, svm.weights[-1])['y_preds']
make_submission_data(y_preds, 'svm.csv')

# run Logistic -----------------------------
logistic = Logistic(r=0.01, sigma=100, n_epoch=10)
print("Predict the TEST set")
logistic.predict(data_test, logistic.weights[-1])
print("Preidict the EVAL set")
y_preds = logistic.predict(data_eval, logistic.weights[-1])['y_preds']
make_submission_data(y_preds, 'logistic.csv')