Exemplo n.º 1
def main():
    kernel = c.COSINE
    # training parameter
    result_path = 'results/PB2_spam.acc'
    model_name = 'digits_' + kernel

    tr_data_path = 'data\\digits\\tr_f_l_10.pickle'
    te_data_path = 'data\\digits\\te_f_l_10.pickle'
    # laod and preprocess training data
    tr_data = loader.load_pickle_file(tr_data_path)
    te_data = loader.load_pickle_file(te_data_path)

    # transpose label
    tr_data[1] = np.transpose(tr_data[1])[0]
    te_data[1] = np.transpose(te_data[1])[0]

    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, tr_data[0])
    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, te_data[0])
    # start training

    st = time.time()

    # start training
    print('{:.2f} Start training.'.format(time.time() - st))

    for r in (0.15, 0.1):
        clf = kNN.kNN(kernel=kernel, dataset=c.DS_DIGITS)
        clf.fit(tr_data[0], tr_data[1])
        tr_pred = clf.predict(tr_data[0], r=r)
        te_pred = clf.predict(te_data[0], r=r)

        tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
        te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]

        print('{} Final results with kernel {} and r={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, r, tr_acc, te_acc))
Exemplo n.º 2
def main():

    target = 'v2'
    # training parameter
    k = 10  # fold
    layer_thresh = 2
    T = 50
    threshes_path = 'data/spambase.threshes'

    # laod and preprocess training data
    training_data = loader.load_dataset('data/spambase.data')

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    k_folds = Preprocess.prepare_k_folds(training_data, k)
    tr_data, te_data = Preprocess.get_i_fold(k_folds, 0)
    f_cur = [x[0] for x in tr_data[0]]

    t = dt.DecisionTree()
    if target == 'v1':
        for i in range(100):
            h_y = t.compute_entropy(tr_data[1])
            thresh = threshes[0][30]
            ig = t.compute_ig(f_cur, tr_data[1], thresh, h_y)
        h_y = t.compute_entropy_v2(tr_data[1])
        thresh = threshes[0][0]
        ig = t.compute_ig_v2(f_cur, tr_data[1], thresh, h_y)
Exemplo n.º 3
def main():
    # training parameter
    result_path = 'results/housingLiR_1.mse'
    model_name = 'housing_shiftAndScale'
    # normalization = Preprocess.zero_mean_unit_var
    normalization = Preprocess.shift_and_scale
    # cols_not_norm = (0,7,12)
    cols_not_norm = []

    # laod and preprocess training data
    training_data = loader.load_dataset('data/housing_train.txt')
    testing_data = loader.load_dataset('data/housing_test.txt')
    Preprocess.normalize_features_all(normalization, training_data[0], testing_data[0], cols_not_norm)

    # start training
    model = rm.LinearRegression()
    model.build(training_data[0], training_data[1])
    training_mse = model.test(training_data[0], training_data[1], util.mse)
    testing_mse = model.test(testing_data[0], testing_data[1], util.mse)
    print 'Error for training data is:'
    print training_mse
    print 'Error for testing data is:'
    print testing_mse

    result = {}
    result['TrainingMSE'] = str(training_mse)
    result['TestingMSE'] = str(testing_mse)
    result['Theta'] = str(model.theta)

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result)
Exemplo n.º 4
def main():
    is_sklearn = False
    # kernel = c.COSINE
    # kernel = c.GAUSSIAN
    kernel = c.POLY
    # training parameter
    result_path = 'results/PB2_spam.acc'
    model_name = 'digits_' + kernel
    model_path = 'data/PB1_B_digits_sk_Gaussian_1.model'

    # tr_data_path = 'data\\digits\\tr_f_l.pickle'
    # te_data_path = 'data\\digits\\te_f_l.pickle'
    tr_data_path = 'data\\digits\\tr_f_l_10.pickle'
    te_data_path = 'data\\digits\\te_f_l_10.pickle'
    # laod and preprocess training data
    tr_data = loader.load_pickle_file(tr_data_path)
    te_data = loader.load_pickle_file(te_data_path)

    # transpose label
    tr_data[1] = np.transpose(tr_data[1])[0]
    te_data[1] = np.transpose(te_data[1])[0]

    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, tr_data[0])
    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, te_data[0])

    # start training
    models = []
    st = time.time()

    # start training
    print('{:.2f} Start training.'.format(time.time() - st))

    for k in (1, 3, 7):
        if not is_sklearn:
            clf = kNN.kNN(kernel=kernel)
            clf.fit(tr_data[0], tr_data[1])
            tr_pred = clf.predict(tr_data[0], k=k)
            te_pred = clf.predict(te_data[0], k=k)
            clf = KNeighborsClassifier(n_neighbors=k, metric=cosine_distances)
            clf.fit(tr_data[0], tr_data[1])
            tr_pred = clf.predict(tr_data[0])
            te_pred = clf.predict(te_data[0])

        tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
        te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]
        print('{} Final results with kernel {} and k={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, k, tr_acc, te_acc))
Exemplo n.º 5
def get_tf_idf(query, src="google"):
	tokens = get_tokens(query, src)

	#converts into a dictionary
	query_dictionary = Preprocess.list_to_dict(tokens,{})

	#print query_dictionary

	#creates a dictionary from a random wikipedia corpus
	#dictionary = Preprocess.get_corpus(num_articles)

	#loads in the dictionary of existing tf_idf words
	dictionary = load_idf()

	tf_idf_dictionary = {}

	#calculates the tfidf for each key, storing it in a new dictionary
	for key in query_dictionary.keys():
		tf = query_dictionary[key]
		if key in dictionary:
			idf = dictionary[key]
			idf = math.log(18,10)
		tf_idf_dictionary[key] = (float(tf)*float(idf))

	#print tf_idf_dictionary

	#sorts the dictionary based on the tfidf value, returning it as a list
	sorted_dictionary = sorted(tf_idf_dictionary.iteritems(), key=operator.itemgetter(1), reverse = True)

	return sorted_dictionary
def detectPlatesInScene(imgOriginalScene):
    possiblePlates = []

    height, width, numChannels = imgOriginalScene.shape

    imgGrayscaleScene = np.zeros((height, width, 1), np.uint8)
    imgThreshScene = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)


    imgGrayscaleScene, imgThreshScene = Preprocess.preprocess(imgOriginalScene)

    possibleCharsInScene = findPossibleCharsInScene(imgThreshScene)

    listOfListsOfMatchingCharsInScene = DetectChars.findListOfListsOfMatchingChars(possibleCharsInScene)

    for matchingChars in listOfListsOfMatchingCharsInScene:
        possiblePlate = extractPlate(imgOriginalScene, matchingChars)

        if possiblePlate.imgPlate is not None:

    print "\n" + str(len(possiblePlates)) + " possible plates found"

    return possiblePlates
Exemplo n.º 7
def main():
    # training parameter
    is_sklearn = True
    k = 10  # fold
    result_path = 'results/PB2_spam.acc'
    model_name = 'spam_' + str(k) + 'fold'
    data_path = 'data/spam/data.pickle'

    # laod and preprocess training data
    training_data = loader.load_pickle_file(data_path)
    # TODO convert labels from {0, 1} to {-1, 1}
    # util.replace_zero_label_with_neg_one(training_data)

    # Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, training_data[0])
    # training_data[0] = preprocessing.scale(training_data[0])

    # start training
    training_errs = []
    testing_errs = []
    print('Preparing k fold data.')
    k_folds = Preprocess.prepare_k_folds(training_data, k)

    for i in (0,):
        st = time.time()
        tr_data, te_data = Preprocess.get_i_fold(k_folds, i)

        # start training
        print('{:.2f} Start training.'.format(time.time() - st))
        kernel = c.EUCLIDEAN
        # kernel = c.GAUSSIAN
        f_select = True
        best_features_num = 5
        clf = kNN.kNN(kernel=kernel)
        clf.fit(tr_data[0], tr_data[1], f_select=f_select, best_f=best_features_num)
        print("Best features: {}".format(clf.best_f_indices))
        for kk in (1, 2, 3, 7):
            tr_pred = clf.predict(tr_data[0], k=kk)
            te_pred = clf.predict(te_data[0], k=kk)

            tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
            te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]

            print('{} Final results with kernel {}, k={}. Train acc: {}, Test acc: {}'.format(time.time() - st, kernel, kk, tr_acc, te_acc))
Exemplo n.º 8
def main():
    # training parameter
    k = 8  # fold
    result_path = 'results/PB2_spam.acc'
    model_name = 'spam_' + str(k) + 'fold'
    data_path = 'data/spam/data.pickle'

    # laod and preprocess training data
    training_data = loader.load_pickle_file(data_path)
    # TODO convert labels from {0, 1} to {-1, 1}
    # util.replace_zero_label_with_neg_one(training_data)

    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, training_data[0])
    # Preprocess.normalize_features_all(Preprocess.shifiat_and_scale, training_data[0])

    # start training
    training_accs = []
    testing_accs = []
    print('Preparing k fold data.')
    k_folds = Preprocess.prepare_k_folds(training_data, k)
    kernel = c.EUCLIDEAN
    sst = time.time()
    for i in (1,):
        st = time.time()
        tr_data, te_data = Preprocess.get_i_fold(k_folds, i)

        # start training
        print('{:.2f} Start training.'.format(time.time() - st))
        for r in (2.5, 2.7):
            clf = kNN.kNN(kernel=kernel)
            # clf.fit(training_data[0], training_data[1])
            clf.fit(tr_data[0], tr_data[1])
            # tr_pred = clf.predict(training_data[0], r=r)
            tr_pred = clf.predict(tr_data[0], r=r)
            te_pred = clf.predict(te_data[0], r=r)

            # tr_acc = (training_data[1] == tr_pred).sum() / training_data[0].shape[0]
            tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
            te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]

            print('{} {}-fold results with kernel {}, r={}. Train acc: {}, Test acc: {}'.format(time.time() - st, i, kernel, r, tr_acc, te_acc))
Exemplo n.º 9
def main():
    # training parameter
    k = 10  # fold
    result_path = "results/PB1_A_spam.acc"
    model_name = "spam_" + str(k) + "fold"
    threshes_path = "data/spambase.threshes"
    data_path = "data/spam/data.pickle"
    # kernel = 'poly'
    kernel = "linear"
    # kernel = 'rbf'
    verbose = False
    tol = 0.01
    c = 0.1

    # laod and preprocess training data
    training_data = loader.load_pickle_file(data_path)
    # TODO convert labels from {0, 1} to {-1, 1}

    # normalize
    Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, training_data[0])

    print("Preparing k fold data.")
    k_folds = Preprocess.prepare_k_folds(training_data, k)

    for i in range(1):
        st = time.time()
        tr_data, te_data = Preprocess.get_i_fold(k_folds, i)

        # start training
        print("{:3f} Start training. Kernel: {}".format(time.time() - st, kernel))

        clf = svm.SVC(C=c, kernel=kernel, tol=tol, verbose=verbose)
        # clf = svm.NuSVC(kernel=kernel, tol=tol, verbose=verbose)
        clf.fit(tr_data[0], tr_data[1])
        tr_pred = clf.predict(tr_data[0])
        te_pred = clf.predict(te_data[0])

        tr_acc = (tr_data[1] == tr_pred).sum() / tr_data[0].shape[0]
        te_acc = (te_data[1] == te_pred).sum() / te_data[0].shape[0]

        print("{:3f} Final results. Train acc: {}, Test acc: {}".format(time.time() - st, tr_acc, te_acc))
Exemplo n.º 10
def analyze_data(time_interval=TimeInterval, refined_type=FullyPreprocessedPath):
    print 'time_interval: ' + str(time_interval) + ' min'
    print 'refined_type: ' + refined_type
    print '--------------------------------------------'

    # Refine the data and save
    refined_data_path = Preprocess.preprocess_data(time_interval, refined_type)

    # Build similarity model and save
    Similarity.Build.similarity_model(time_interval, refined_type)

    # Set data for visualization
    Visualization.set_data4visualization(time_interval, refined_type)
Exemplo n.º 11
def analyze_data(time_interval=TimeInterval, refined_type=FullyPreprocessedPath):

    print 'time_interval: ' + str(time_interval) + ' min'
    print 'refined_type: ' + refined_type
    print '--------------------------------------------'
    # Draw graphs and save the figures
    graph_directory = Graph.Save.raw_data2graph()

    # Refine the data and save
    refined_data_path = Preprocess.refining_data(time_interval, refined_type)

    # Build similarity model and save
    Similarity.Build.similarity_model(time_interval, refined_type)

    # Set data for visualization
    Visualization.set_data4visualization(time_interval, refined_type)
Exemplo n.º 13
def main():
    #define new objects
    preprocess = Preprocess()
    process = Process()
    points = Points()
    postprocess = Postprocess()
    #declare and initialize variables
    search_string = ''
    option = 0
    count2 = 0      #delete this
    reordered_search_string = ''
    permutation_set = set()
    temp_permutation_set = set()
    permutation_list = []     #2D list
    blank_permutation_list = []
    filtered_content = []
    sorted_results = []
    final_results = []
    sorted_final_results = []

    #menu options
    print "\nSearch options:\n"
    print "1. Search for words" 
    print "2. Search for words starting with"
    print "3. Search for words ending with"
    print "4. Search for words containing"
    print "5. Search with blank tiles (use the underscore character to represent blanks)\n"
    #option = int(raw_input("Choose option:"))
    option = 1
    #search_string = raw_input('Please input tiles for search: ').lower()
    search_string = "andrew"
    #basic input check
    if (preprocess.checkInput(search_string)):
        reordered_search_string = preprocess.reorderString(search_string) #alphabetize tiles

    t1 = time.time()    #diagnostics
    #Input(search_string, option)    #turned into function for testing purposes
    if (option == 0):   #no option chosen
        print "ERROR: No option chosen, exiting."
    elif(option == 1):
        print "Searching for words...\n"
        permutation_list = process.stringPermutations(reordered_search_string)
        filtered_content = process.collectDictionarySegments(reordered_search_string)
        sorted_results = process.findWords(permutation_list, filtered_content)
        final_results = points.associatePointScore(sorted_results)
    elif(option == 2):
        print "Searching for words starting with: ", search_string, "\n"
        filtered_content = process.collectDictionarySegments(search_string[0])  #get first letter int he word being searched
        sorted_results = process.findWordsContaining(search_string, filtered_content, option)
        final_results = points.associatePointScore(sorted_results)
    elif(option == 3):
        print "Searching for words ending in: ", search_string, "\n"
        alphabet = 'abcdefghijklmnopqrstuvwxyz'
        filtered_content = process.collectDictionarySegments(alphabet)
        sorted_results = process.findWordsContaining(search_string, filtered_content, option)
        final_results = points.associatePointScore(sorted_results)
    elif(option == 4):
        print "Searching for words containing: ", search_string, "\n"
        alphabet = 'abcdefghijklmnopqrstuvwxyz'
        filtered_content = process.collectDictionarySegments(alphabet)
        sorted_results = process.findWordsContaining(search_string, filtered_content, option)
        final_results = points.associatePointScore(sorted_results)
    elif(option == 5):
        print "Searching with blank tiles...\n"
        alphabet = 'abcdefghijklmnopqrstuvwxyz'
        blank_permutation_list = process.blankTileProcessing(reordered_search_string)        
        filtered_content = process.collectDictionarySegments(alphabet)
        #TO DO: Creates a 2D list, gotta convert to 1D list - DONE
        #TO DO: find way to use union keyword to take out duplicates, it will take care of one nested for loop in findWords function - DONE
        #TO DO: Do another union - DONE
            # time vs duplication trade off. Takes longer to take out the duplicates with the union
        for blank_permutation_string in blank_permutation_list:
            temp_permutation_set = set(process.stringPermutations(blank_permutation_string))
            permutation_set = permutation_set.union(temp_permutation_set)
        permutation_list = list(permutation_set)
        sorted_results = process.findWords(permutation_list, filtered_content)
        final_results = points.associatePointScore(sorted_results)
        print "ERROR: Please choose an option between 1-5"
    t2 = time.time() - t1   #diagnostics
    sorted_option = 0
    print "Results found and processed. Sort results by...\n"
    print "1. Points - lowest to highest"
    print "2. Points - highest to lowest"
    print "3. Length - longest to shortest"
    print "4. Length - shortest to longest"
    sorted_option = int(raw_input("choose option: "))
    print "Option", sorted_option, "chosen"
    if (sorted_option == 1):
        print "Sorting results by points, highest to lowest\n"
        sorted_final_results = postprocess.resultsByPoints(final_results)
    elif (sorted_option == 2):
        print "Sorting results by points, lowest to highest\n"
        sorted_final_results = postprocess.resultsByPointsReverse(final_results)
    elif (sorted_option == 3):
        print "Sorting results by length, longest to shortest\n"
        sorted_final_results = postprocess.resultsByLength(final_results)
    elif (sorted_option == 4):
        print "Sorting results by length, shortest to longest\n"
        sorted_final_results = postprocess.resultsByLengthReverse(final_results)
        print "Option 1-4 not chosen, outputting results by default order"
        sorted_final_results = final_results
    Output(sorted_final_results, t2)
Exemplo n.º 14
import numpy as np
import rbm_rm
import rbm_cm

import matplotlib.pyplot as plt
import utils
import Preprocess

mnist_dir = os.path.join(os.environ['DATA_HOME'], 'mnist')
mnist_train_path = os.path.join(mnist_dir, 'MNISTTrainData.npy')

data_rm = np.load(mnist_train_path)
[normed, meanv, stdv] = Preprocess.mean_zero_unit_variance(data_rm)
#Look, I didn't actually use the normalized data because it broke everything

train_rm = data_rm[30000:, :]
valid_rm = data_rm[:30000, :]

data_cm = data_rm.transpose()
train_cm = data_cm[:,30000:]
valid_cm = data_cm[:,:30000]

nHidden = 100
ViewDimensions = (10, 10)   # Should multiply to nHidden
TP = rbm_rm.RBMTrainParams()
TP.maxepoch = 15

rm_learner = rbm_rm.GV_RBM(nHidden, train_rm.shape[1])
Exemplo n.º 15
# training parameter
result_path = 'results/housingLiRGD_1.mse'
model_name = 'housing'
lamda = 0.0001  # 0.000015
is_batch = False
# normalization = Preprocess.zero_mean_unit_var
normalization = Preprocess.shift_and_scale
term_fun = util.mse_less_than
term_thresh = 25
cols_not_norm = [0,7]

# laod and preprocess training data
training_data = loader.load_dataset('data/housing_train.txt')
testing_data = loader.load_dataset('data/housing_test.txt')
Preprocess.normalize_features_all(normalization, training_data[0], testing_data[0], not_norm=cols_not_norm)

# start training
model = gd.LinearRegressionGD()
model.build(training_data[0], training_data[1], lamda, term_fun, term_thresh, is_batch)
except KeyboardInterrupt:
    print 'Interrupted'
    training_mse = model.test(training_data[0], training_data[1], util.mse)
    testing_mse = model.test(testing_data[0], testing_data[1], util.mse)
    print 'Error for training data is:'
    print training_mse
    print 'Error for testing data is:'
    print testing_mse
Exemplo n.º 16
def main():
    # training parameter
    target = 'crx'
    # target = 'vote'
    k = 10  # fold
    round_limit = 150

    if target == 'crx':
        result_path = 'results/crxBoosting_final_1.acc'
        model_name = 'crx_' + str(k) + 'fold'
        threshes_path = 'data/crx.threshes'
        data_path = 'data/crx_parsed.data'
        result_path = 'results/voteBoosting_final.acc'
        model_name = 'vote_' + str(k) + 'fold'
        threshes_path = 'data/vote.threshes'
        data_path = 'data/vote_parsed.data'

    # laod and preprocess training data
    training_data = loader.load_pickle_file(data_path)

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    round_err_1st_boost = None
    tr_errs_1st_boost = None
    te_errs_1st_boost = None
    te_auc_1st_boost = None
    roc = []
    auc = 0.0
    k_folds = Preprocess.prepare_k_folds(training_data, k)

    for i in range(k):
        tr_data, te_data = Preprocess.get_i_fold(k_folds, i)
        tr_n, f_d = np.shape(tr_data[0])
        te_n, = np.shape(te_data[1])
        # TODO prepare distribution
        d = util.init_distribution(len(tr_data[0]))
        # TODO compute thresholds cheat sheet
        thresh_cs = util.pre_compute_threshes_uci(tr_data[0], tr_data[1], threshes)
        boost = b.Boosting(d)
        testing_predict = np.zeros((1, te_n)).tolist()[0]
        training_predict = np.zeros((1, tr_n)).tolist()[0]
        round_tr_err = []
        round_te_err = []
        round_model_err = []
        round_te_auc = []
        converged = False
        tol = 1e-5
        te_auc = 2.
        round = 0
        while round < round_limit: # and not converged:
            round += 1
            boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
            boost.update_predict(tr_data[0], training_predict)
            boost.update_predict(te_data[0], testing_predict)
            c_model_err = boost.model[-1].w_err
            c_f_ind = boost.model[-1].f_ind
            c_thresh = boost.model[-1].thresh
            c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
            c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
            # TODO calculate the AUC for testing results
            # c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
            # round_tr_err.append(c_tr_err)
            # round_te_err.append(c_te_err)
            # round_te_auc.append(c_te_auc)
            print('Round: {} Feature: {} Threshold: {} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f}'.format(round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err))
            # converged =  abs(c_te_auc - te_auc) / te_auc <= tol
            # te_auc = c_te_auc

        # if k == 0:
        #     round_err_1st_boost = round_model_err
        #     tr_errs_1st_boost = round_tr_err
        #     te_errs_1st_boost = round_te_err
            # te_auc_1st_boost = round_te_auc

        # break      # for testing

    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print(str(k) + '-fold validation done. Training errs are:')
    print('Mean training err is:')
    print('Testing errs are:')
    print('Mean testing err is:')

    result = {}
    result['Fold'] = str(k)
    result['Trainingerrs'] = str(training_errs)
    result['MeanTrainingAcc'] = str(mean_training_err)
    result['Testingerrs'] = str(testing_errs)
    result['MeanTestingAcc'] = str(mean_testing_err)
    result['1stBoostTrainingError'] = str(tr_errs_1st_boost)
    result['1stBoostTestingError'] = str(te_errs_1st_boost)
    result['1stBoostModelError'] = str(round_err_1st_boost)
    result['1stBoostTestingAUC'] = str(te_auc_1st_boost)

    # result['ROC'] = str(roc)
    result['AUC'] = str(auc)

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result)
Exemplo n.º 17
def GenerateMake(inputDir, outputDir, toolsRoot, linkerStage='elf'):
    def MakeFileList(s, outDir, extension):
        return list(
            map(lambda x: "{0}\\{1}.{2}".format(outDir, x.stem, extension), s))

    def FillCompilerTemplate(dest, source):
        print("compile {0} => {1}".format(source, dest))
        langSpec = ""
        lang = ""
        sourcePath = pathlib.Path(source)
        if sourcePath.suffix == '.c':
            langSpec = "-std=gnu99"
            lang = "-x c"
        m = {
            'lang': lang,
            'langSpec': langSpec,
            'objectFile': str(dest),
            'sourceFile': str(source)
        t = string.Template(compileSrcTemplate)
        return t.safe_substitute(m)

    p = pathlib.Path(inputDir)
    sources = [x for x in p.iterdir() if x.suffix == ('.c')]
    Preprocess.InitPreprocessor([], [])
    for s in sources:
        Preprocess.Rewrite(str(s), outputDir)
    asmSources = [x for x in p.iterdir() if x.suffix == ('.s')]
    asmSourceList = list(map(str, asmSources))
    ppSourceList = MakeFileList(sources, outputDir, 'c') + asmSourceList
    objectList = MakeFileList(sources + asmSources, outputDir, 'o')
    flashTarget = ''

    if linkerStage == 'elf':  #otherwise we should not generate a flash target!
        flashTarget = string.Template(flashTargetTemplate).safe_substitute(

    device = targetConfigs['device']
    AvrFamily = '__AVR_DEV_LIB_NAME__={0}'.format(device)
    if device == 'm328p':
        AvrFamily = '__AVR_ATmega328P__=1'
    replaceMap = {
        " ".join(map(str, sources)),
        " ".join(ppSourceList),
        " ".join(objectList),
        " ".join(MakeFileList(sources, outputDir, 'd')),
        "\n".join(list(map(FillCompilerTemplate, objectList, ppSourceList))),

    ppSourceList = None
    objectList = None
    temp = string.Template(makeFileTemplate)
    makeFile = temp.safe_substitute(replaceMap)
    with open(pathlib.Path(outputDir) / "Makefile", "w") as f:
Exemplo n.º 18
Exemplo n.º 19
Exemplo n.º 20
def main():
    # argument for input video/image/calibration
    ap = argparse.ArgumentParser()
    ap.add_argument("-v", "--video", help="Path to video file")
    ap.add_argument("-i", "--image", help="Path to the image")
    ap.add_argument("-c", "--calibration", help="image or video or camera")
    args = vars(ap.parse_args())

    img_original_scene = None
    loop = None
    camera = None

    # if -c assigned, calibrate the angle of camera or video
    if args.get("calibration", True):
        img_original_scene = cv2.imread(args["calibration"])
        if img_original_scene is None:
            print("Please check again the path of image or argument !")
        img_original_scene = imutils.resize(img_original_scene, width=720)
    else:  # run video / image / cam
        if args.get("video", True):
            camera = cv2.VideoCapture(args["video"])
            if camera is None:
                print("Please check again the path of video or argument !")
            loop = True

        elif args.get("image", True):
            img_original_scene = cv2.imread(args["image"])
            if img_original_scene is None:
                print("Please check again the path of image or argument !")
                loop = False
            camera = cv2.VideoCapture(0)
            loop = True

    # Load and check KNN Model
    assert DetectChars.loadKNNDataAndTrainKNN(), "KNN can't be loaded !"

    save_number = 0
    prev_license = ""
    licenses_verify = []

    # Looping for Video
    while loop:
        # grab the current frame
        (grabbed, frame) = camera.read()
        if args.get("video") and not grabbed:

        # resize the frame and preprocess
        img_original_scene = imutils.resize(frame, width=620)
        _, img_thresh = pp.preprocess(img_original_scene)

        # Show the preprocess result
        cv2.imshow("threshold", img_thresh)

        # Get the license in frame
        img_original_scene = imutils.transform(img_original_scene)
        img_original_scene, new_license = searching(img_original_scene, loop)

        # only save 5 same license each time (verification)
        if new_license == "":
            print("no characters were detected\n")
            if len(licenses_verify) == N_VERIFY and len(
                    set(licenses_verify)) == 1:
                if prev_license == new_license:
                    print(f"still = {prev_license}\n")
                    # show and save verified plate
                        f"A new license plate read from image = {new_license} \n"
                    cv2.imshow(new_license, img_original_scene)
                    file_name = f"hasil/{new_license}.png"
                    cv2.imwrite(file_name, img_original_scene)
                    prev_license = new_license
                    licenses_verify = []
                if len(licenses_verify) == N_VERIFY:
                    # drop first if reach the N_VERIFY
                    licenses_verify = licenses_verify[1:]

        # add text and rectangle, just for information and bordering
            "Press 's' to save frame to be 'save.png', for calibrating",
            (10, 30),
            0.5, (255, 255, 255),
                      ((img_original_scene.shape[1] // 2 - 230),
                       (img_original_scene.shape[0] // 2 - 80)),
                      ((img_original_scene.shape[1] // 2 + 230),
                       (img_original_scene.shape[0] // 2 + 80)), SCALAR_GREEN,
        cv2.imshow("imgOriginalScene", img_original_scene)

        key = cv2.waitKey(5) & 0xFF
        # if 's' key pressed save the image
        if key == ord('s'):
            save_number = str(save_number)
            savefileimg = "calib_knn/img_" + save_number + ".png"
            savefileThr = "calib_knn/Thr_" + save_number + ".png"
            # cv2.saveimage("save.png", imgOriginalScene)
            cv2.imwrite(savefileimg, frame)
            cv2.imwrite(savefileThr, img_thresh)
            print("image save !")
            save_number = int(save_number)
            save_number = save_number + 1
        if key == 27:  # if the 'q' key is pressed, stop the loop
            camera.release()  # cleanup the camera and close any open windows

    # For image only
    if not loop:
        img_original_scene = imutils.resize(img_original_scene, width=720)
        cv2.imshow("original", img_original_scene)
        imgGrayscale, img_thresh = pp.preprocess(img_original_scene)
        cv2.imshow("threshold", img_thresh)
        img_original_scene = imutils.transform(img_original_scene)
        img_original_scene, new_license = searching(img_original_scene, loop)
        print(f"license plate read from image = {new_license} \n")
Exemplo n.º 21
k = 10  # fold
result_path = 'results/spamNBBern_1.acc'
model_name = 'spam_' + str(k) + 'fold_' + 'SS'

# laod and preprocess training data
training_data = loader.load_dataset('data/spambase.data')

# start training
training_accs = []
training_cms = []
testing_accs = []
testing_cms = []
roc = []
auc = 0.0
k_folds = Preprocess.prepare_k_folds(training_data, k)

means = loader.load_spam_mean('data/spam_mean')

for i in range(k):
    tr_data, te_data = Preprocess.get_i_fold(k_folds, i)

    model = m.NBBernoulli(means)
    model.build(tr_data[0], tr_data[1])

    training_test_res = model.test(tr_data[0], tr_data[1], util.compute_acc_confusion_matrix)
    testing_test_res = model.test(te_data[0], te_data[1], util.compute_acc_confusion_matrix)
Exemplo n.º 22
            f.write(str(j) + " ")
    for row in r:
        for j in row:
            f.write(str(j) + " ")

while lrate < 1.0:
    z.write("FOR LRATE: " + str(lrate) + "\n")
    temp = P.main("Datasets/IRIS.csv", lrate)
    z.write(str(temp[0]) + " " + str(temp[1]) + " " + str(temp[2]) + "\n")
    temp = P.main("Datasets/SPECT.csv", lrate)
    z.write(str(temp[0]) + " " + str(temp[1]) + " " + str(temp[2]) + "\n")
    temp = P.main("Datasets/SPECTF.csv", lrate)
Exemplo n.º 23
def main():
	# argument for input video/image/calibration
    ap = argparse.ArgumentParser()
    ap.add_argument("-v", "--video",
        help = "path to video file")

    ap.add_argument("-i", "--image",
        help = "Path to the image")

    ap.add_argument("-c", "--calibration",
        help = "image or video or camera")
    args = vars(ap.parse_args())

    if args.get("calibration", True):
        imgOriginalScene = cv2.imread(args["calibration"])
        if imgOriginalScene is None:
    	    print("Please check again the path of image or argument !")

        imgOriginalScene  = imutils.resize(imgOriginalScene, width = 640)

    if args.get("video", True):
        camera = cv2.VideoCapture(args["video"])
        if camera is None:
            print("   Please check again the path of video or argument !")
        loop = True

    elif args.get("image", True):
        #imgOriginalScene = cv2.imread(args["image"])
        imgOriginalScene = cv2.imread("media/gspeintercon/GSPE1/GSPE/OCR/plat4.JPG")
        if imgOriginalScene is None:
            print("   Please check again the path of image or argument !")
        loop = False
        #camera = cv2.VideoCapture("rtsp://")
        #camera = cv2.VideoCapture("rtsp://*****:*****@")
        camera = cv2.VideoCapture(0)
        #camera = cv2.VideoCapture(0)
        loop = True

    # add knn library for detect chars
    blnKNNTrainingSuccessful = DetectChars.loadKNNDataAndTrainKNN()             # attempt KNN training

    if blnKNNTrainingSuccessful == False:                                       # if KNN training was not successful
        print("\nerror: KNN traning was not successful\n")                      # show error message
    count = 0
    # not very important, just iterating for license array haha
    license = []
    VER = np.zeros(VERIF)
    for x in VER:
    numlicense = ""
    knn = 0

    # Looping for Video
    while (loop):
        # grab the current frame
        (grabbed, frame) = camera.read()
        #frame = camera.read()
        if args.get("video") and not grabbed:
        # resize the frame and convert it to grayscale
        imgOriginalScene  = imutils.resize(frame, width = 640)
        imgGrayscale, imgThresh = pp.preprocess(imgOriginalScene)
        cv2.imshow("threshold", imgThresh)
        #imgOriginalScene = imutils.transform (imgOriginalScene)
        imgOriginalScene, licenses = searching(imgOriginalScene,loop)

        # only save 5 same license each time
        license[count+1] = licenses
        nums = license[VERIF-1]
        if (license[count] == license[count+1]):
            count = count + 1
        elif (license[count] != license[count+1]):
            coba = license[count+1]
            count = 0
            license[count] = coba
        if count == (VERIF-1):

            plateAlloc = "       "
            numstring = ""
            numbers = sum(c.isdigit() for c in nums)
            words   = sum(c.isalpha() for c in nums)

            for c in nums:

            global plat
            plat = "         "
            plat = list(plat)
            numstring = ""
            numstring = list(numstring)
            alphastring = ""
            alphastring = list(alphastring)
            numbers = sum(c.isdigit() for c in nums)
            words   = sum(c.isalpha() for c in nums)

            for i in nums:
                #nums = np.array(nums)
                #nums = list(nums)
                if i.isalpha():
                    #nums[i] = np.array(nums[i])
                elif i.isdigit():
                    #nums[i] = np.array(nums[i])


            #add numbers

            a = 2
            for b in numstring:
                plat[a] = b

            #add front letter(s)

            c = 0
            sumfront = sum(c.isalpha() for c in nums[0:2])
            if (sumfront == 1):
                for d in nums[0:1]:
                    plat[c] = d
            elif (sumfront == 2):
                for d in nums[0:2]:
                    plat[c] = d

            #add back letter(s)

            e = -3
            sumback = sum(e.isalpha() for e in nums[-3:])
            if (sumback == 1):
                for f in nums[-1:]:
                    plat[e] = f
            elif (sumback == 2):
                for f in nums[-2:]:
                    plat[e] = f
            elif (sumback == 3):
                for f in nums[-3:]:
                    plat[e] = f

            plat = ''.join(plat)

            if (license[VERIF-1] == ""):
                print("no characters were detected\n")
                #if number license same, not be saved

                if (numlicense == license[VERIF-1]):
                    print("still = " + numlicense + "\n")
                elif (len(nums) <= 9 and nums[0] >= 'A' and nums[0] <= 'Z' and numbers <= 4 and words <= 5):

                    numlicense = license[VERIF-1]
                    #print("A new license plate read from image = " + license[VERIF-1] + "\n")
                    print("A new license plate read from image = " + plat + "\n")
                    #cv2.imshow(license[VERIF-1], imgOriginalScene)
                    cv2.imshow(plat, imgOriginalScene)
                    insertdata= updatetime(numlicense)
                    if check(numlicense):
                        ts = time.time()
                        timestamp = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d')
                        timestamp2 = datetime.datetime.fromtimestamp(ts).strftime('%H:%M:%S')

                        #Ganti Path sesuai dengan laptop masing2 heheh
                        namefile = "/var/www/html/MonitoringDashboard/hasil_parksystem/"+ license[VERIF-1] + timestamp + timestamp2 + ".png"
                        cv2.imwrite(namefile, imgOriginalScene)

                        #Hapus bagian ini untuk tidak menggunakan sensor dan mengirim mqtt
                        client1= paho.Client("control1")                           #create client object
                        client1.connect(broker,port)                                 #establish connection
                        ret= client1.publish("xiaomi/to/write",'{"cmd": "write",  "model": "plug",  "sid": "158d0002365abb",  "data": {"status": "on"}}')
            count = 0

        #determine plate regions

        global plateRegion
        plateRegion = ""
        plateDic = {'B':"Jakarta", 'D':"Bandung", 'L':"Surabaya", 'A':"Banten", 'E':"Cirebon", 'G':"Pekalongan", 'H':"Semarang"}
        for i, j in plateDic.items():
            if (plat[0] == i):
                plateRegion = j

        #and nums[0] >= 'A' and nums[0] <= 'Z' and nums[-1] >= 'A' and nums[-1] <= 'Z'

        global plateRegion
        if (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'B'):
            plateRegion = "Jakarta"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'D'):
            plateRegion = "Bandung"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'L'):
            plateRegion = "Surabaya"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'E'):
            plateRegion = "Bandung"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'L'):
            plateRegion = "Surabaya"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'D'):
            plateRegion = "Bandung"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'L'):
            plateRegion = "Surabaya"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'D'):
            plateRegion = "Bandung"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'L'):
            plateRegion = "Surabaya"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'D'):
            plateRegion = "Bandung"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'L'):
            plateRegion = "Surabaya"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'D'):
            plateRegion = "Bandung"
        elif (len(nums) >= 5 and len(nums) <= 9 and nums[0] == 'L'):
            plateRegion = "Surabaya"

            plateRegion = "Who knows?"


        # re-show scene image
        #imgOriginalScene = cv2.blur(imgOriginalScene,(12,12))
        cv2.putText(imgOriginalScene,"Press 's' to save frame to be 'save.png', for calibrating",(10,30),cv2.FONT_HERSHEY_SIMPLEX, 0.5,(255,255,255),1,bottomLeftOrigin = False)
        #drawRedRectangleAroundPlate(imgOriginalScene, imgOriginalScene)


        cv2.imshow("imgOriginalScene", imgOriginalScene)
        #cv2.imshow("ori", frame)

        key = cv2.waitKey(5) & 0xFF
        if key == ord('s'):
            knn = str(knn)
            savefileimg = "calib_knn/img_"+ knn +".png"
            savefileThr = "calib_knn/Thr_"+ knn +".png"
            #cv2.saveimage("save.png", imgOriginalScene)
            cv2.imwrite(savefileimg, frame)
            cv2.imwrite(savefileThr, imgThresh)
            print("image save !")
            knn = int(knn)
            knn = knn + 1
        if key == 27: # if the 'q' key is pressed, stop the loop
            camera.release() # cleanup the camera and close any open windows

    # For image only
    if (loop == False):
        imgOriginalScene  = imutils.resize(imgOriginalScene, width = 850)
        imgGrayscale, imgThresh = pp.preprocess(imgOriginalScene)
        #imgOriginalScene = imutils.transform (imgOriginalScene)
        imgOriginalScene,license = searching(imgOriginalScene,loop)
        #imgOriginalScene = imutils.detransform(imgOriginalScene)

Exemplo n.º 25
    wt = pre.time_weight(Dc, N=n_code, L_n=P.shape[1])
    for i in range(Dc.shape[1]):
        X_i = Dc[:, i]
        X_l, X_selected, X_h = pre.dct_segment_generate(X_i,
        Y = eb.watermark_embed(X_selected, P, wmbits, N=n_code, weight=wt[i])
        Y_i = pre.dct_reconstruct(X_l, Y, X_h)
        signal_wmd[:, i] = Y_i
    embeded = tf.istt_my(signal_wmd, length=y.shape[0])

    return embeded, ns

path = 'F:/audio_wm/audio/'
all_file = pre.filter_file(path, 'wav')
#all_file = ['./audio/batman-5min.wav']

n_dt = 8192
L_n = 32
n_code = 32  # 比特数除以4, 这里比特数是128

p0 = eb.seed_generate(L_n)
P = eb.pn_code_generate(16, p0)
np.save('F:/audio_wm/data/p0.npy', p0)
for filepath in all_file:
    print("Embedding in " + filepath)
    aFullFilename = os.path.split(filepath)[-1]
    filename = aFullFilename.split('.')[0]
    audio, sr = librosa.load(filepath, sr=44100, mono=False)
Exemplo n.º 26
Exemplo n.º 27
Exemplo n.º 28
def detectPlatesInScene(imgOriginalScene):
    listOfPossiblePlates = []                   # this will be the return value

    height, width, numChannels = imgOriginalScene.shape

    imgGrayscaleScene = np.zeros((height, width, 1), np.uint8)
    imgThreshScene = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)


    if Main.showSteps == True: # show steps #######################################################
        cv2.imshow("0", imgOriginalScene)
    # end if # show steps #########################################################################

    imgGrayscaleScene, imgThreshScene = Preprocess.preprocess(imgOriginalScene)         # preprocess to get grayscale and threshold images

    if Main.showSteps == True: # show steps #######################################################
        cv2.imshow("1a", imgGrayscaleScene)
        cv2.imshow("1b", imgThreshScene)
    # end if # show steps #########################################################################

            # find all possible chars in the scene,
            # this function first finds all contours, then only includes contours that could be chars (without comparison to other chars yet)
    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene)

    if Main.showSteps == True: # show steps #######################################################
        print "step 2 - len(listOfPossibleCharsInScene) = " + str(len(listOfPossibleCharsInScene))         # 131 with MCLRNF1 image

        imgContours = np.zeros((height, width, 3), np.uint8)

        contours = []

        for possibleChar in listOfPossibleCharsInScene:
        # end for

        cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)
        cv2.imshow("2b", imgContours)
    # end if # show steps #########################################################################

            # given a list of all possible chars, find groups of matching chars
            # in the next steps each group of matching chars will attempt to be recognized as a plate
    listOfListsOfMatchingCharsInScene = DetectChars.findListOfListsOfMatchingChars(listOfPossibleCharsInScene)

    if Main.showSteps == True: # show steps #######################################################
        print "step 3 - listOfListsOfMatchingCharsInScene.Count = " + str(len(listOfListsOfMatchingCharsInScene))    # 13 with MCLRNF1 image

        imgContours = np.zeros((height, width, 3), np.uint8)

        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = random.randint(0, 255)
            intRandomGreen = random.randint(0, 255)
            intRandomRed = random.randint(0, 255)

            contours = []

            for matchingChar in listOfMatchingChars:
            # end for

            cv2.drawContours(imgContours, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed))
        # end for

        cv2.imshow("3", imgContours)
    # end if # show steps #########################################################################

    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:                   # for each group of matching chars
        possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars)         # attempt to extract plate

        if possiblePlate.imgPlate is not None:                          # if plate was found
            listOfPossiblePlates.append(possiblePlate)                  # add to list of possible plates
        # end if
    # end for

    print "\n" + str(len(listOfPossiblePlates)) + " possible plates found"          # 13 with MCLRNF1 image

    if Main.showSteps == True: # show steps #######################################################
        print "\n"
        cv2.imshow("4a", imgContours)

        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = cv2.boxPoints(listOfPossiblePlates[i].rrLocationOfPlateInScene)

            cv2.line(imgContours, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), Main.SCALAR_RED, 2)

            cv2.imshow("4a", imgContours)

            print "possible plate " + str(i) + ", click on any image and press a key to continue . . ."

            cv2.imshow("4b", listOfPossiblePlates[i].imgPlate)
        # end for

        print "\nplate detection complete, click on any image and press a key to begin char recognition . . .\n"
    # end if # show steps #########################################################################

    return listOfPossiblePlates
Exemplo n.º 29
import os

def extract(y, n_dt, P, wmbits, n_code):
    Dc = tf.stt_my(y, n_dt=n_dt)
    for i in range(Dc.shape[1]):
        X_i = Dc[:, i]
        X_l, X_s, X_h = pre.dct_segment_generate(X_i, N=n_code, L_n=P.shape[1])
        wmt = et.extract(X_s, P)
    return wmbits

path = 'F:/audio_wm/result/'
all_file = pre.filter_file(path, 'wav')
p0 = np.load('F:/audio_wm/data/p0.npy')
P = et.pn_reconstruct(16, p0)

for filepath in all_file:
    aFilename = os.path.split(filepath)[-1]
    filename = aFilename.split('.')[0]
    audio, sr = librosa.load(filepath, sr=44100, mono=False)
    n_dt = 8192
    n_code = 32
    #p0 = np.load('./data/' + filename +'.npy')
    #P = et.pn_reconstruct(16, p0)
Exemplo n.º 31
def main():
    # training parameter
    k = 10  # fold
    layer_thresh = 2
    T = 50
    result_path = 'results/spamDT_final.acc'
    model_name = 'spam_' + str(k) + 'fold'
    threshes_path = 'data/spambase.threshes'

    # laod and preprocess training data
    training_data = loader.load_dataset('data/spambase.data')

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    roc = []
    auc = 0.0
    k_folds = Preprocess.prepare_k_folds(training_data, k)

    for i in range(1):
        st = time.time()
        tr_data, te_data = Preprocess.get_i_fold(k_folds, i)
        tr_n, f_d = np.shape(tr_data[0])
        te_n, = np.shape(te_data[1])
        t = dt.DecisionTree()
        t.build(tr_data[0], tr_data[1], threshes, layer_thresh)
        # test the bagging model and compute testing acc
        training_errs.append(t.test(tr_data[0], tr_data[1], util.acc))
        testing_errs.append(t.test(te_data[0], te_data[1], util.acc))
        print('Round {} finishes, time used: {}'.format(i, time.time() - st))

    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print(str(k) + '-fold validation done. Training errs are:')
    print('Mean training err is:')
    print('Testing errs are:')
    print('Mean testing err is:')

    result = {}
    result['Fold'] = k
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['MeanTestingAcc'] = mean_testing_err

    result['ROC'] = roc
    result['AUC'] = auc

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)
def preprocess():
    path = request.form['datasetfile']
    return render_template('main.html', result=0)
Exemplo n.º 33
def main():
    # training parameter
    round_limit = 50
    result_path = 'results/spamActive_random_final_1.acc'
    model_name = 'spam_active'
    threshes_path = 'data/spambase.threshes'

    # laod and preprocess training data
    training_data = loader.load_dataset('data/spambase.data')
    # TODO convert labels from {0, 1} to {-1, 1}

    # load thresholds
    threshes = loader.load_pickle_file(threshes_path)

    # start training
    training_errs = []
    testing_errs = []
    # round_err_1st_boost = None
    # tr_errs_1st_boost = None
    # te_errs_1st_boost = None
    # te_auc_1st_boost = None
    roc = []
    auc = 0.0
    k_folds = Preprocess.prepare_k_folds(training_data, 5)
    tr_data_pool, te_data = Preprocess.get_i_fold(k_folds, 1)
    data_set = DataSet.DataSet(tr_data_pool)
    data_rates = (5, 10, 15, 20, 30, 50)
    for c in data_rates:
        tr_data = data_set.random_pick(c, False)
        tr_n, f_d = np.shape(tr_data[0])
        te_n, = np.shape(te_data[1])
        # TODO prepare distribution
        d = util.init_distribution(len(tr_data[0]))
        # TODO compute thresholds cheat sheet
        thresh_cs = util.pre_compute_threshes(tr_data[0], tr_data[1], threshes)
        boost = b.Boosting(d)
        testing_predict = np.zeros((1, te_n)).tolist()[0]
        training_predict = np.zeros((1, tr_n)).tolist()[0]
        round_tr_err = []
        round_te_err = []
        round_model_err = []
        round_te_auc = []
        converged = False
        tol = 1e-5
        te_auc = 2.
        round = 0
        while round < round_limit: # and not converged:
            round += 1
            boost.add_model(ds.DecisionStump, tr_data[0], tr_data[1], threshes, thresh_cs)
            boost.update_predict(tr_data[0], training_predict)
            boost.update_predict(te_data[0], testing_predict)
            c_model_err = boost.model[-1].w_err
            c_f_ind = boost.model[-1].f_ind
            c_thresh = boost.model[-1].thresh
            c_tr_err = util.get_err_from_predict(training_predict, tr_data[1])
            c_te_err = util.get_err_from_predict(testing_predict, te_data[1])
            # TODO calculate the AUC for testing results
            # c_te_auc = util.get_auc_from_predict(testing_predict, te_data[1])
            # round_te_auc.append(c_te_auc)
            print('Data {}% Round: {} Feature: {} Threshold: {:.3f} Round_err: {:.12f} Train_err: {:.12f} Test_err {:.12f} AUC {}'.format(c, round, c_f_ind, c_thresh, c_model_err, c_tr_err, c_te_err, 0))
            # converged =  abs(c_te_auc - te_auc) / te_auc <= tol
            # te_auc = c_te_auc

        # break      # for testing

    mean_training_err = np.mean(training_errs)
    mean_testing_err = np.mean(testing_errs)

    print('Training errs are:')
    print('Mean training err is:')
    print('Testing errs are:')
    print('Mean testing err is:')

    result = {}
    result['Trainingerrs'] = training_errs
    result['MeanTrainingAcc'] = mean_training_err
    result['Testingerrs'] = testing_errs
    result['MeanTestingAcc'] = mean_testing_err

    # result['ROC'] = str(roc)
    result['AUC'] = auc

    # log the training result to file
    util.write_result_to_file(result_path, model_name, result, True)
Exemplo n.º 34
def train(Para,num_epochs,path,validation_A_dir,output_dir,model_name):

    n_frames = 128
    dataset_A, dataset_B = pre.sample_data(dataset_A = Para["coded_sps_A_norm"], 
                                               dataset_B = Para["coded_sps_B_norm"],
                                               frames_per_sample = n_frames,
    n_samples = dataset_A.shape[0]

##build model
    # model = Sequential()
    #Inputs = Input(shape=(80,None))
    Inputs = Input(shape=(None,80))
    a= Conv1D(128,kernel_size=5, strides=2,activation='relu')(Inputs)
    b= Conv1D(256,kernel_size=5, strides=2,activation='relu')(a)
    c= Conv1D(512,kernel_size=5, strides=2,activation='relu')(b)
    x = Bidirectional(LSTM(512, activation='relu', recurrent_activation='hard_sigmoid', return_sequences=True, return_state=False, stateful=False))(c)
    y = Bidirectional(LSTM(512, activation='relu', recurrent_activation='hard_sigmoid', return_sequences=True, return_state=False, stateful=False))(x)
    d= Conv1D(1024,kernel_size=5, strides=2,activation='relu')(y)
    e= Conv1D(512,kernel_size=5, strides=2,activation='relu')(d)
    f= Conv1D(80,kernel_size=5, strides=2,activation='linear')(e)
 #   Outputs = Dense(80, activation = 'linear')(d)

 #   model = Model(inputs=Inputs, outputs=Outputs)
    model = Model(inputs=Inputs, outputs=f)
    # model.add(Bidirectional(LSTM(128,input_dim=(None, return_sequences=True)))
    # model.add(Bidirectional(LSTM(64, return_sequences=True)))
    sgd=SGD(lr=0.01, momentum=0.0, decay=0.0, nesterov=False);
    #sgd = SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
    rmsprop=RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0);
    adagrad=Adagrad(lr=0.01, epsilon=1e-08, decay=0.0);
    adadelta=Adadelta(lr=1.0, rho=0.95, epsilon=1e-08, decay=0.0);
    adam=Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0);
    adamax=Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0);
    nadam=Nadam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004);


	loss='mean_squared_error',   #mean_squared_error
    checkpointer = ModelCheckpoint(

    StartTime= time.time()

    Hist=model.fit(data_A, data_B, batch_size=1, epochs=num_epochs,verbose=1,callbacks=[checkpointer],validation_split=0.1,shuffle=False)
    #do waveform reconstruction
    sampling_rate = 16000
    num_mcep = 80 #24
    frame_period = 5.0
    if validation_A_dir is not None:
        validation_A_output_dir = os.path.join(output_dir, 'converted_A')
        if not os.path.exists(validation_A_output_dir):
    for filepath in tqdm(Eva_list_A,desc='Generating'):
        if not os.path.exists(outpath):
        wav, _ = librosa.load(filepath, sr = sampling_rate, mono = True)
        wav = pre.wav_padding(wav = wav, sr = sampling_rate, frame_period = frame_period, multiple = 4)
        f0, timeaxis, sp, ap = pre.world_decompose(wav = wav, fs = sampling_rate, frame_period = frame_period)
        f0_converted = pre.pitch_conversion(f0 = f0, mean_log_src = Para["log_f0s_mean_A"], std_log_src = Para["log_f0s_std_A"], mean_log_target = Para["log_f0s_mean_B"], std_log_target = Para["log_f0s_std_B"])
        coded_sp = pre.world_encode_spectral_envelop(sp = sp, fs = sampling_rate, dim = num_mcep)
        coded_sp_transposed = coded_sp.T
        coded_sp_norm = (coded_sp_transposed - Para["coded_sps_A_mean"]) / Para["coded_sps_A_std"]
        data_Ans= model.predict(data_Tes_new, batch_size=1, verbose=1, steps=None)
        #data_Ans = model.test(inputs = data_Tes, direction = 'A2B')[0]
        coded_sp_converted_norm = data_Ans
        coded_sp_converted = coded_sp_converted_norm_new * Para["coded_sps_B_std"] + Para["coded_sps_B_mean"]
        coded_sp_converted = coded_sp_converted.T
        coded_sp_converted = np.ascontiguousarray(coded_sp_converted)
        decoded_sp_converted = pre.world_decode_spectral_envelop(coded_sp = coded_sp_converted, fs = sampling_rate)
        wav_transformed = pre.world_speech_synthesis(f0 = f0_converted, decoded_sp = decoded_sp_converted, ap = ap, fs = sampling_rate, frame_period = frame_period)
        librosa.output.write_wav(os.path.join(outpath,os.path.basename(filepath)),wav_transformed, sampling_rate)
    with open(path+model_name+".json", "w") as f:

    EndTime = time.time()
    print('time is {} sec'.format(EndTime-StartTime))
def detectCharsInPlates(listOfPossiblePlates):
    intPlateCounter = 0
    imgContours = None
    contours = []

    if len(listOfPossiblePlates) == 0:  # if list of possible plates is empty
        return listOfPossiblePlates  # return
    # end if

    # at this point we can be sure the list of possible plates has at least one plate

    for possiblePlate in listOfPossiblePlates:  # for each possible plate, this is a big for loop that takes up most of the function

        possiblePlate.imgGrayscale, possiblePlate.imgThresh = Preprocess.preprocess(
        )  # preprocess to get grayscale and threshold images

        if Main.showSteps == True:  # show steps ###################################################
            cv2.imshow("5a", possiblePlate.imgPlate)
            cv2.imshow("5b", possiblePlate.imgGrayscale)
            cv2.imshow("5c", possiblePlate.imgThresh)
        # end if # show steps #####################################################################

        # increase size of plate image for easier viewing and char detection
        possiblePlate.imgThresh = cv2.resize(possiblePlate.imgThresh, (0, 0),

        # threshold again to eliminate any gray areas
        thresholdValue, possiblePlate.imgThresh = cv2.threshold(
            possiblePlate.imgThresh, 0.0, 255.0,
            cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        if Main.showSteps == True:  # show steps ###################################################
            cv2.imshow("5d", possiblePlate.imgThresh)
        # end if # show steps #####################################################################

        # find all possible chars in the plate,
        # this function first finds all contours, then only includes contours that could be chars (without comparison to other chars yet)
        listOfPossibleCharsInPlate = findPossibleCharsInPlate(
            possiblePlate.imgGrayscale, possiblePlate.imgThresh)

        if Main.showSteps == True:  # show steps ###################################################
            height, width, numChannels = possiblePlate.imgPlate.shape
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]  # clear the contours list

            for possibleChar in listOfPossibleCharsInPlate:
            # end for

            cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)

            cv2.imshow("6", imgContours)
        # end if # show steps #####################################################################

        # given a list of all possible chars, find groups of matching chars within the plate
        listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(

        if Main.showSteps == True:  # show steps ###################################################
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                intRandomBlue = random.randint(0, 255)
                intRandomGreen = random.randint(0, 255)
                intRandomRed = random.randint(0, 255)

                for matchingChar in listOfMatchingChars:
                # end for
                cv2.drawContours(imgContours, contours, -1,
                                 (intRandomBlue, intRandomGreen, intRandomRed))
            # end for
            cv2.imshow("7", imgContours)
        # end if # show steps #####################################################################

        if (len(listOfListsOfMatchingCharsInPlate) == 0
            ):  # if no groups of matching chars were found in the plate

            if Main.showSteps == True:  # show steps ###############################################
                    "chars found in plate number " + str(intPlateCounter) +
                    " = (none), click on any image and press a key to continue . . ."
                intPlateCounter = intPlateCounter + 1
            # end if # show steps #################################################################

            possiblePlate.strChars = ""
            continue  # go back to top of for loop
        # end if

        for i in range(0, len(listOfListsOfMatchingCharsInPlate)
                       ):  # within each list of matching chars
                key=lambda matchingChar: matchingChar.intCenterX
            )  # sort chars from left to right
            listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(
            )  # and remove inner overlapping chars
        # end for

        if Main.showSteps == True:  # show steps ###################################################
            imgContours = np.zeros((height, width, 3), np.uint8)

            for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                intRandomBlue = random.randint(0, 255)
                intRandomGreen = random.randint(0, 255)
                intRandomRed = random.randint(0, 255)

                del contours[:]

                for matchingChar in listOfMatchingChars:
                # end for

                cv2.drawContours(imgContours, contours, -1,
                                 (intRandomBlue, intRandomGreen, intRandomRed))
            # end for
            cv2.imshow("8", imgContours)
        # end if # show steps #####################################################################

        # within each possible plate, suppose the longest list of potential matching chars is the actual list of chars
        intLenOfLongestListOfChars = 0
        intIndexOfLongestListOfChars = 0

        # loop through all the vectors of matching chars, get the index of the one with the most chars
        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            if len(listOfListsOfMatchingCharsInPlate[i]
                   ) > intLenOfLongestListOfChars:
                intLenOfLongestListOfChars = len(
                intIndexOfLongestListOfChars = i
            # end if
        # end for

        # suppose that the longest list of matching chars within the plate is the actual list of chars
        longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[

        if Main.showSteps == True:  # show steps ###################################################
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for matchingChar in longestListOfMatchingCharsInPlate:
            # end for

            cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)

            cv2.imshow("9", imgContours)
        # end if # show steps #####################################################################

        possiblePlate.strChars = recognizeCharsInPlate(
            possiblePlate.imgThresh, longestListOfMatchingCharsInPlate)

        if Main.showSteps == True:  # show steps ###################################################
            print("chars found in plate number " + str(intPlateCounter) +
                  " = " + possiblePlate.strChars +
                  ", click on any image and press a key to continue . . .")
            intPlateCounter = intPlateCounter + 1
        # end if # show steps #####################################################################

    # end of big for loop that takes up most of the function

    if Main.showSteps == True:
            "\nchar detection complete, click on any image and press a key to continue . . .\n"
    # end if

    return listOfPossiblePlates
Exemplo n.º 36
def before_train(train_A_dir, train_B_dir, model_dir, output_dir, tensorboard_log_dir):
    sampling_rate = 16000
    num_mcep = 80 #24
    frame_period = 5.0

    print('Preprocessing Data...')

    start_time = time.time()
    # list_A= pre.load_data_list(train_A_dir)
    # list_B= pre.load_data_list(train_B_dir)
    # print(list_A[1])
    train_A_dir=str(train_A_dir)+'\*\*.wav'    #當層 \*.wav 內有一層\*\*.wav
    wavs_A = pre.load_wavs(wav_list = tta, sr = sampling_rate)   
    wavs_B = pre.load_wavs(wav_list = ttb, sr = sampling_rate)

    f0s_A, timeaxes_A, sps_A, aps_A, coded_sps_A = pre.world_encode_data(wavs = wavs_A, fs = sampling_rate, frame_period = frame_period, coded_dim = num_mcep)
    log_f0s_mean_A, log_f0s_std_A = pre.logf0_statistics(f0s_A)
    print('Log Pitch A')
    print('Mean: %f, Std: %f' %(log_f0s_mean_A, log_f0s_std_A))
    f0s_B, timeaxes_B, sps_B, aps_B, coded_sps_B = pre.world_encode_data(wavs = wavs_B, fs = sampling_rate, frame_period = frame_period, coded_dim = num_mcep)
    log_f0s_mean_B, log_f0s_std_B = pre.logf0_statistics(f0s_B)
    print('Log Pitch B')
    print('Mean: %f, Std: %f' %(log_f0s_mean_B, log_f0s_std_B))

    coded_sps_A_transposed = pre.transpose_in_list(lst = coded_sps_A)
    coded_sps_B_transposed = pre.transpose_in_list(lst = coded_sps_B)
    coded_sps_A_norm, coded_sps_A_mean, coded_sps_A_std = pre.coded_sps_normalization_fit_transoform(coded_sps = coded_sps_A_transposed)
    print("Input data fixed.")
    coded_sps_B_norm, coded_sps_B_mean, coded_sps_B_std = pre.coded_sps_normalization_fit_transoform(coded_sps = coded_sps_B_transposed)

    if not os.path.exists(model_dir):
    np.savez(os.path.join(model_dir, 'logf0s_normalization.npz'), mean_A = log_f0s_mean_A, std_A = log_f0s_std_A, mean_B = log_f0s_mean_B, std_B = log_f0s_std_B)
    np.savez(os.path.join(model_dir, 'mcep_normalization.npz'), mean_A = coded_sps_A_mean, std_A = coded_sps_A_std, mean_B = coded_sps_B_mean, std_B = coded_sps_B_std)

    end_time = time.time()
    time_elapsed = end_time - start_time
    Para_name=['sampling_rate', 'num_mcep', 'frame_period',
               'coded_sps_A_norm', 'coded_sps_B_norm', 'coded_sps_A', 'coded_sps_B',
               'coded_sps_A_mean', 'coded_sps_A_std', 'coded_sps_B_mean', 'coded_sps_B_std',
               'log_f0s_mean_A', 'log_f0s_std_A', 'log_f0s_mean_B', 'log_f0s_std_B']
#    Para_num=len(Para_name) 
    Para_data=[Local_Var[para_index] for para_index in Para_name]
    Para=dict(zip(Para_name, Para_data))
    print('Preprocessing Done.')
    print('Time Elapsed for Data Preprocessing: %02d:%02d:%02d' % (time_elapsed // 3600, (time_elapsed % 3600 // 60), (time_elapsed % 60 // 1)))
    return Para
Exemplo n.º 37
# training parameter
k = 50  # fold
result_path = 'results/spamRidge_4.acc'
model_name = 'spam_' + str(k) + 'fold_' + 'shift_scale'
normalization = Preprocess.zero_mean_unit_var
# normalization = Preprocess.shift_and_scale
# cols_not_norm = [i for i in range(54)]
# cols_not_norm = [i for i in range(48, 54)]
cols_not_norm = ()
lamda = 2

# laod and preprocess training data
training_data = loader.load_dataset('data/spambase.data')
Preprocess.normalize_features_all(normalization, training_data[0], not_norm=cols_not_norm)

# start training
training_accs = []
training_cms = []
testing_accs = []
testing_cms = []
roc = []
auc = 0.0
for i in range(k):
    (tr_data, te_data) = Preprocess.prepare_k_fold_data(training_data, k, i + 1)

    model = rm.Ridge()
    model.build(tr_data[0], tr_data[1], lamda)
def detectCharsInPlates(listOfPossiblePlates, filePath):
    intPlateCounter = 0
    imgContours = None
    contours = []

    if len(listOfPossiblePlates) == 0:  # if list of possible plates is empty
        return listOfPossiblePlates  # return
    # end if

    # at this point we can be sure the list of possible plates has at least one plate

    for index, possiblePlate in enumerate(
    ):  # for each possible plate, this is a big for loop that takes up most of the function
        possiblePlate.imgGrayscale, possiblePlate.imgThresh = Preprocess.preprocess(
        )  # preprocess to get grayscale and threshold images

        adaptivePlate = cv2.adaptiveThreshold(possiblePlate.imgGrayscale, 255,
                                              cv2.THRESH_BINARY, 11, 2)
        blurPlate = cv2.GaussianBlur(adaptivePlate, (5, 5), 0)
        ret, processedPlate = cv2.threshold(
            blurPlate, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        if Main.showSteps == True:  # show steps ###################################################
            cv2.imshow("5a", possiblePlate.imgPlate)
            cv2.imshow("5b", possiblePlate.imgGrayscale)
            cv2.imshow("5c.adaptive", adaptivePlate)
            cv2.imshow("5d.blur", blurPlate)
            cv2.imshow("5e.otsu", processedPlate)

        # end if # show steps #####################################################################

        # increase size of plate image for easier viewing and char detection
        possiblePlate.imgThresh = cv2.resize(possiblePlate.imgThresh, (0, 0),

        # find all possible chars in the plate,
        # this function first finds all contours, then only includes contours that could be chars (without comparison to other chars yet)

        listOfPossibleCharsInPlate = findPossibleCharsInPlate(adaptivePlate)
        listOfPossibleCharsInPlate.sort(key=lambda Char: Char.intCenterX)

        if Main.showSteps == True:  # show steps ###################################################
            showContours(possiblePlate, listOfPossibleCharsInPlate)

        listOfListsOfChars = findListOfListsOfMatchingChars(
            listOfPossibleCharsInPlate, minChars=3, maxAngle=10)
        if len(listOfListsOfChars) == 0:
        # find chars that have same heights
        listOfListsOfChars1 = [
            getEqualHeightList(x) for x in listOfListsOfChars
        listOfListsOfChars2 = getEqualHeightList(listOfListsOfChars1, mode=1)
        # remove Distance Char
        listOfListsOfChars3 = [
            removeDistanceChar(x) for x in listOfListsOfChars2
        # flatten list
        listOfCharsInPlate = [
            char for listChars in listOfListsOfChars3 for char in listChars
        # remove inner Chars
        listOfCharsInPlate = removeInnerChars(listOfCharsInPlate)

        # number of plate elements must be > 6
        if len(listOfCharsInPlate) >= 6:
            possiblePlate.isPlate = True
            if Main.showSteps == True:  # show steps #######################################################
                showListOfLists(possiblePlate, listOfCharsInPlate)

            # end of big for loop that takes up most of the function
            possiblePlate.strChars = recognizeCharsInPlate(
                possiblePlate.imgGrayscale, listOfCharsInPlate, index,
            print("predict: ", possiblePlate.strChars)

    listOfPlates = [plate for plate in listOfPossiblePlates if plate.isPlate]

    if Main.showSteps == True:
            "\nchar detection complete, click on any image and press a key to continue . . .\n"
    # end if

    return listOfPlates
Exemplo n.º 39
def detectPlatesInScene(imgOriginalScene):
    listOfPossiblePlates = []                   

    height, width, numChannels = imgOriginalScene.shape

    imgGrayscaleScene = np.zeros((height, width, 1), np.uint8)
    imgThreshScene = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)

    if Main.showSteps == True: 
        cv2.imshow("0", imgOriginalScene)

    imgGrayscaleScene, imgThreshScene = Preprocess.preprocess(imgOriginalScene)        

    if Main.showSteps == True:  
        cv2.imshow("1a", imgGrayscaleScene)
        cv2.imshow("1b", imgThreshScene)
    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene)

    if Main.showSteps == True:  
        print "step 2 - len(listOfPossibleCharsInScene) = " + str(len(listOfPossibleCharsInScene))         

        imgContours = np.zeros((height, width, 3), np.uint8)

        contours = []

        for possibleChar in listOfPossibleCharsInScene:

        cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)
        cv2.imshow("2b", imgContours)
    listOfListsOfMatchingCharsInScene = DetectChars.findListOfListsOfMatchingChars(listOfPossibleCharsInScene)

    if Main.showSteps == True:  
        print "step 3 - listOfListsOfMatchingCharsInScene.Count = " + str(len(listOfListsOfMatchingCharsInScene))    

        imgContours = np.zeros((height, width, 3), np.uint8)

        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = random.randint(0, 255)
            intRandomGreen = random.randint(0, 255)
            intRandomRed = random.randint(0, 255)

            contours = []

            for matchingChar in listOfMatchingChars:

            cv2.drawContours(imgContours, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed))

        cv2.imshow("3", imgContours)

    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:                    
        possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars)         

        if possiblePlate.imgPlate is not None:                          

    if Main.showSteps == True:  
        print "\n"
        cv2.imshow("4a", imgContours)

        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = cv2.boxPoints(listOfPossiblePlates[i].rrLocationOfPlateInScene)

            cv2.line(imgContours, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), Main.SCALAR_RED, 2)

            cv2.imshow("4a", imgContours)

            print "possible plate " + str(i) + ", click on any image and press a key to continue . . ."

            cv2.imshow("4b", listOfPossiblePlates[i].imgPlate)
        print "\nplate detection complete, click on any image and press a key to begin char recognition . . .\n"

    return listOfPossiblePlates
Exemplo n.º 40
from Custom_keras_Encoder_Decoder import Encoder_Decoder, load_Encoder_Decoder
import Encoder_Decoder_for_dummies
import Preprocess
from read_xlsx import get_trivial
from prep_data import preprocess_data

MAX_SIZE = 1000

#qanda = QandA_data.QandA_data()
#questions, answers = qanda.get_data()
#print(len(questions), len(answers))
questions, answers = preprocess_data(get_trivial(["_", "\'", "&", "\"", ":", "(", ")"], [], MAX_SIZE)[1], get_trivial(["_", "\'", "&", "\"", ":", "(", ")"], [], MAX_SIZE)[2])

#print (questions[4250],"\n", answers[4250])
#questions, answers = preprocess_data(questions, answers)
preprocess = Preprocess.Preprocess(questions[:10000], answers[:10000])

def str_to_tokens(sentence : str):
	words = sentence.lower().split()
	tokens_list = list()
	for word in words:
		if word in preprocess.que_word_dict:
	return preprocessing.sequence.pad_sequences( [tokens_list] , maxlen=preprocess.max_input_length , padding='post')

ed = Encoder_Decoder_for_dummies.ED_dummies(preprocess)

ed.train(epochs=500, batch_size=1000)
Exemplo n.º 41
 def __get_outer_data(self, outer_data_dir, data_szie):
     return Preprocess.process_all_articles(outer_data_dir, data_szie)
Exemplo n.º 42
def detectPlatesInScene(imgOriginalScene):
    listOfPossiblePlates = []                   # this will be the return value

    height, width, numChannels = imgOriginalScene.shape

    imgGrayscaleScene = np.zeros((height, width, 1), np.uint8)
    imgThreshScene = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)

    cv2.imshow("Original Image", imgOriginalScene)
    # if Main.showSteps == True:
    #     cv2.imshow("0", imgOriginalScene)
    #     # cv2.waitKey(0)
    imgGrayscaleScene, imgThreshScene = Preprocess.preprocess(imgOriginalScene)         # preprocess to get grayscale and threshold images

    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene)

    if Main.showSteps == True:

        imgContours = np.zeros((height, width, 3), np.uint8)

        contours = []

        for possibleChar in listOfPossibleCharsInScene:

        # cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)
        # cv2.imshow("2b", imgContours)
        # cv2.waitKey(0)
        # cv2.destroyAllWindows()

    listOfListsOfMatchingCharsInScene = CharacterDetection.findListOfListsOfMatchingChars(listOfPossibleCharsInScene)

    if Main.showSteps == True:
        print("step 3 - listOfListsOfMatchingCharsInScene.Count = " + str(len(listOfListsOfMatchingCharsInScene)))
        imgContours = np.zeros((height, width, 3), np.uint8)

        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = random.randint(0, 255)
            intRandomGreen = random.randint(0, 255)
            intRandomRed = random.randint(0, 255)

            imgContours2 = np.zeros((height, width, 3), np.uint8)

            contours = []

            for matchingChar in listOfMatchingChars:

            cv2.drawContours(imgContours, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed))
            cv2.drawContours(imgContours2, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed))
        # # # end for
        #     cv2.imshow("3", imgContours)
        #     cv2.waitKey(0)
        # # cv2.destroyAllWindows()

    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
        possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars)

        if possiblePlate.imgPlate is not None:
            cv2.imshow('The plates',possiblePlate.imgPlate)
            # cv2.destroyAllWindows()

    if Main.showSteps == True:
        print("\n" + str(len(listOfPossiblePlates)) + " possible plates found")

    return listOfPossiblePlates
Exemplo n.º 43
def retieveEventsAtDate(date, url):
    # root website

    html = urllib.urlopen(url).read()
    soup = BeautifulSoup(html, "html.parser")

    briefs = soup.select("div.item.event_item.vevent")

    # retieve all the events detail pages url.
    print 'collecting', len(briefs), 'events from', url
    toVisit = []
    for brief in briefs:

    result = []

    # get information from each events detail pages.
    for item in toVisit:
        tmp = {}
        url = item
        html = urllib.urlopen(url).read()
        soup = BeautifulSoup(html, "html.parser")
        # the event information is reside here. 
        event = soup.select("div.box_header.vevent")[0]

        # extract title from h1 tag's text
        title = event.h1.span.get_text().strip()

        # extract time from h2 tag's abbr field
        time = ""
        for abbr in event.h2.findAll('abbr'):
            time += abbr.get_text().strip() + " "
        print time
        time = timePreprocess(date, time)
        print time
        if time is None:
            print("can't resolve time, discard this event")
        display_location = event.h3.a
        real_location = event.h3.small

        if display_location is not None and real_location is not None and len(display_location) > 0 and len(real_location) > 0:
            display_location = display_location.get_text().strip()
            real_location = real_location.get_text().strip()
            # discard bad formated events 
            print("can't resolve location, discard this event")

        if "Cornell University" in real_location:
            real_location = real_location.replace("Cornell University", "")
            print "real_location becomes: " + real_location 

        if "Ithaca, NY 14853, USA" not in real_location:
            real_location = display_location + ", Ithaca, NY 14853, USA"

        latlng = Preprocess.decodeAddressToCoordinates(real_location)
        print "latlng: %s" % latlng
        lat = latlng['lat']
        lng = latlng['lng']

        # street = event.h3.small
        # if street is not None:
        #     street = street.get_text().strip()
        # else:
        #     street = "Cornell University"

        description = event.select("div.description")
        if len(description) > 0:
            description = description[0].get_text()
            # discard bad formated events 
            print("can't resolve description, discard this event")
        image = soup.select("div.box_image")
        if len(image) > 0:
            image = image[0].a.img['src']
            # discard bad formated events 
            print("can't resolve image, discard this event")
        tmp['title'] = title
        tmp['time'] = time
        tmp['location'] = display_location
        tmp['description'] = description
        tmp['image'] = image
        tmp['lat'] = lat
        tmp['lng'] = lng
        tmp['secondaryTag'] = ['Cornell Sponsored']

        for free_food_keyword in free_food_keywords:
            if free_food_keyword in tmp['description'].lower():
                tmp['secondaryTag'].append('Free Food')

        print 'retrieved:', tmp['title']

    # return all events dicitionary in an array.
    return result
Exemplo n.º 44
def detectCharsInPlates(listOfPossiblePlates):
    intPlateCounter = 0
    imgContours = None
    contours = []

    if len(listOfPossiblePlates
           ) == 0:  # ukoliko je lista mogucih tablica prazna
        return listOfPossiblePlates  # return
    # end if

    for possiblePlate in listOfPossiblePlates:  # za svaku mogucu tablicu

        possiblePlate.imgGrayscale, possiblePlate.imgThresh = Preprocess.preprocess(
        )  # predprocesi da se dobiju grayscale i threshold slike

        if Main.showSteps == True:  # prikazi korake
            cv2.imshow("5a", possiblePlate.imgPlate)
            cv2.imshow("5b", possiblePlate.imgGrayscale)
            cv2.imshow("5c", possiblePlate.imgThresh)
        # end if

        # povecati velicinu slike da bi se bolje vidjelo i lakse detektovali charovi
        possiblePlate.imgThresh = cv2.resize(possiblePlate.imgThresh, (0, 0),

        # threshold  ponovo da bi se otklonile sive povrsine
        thresholdValue, possiblePlate.imgThresh = cv2.threshold(
            possiblePlate.imgThresh, 0.0, 255.0,
            cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        if Main.showSteps == True:
            cv2.imshow("5d", possiblePlate.imgThresh)

            # pronalazi sve moguce charove na slici
            # ova funkcija prvo pronalazi sve konture, i onda samo ukljucuje konture koje bi mogle biti charovi
        listOfPossibleCharsInPlate = findPossibleCharsInPlate(
            possiblePlate.imgGrayscale, possiblePlate.imgThresh)

        if Main.showSteps == True:
            height, width, numChannels = possiblePlate.imgPlate.shape
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]  # ocistiti listu kontura

            for possibleChar in listOfPossibleCharsInPlate:
            # end for

            cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)

            cv2.imshow("6", imgContours)
        # end if #

        # nakon sto imamo listu svih mogucih charova, pronaci grupu slicnih charova na slici
        listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(

        if Main.showSteps == True:
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                intRandomBlue = random.randint(0, 255)
                intRandomGreen = random.randint(0, 255)
                intRandomRed = random.randint(0, 255)

                for matchingChar in listOfMatchingChars:
                # end for
                cv2.drawContours(imgContours, contours, -1,
                                 (intRandomBlue, intRandomGreen, intRandomRed))
            # end for
            cv2.imshow("7", imgContours)
        # end if

        if (len(listOfListsOfMatchingCharsInPlate) == 0
            ):  # ukoliko nisu pronadene grupe slicnih charova

            if Main.showSteps == True:
                    "chars found in plate number " + str(intPlateCounter) +
                    " = (none), click on any image and press a key to continue . . ."
                intPlateCounter = intPlateCounter + 1
            # end if

            possiblePlate.strChars = ""
            continue  # vracanje na pocetak for petlje
        # end if

        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
                key=lambda matchingChar: matchingChar.intCenterX
            )  # sortirati charova s lijeva na desno
            listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(
            )  # i ukloniti charove koji se preklapaju
        # end for

        if Main.showSteps == True:
            imgContours = np.zeros((height, width, 3), np.uint8)

            for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                intRandomBlue = random.randint(0, 255)
                intRandomGreen = random.randint(0, 255)
                intRandomRed = random.randint(0, 255)

                del contours[:]

                for matchingChar in listOfMatchingChars:
                # end for

                cv2.drawContours(imgContours, contours, -1,
                                 (intRandomBlue, intRandomGreen, intRandomRed))
            # end for
            cv2.imshow("8", imgContours)
        # end if

        intLenOfLongestListOfChars = 0
        intIndexOfLongestListOfChars = 0

        # proci petljom kroz sve vektore poklapajucih charova i uzeti indeks onog koji se najvise poklapa
        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            if len(listOfListsOfMatchingCharsInPlate[i]
                   ) > intLenOfLongestListOfChars:
                intLenOfLongestListOfChars = len(
                intIndexOfLongestListOfChars = i
            # end if
        # end for

        longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[

        if Main.showSteps == True:
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for matchingChar in longestListOfMatchingCharsInPlate:
            # end for

            cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)

            cv2.imshow("9", imgContours)
        # end if

        possiblePlate.strChars = recognizeCharsInPlate(
            possiblePlate.imgThresh, longestListOfMatchingCharsInPlate)

        if Main.showSteps == True:
            print("chars found in plate number " + str(intPlateCounter) +
                  " = " + possiblePlate.strChars +
                  ", click on any image and press a key to continue . . .")
            intPlateCounter = intPlateCounter + 1
        # end if

    if Main.showSteps == True:
            "\nchar detection complete, click on any image and press a key to continue . . .\n"
    # end if

    return listOfPossiblePlates
Exemplo n.º 45
    threshes_path = "data/vote.threshes"
    data_path = "data/vote_parsed.data"

# laod and preprocess training data
training_data = loader.load_pickle_file(data_path)
print("total data points: {}".format(len(training_data[0])))
# load thresholds
threshes = loader.load_pickle_file(threshes_path)

# start training
training_errs_by_percent = {}
testing_errs_by_percent = {}
auc_by_percent = {}
roc = []
auc = 0.0
k_folds = Preprocess.prepare_k_folds(training_data, k)
percent_list = (5, 10, 15, 20, 30, 50, 80)
for i in range(k):
    tr_data_all, te_data = Preprocess.get_i_fold(k_folds, i)

    for c in percent_list:
        if c not in training_errs_by_percent.keys():
            training_errs_by_percent[c] = []
            testing_errs_by_percent[c] = []
            auc_by_percent[c] = []

        tr_data = Preprocess.get_c_percent(c, tr_data_all)

        tr_n, f_d = np.shape(tr_data[0])
        te_n, = np.shape(te_data[1])
        # TODO prepare distribution
def detectPlatesInScene(img):
    listOfPossiblePlates = []                 

    height, width, numChannels = img.shape

    imgGray = np.zeros((height, width, 1), np.uint8)
    imgThresh = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)


    if Main.showSteps == True: 
        cv2.imshow("0", img)

    imgGray, imgThresh = Preprocess.preprocess(img)         # preprocess to get grayscale and threshold images

    if Main.showSteps == True: 
        cv2.imshow("Gray SCale Image", imgGray)
        cv2.imshow("Image threshold", imgThresh)

    # find all possible chars in the scene,
    # this function first finds all contours, then only includes contours that could be chars (without comparison to other chars yet)
    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThresh)

    if Main.showSteps == True: #
        print("step 2 - len(listOfPossibleCharsInScene) = " + str(len(listOfPossibleCharsInScene))) 

        imgContours = np.zeros((height, width, 3), np.uint8)

        contours = []

        for possibleChar in listOfPossibleCharsInScene:

        cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)
        cv2.imshow("2b", imgContours)

     # given a list of all possible chars, find groups of matching chars
     # in the next steps each group of matching chars will attempt to be recognized as a plate
    listOfListsOfMatchingCharsInScene = DetectChars.findListOfListsOfMatchingChars(listOfPossibleCharsInScene)

    if Main.showSteps == True:
        print("step 3 - listOfListsOfMatchingCharsInScene.Count = " + str( len(listOfListsOfMatchingCharsInScene)))  

        imgContours = np.zeros((height, width, 3), np.uint8)

        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            RandomBlue = random.randint(0, 255)
            RandomGreen = random.randint(0, 255)
            RandomRed = random.randint(0, 255)

            contours = []

            for matchingChar in listOfMatchingChars:

            cv2.drawContours(imgContours, contours, -1, (RandomBlue, RandomGreen, RandomRed))
        cv2.imshow("Image Contours", imgContours)

    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:                  
        possiblePlate = extractPlate(img, listOfMatchingChars)         

        if possiblePlate.imgPlate is not None:                        

    print("\n" + str(len(listOfPossiblePlates)) + " possible plates found") 

    if Main.showSteps == True: 
        cv2.imshow("Image Contours", imgContours)

        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = cv2.boxPoints(listOfPossiblePlates[i].rrLocationOfPlateInScene)

            cv2.line(imgContours, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), Main.SCALAR_RED, 2)

            cv2.imshow("Image Contpurs", imgContours)

            print("possible plate " + str(i) + ", click on any image and press a key to continue . . .")

            cv2.imshow("List of Possible plates", listOfPossiblePlates[i].imgPlate)

        print("\nplate detection complete, click on any image and press a key to begin char recognition . . .\n")
    return listOfPossiblePlates
# params
lamda = 0.5
tol = 0.92
normalize_method = prep.zero_mean_unit_var
term_method = util.acc_higher_than_ridge

# laod and preprocess training data
tr_data = loader.load_pickle_file(train_data_path)
te_data = loader.load_pickle_file(test_data_path)
print("{:.2f} Data loaded!".format(time.time() - st))

tr_data[0] = tr_data[0].tolist()
te_data[0] = te_data[0].tolist()

# normalize features
prep.normalize_features_all(normalize_method, tr_data[0], te_data[0])
print("{:.2f} Features normalized!".format(time.time() - st))

saved_model = loader.load_pickle_file(model_path)  # load the model
theta = saved_model.theta
is_batch = True
penalty = "l2"  # l2 for RIDGE
alpha = 0.05
model = gd.LogisticRegressionGD(theta, penalty, alpha)
# model.build(tr_data[0], tr_data[1], lamda, term_method, tol, is_batch)
model.build(tr_data[0], tr_data[1], lamda, term_method, tol, is_batch, te_data[0], te_data[1])
training_acc = model.test(tr_data[0], tr_data[1], util.acc)
testing_acc = model.test(te_data[0], te_data[1], util.acc)

print("{} Final results. Train acc: {}, Test acc: {}".format(time.time() - st, training_acc, testing_acc))
Exemplo n.º 48
def detectPlatesInScene(imgOriginalScene):
    listOfPossiblePlates = []                   # this will be the return value

    height, width, numChannels = imgOriginalScene.shape

    imgGrayscaleScene = np.zeros((height, width, 1), np.uint8)
    imgThreshScene = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)


    if main.showSteps == True: # show steps #######################################################
        cv2.imshow("0", imgOriginalScene)
    # end if # show steps #########################################################################

    imgGrayscaleScene, imgThreshScene = Preprocess.preprocess(imgOriginalScene)         # preprocess to get grayscale and threshold images

    if main.showSteps == True: # show steps #######################################################
        cv2.imshow("1a", imgGrayscaleScene)
        cv2.imshow("1b", imgThreshScene)
    # end if # show steps #########################################################################

            # find all possible chars in the scene,
            # this function first finds all contours, then only includes contours that could be chars (without comparison to other chars yet)
    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene)

    if main.showSteps == True: # show steps #######################################################
        print "step 2 - len(listOfPossibleCharsInScene) = " + str(len(listOfPossibleCharsInScene))         # 131 with MCLRNF1 image

        imgContours = np.zeros((height, width, 3), np.uint8)
        contours = []

        for possibleChar in listOfPossibleCharsInScene:
        # end for

        cv2.drawContours(imgContours, contours, -1, main.SCALAR_WHITE)
        cv2.imshow("2b", imgContours)
    # end if # show steps #########################################################################

            # given a list of all possible chars, find groups of matching chars
            # in the next steps each group of matching chars will attempt to be recognized as a plate
    listOfListsOfMatchingCharsInScene = DetectChars.findListOfListsOfMatchingChars(listOfPossibleCharsInScene)

    if main.showSteps == True: # show steps #######################################################
        print "step 3 - listOfListsOfMatchingCharsInScene.Count = " + str(len(listOfListsOfMatchingCharsInScene))    # 13 with MCLRNF1 image

        imgContours = np.zeros((height, width, 3), np.uint8)

        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = random.randint(0, 255)
            intRandomGreen = random.randint(0, 255)
            intRandomRed = random.randint(0, 255)

            contours = []

            for matchingChar in listOfMatchingChars:
            # end for

            cv2.drawContours(imgContours, contours, -1, (intRandomBlue, intRandomGreen, intRandomRed))
        # end for

        cv2.imshow("3", imgContours)
    # end if # show steps #########################################################################

    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:                   # for each group of matching chars
        possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars)         # attempt to extract plate

        if possiblePlate.imgPlate is not None:                          # if plate was found
            listOfPossiblePlates.append(possiblePlate)                  # add to list of possible plates
        # end if
    # end for

    print "\n" + str(len(listOfPossiblePlates)) + " possible plates found"          # 13 with MCLRNF1 image

    if main.showSteps == True: # show steps #######################################################
        print "\n"
        cv2.imshow("4a", imgContours)

        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = cv2.boxPoints(listOfPossiblePlates[i].rrLocationOfPlateInScene)

            cv2.line(imgContours, tuple(p2fRectPoints[0]), tuple(p2fRectPoints[1]), main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[1]), tuple(p2fRectPoints[2]), main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[2]), tuple(p2fRectPoints[3]), main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[3]), tuple(p2fRectPoints[0]), main.SCALAR_RED, 2)

            cv2.imshow("4a", imgContours)

            print "possible plate " + str(i) + ", click on any image and press a key to continue . . ."

            cv2.imshow("4b", listOfPossiblePlates[i].imgPlate)
        # end for

        print "\nplate detection complete, click on any image and press a key to begin char recognition . . .\n"
    # end if # show steps #########################################################################

    return listOfPossiblePlates
Exemplo n.º 49
from perceptron_dual import PerceptronDual
import csv
import Utilities as util
import numpy as np
import Consts as c
import Preprocess

data_file = 'data/twoSpirals.txt'

# load and preprocess data
features = []
labels = []
with open(data_file) as f:
    for line in csv.reader(f, delimiter='\t'):
        cur_l = int(float(line[-1]))
        sign = 1
        cur_f = [sign * float(l) for l in line[:-1]]
features = np.array(features)
Preprocess.normalize_features_all(Preprocess.zero_mean_unit_var, features)
# Preprocess.normalize_features_all(Preprocess.shift_and_scale, features)
labels = np.array(labels).transpose()[0]
# create perceptron
# kernel = c.LINEAR
kernel = c.GAUSSIAN
model = PerceptronDual(kernel_fun=kernel)
model.fit(features, labels)
Exemplo n.º 50
def detectPlatesInScene(imgOriginalScene):
    listOfPossiblePlates = []

    height, width, numChannels = imgOriginalScene.shape

    imgGrayscaleScene = np.zeros((height, width, 1), np.uint8)
    imgThreshScene = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)

    if Main.showSteps == True:
        cv2.imshow("0", imgOriginalScene)

    imgGrayscaleScene, imgThreshScene = Preprocess.preprocess(imgOriginalScene)

    if Main.showSteps == True:
        cv2.imshow("1a", imgGrayscaleScene)
        cv2.imshow("1b", imgThreshScene)

    listOfPossibleCharsInScene = findPossibleCharsInScene(imgThreshScene)

    if Main.showSteps == True:
        print "step 2 - len(listOfPossibleCharsInScene) = " + str(

        imgContours = np.zeros((height, width, 3), np.uint8)

        contours = []

        for possibleChar in listOfPossibleCharsInScene:

        cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)
        cv2.imshow("2b", imgContours)

    listOfListsOfMatchingCharsInScene = DetectChars.findListOfListsOfMatchingChars(

    if Main.showSteps == True:
        print "step 3 - listOfListsOfMatchingCharsInScene.Count = " + str(

        imgContours = np.zeros((height, width, 3), np.uint8)

        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = random.randint(0, 255)
            intRandomGreen = random.randint(0, 255)
            intRandomRed = random.randint(0, 255)

            contours = []

            for matchingChar in listOfMatchingChars:

            cv2.drawContours(imgContours, contours, -1,
                             (intRandomBlue, intRandomGreen, intRandomRed))

        cv2.imshow("3", imgContours)

    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
        possiblePlate = extractPlate(imgOriginalScene, listOfMatchingChars)

        if possiblePlate.imgPlate is not None:

    if Main.showSteps == True:
        print "\n"
        cv2.imshow("4a", imgContours)

        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = cv2.boxPoints(

            cv2.line(imgContours, tuple(p2fRectPoints[0]),
                     tuple(p2fRectPoints[1]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[1]),
                     tuple(p2fRectPoints[2]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[2]),
                     tuple(p2fRectPoints[3]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[3]),
                     tuple(p2fRectPoints[0]), Main.SCALAR_RED, 2)

            cv2.imshow("4a", imgContours)

            print "possible plate " + str(
                i) + ", click on any image and press a key to continue . . ."

            cv2.imshow("4b", listOfPossiblePlates[i].imgPlate)

        print "\nplate detection complete, click on any image and press a key to begin char recognition . . .\n"

    return listOfPossiblePlates
Exemplo n.º 51
def detectCharsInPlates(listOfPossiblePlates):
    intPlateCounter = 0
    imgContours = None
    contours = []

    if len(listOfPossiblePlates) == 0:
        return listOfPossiblePlates

    listOfPossiblePlates_refined = []
    for possiblePlate in listOfPossiblePlates:
        #  grayscale and threshold images by preprocess
        possiblePlate.imgGrayscale, possiblePlate.imgThresh = Preprocess.preprocess1(

        listOfPossibleCharsInPlate = findPossibleCharsInPlate(
            possiblePlate.imgGrayscale, possiblePlate.imgThresh)

        if Main.showSteps == True:
            height, width, numChannels = possiblePlate.imgPlate.shape
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for possibleChar in listOfPossibleCharsInPlate:

        #  find groups of matching chars within the plate in list
        listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(
        if (len(listOfListsOfMatchingCharsInPlate) == 0):

            possiblePlate.strChars = ""

        # within each list of matching chars
        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            # sort chars from left to right
                key=lambda matchingChar: matchingChar.intCenterX)
            # and remove inner overlapping chars
            listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(

        # within each possible plate, suppose the longest list of potential matching chars is the actual list of chars
        intLenOfLongestListOfChars = 0
        intIndexOfLongestListOfChars = 0

        # loop through all the vectors of matching chars, get the index of the one with the most chars
        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            if len(listOfListsOfMatchingCharsInPlate[i]
                   ) > intLenOfLongestListOfChars:
                intLenOfLongestListOfChars = len(
                intIndexOfLongestListOfChars = i

        # suppose that the longest list of matching chars within the plate is the actual list of chars
        longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[

        possiblePlate.strChars = recognizeCharsInPlate(
            possiblePlate.imgThresh, longestListOfMatchingCharsInPlate)

    return listOfPossiblePlates_refined
Exemplo n.º 52
Y_test = []
train_dataset = pd.read_csv("dataset.csv", encoding='utf-8',sep='|')
test_dataset = pd.read_csv("testset.csv", encoding='utf-8',sep='|')
train_dataset = train_dataset.dropna()
test_dataset = test_dataset.dropna()

X_data_ids = []
X_data_attn = []
X_data_seg = []
X_test_ids = []
X_test_attn = []
X_test_seg = []

import Preprocess as pr
p = pr.Preprocess(tokenizer,maxlen)

for sentence in train_dataset['Sentence']:
    X_dataset = p.work(sentence)

for sentence in test_dataset['Sentence']:
    X_testset = p.work(sentence)

Y_data = p.labeling(train_dataset, Y_data)
Y_test = p.labeling(test_dataset, Y_test)
Exemplo n.º 53
__author__ = 'Mounica'
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import SGDClassifier
import pandas as pd
import Preprocess
from sklearn.externals import joblib
import sys

PATH = sys.argv[1]
INPUTPATH = sys.argv[2]
FILE_NAME = sys.argv[3]


dataset = Preprocess.process(FILE_PATH)
userId = dataset['userId']
status = dataset['status']
gender = dataset['gender']
age = dataset['age']
datasetList = zip(userId, status, gender, age)
ds = pd.DataFrame(data=datasetList, columns=['userId', 'status', 'gender', 'age'])

ngram_vectorizer = CountVectorizer(ngram_range=(1,3), min_df=1)
tfidf_transformer = TfidfTransformer()

X_train = ds['status']
y_train_gender = ds['gender']
y_train_age = ds['age']

#Traindata vectorization
Exemplo n.º 54
def detectPlatesInScene(imgOriginalScene):
    listOfPossiblePlates = []  # this will be the return value

    height, width, numChannels = imgOriginalScene.shape

    imgGrayscaleScene = np.zeros((height, width, 1), np.uint8)
    imgThreshScene = np.zeros((height, width, 1), np.uint8)
    imgContours = np.zeros((height, width, 3), np.uint8)

    if Main.showSteps == True:  # show steps #######################################################
        #cv2.imshow("0", imgOriginalScene)
        input('Press any key to continue...')

    imgGrayscaleScene, imgThreshScene = Preprocess.preprocess(
        imgOriginalScene)  # preprocess to get grayscale and threshold images

    # find all possible chars in the scene,
    # this function first finds all contours, then only includes contours that could be chars (without comparison to other chars yet)
    listOfPossibleCharsInScene = findPossibleCharsInScene(
    )  # Here we get a list of all the contours in the image that may be characters.

    if Main.showSteps == True:  # show steps #######################################################
        #print("step 2 - len(listOfPossibleCharsInScene) = " + str(len(listOfPossibleCharsInScene)))

        imgContours = np.zeros((height, width, 3), np.uint8)

        contours = []

        for possibleChar in listOfPossibleCharsInScene:

        cv2.drawContours(imgContours, contours, -1, Main.SCALAR_WHITE)
        input('Press any key to continue...')
        # This is for the boxing of all the contours
        for possibleChar in listOfPossibleCharsInScene:
            cv2.rectangle(imgContours,(possibleChar.intBoundingRectX,possibleChar.intBoundingRectY),(possibleChar.intBoundingRectX+possibleChar.intBoundingRectWidth,possibleChar.intBoundingRectY+possibleChar.intBoundingRectHeight),(0.0, 255.0, 255.0),1)

        # given a list of all possible chars, find groups of matching chars
        # in the next steps each group of matching chars will attempt to be recognized as a plate
    listOfListsOfMatchingCharsInScene = DetectChars.findListOfListsOfMatchingChars(
    if Main.showSteps == True:  # show steps #######################################################
        print("step 3 - listOfListsOfMatchingCharsInScene.Count = " + str(
            len(listOfListsOfMatchingCharsInScene)))  # 13 with MCLRNF1 image

        imgContours = np.zeros((height, width, 3), np.uint8)

        for listOfMatchingChars in listOfListsOfMatchingCharsInScene:
            intRandomBlue = random.randint(0, 255)
            intRandomGreen = random.randint(0, 255)
            intRandomRed = random.randint(0, 255)

            #imgContours2 = np.zeros((height, width, 3), np.uint8)

            contours = []

            for matchingChar in listOfMatchingChars:
            # end for

            #cv2.drawContours(imgContours, contours, -1, (255, 255, 255))
            cv2.drawContours(imgContours, contours, -1,
                             (intRandomBlue, intRandomGreen, intRandomRed))
        # end for

        #imgContours = Image.fromarray(imgContours,'RGB').show()

    # end if # show steps #########################################################################
    for listOfMatchingChars in listOfListsOfMatchingCharsInScene:  # for each group of matching chars
        possiblePlate = extractPlate(
            imgOriginalScene, listOfMatchingChars)  # attempt to extract plate

        if possiblePlate.imgPlate is not None:  # if plate was found
                possiblePlate)  # add to list of possible plates

    if Main.showSteps == True:
        print("\n" + str(len(listOfPossiblePlates)) + " possible plates found")
    if Main.showSteps == True:  # show steps #######################################################

        Image.fromarray(imgContours, 'RGB').show()
        input('Press any key to continue...')
        for i in range(0, len(listOfPossiblePlates)):
            p2fRectPoints = cv2.boxPoints(

            cv2.line(imgContours, tuple(p2fRectPoints[0]),
                     tuple(p2fRectPoints[1]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[1]),
                     tuple(p2fRectPoints[2]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[2]),
                     tuple(p2fRectPoints[3]), Main.SCALAR_RED, 2)
            cv2.line(imgContours, tuple(p2fRectPoints[3]),
                     tuple(p2fRectPoints[0]), Main.SCALAR_RED, 2)

            #cv2.imshow("4a", imgContours)

            print("possible plate " + str(i) +
                  ", click on any image and press a key to continue . . .")

        # end for
            "\nplate detection complete, press a key to begin char recognition . . .\n"
    # end if # show steps #########################################################################

    return listOfPossiblePlates
Exemplo n.º 55
def templateMatching(im,elements, isPhoto):
    templates = ['images/templates/resistorT4.PNG', 'images/templates/resistorT2.PNG', 'images/templates/resistorT2_0degree.PNG', 'images/templates/resistorT2_45degree.PNG', 'images/templates/resistorT2_135degree.PNG','images/templates/resistorT4_0degree.PNG'];
    unmatched_resistors = [];
    for elem in elements:
        if elem[4] == 'o':
            unmatched_resistors += [elem];

    matched_resistors = {}
    matched_resistor_key = {}
    for threshold in [1, .9, .8, .7, .6, .5]:#, .5, .4, .3]:
        for restt in range(2, 15):
            for t in templates:
                templ = cv2.imread(t,cv2.CV_LOAD_IMAGE_COLOR);
                res = 20 - restt;
                template = cv2.resize(templ, dsize = (0,0), fx = res/10., fy = res/10., interpolation = cv2.INTER_CUBIC);
                [template, g]= Preprocess.getImageToSendToContour(template, False);
                w, h = template.shape[::-1]

                res = cv2.matchTemplate(im,template,cv2.TM_CCOEFF_NORMED)

                loc = np.where( res >= threshold)
                pts = []
                for pt in zip(*loc[::-1]):
                    pts += [[pt[0], pt[1], w, h, 'r']];
                indicesToRemove_ii = []
                indicesToRemove_i = []
                for i in range(0, len(unmatched_resistors)):
                    ii = -1;
                    minDistance = 1000000;
                    for ifindmin in range(0,len(pts)): 
                        dist = Postprocess.distance_resistor(unmatched_resistors[i][0:5], pts[ifindmin]);
                        if dist < minDistance and (ifindmin not in indicesToRemove_ii) and dist < 20 and dist < matched_resistor_key.get(i, 10000)*(threshold*1.1) and dist>7:
                            ii = ifindmin;
                            minDistance = dist;
                    if ii == -1:
                    matchresistor = unmatched_resistors[i][:];
                    matchresistor[0] = pts[ii][0]; #take on location of the element in the circuit
                    matchresistor[1] = pts[ii][1];
                    matchresistor[2] = pts[ii][2];
                    matchresistor[3] = pts[ii][3];
                    indicesToRemove_ii += [ii];
                    indicesToRemove_i  += [i];
                    matched_resistors[i] = matchresistor;
                    matched_resistor_key[i] = dist;
                #newunmatched = []
                #for i in range(0, len(unmatched_resistors)):
                #    if i not in indicesToRemove_i:
                #        newunmatched += [unmatched_resistors[i]]
                #unmatched_resistors = newunmatched;

 #   for r in matched_resistors:
 #       print r
    print matched_resistors
    print unmatched_resistors
    for i in matched_resistors.keys():
        pt = matched_resistors[i];
        cv2.rectangle(im, (pt[0], pt[1]), (pt[0] + pt[2], pt[1] + pt[3]), (0,0,0), 2)
        matchresistor = unmatched_resistors[i];
        matchresistor[0] = pt[0]; #take on location of the element in the circuit
        matchresistor[1] = pt[1];
        matchresistor[2] = pt[2];
        matchresistor[3] = pt[3];
    cv2.imshow('resistors', im);
 #   cv2.imshow('temp', template);

#    key = cv2.waitKey(0)
    return elements;
                    #cv2.rectangle(im, pt, (pt[0] + w, pt[1] + h), (0,0,255), 2)
Exemplo n.º 56
import Tree

# training parameter
k = 10
term_con = c.LAYER
term_thresh = 7
result_path = 'results/spamDT_10.acc'
model_name = 'spam_' + str(k) + 'fold_' + term_con + '_' + str(term_thresh)

# laod training data
training_data = loader.load_dataset('data/spambase.data')
# load threshold data
# threshs = loader.load_pickle_file('config/spam_threshold')
threshs = Preprocess.generate_thresholds(training_data[0], 'config/spam_thresh_path')

# start training
training_accs = []
training_cms = []
testing_accs = []
testing_cms = []
for i in range(k):
    (tr_data, te_data) = Preprocess.prepare_k_fold_data(training_data, k, i + 1)

    tree = Tree.DecisionTree()
               tr_data[1], threshs, term_con, term_thresh)

    training_test_res = tree.test(tr_data[0], tr_data[1])
Exemplo n.º 57
def detectCharsInPlates(listOfPossiblePlates, type, cameraName):
    intPlateCounter = 0
    imgContours = None
    contours = []

    if len(listOfPossiblePlates) == 0:  # Olası plakaların listesi boşsa
        return listOfPossiblePlates  # listeyi döndür

    # Eğer bu noktaya geldiyse plakalar listesinde en az bir plaka olduğundan emin olabiliriz
    for possiblePlate in listOfPossiblePlates:  # Her olası plaka için for döngüsü
        possiblePlate.imgGrayscale, possiblePlate.imgThresh = Preprocess.preprocess(
            possiblePlate.imgPlate, type
        )  # Gri tonlamalı ve threshold görüntüler elde etmek için önişlem
        # Daha kolay görüntüleme ve karakter algılama için plaka görüntüsünün boyutunu arttırır
        #possiblePlate.imgThresh = cv2.resize(possiblePlate.imgThresh, (0, 0), fx = 1.5, fy = 1.5)
        #Gri alanları gidermek için tekrar threshold
        thresholdValue, possiblePlate.imgThresh = cv2.threshold(
            possiblePlate.imgThresh, 0.0, 255.0,
            cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        # Plakadaki tüm olası karakterleri bulma
        # Bu işlev ilk önce tüm konturları bulur, ardından sadece karakter alabilecek kontürleri içerir (diğer karakterlerle karşılaştırmadan)
        listOfPossibleCharsInPlate = findPossibleCharsInPlate(
            possiblePlate.imgGrayscale, possiblePlate.imgThresh)

        # Olası tüm karakterlerin bir listesi verildiğinde, plaka içindeki eşleşen karakter gruplarını bulur
        listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(

        if (len(listOfListsOfMatchingCharsInPlate) == 0
            ):  #Plakada eşleşen karakter grubu bulunamazsa
            possiblePlate.strChars = ""
            continue  #Loop için başa dön

        for i in range(0, len(listOfListsOfMatchingCharsInPlate)
                       ):  # Eşleşen karakterlerin her listesi ile
                key=lambda matchingChar: matchingChar.intCenterX
            )  # Karakterleri soldan sağa sırala
            listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(
            )  #üst üste binen karakterleri kaldır

        # Her olası plaka içerisinde, potansiyel eşleştirme karakterlerinin en uzun listesinin gerçek karakter listesi olduğunu varsay
        intLenOfLongestListOfChars = 0
        intIndexOfLongestListOfChars = 0

        # Eşleşen karakterlerin tüm vektörlerinde dolaş, en çok karaktere sahip olanın dizinini al
        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            if len(listOfListsOfMatchingCharsInPlate[i]
                   ) > intLenOfLongestListOfChars:
                intLenOfLongestListOfChars = len(
                intIndexOfLongestListOfChars = i

        # Plaka içerisinde eşleşen en uzun karakter listesinin gerçek karakter listesi olduğunu varsayalım
        longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[
        possiblePlate.strChars = recognizeCharsInPlate(
            possiblePlate.imgThresh, longestListOfMatchingCharsInPlate)

    return listOfPossiblePlates
Exemplo n.º 58
def predict_nii_multi(net, img1, img2, gpu, name, is_test=False):

    img1 = np.array(img1, dtype='float64')

    img1[img1 < -200] = -200
    img1[img1 > 250] = 250
    img1 = ((img1 + 200) * 255 // 450)
    img1 = np.array(img1, dtype='uint8')

    if not is_test:
        img2 = np.array(img2, dtype='uint8')

        for i in range(img1.shape[2]):
            img1[:, :, i] = np.flip(img1[:, :, i], 0)
            img1[:, :, i] = np.rot90(img1[:, :, i])
            img1[:, :, i] = np.rot90(img1[:, :, i])
            img1[:, :, i] = np.rot90(img1[:, :, i])

        startposition, endposition = pp.getRangImageDepth(img2)

        sub_srcimages = pp.make_multi_patch(img1, (128, 128), 5, 3,
                                            startposition, endposition) / 255

        layers = img1.shape[2]
        save_nii = np.zeros((img1.shape[0], img1.shape[1], layers),

        for ind in range(startposition, endposition + 1, 1):
            # ind=startposition+1
            layer_input = sub_srcimages[:, :, :, (ind - startposition) *
                                        25:(ind - startposition) * 25 + 25]

            im = predict_layer_multi(net, layer_input, gpu)
            im = np.flip(im, 0)
            im = np.rot90(im)
            im = np.rot90(im)
            im = np.rot90(im)
            # cv2.imshow('2', im*127)
            # cv2.waitKey(0)

            save_nii[:, :, ind] = im
            print("Predicting {}-{}".format(name, ind))
        for i in range(img1.shape[2]):
            img1[:, :, i] = np.flip(img1[:, :, i], 0)
            img1[:, :, i] = np.rot90(img1[:, :, i])
            img1[:, :, i] = np.rot90(img1[:, :, i])
            img1[:, :, i] = np.rot90(img1[:, :, i])

        startposition = 1
        endposition = img1.shape[2] - 2

        sub_srcimages = pp.make_multi_patch(img1, (128, 128), 5, 3,
                                            startposition, endposition) / 255

        layers = img1.shape[2]
        save_nii = np.zeros((img1.shape[0], img1.shape[1], layers),

        for ind in range(startposition, endposition + 1, 1):
            # ind=startposition+1
            layer_input = sub_srcimages[:, :, :, (ind - startposition) *
                                        25:(ind - startposition) * 25 + 25]

            im = predict_layer_multi(net, layer_input, gpu)
            im = np.flip(im, 0)
            im = np.rot90(im)
            im = np.rot90(im)
            im = np.rot90(im)
            # cv2.imshow('2', im*127)
            # cv2.waitKey(0)

            save_nii[:, :, ind] = im
            print("Predicting {}-{}".format(name, ind))
    save_nii = np.array(save_nii, dtype='uint8')
    new_img = nib.Nifti1Image(save_nii, affine=np.eye(4))
    nib.save(new_img, mPath.DataPath_Volume_Predict + name + '.nii')
Exemplo n.º 59
from sklearn.ensemble import RandomForestRegressor
import sklearn.metrics as mc
import Preprocess as load
import time as time
import numpy as np

# Load data
train_data, train_label, test_data, test_label = load.read_data()

start = time.clock()

# Create model
rf = RandomForestRegressor(max_depth=10,

# Traning model
pre_rf = rf.fit(train_data, train_label).predict(test_data)

# Calculate score
score_rf = rf.score(test_data, test_label)
pre_rf = np.reshape(pre_rf, [-1, 1])
[m,n] = np.shape(test_label)

rsquare =1- (((pre_rf- test_label) ** 2).sum()) / (((test_label - test_label.mean()) ** 2).sum())
prepro = rf.get_params()

se = mc.mean_squared_error(test_label, pre_rf)

print("R-square:" + str(score_rf))
Exemplo n.º 60
def detectCharsInPlates(listOfPossiblePlates, save_intermediate, output_folder,
    This function processes the list of number plates and returns a lsit of processed number plates
    with their text.
    @input: List of images of number plates
    @output:A list of number plates with all the information encapsulated

    # Initialize the varaibles
    intPlateCounter = 0
    imgContours = None
    contours = []

    if len(listOfPossiblePlates) == 0:  # if list of possible plates is empty
        l = []
        l = l.append(listOfPossiblePlates)
        print('No Plates found')
        return l  # return
    # end if

    # at this point we can be sure the list of possible plates has at least one plate
    listOfPossiblePlates_refined = []
    for possiblePlate in listOfPossiblePlates:  # for each possible plate, this is a big for loop that takes up most of the function
        #possiblePlate.imgPlate = cv2.fastNlMeansDenoisingColored(possiblePlate.imgPlate,None,15,15,7,21)
        #possiblePlate.imgPlate = cv2.equalizeHist(possiblePlate.imgPlate)

        # preprocess to get grayscale and threshold images
        possiblePlate.imgGrayscale, possiblePlate.imgThresh = Preprocess.preprocess(
            possiblePlate.imgPlate, save_intermediate, output_folder,

        if showSteps == True:  # show steps ###################################################
            cv2.imshow("imgPlate", possiblePlate.imgPlate)
            cv2.imshow("imgGrayscale", possiblePlate.imgGrayscale)
            cv2.imshow("imgThresh", possiblePlate.imgThresh)
            # increase size of plate image for easier viewing and char detection
        possiblePlate.imgThresh = cv2.resize(possiblePlate.imgThresh, (0, 0),

        # threshold again to eliminate any gray areas
        thresholdValue, possiblePlate.imgThresh = cv2.threshold(
            possiblePlate.imgThresh, 0.0, 255.0,
            cv2.THRESH_BINARY | cv2.THRESH_OTSU)

        # This clears the image more removing all the unknown noise from it.
        if showSteps == True:  # show steps ###################################################
            cv2.imshow("imgThresh_gray_remover", possiblePlate.imgThresh)

        if save_intermediate == True:  # show steps ###################################################
            cv2.imwrite("%s/imgThresh_gray_remover.png" % (output_folder),

        # end if # show steps #####################################################################

        # find all possible chars in the plate,
        # this function first finds all contours, then only includes contours that could be chars (without
        # comparison to other chars yet)
        listOfPossibleCharsInPlate = findPossibleCharsInPlate(
            possiblePlate.imgGrayscale, possiblePlate.imgThresh)

        if showSteps == True or save_intermediate == True:
            height, width = possiblePlate.imgThresh.shape
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]  # clear the contours list

            for possibleChar in listOfPossibleCharsInPlate:
            # end for

            cv2.drawContours(imgContours, contours, -1, (255, 255, 255))
            #print('These are the possible characters in the plate :')

        if showSteps == True:  # show steps ###################################################
            cv2.imshow("Possible_chars_in_plate", imgContours)

        if save_intermediate == True:  # show steps ###################################################
            cv2.imwrite("%s/Possible_chars_in_plate.png" % (output_folder),

        # end if # show steps #####################################################################

        # given a list of all possible chars, find groups of matching chars within the plate
        listOfListsOfMatchingCharsInPlate = findListOfListsOfMatchingChars(
        if (len(listOfListsOfMatchingCharsInPlate) == 0
            ):  # if no groups of matching chars were found in the plate
            #print('\nNo matching characters found:')
            if showSteps == True:  # show steps ###############################################
                    "chars found in plate number " + str(intPlateCounter) +
                    " = (none), click on any image and press a key to continue . . ."
                intPlateCounter = intPlateCounter + 1

            # end if # show steps #################################################################

            possiblePlate.strChars = ""
            continue  # go back to top of for loop
        # end if

        if showSteps == True or save_intermediate == True:  # show steps ###################################################
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                intRandomBlue = random.randint(0, 255)
                intRandomGreen = random.randint(0, 255)
                intRandomRed = random.randint(0, 255)

                for matchingChar in listOfMatchingChars:
                # end for
                cv2.drawContours(imgContours, contours, -1,
                                 (intRandomBlue, intRandomGreen, intRandomRed))
            # end for

        if showSteps == True:
            cv2.imshow("A_Complete_plate", imgContours)
        # end if # show steps #####################################################################

        if save_intermediate == True:  # show steps ###################################################
            cv2.imwrite("%s/A_Complete_plate.png" % (output_folder),

        # within each list of matching chars
        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            # sort chars from left to right
                key=lambda matchingChar: matchingChar.intCenterX)

            # and remove inner overlapping chars
            listOfListsOfMatchingCharsInPlate[i] = removeInnerOverlappingChars(
        # end for

        if showSteps == True or save_intermediate == True:  # show steps ###################################################
            imgContours = np.zeros((height, width, 3), np.uint8)

            for listOfMatchingChars in listOfListsOfMatchingCharsInPlate:
                intRandomBlue = random.randint(0, 255)
                intRandomGreen = random.randint(0, 255)
                intRandomRed = random.randint(0, 255)

                del contours[:]

                for matchingChar in listOfMatchingChars:
                # end for

                cv2.drawContours(imgContours, contours, -1,
                                 (intRandomBlue, intRandomGreen, intRandomRed))
            # end for

        if showSteps == True:
            cv2.imshow("Remove_Overlapping", imgContours)

        if save_intermediate == True:  # show steps ###################################################
            cv2.imwrite("%s/Remove_Overlapping.png" % (output_folder),

        # end if # show steps #####################################################################
        # within each possible plate, suppose the longest list of potential matching chars is the actual list of chars
        intLenOfLongestListOfChars = 0
        intIndexOfLongestListOfChars = 0

                # loop through all the vectors of matching chars, get the index of the one with the most chars
        for i in range(0, len(listOfListsOfMatchingCharsInPlate)):
            if len(listOfListsOfMatchingCharsInPlate[i]) > intLenOfLongestListOfChars:
                intLenOfLongestListOfChars = len(listOfListsOfMatchingCharsInPlate[i])
                intIndexOfLongestListOfChars = i
            # end if
        # end for

        # suppose that the longest list of matching chars within the plate is the actual list of chars
        #longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate[intIndexOfLongestListOfChars]

        # All the left plates till now are elligible to be potential part of a number plate
        #longestListOfMatchingCharsInPlate = listOfListsOfMatchingCharsInPlate

        if showSteps == True or save_intermediate == True:  # show steps ###################################################
            imgContours = np.zeros((height, width, 3), np.uint8)
            del contours[:]

            for longestListOfMatchingCharsInPlate in listOfListsOfMatchingCharsInPlate:
                for matchingChar in longestListOfMatchingCharsInPlate:
            # end for

            cv2.drawContours(imgContours, contours, -1, (255, 255, 255))

        if showSteps == True:
            cv2.imshow("The_Longest_list_of_matching_chars", imgContours)
        # end if # show steps #####################################################################

        if save_intermediate == True:  # show steps ###################################################
                "%s/The_Longest_list_of_matching_chars.png" % (output_folder),

        possiblePlate.strChars = recognizeCharsInPlate(
            possiblePlate.imgThresh, listOfListsOfMatchingCharsInPlate,
            save_intermediate, output_folder, showSteps)
        if showSteps == True:

        if showSteps == True:  # show steps ###################################################
            print("chars found in plate number " + str(intPlateCounter) +
                  " = " + possiblePlate.strChars +
                  ", click on any image and press a key to continue . . .")
            intPlateCounter = intPlateCounter + 1
        # end if # show steps #####################################################################

    # end of big for loop that takes up most of the function

    if showSteps == True:
            "\nchar detection complete, click on any image and press a key to continue . . .\n"
    # end if

    return listOfPossiblePlates_refined  # we return the list of plates with the probable plate number of each plate.