示例#1
0
def test_hard_clf(fname, data, labels):
    clf = loaddata.load_pkl(fname)
    data = norm_data(data)

    data = torch.tensor(data).float()
    pred_y = clf.predict(data)

    # Difference between values to be considered close enough to not be sure of class
    eps = 0.00005
    pred_labels = []
    for ele in pred_y:
        tensor_max = ele.max(0)
        max_val = tensor_max[0]
        temp_label = tensor_max[1]
        for i in range(len(ele)):
            if temp_label != i and abs(max_val - ele[i]) <= eps:
                temp_label = torch.tensor(-2)
        pred_labels.append(temp_label.item() + 1)

    # If labels provided, get the accuracy of the model
    pred_acc = -1.0
    if labels is not None:
        pred_acc = sum(pred_labels == labels) / len(labels)

    return pred_labels, pred_acc
示例#2
0
    def prepare_data(self, data=None, labels=None):

        if (data == None):
            data = loaddata.load_pkl("train_data.pkl")
            labels = np.load("finalLabelsTrain.npy")

        data = np.array(
            [np.array(data[i], dtype=bool) for i in range(len(data))])
        data_labelled = [[data[i], labels[i]] for i in range(len(data))]

        data_labelled_standardized = self.standardize_data(data_labelled)
        # np.random.shuffle( std_labelled_training_data )

        self.training_data_loader = torch.utils.data.DataLoader(
            data_labelled_standardized[:int(
                len(data_labelled_standardized) * 0.80)],
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=0)

        self.test_data_loader = torch.utils.data.DataLoader(
            data_labelled_standardized[
                int(len(data_labelled_standardized) * 0.80):],
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=0)
        return None
示例#3
0
def test_easy_clf(fname, data, labels):
    clf = loaddata.load_pkl(fname)
    data = norm_data(data)
    pred_y = clf.predict(data)

    pred_acc = -1.0
    if labels is not None:
        pred_acc = sum(pred_y == labels) / len(labels)

    return pred_y, pred_acc
示例#4
0
def predict():
    """ Load Test data. """
    input_data = loaddata.load_pkl(sys.argv[2])
    np.save('input_pickle_data.npy', input_data)
    test_data = np.load('input_pickle_data.npy', allow_pickle=True)
    """Network type"""
    if sys.argv[1] == 'AB':
        networkType = 'AB'
    else:
        networkType = 'All'

    def data_process(train_data):
        resized_data = []
        for item in train_data:
            resized_data.append(skt.resize(np.float32(item), (100, 100)))
        for i in range(len(resized_data)):
            resized_data[i] = resized_data[i].flatten()
        resized_data = np.asarray(resized_data)
        return resized_data

    """ Obtain all images of 'a' and 'b'. Create a dataset with those instances."""

    def data_split(resized_data, train_labels):
        set_X, set_Y = [], []
        for i in (np.unique(train_labels)):
            items = list(np.where(train_labels == i)[0])
            set_X.append(resized_data[items])
            set_Y.append(train_labels[items])

        if networkType == "AB":
            x = np.concatenate((np.asarray(set_X[0]), np.asarray(set_X[1])))
            y = np.concatenate((np.asarray(set_Y[0]), np.asarray(set_Y[1])))
        else:
            x = np.concatenate((np.asarray(set_X[0]), np.asarray(set_X[1]),
                                np.asarray(set_X[2]), np.asarray(set_X[3]),
                                np.asarray(set_X[4]), np.asarray(set_X[5]),
                                np.asarray(set_X[6]), np.asarray(set_X[7])))
            y = np.concatenate((np.asarray(set_Y[0]), np.asarray(set_Y[1]),
                                np.asarray(set_Y[2]), np.asarray(set_Y[3]),
                                np.asarray(set_Y[4]), np.asarray(set_Y[5]),
                                np.asarray(set_Y[6]), np.asarray(set_Y[7])))
        return x, y

    """Resize the test data"""
    test_resize_data = data_process(test_data)
    """ Load trained network"""
    networkFile = 'networkAB.pickle' if networkType == "AB" else "networkAll.pickle"
    with open(networkFile, 'rb') as handle:
        network = pickle.load(handle)
    load_lr_model = pickle.load(open(networkFile, 'rb'))
    """Forward the test data into network"""
    y_predicted = load_lr_model.predict(test_resize_data)
    """ Saving predicted values in an output file"""
    np.save(sys.argv[3], y_predicted)
    return y_predicted
示例#5
0
def load_data_All_Classes(data, labels):

    data1 = load_pkl(data)
    data1 = np.asarray(data1)
    for i in range(len(data1)):
        data1[i] = np.asarray(data1[i])
    labels = np.load('finalLabelsTrain.npy')
    for i in range(len(data1)):
        data1[i] = skt.resize(data1[i], ((50, 50)), anti_aliasing=True)

    return data1, labels
示例#6
0
def test(data=None):
    weights_file_path = "./pre_trained_cnn_weights.weights"
    network = CNN(lr=0.001, batch_size=8, epochs=15, number_of_classes=9)
    weights = torch.load(weights_file_path)
    network.load_state_dict(weights)

    if (data == None):
        data = loaddata.load_pkl("train_data.pkl")

    data = [[np.array(data[i], dtype=bool)] for i in range(len(data))]
    data = network.standardize_data(data)
    data = torch.stack([data[i][0] for i in range(len(data))])

    return network.predict(data)
示例#7
0
def load_data_AB(data, labels):
    data_X = []
    label_X = []
    data1 = load_pkl(data)
    data1 = np.asarray(data1)
    for i in range(len(data1)):
        data1[i] = np.asarray(data1[i])
    labels = np.load('finalLabelsTrain.npy')

    for i in range(len(data1)):
        if (labels[i] == 1 or labels[i] == 2):
            data_X.append(data1[i])
            label_X.append(labels[i])
    data_X = np.array(data_X)
    label_X = np.array(label_X)

    for i in range(len(data_X)):
        data_X[i] = skt.resize(data_X[i], ((50, 50)), anti_aliasing=True)

    return data_X, label_X
def load_data():

    data = load_pkl('train_data(1).pkl')
    data1 = np.asarray(data)
    for i in range(len(data1)):
        data1[i] = np.asarray(data1[i])

    labels = np.load('finalLabelsTrain.npy')
    for i in range(len(labels)):
        labels[i] = labels[i] - 1

    for i in range(len(data)):

        data[i] = skt.resize(data[i], ((50, 50)), anti_aliasing=True)
        data[i] = np.expand_dims(data[i], axis=0)
        trans = transforms.ToPILImage()
        trans1 = transforms.Grayscale(num_output_channels=1)
        data_new.append(data[i])
    data1 = np.array(data_new)

    return data1, labels
示例#9
0
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import pickle
from skimage import transform as skt
from PIL import *
from loaddata import load_pkl
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from sklearn.model_selection import train_test_split



#with open('/home/aritrab97/GNV_OCR/data2/train_data.pkl', 'rb') as f:
#	data = pickle.load(f)
data= load_pkl('train_data(1).pkl')
data1=np.asarray(data)
for i in range(len(data1)):
	data1[i]=np.asarray(data1[i])

#pil_img = Image.fromarray(data[1000])
#pil_img.save('lena_square_save.png')	
#print(data1[1000].shape)
#plt.imshow(data1[1000])
#plt.show()
train_path = "/home/aritrab97/GNV_OCR/data1/train/"
test_path = "/home/aritrab97/GNV_OCR/data1/test/"
MODEL_STORE_PATH="/home/aritrab97/GNV_OCR/data2/"

#transformations = transforms.Compose([transforms.ToPILImage(),transforms.Grayscale(num_output_channels=1),transforms.ToTensor()])
transformations = transforms.Compose([transforms.ToPILImage(),transforms.Grayscale(num_output_channels=1)])
示例#10
0
import matplotlib.pyplot as plt
from scipy import ndimage
import skimage
from PIL import Image
from matplotlib import pyplot as plt
import sys
from scipy import ndimage
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Loading in the train data
train_data = loaddata.load_pkl('train_data.pkl')

# Loading in the labels
train_labels = np.load('finalLabelsTrain.npy')

# When attempting to only classify a and b, looking only at reduced set.
ab_train_data = train_data[np.logical_or((train_labels == 1),
                                         (train_labels == 2))]
ab_train_labels = train_labels[np.logical_or((train_labels == 1),
                                             (train_labels == 2))]

# It appears that in the original data, the letter switches after every count of 9

# List of a,b data points that need to be rotated - this is not used

rot_list = []
示例#11
0
def view_data(file_path, view=False):
    '''
        Opens and (optionally) views the data in the .npy, .pkl data objects
    '''
    label_dict = {
        1: 'a',
        2: 'b',
        3: 'c',
        4: 'd',
        5: 'h',
        6: 'i',
        7: 'j',
        8: 'k'
    }
    # Acquire all possible data.npy and label.npy files from file paths given
    data_files = []
    for dir in file_path:
        dir_listing = os.listdir(dir)
        file_pair = []

        # Find the data.npys files
        for f in dir_listing:
            match = re.match(r'(.*)data(.*)', f)
            if match != None:
                print("Got {} from directory {}".format(f, dir))
                if f.endswith('.pkl') or f.endswith('.npy'):
                    file_pair.append(dir + '/' + f)

        # Find the labels.npy files
        for f in dir_listing:
            match = re.match(r'(.*)[lL]abel(.*).npy', f)
            if match != None:
                print("Got {} from directory {}".format(f, dir))
                file_pair.append(dir + '/' + f)

        data_files.append(file_pair)
        if view:
            print(data_files)

    data_files = np.array(data_files)

    # No files found
    if np.min(data_files.shape) == 0:
        print(
            "No '.npy' files found in the given directories; quitting data visualization"
        )
        return

    data = []
    labels = []
    for pair in data_files:
        d = pair[0]
        if d.endswith('.pkl'):
            data.extend(loaddata.load_pkl(d))
        else:
            data.extend(np.load(d, allow_pickle=True))
        if len(pair) > 1:
            lbl = pair[1]
            labels.extend(np.load(lbl, allow_pickle=True))

    # If we want to see the binarized data and their respective labels...
    if view:
        data_pair = zip(data, labels)

        # Visualize all the data
        for d, lbl in data_pair:
            for i in range(len(d)):
                print("Label:", label_dict[lbl[i]])
                fig = plt.figure()
                plt.imshow(d[i], cmap="Greys")
                plt.pause(0.75)
                plt.close(fig)

    return data, labels
示例#12
0
def test(data=None):
    weights_file_path = "./pre_trained_cnn_weights.weights"
    network = CNN(lr=0.001, batch_size=8, epochs=15, number_of_classes=9)
    weights = torch.load(weights_file_path)
    network.load_state_dict(weights)

    if (data == None):
        data = loaddata.load_pkl("train_data.pkl")

    data = [[np.array(data[i], dtype=bool)] for i in range(len(data))]
    data = network.standardize_data(data)
    data = torch.stack([data[i][0] for i in range(len(data))])

    return network.predict(data)


# driver for convinience,
# will read data and labels as arguments from sys.argv
# where data_filename = sys.argv[1]
import sys

# Given that this is an unrequired, for-convienience driver, no guarantees are extended for invalid arguments
if __name__ == "__main__":
    data = None

    if (len(sys.argv) > 1):
        data = loaddata.load_pkl(sys.argv[1])

    test(data)
示例#13
0
                temp_label = torch.tensor(-2)
        pred_labels.append(temp_label.item() + 1)

    # If labels provided, get the accuracy of the model
    pred_acc = -1.0
    if labels is not None:
        pred_acc = sum(pred_labels == labels) / len(labels)

    return pred_labels, pred_acc


if __name__ == '__main__':
    args = make_parser()

    # Load the test data set
    test_data = loaddata.load_pkl(args.data_file[0])

    # Extract Features
    print("Grabbing test data set features...")
    test_data = feat_extraction.pad_data(test_data)
    test_feat = np.array(feat_extraction.feature_ext(test_data, debug=False))
    test_sums = np.array(feat_extraction.extract_sums(test_data))
    test_feat = np.hstack((test_feat, test_sums))
    print("Done with feature extraction.")

    # Save extracted features into .npy files for future loading
    print("Size of test data set:", test_feat.shape)

    test_labels = None
    # if args.labels:
    #     test_labels = np.load('test_data/test_feat/test_labels.npy')