resources = '../../../resources/digit-recognizer'

# test_csv = 'test.csv'
test_csv = 'jason_test_1000.csv'

# train_csv = 'train.csv'
# train_csv = 'jason_train_10000.csv'
train_csv = 'jason_train_5000.csv'
# train_csv = 'jason_train_4000.csv'
# train_csv = 'jason_train_2000.csv'
# train_csv = 'jason_train_1000.csv'

train_csv_filename = resources + train_csv
test_csv_filename = resources + test_csv

image_mod = DisplayImage()
x_train, x_test, y_train, y_test = image_mod.train_test_set(
    train_file=train_csv_filename, train_size=.8, random_state=10)
# x_train, x_test, y_train, y_test = DisplayImage().train_test_set(train_file=train_csv_filename,
#                                                                  train_size=1.0,
#                                                                  test_file=test_csv_filename)

model_scaler = StandardScaler()
x_train_v2 = model_scaler.fit_transform(x_train)
x_test_v2 = model_scaler.transform(x_test)

y_train_v2 = to_categorical(y_train)

# learning_rates = [.0001, 0.01, 1]
# for lr in learning_rates:
# Create the model: model
from sklearn.metrics import accuracy_score
from image_test_space import DisplayImage
from sklearn.linear_model import LogisticRegression

resources = '../../../resources/digit-recognizer'
# train_csv = 'train.csv'
# train_csv = 'jason_train_10000.csv'
train_csv = 'jason_train_5000.csv'
# train_csv = 'jason_train_4000.csv'
# train_csv = 'jason_train_2000.csv'
# train_csv = 'jason_train_1000.csv'
csv_filename = '%s/%s' % (resources, train_csv)

# read training info
digit_train_set = pd.read_csv(csv_filename)
image_info = DisplayImage(csv_filename)
digit_train_set = image_info.get_all_info()

# separate training info into samples and target
samples_v1 = digit_train_set[:, 1]
target = digit_train_set[:, 0]
target = target.astype(int)

# print(type(target[0]))
# print(target)
# exit(0)

samples_v2 = list(map(lambda v: np.reshape(v, (-1)), samples_v1))
samples_v3 = image_info.circle_info_arr(samples_v2, samples_v1)

x_train, x_test_before, y_train, y_test = train_test_split(samples_v3,
new_model = False

# Gather images to review
large_resources = '../../../../image-data-train-test-large-data/Coccidia/img/'
base_train = "0"
images_to_review = glob.glob(large_resources + base_train + "*" + ".jpg")

train_images, test_images, _, _ = train_test_split(images_to_review,
                                                   range(0, len(images_to_review)),
                                                   test_size=0.2,
                                                   random_state=10)

screen_size = 32
img_size = 512
zeros_mask = np.zeros((img_size, img_size, 1), dtype=np.uint8)
img_mod = DisplayImage(img_size=img_size, screen_size=screen_size)

model_save_name = "model_save_v2_%sx.h5" % base_train
if new_model:
    # Get training data
    x_values, y_values = img_mod.get_training_values(train_images)

    x_train = np.array(x_values)
    y_train = y_values

    # Cleanup data for CNN
    x_train_v3 = x_train.astype(np.float32)
    x_train_v3 /= 255

    y_train_array = np.array(y_train)
    print("Total samples:\t\t" + str(y_train_array.shape[0]))
示例#4
0
from sklearn.metrics import accuracy_score
from image_test_space import DisplayImage
from sklearn.linear_model import LogisticRegression

resources = '../../../../resources/digit-recognizer'
# train_csv = 'train.csv'
train_csv = 'jason_train_10000.csv'
# train_csv = 'jason_train_5000.csv'
# train_csv = 'jason_train_4000.csv'
# train_csv = 'jason_train_2000.csv'
# train_csv = 'jason_train_1000.csv'
csv_filename = '%s/%s' % (resources, train_csv)

# read training info
digit_train_set = pd.read_csv(csv_filename)
image_info = DisplayImage(csv_filename)
digit_train_set = image_info.get_all_info()

# separate training info into samples and target
samples_v1 = digit_train_set[:, 1]
target = digit_train_set[:, 0]
target = target.astype(int)

# print(type(target[0]))
# print(target)
# exit(0)

samples_v2 = np.array(
    list(map(lambda v: np.reshape(v, (28, 28, 1)), samples_v1)))
samples_v2 = samples_v2.astype(np.uint8)
示例#5
0
import numpy as np
import cv2 as cv
import operator
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from image_test_space import DisplayImage

csv_filename = '../../resources/digit-recognizer/jason_train_2000.csv'
# csv_filename = '../../resources/digit-recognizer/train.csv'
digit_train_set = pd.read_csv(csv_filename)

image_info = DisplayImage(csv_filename)
digit_train_set = image_info.get_all_info()

digit_train_set[:, 1] = list(
    map(lambda v: cv.dilate(v, np.ones((1, 3), dtype=np.uint8)),
        digit_train_set[:, 1]))
digit_train_set[:, 1] = list(
    map(lambda v: cv.threshold(v, 100, 255, cv.THRESH_TOZERO)[1],
        digit_train_set[:, 1]))
digit_train_set[:, 1] = list(
    map(lambda v: image_info.rotate_to_upright(v), digit_train_set[:, 1]))

x = digit_train_set[:, 1]
y = digit_train_set[:, 0]
y = y.astype('int')

x = list(map(lambda v: np.reshape(v, (-1)), x))
k_scores = {}
示例#6
0
# digit_train_set = pd.read_csv('../../resources/digit-recognizer/train.csv')
digit_train_set = pd.read_csv(
    '../../resources/digit-recognizer/jason_train_4000.csv')

independent_columns = digit_train_set.columns[1:]
dependent_column = digit_train_set.columns[0:1]

x = digit_train_set.loc[:, independent_columns].values
y = digit_train_set.loc[:, dependent_column].values

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=91)

display_image = DisplayImage(data_set=x_test)

scores = {}
# creating loop for neighbor_itr for later
# 3 is best, though 1-10 are very similar at about 93.5%
for neighbor_itr in range(3, 4):
    knn = KNeighborsClassifier(n_neighbors=neighbor_itr)
    knn.fit(x_train, y_train.ravel())
    pred = knn.predict(x_test)
    y_test_raveled = y_test.ravel()
    score = accuracy_score(y_test_raveled, pred)
    tf_result = y_test_raveled == pred
    print(str(neighbor_itr) + ":" + str(score))
    for idx, tf in enumerate(tf_result):
        if not tf:
            print("Actual:" + str(y_test_raveled[idx]) + "::Guess:" +
# Training files
train_csv = 'train.csv'
# train_csv = 'jason_train_10000.csv'
# train_csv = 'jason_train_5000.csv'
# train_csv = 'jason_train_4000.csv'
# train_csv = 'jason_train_2000.csv'
# train_csv = 'jason_train_1000.csv'

train_csv_filename = '%s/%s' % (resources, train_csv)
test_csv_filename = '%s/%s' % (resources, test_csv)


# read training info
digit_train_set = pd.read_csv(train_csv_filename)
image_info = DisplayImage(train_csv_filename)
digit_train_set = image_info.get_all_info()
# read testing info
digit_test_set = pd.read_csv(test_csv_filename)
test_samples_v1 = digit_test_set.values

# separate training info into samples and target
train_samples_v1 = digit_train_set[:, 1]
target = digit_train_set[:, 0]
target = target.astype(int)





示例#8
0
from image_test_space import DisplayImage

screen_size = 16
img_size = 512
img_mod = DisplayImage(img_size=img_size, screen_size=screen_size)

assert not img_mod.in_corner(256, 256, 256 + 16, 256 + 16)
assert img_mod.in_corner(0, 0, 16, 16)
assert img_mod.in_corner(0, 512 - 16, 16, 512)
assert not img_mod.in_corner(0, 256, 16, 256 + 16)