示例#1
0
def recognize_whole(model, lb, charList):
    """recognize from the charList"""
    predictions = ''
    for char in charList:
        image = cv.cvtColor(char, cv.COLOR_BGR2GRAY)
        image = captchahelper.preprocess(image, config.INPUT_SIZE,
                                         config.INPUT_SIZE)
        image = img_to_array(image)
        data = np.expand_dims(image, axis=0) / 255.0
        pred = model.predict(data)
        pred = lb.inverse_transform(pred)[0]
        predictions += pred

    return predictions
示例#2
0
def read_data_labels(path):
    data, labels = [], []
    for imagePath in paths.list_images(path):
        image = cv.imread(imagePath, cv.IMREAD_GRAYSCALE)
        image = captchahelper.preprocess(image, config.INPUT_SIZE,
                                         config.INPUT_SIZE)
        # return a 3D Numpy array
        image = img_to_array(image)
        data.append(image)

        label = imagePath.split(os.path.sep)[-2]
        labels.append(label)

    return data, labels
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.copyMakeBorder(gray, 20, 20, 20, 20, cv2.BORDER_REPLICATE)

    thresh = cv2.threshold(gray, 0, 255,
                           cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

    cnts, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
                               cv2.CHAIN_APPROX_SIMPLE)
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4]
    cnts = contours.sort_contours(cnts)[0]

    output = cv2.merge([gray] * 3)
    predictions = []

    for c in cnts:
        (x, y, w, h) = cv2.boundingRect(c)
        roi = gray[y - 5:y + h + 5, x - 5:x + w + 5]

        roi = preprocess(roi, 28, 28)
        roi = np.expand_dims(img_to_array(roi), axis=0) / 255.0
        pred = model.predict(roi).argmax(axis=1)[0] + 1
        predictions.append(str(pred))

        cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4),
                      (0, 255, 0), 1)
        cv2.putText(output, str(pred), (x - 5, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)

    print("[INFO] captcha: {}".format("".join(predictions)))
    cv2.imshow("Output", output)
    cv2.waitKey()
示例#4
0
ap.add_argument("-d", "--dataset", required=True,
	help="path to input dataset")
ap.add_argument("-m", "--model", required=True,
	help="path to output model")
args = vars(ap.parse_args())

# initialize the data and labels
data = []
labels = []

# loop over the input images
for imagePath in paths.list_images(args["dataset"]):
	# load the image, pre-process it, and store it in the data list
	image = cv2.imread(imagePath)
	image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	image = preprocess(image, 28, 28)
	image = img_to_array(image)
	data.append(image)

	# extract the class label from the image path and update the
	# labels list
	label = imagePath.split(os.path.sep)[-2]
	labels.append(label)

# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)

# partition the data into training and testing splits using 75% of
# the data for training and the remaining 25% for testing
(trainX, testX, trainY, testY) = train_test_split(data,
示例#5
0
ap = argparse.ArgumentParser()
ap.add_argument('-d',
                '--dataset',
                required=True,
                help='path to dataset directory')
ap.add_argument('-m', '--model', required=True, help='path to output model')
args = vars(ap.parse_args())

data = []
labels = []

for imagePath in list(paths.list_images(args['dataset'])):
    image = cv2.imread(imagePath)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = preprocess(image, width=28, height=28)
    image = img_to_array(image)
    data.append(image)

    label = imagePath.split(os.path.sep)[-2]
    labels.append(label)

data = np.array(data, dtype='float') / 255.0
labels = np.array(labels)

(trainX, testX, trainY, testY) = train_test_split(data,
                                                  labels,
                                                  test_size=0.25,
                                                  random_state=42)
lb = LabelBinarizer()
trainY = lb.fit_transform(trainY)
示例#6
0
def main():
    """Evaluate model performance
    """
    # construct the argument parse and parse the arguments
    args = argparse.ArgumentParser()
    args.add_argument("-i",
                      "--input",
                      required=True,
                      help="path to input directory of images")
    args.add_argument("-m",
                      "--model",
                      required=True,
                      help="path to input model")
    args = vars(args.parse_args())

    # load the pre-trained network
    print("[INFO] loading pre-trained network...")
    model = load_model(args["model"])

    # randomy sample a few of the input images
    image_paths = list(paths.list_images(args["input"]))
    image_paths = np.random.choice(image_paths, size=(10, ), replace=False)

    # loop over the image paths
    for image_path in image_paths:
        # load the image and convert it to grayscale, then pad the image to ensure digits
        # caught only the border of the image are retained
        image = cv2.imread(image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.copyMakeBorder(gray, 20, 20, 20, 20, cv2.BORDER_REPLICATE)

        # threshold the image to reveal the digits
        thresh = cv2.threshold(gray, 0, 255,
                               cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]

        # find contours in the image, keeping only the four largest ones,
        # then sort them from left-to-right
        cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL,
                                cv2.CHAIN_APPROX_SIMPLE)
        cnts = imutils.grab_contours(cnts)
        cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:4]
        cnts = contours.sort_contours(cnts)[0]

        # initialize the output image as a "grayscale" image with 3
        # channels along with the output predictions
        output = cv2.merge([gray] * 3)
        predictions = []

        # loop over the contours
        for contour in cnts:
            # compute the bounding box for the contour then extract the digit
            (x, y, w, h) = cv2.boundingRect(contour)
            roi = gray[y - 5:y + h + 5, x - 5:x + w + 5]

            # pre-process the ROI and classify it then classify it
            roi = preprocess(roi, 28, 28)
            roi = np.expand_dims(img_to_array(roi), axis=0) / 255.0
            pred = model.predict(roi).argmax(axis=1)[0] + 1
            predictions.append(str(pred))

            # draw the prediction on the output image
            cv2.rectangle(output, (x - 2, y - 2), (x + w + 4, y + h + 4),
                          (0, 255, 0), 1)
            cv2.putText(output, str(pred), (x - 5, y - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 255, 0), 2)

        # show the output image
        print("[INFO] captcha: {}".format("".join(predictions)))
        cv2.imshow("Output", output)
        cv2.waitKey()
示例#7
0
def main():
    """Train LeNet model on the image captcha dataset
    """
    # construct the argument parse and parse the arguments
    args = argparse.ArgumentParser()
    args.add_argument("-d",
                      "--dataset",
                      required=True,
                      help="path to input dataset")
    args.add_argument("-m",
                      "--model",
                      required=True,
                      help="path to output model")
    args = vars(args.parse_args())

    # initialize the data and labels
    data = []
    labels = []

    # loop over the input images
    for image_path in paths.list_images(args["dataset"]):
        # load the image, pre-process it, and store it in the data list
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = preprocess(image, 28, 28)
        image = img_to_array(image)
        data.append(image)

        # extract the class label from the image path and update the labels list
        label = image_path.split(os.path.sep)[-2]
        labels.append(label)

    # scale the raw pixel intensities to the range [0, 1]
    data = np.array(data, dtype="float") / 255.0
    labels = np.array(labels)

    # partition the data into training and testing splits using 75% of
    # the data for training and the remaining 25% for testing
    (train_x, test_x, train_y, test_y) = train_test_split(data,
                                                          labels,
                                                          test_size=0.25,
                                                          random_state=42)
    # convert the labels from integers to vectors
    label_binarizer = LabelBinarizer().fit(train_y)
    train_y = label_binarizer.transform(train_y)
    test_y = label_binarizer.transform(test_y)

    # initialize the model
    print("[INFO] compiling model...")
    model = LeNet.build(width=28, height=28, depth=1, classes=9)
    opt = SGD(lr=0.01)
    model.compile(loss="categorical_crossentropy",
                  optimizer=opt,
                  metrics=["accuracy"])

    # train the network
    print("[INFO] training network...")
    model_fit = model.fit(train_x,
                          train_y,
                          validation_data=(test_x, test_y),
                          batch_size=32,
                          epochs=15,
                          verbose=1)

    # evaluate the network
    print("[INFO] evaluating network...")
    predictions = model.predict(test_x, batch_size=32)
    print(
        classification_report(test_y.argmax(axis=1),
                              predictions.argmax(axis=1),
                              target_names=label_binarizer.classes_))
    # save the model to disk
    print("[INFO] serializing network...")
    model.save(args["model"])

    # plot the training + testing loss and accuracy
    plt.style.use("ggplot")
    plt.figure()
    plt.plot(np.arange(0, 15), model_fit.history["loss"], label="train_loss")
    plt.plot(np.arange(0, 15), model_fit.history["val_loss"], label="val_loss")
    plt.plot(np.arange(0, 15), model_fit.history["acc"], label="acc")
    plt.plot(np.arange(0, 15), model_fit.history["val_acc"], label="val_acc")
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend()
    plt.show()