示例#1
0
def create_mnist_dataset():
    images, labels = get_mnist_raw_data()
    mask = labels != 0
    print("Pre-zero removal:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    images = list(itertools.compress(images, mask))
    labels = labels[mask]

    images = images[3::20]
    labels = labels[3::20]

    print("Pre-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    y = np.array(labels, 'int8')
    images, mask = blobify(images)
    y = y[mask]
    print("Post-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(y))]))

    print("Extract features...")
    X = np.array([extract_efd_features(img) for img in images])

    try:
        os.makedirs(os.path.expanduser('~/sudokuextract'))
    except:
        pass

    try:
        for i, (img, lbl) in enumerate(zip(images, labels)):
            img = Image.fromarray(img, 'L')
            with open(os.path.expanduser('~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)), 'w') as f:
                img.save(f)
    except Exception as e:
        print(e)

    return images, labels, X, y
示例#2
0
def create_mnist_dataset():
    images, labels = get_mnist_raw_data()
    mask = labels != 0
    print("Pre-zero removal:  Label / N : {0}".format([
        (v, c) for v, c in zip(_range(10), np.bincount(labels))
    ]))
    images = list(itertools.compress(images, mask))
    labels = labels[mask]

    images = images[3::20]
    labels = labels[3::20]

    print("Pre-blobify:  Label / N : {0}".format([
        (v, c) for v, c in zip(_range(10), np.bincount(labels))
    ]))
    y = np.array(labels, 'int8')
    images, mask = blobify(images)
    y = y[mask]
    print("Post-blobify:  Label / N : {0}".format([
        (v, c) for v, c in zip(_range(10), np.bincount(y))
    ]))

    print("Extract features...")
    X = np.array([extract_efd_features(img) for img in images])

    try:
        os.makedirs(os.path.expanduser('~/sudokuextract'))
    except:
        pass

    try:
        for i, (img, lbl) in enumerate(zip(images, labels)):
            img = Image.fromarray(img, 'L')
            with open(
                    os.path.expanduser(
                        '~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)),
                    'w') as f:
                img.save(f)
    except Exception as e:
        print(e)

    return images, labels, X, y
示例#3
0
def create_data_set_from_images(path_to_data_dir, force=False):

    try:
        import matplotlib.pyplot as plt
    except ImportError:
        print("This method requires matplotlib installed...")
        return

    images = []
    labels = []
    path_to_data_dir = os.path.abspath(os.path.expanduser(path_to_data_dir))
    _, _, files = next(os.walk(path_to_data_dir))
    for f in files:
        file_name, file_ext = os.path.splitext(f)
        if file_ext in ('.jpg', '.png',
                        '.bmp') and "{0}.txt".format(file_name) in files:
            # The current file is an image and it has a corresponding text file as reference.
            # Use it as data.
            print("Handling {0}...".format(f))
            image = Image.open(os.path.join(path_to_data_dir, f))
            with open(
                    os.path.join(path_to_data_dir,
                                 "{0}.txt".format(file_name)), 'rt') as f:
                parsed_img = f.read().strip().split('\n')
            for sudoku, subimage in _extraction_iterator_map(
                    np.array(image.convert('L'))):
                if not force:
                    for k in range(len(sudoku)):
                        for kk in range(len(sudoku[k])):
                            ax = plt.subplot2grid((9, 9), (k, kk))
                            ax.imshow(sudoku[k][kk], plt.cm.gray)
                            ax.set_title(str(parsed_img[k][kk]))
                            ax.axis('off')
                    plt.show()
                    ok = raw_input("Is this OK (Y/n/a)? ")
                    if ok.lower() == 'a':
                        break
                    elif ok.lower() == 'n':
                        continue
                    else:
                        for k in range(len(sudoku)):
                            for kk in range(len(sudoku[k])):
                                images.append(sudoku[k][kk].copy())
                                labels.append(int(parsed_img[k][kk]))
                        break
                else:
                    for k in range(len(sudoku)):
                        for kk in range(len(sudoku[k])):
                            images.append(sudoku[k][kk].copy())
                            labels.append(int(parsed_img[k][kk]))
                    break

            for sudoku, subimage in _extraction_iterator_map(
                    np.array(image.convert('L')), use_local_thresholding=True):
                if not force:
                    for k in range(len(sudoku)):
                        for kk in range(len(sudoku[k])):
                            ax = plt.subplot2grid((9, 9), (k, kk))
                            ax.imshow(sudoku[k][kk], plt.cm.gray)
                            ax.set_title(str(parsed_img[k][kk]))
                            ax.axis('off')
                    plt.show()
                    ok = raw_input("Is this OK (Y/n/a)? ")
                    if ok.lower() == 'a':
                        break
                    elif ok.lower() == 'n':
                        continue
                    else:
                        for k in range(len(sudoku)):
                            for kk in range(len(sudoku[k])):
                                images.append(sudoku[k][kk].copy())
                                labels.append(int(parsed_img[k][kk]))
                        break
                else:
                    for k in range(len(sudoku)):
                        for kk in range(len(sudoku[k])):
                            images.append(sudoku[k][kk].copy())
                            labels.append(int(parsed_img[k][kk]))
                    break

    try:
        os.makedirs(os.path.expanduser('~/sudokuextract'))
    except:
        pass

    try:
        for i, (img, lbl) in enumerate(zip(images, labels)):
            img = Image.fromarray(img, 'L')
            with open(
                    os.path.expanduser(
                        '~/sudokuextract/{1}_{0:04d}.jpg'.format(i + 1, lbl)),
                    'w') as f:
                img.save(f)
    except Exception as e:
        print(e)

    print("Pre-blobify:  Label / N : {0}".format([
        (v, c) for v, c in zip(_range(10), np.bincount(labels))
    ]))
    y = np.array(labels, 'int8')
    images, mask = blobify(images)
    y = y[mask]
    print("Post-blobify:  Label / N : {0}".format([
        (v, c) for v, c in zip(_range(10), np.bincount(y))
    ]))

    print("Extract features...")
    X = np.array([extract_efd_features(img) for img in images])

    return images, labels, X, y
示例#4
0
def create_data_set_from_images(path_to_data_dir, force=False):

    try:
        import matplotlib.pyplot as plt
    except ImportError:
        print("This method requires matplotlib installed...")
        return

    images = []
    labels = []
    path_to_data_dir = os.path.abspath(os.path.expanduser(path_to_data_dir))
    _, _, files = next(os.walk(path_to_data_dir))
    for f in files:
        file_name, file_ext = os.path.splitext(f)
        if file_ext in ('.jpg', '.png', '.bmp') and "{0}.txt".format(file_name) in files:
            # The current file is an image and it has a corresponding text file as reference.
            # Use it as data.
            print("Handling {0}...".format(f))
            image = Image.open(os.path.join(path_to_data_dir, f))
            with open(os.path.join(path_to_data_dir, "{0}.txt".format(file_name)), 'rt') as f:
                parsed_img = f.read().strip().split('\n')
            for sudoku, subimage in _extraction_iterator_map(np.array(image.convert('L'))):
                if not force:
                    for k in range(len(sudoku)):
                        for kk in range(len(sudoku[k])):
                            ax = plt.subplot2grid((9, 9), (k, kk))
                            ax.imshow(sudoku[k][kk], plt.cm.gray)
                            ax.set_title(str(parsed_img[k][kk]))
                            ax.axis('off')
                    plt.show()
                    ok = raw_input("Is this OK (Y/n/a)? ")
                    if ok.lower() == 'a':
                        break
                    elif ok.lower() == 'n':
                        continue
                    else:
                        for k in range(len(sudoku)):
                            for kk in range(len(sudoku[k])):
                                images.append(sudoku[k][kk].copy())
                                labels.append(int(parsed_img[k][kk]))
                        break
                else:
                    for k in range(len(sudoku)):
                        for kk in range(len(sudoku[k])):
                            images.append(sudoku[k][kk].copy())
                            labels.append(int(parsed_img[k][kk]))
                    break

            for sudoku, subimage in _extraction_iterator_map(np.array(image.convert('L')), use_local_thresholding=True):
                if not force:
                    for k in range(len(sudoku)):
                        for kk in range(len(sudoku[k])):
                            ax = plt.subplot2grid((9, 9), (k, kk))
                            ax.imshow(sudoku[k][kk], plt.cm.gray)
                            ax.set_title(str(parsed_img[k][kk]))
                            ax.axis('off')
                    plt.show()
                    ok = raw_input("Is this OK (Y/n/a)? ")
                    if ok.lower() == 'a':
                        break
                    elif ok.lower() == 'n':
                        continue
                    else:
                        for k in range(len(sudoku)):
                            for kk in range(len(sudoku[k])):
                                images.append(sudoku[k][kk].copy())
                                labels.append(int(parsed_img[k][kk]))
                        break
                else:
                    for k in range(len(sudoku)):
                        for kk in range(len(sudoku[k])):
                            images.append(sudoku[k][kk].copy())
                            labels.append(int(parsed_img[k][kk]))
                    break


    try:
        os.makedirs(os.path.expanduser('~/sudokuextract'))
    except:
        pass

    try:
        for i, (img, lbl) in enumerate(zip(images, labels)):
            img = Image.fromarray(img, 'L')
            with open(os.path.expanduser('~/sudokuextract/{1}_{0:04d}.jpg'.format(i+1, lbl)), 'w') as f:
                img.save(f)
    except Exception as e:
        print(e)

    print("Pre-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(labels))]))
    y = np.array(labels, 'int8')
    images, mask = blobify(images)
    y = y[mask]
    print("Post-blobify:  Label / N : {0}".format([(v, c) for v, c in zip(_range(10), np.bincount(y))]))

    print("Extract features...")
    X = np.array([extract_efd_features(img) for img in images])

    return images, labels, X, y