示例#1
0
    def test_dataset_to_pen(self):
        inputDataset = TestData.getDataset()

        outputDataset = ConvertedDataset(inputDataset)

        for sampleId, rawSample in enumerate(inputDataset.samples):
            sample = inputDataset.undo_preprocess(rawSample)
            penPositions = sample_to_penpositions(
                sample, inputDataset.char_labels[sampleId],
                inputDataset.eoc_labels[sampleId],
                inputDataset.bow_labels[sampleId])
            outputDataset.addSample(penPositions, inputDataset.texts[sampleId])

        outputDataset.applyPreProcessing()

        with tempfile.TemporaryDirectory() as tmpDir:
            tmpFile = os.path.join(tmpDir, 'dataset.npz')
            outputDataset.save(tmpFile)

            verificationDataset = HandWritingDatasetConditional(tmpFile)

            for sampleId, _ in enumerate(inputDataset.samples):
                inputSample = inputDataset.samples[
                    0] * inputDataset.norm_std + inputDataset.norm_mean
                inputSample *= inputDataset.scale_max - inputDataset.scale_min
                verificationSample = (verificationDataset.samples[0] *
                                      verificationDataset.norm_std +
                                      verificationDataset.norm_mean)
                verificationSample *= verificationDataset.scale_max - verificationDataset.scale_min

                inputSample[:, 2] = inputDataset.samples[0][:, 2]
                verificationSample[:, 2] = verificationDataset.samples[0][:, 2]

                np.testing.assert_array_almost_equal(inputSample,
                                                     verificationSample,
                                                     decimal=4)
                np.testing.assert_array_equal(
                    inputDataset.char_labels[sampleId],
                    verificationDataset.char_labels[sampleId])
                np.testing.assert_array_almost_equal(
                    inputDataset.eoc_labels[sampleId],
                    verificationDataset.eoc_labels[sampleId])
                np.testing.assert_array_almost_equal(
                    inputDataset.bow_labels[sampleId],
                    verificationDataset.bow_labels[sampleId])
                self.assertEqual(inputDataset.texts[sampleId],
                                 verificationDataset.texts[sampleId])

            np.testing.assert_array_equal(inputDataset.alphabet,
                                          verificationDataset.alphabet)
示例#2
0
def renderDataset(inputDataset, outputFolder):

    if not os.path.exists(outputFolder):
        os.makedirs(outputFolder)

    preprocessingIsIncorrect = find_scaling_errors(inputDataset)

    for sampleId, rawSample in enumerate(inputDataset.samples):
        if sampleId % 50 == 0:
            print()
            sys.stdout.write('Rendering sample ' + str(sampleId) + ' / ' +
                             str(len(inputDataset.samples)) + ' ')
            sys.stdout.flush()
        else:
            sys.stdout.write('.')
            sys.stdout.flush()

        sample = inputDataset.undo_preprocess(rawSample)

        if preprocessingIsIncorrect[sampleId]:
            sample = fix_scaling_error(sample, inputDataset.scale_max,
                                       inputDataset.scale_min)

        penPositions = sample_to_penpositions(
            sample, inputDataset.char_labels[sampleId],
            inputDataset.eoc_labels[sampleId],
            inputDataset.bow_labels[sampleId])

        skeletonImage, skeletonCharImage, skeletonEocImage, skeletonBowImage, skeletonMetadata = penpositions_to_skeletonimages(
            penPositions)
        img = Image.fromarray(255 - skeletonImage.astype('uint8') * 255,
                              mode='L')

        img.save(os.path.join(outputFolder, str(sampleId) + '.png'), 'PNG')
        text = penpositions_to_text(penPositions)
        with open(os.path.join(outputFolder,
                               str(sampleId) + '.txt'), 'w') as fil:
            fil.write(text)
示例#3
0
    def test_pen_to_strokes(self):
        inputDataset = TestData.getDataset()

        outputDataset = ConvertedDataset(inputDataset)

        for sampleId, rawSample in enumerate(inputDataset.samples):
            sample = inputDataset.undo_preprocess(rawSample)
            penPositions = sample_to_penpositions(
                sample, inputDataset.char_labels[sampleId],
                inputDataset.eoc_labels[sampleId],
                inputDataset.bow_labels[sampleId])
            strokes = penpositions_to_strokes(penPositions)
            penPositions2 = strokes_to_penpositions(strokes)

            self.assertEqual(len(penPositions), len(penPositions2))

            penPos1 = [pos1.pos for pos1 in penPositions]
            penPos2 = [pos2.pos for pos2 in penPositions2]
            np.testing.assert_array_almost_equal(penPos1, penPos2)

            penUp1 = [pos1.penUp for pos1 in penPositions[:-1]
                      ]  # don't test last penUp, seems to be random
            penUp2 = [pos2.penUp for pos2 in penPositions2[:-1]]
            np.testing.assert_array_almost_equal(penUp1, penUp2)

            penChar1 = [pos1.charLabel for pos1 in penPositions[:]]
            penChar2 = [pos2.charLabel for pos2 in penPositions2[:]]
            np.testing.assert_array_equal(penChar1, penChar2)

            penEoc1 = [pos1.eocLabel for pos1 in penPositions[:]]
            penEoc2 = [pos2.eocLabel for pos2 in penPositions2[:]]
            np.testing.assert_array_almost_equal(penEoc1, penEoc2)

            penBow1 = [pos1.bowLabel for pos1 in penPositions[:]]
            penBow2 = [pos2.bowLabel for pos2 in penPositions2[:]]
            np.testing.assert_array_almost_equal(penBow1, penBow2)
def convertDataset(inputDataset, DRAW_STEPS=False):
    outputDataset = createOutputDataset(inputDataset)

    preprocessingIsIncorrect = find_scaling_errors(inputDataset)

    for sampleId, rawSample in enumerate(inputDataset.samples):
        if sampleId % 50 == 0:
            print()
            sys.stdout.write('Adding sample ' + str(sampleId) + ' / ' +
                             str(len(inputDataset.samples)) + ' ')
            sys.stdout.flush()
        else:
            sys.stdout.write('.')
            sys.stdout.flush()

        sample = inputDataset.undo_preprocess(rawSample)

        if preprocessingIsIncorrect[sampleId]:
            sample = fix_scaling_error(sample, inputDataset.scale_max,
                                       inputDataset.scale_min)

        penPositions = sample_to_penpositions(
            sample, inputDataset.char_labels[sampleId],
            inputDataset.eoc_labels[sampleId],
            inputDataset.bow_labels[sampleId])

        skeletonImage, skeletonCharImage, skeletonEocImage, skeletonBowImage, skeletonMetadata = penpositions_to_skeletonimages(
            penPositions)

        if False:
            figure = plt.figure('SkeletonImages')
            plt.subplot(4, 1, 1)
            plt.imshow(skeletonImage)
            plt.subplot(4, 1, 2)
            plt.imshow(skeletonCharImage,
                       cmap='nipy_spectral',
                       vmin=40,
                       vmax=70)
            plt.subplot(4, 1, 3)
            plt.imshow(skeletonEocImage, cmap='nipy_spectral')
            plt.subplot(4, 1, 4)
            plt.imshow(skeletonBowImage, cmap='nipy_spectral')
            plt.show()
            exit(1)

        thinnedImage = skeletonize(skeletonImage)

        graph = skeleton_to_graph(thinnedImage)

        if DRAW_STEPS:
            print("Drawing ...")
            figure = plt.figure("Graphs")
            plt.subplot(4, 1, 1)
            plt.imshow(thinnedImage, cmap='binary', vmax=10)
            graph.plot()

        resolve_strokes(graph)

        strokes = graph_to_strokes(graph)

        strokes.sort()
        annotateStrokes(strokes, skeletonCharImage, skeletonEocImage,
                        skeletonBowImage)

        smoothStrokes = resample_strokes_smooth(strokes)

        if DRAW_STEPS:
            plt.subplot(4, 1, 2)
            plt.imshow(thinnedImage, cmap='binary', vmax=10)
            graph.plot()
            plt.subplot(4, 1, 3)
            plt.imshow(thinnedImage, cmap='binary', vmax=10)
            strokes.plot()
            plt.subplot(4, 1, 4)
            plt.imshow(thinnedImage, cmap='binary', vmax=10)
            smoothStrokes.plot()
            plt.show()
            exit(1)

        fakePenPositions = strokes_to_penpositions(smoothStrokes)

        addSampleToDataset(outputDataset, fakePenPositions,
                           inputDataset.texts[sampleId])

        if False:
            figure = plt.figure('PenPositionsImages')
            plt.subplot(2, 1, 1)
            plt.imshow(thinnedImage, cmap='binary', vmax=10)
            smoothStrokes.plot()
            plt.subplot(2, 1, 2)
            plt.imshow(skeletonImage, cmap='binary', vmax=10)
            currentStrokeX = list()
            currentStrokeY = list()
            for penPosition in fakePenPositions:
                currentStrokeX.append(penPosition.pX)
                currentStrokeY.append(penPosition.pY)
                if penPosition.penUp:
                    plt.plot(currentStrokeX, currentStrokeY, '.-')
                    currentStrokeX = list()
                    currentStrokeY = list()
            plt.show()
            exit(1)

        if False:
            sample = outputDataset.get('samples')[0]
            penPositions = sample_to_penpositions(
                sample,
                outputDataset.get('char_labels')[sampleId],
                outputDataset.get('eoc_labels')[sampleId],
                outputDataset.get('sow_labels')[sampleId])

            skeletonImage, skeletonCharImage, skeletonEocImage, skeletonBowImage, skeletonMetadata = penpositions_to_skeletonimages(
                penPositions)

            figure = plt.figure('OutputSkeletonImages')
            plt.subplot(4, 1, 1)
            plt.imshow(skeletonImage)
            plt.subplot(4, 1, 2)
            plt.imshow(skeletonCharImage,
                       cmap='nipy_spectral',
                       vmin=40,
                       vmax=70)
            plt.subplot(4, 1, 3)
            plt.imshow(skeletonEocImage, cmap='nipy_spectral')
            plt.subplot(4, 1, 4)
            plt.imshow(skeletonBowImage, cmap='nipy_spectral')
            plt.show()
            exit(1)

    print()

    return outputDataset
def convertDataset(inputDataset, resample=False):
    outputDataset = ConvertedDataset(inputDataset)

    preprocessingIsIncorrect = find_scaling_errors(inputDataset)

    for sampleId, rawSample in enumerate(inputDataset.samples):
        if sampleId % 50 == 0:
            print()
            sys.stdout.write('Adding sample ' + str(sampleId) + ' / ' +
                             str(len(inputDataset.samples)) + ' ')
            sys.stdout.flush()
        else:
            sys.stdout.write('.')
            sys.stdout.flush()

        sample = inputDataset.undo_preprocess(rawSample)

        if preprocessingIsIncorrect[sampleId]:
            sample = fix_scaling_error(sample, inputDataset.scale_max,
                                       inputDataset.scale_min)

        penPositions = sample_to_penpositions(
            sample, inputDataset.char_labels[sampleId],
            inputDataset.eoc_labels[sampleId],
            inputDataset.bow_labels[sampleId])

        strokes = penpositions_to_strokes(penPositions)
        skeletonImage, _, _, _, _ = penpositions_to_skeletonimages(
            penPositions)

        if False:
            strokeAccelerations = analyse_strokes_acceleration(strokes)
            figure = plt.figure('StrokeAccelerations')
            plt.hist(strokeAccelerations, bins=30)
            plt.show()
            #exit(1)

        smoothStrokes = resample_strokes_smooth(strokes)

        fakePenPositions = strokes_to_penpositions(smoothStrokes)

        outputDataset.addSample(fakePenPositions, inputDataset.texts[sampleId])

        if True:
            figure = plt.figure('PenPositionsImages')
            plt.subplot(3, 1, 1)
            plt.imshow(skeletonImage, cmap='binary', vmax=10)
            strokes.plot()
            plt.subplot(3, 1, 2)
            plt.imshow(skeletonImage, cmap='binary', vmax=10)
            smoothStrokes.plot()
            plt.subplot(3, 1, 3)
            plt.imshow(skeletonImage, cmap='binary', vmax=10)
            currentStrokeX = list()
            currentStrokeY = list()
            for penPosition in fakePenPositions:
                currentStrokeX.append(penPosition.pos[0])
                currentStrokeY.append(penPosition.pos[1])
                if penPosition.penUp:
                    plt.plot(currentStrokeX, currentStrokeY, '.-')
                    currentStrokeX = list()
                    currentStrokeY = list()
            if currentStrokeX:
                plt.plot(currentStrokeX, currentStrokeY, '.-')
            plt.show()
            exit(1)

    print()

    return outputDataset