def test_dataset_to_pen(self): inputDataset = TestData.getDataset() outputDataset = ConvertedDataset(inputDataset) for sampleId, rawSample in enumerate(inputDataset.samples): sample = inputDataset.undo_preprocess(rawSample) penPositions = sample_to_penpositions( sample, inputDataset.char_labels[sampleId], inputDataset.eoc_labels[sampleId], inputDataset.bow_labels[sampleId]) outputDataset.addSample(penPositions, inputDataset.texts[sampleId]) outputDataset.applyPreProcessing() with tempfile.TemporaryDirectory() as tmpDir: tmpFile = os.path.join(tmpDir, 'dataset.npz') outputDataset.save(tmpFile) verificationDataset = HandWritingDatasetConditional(tmpFile) for sampleId, _ in enumerate(inputDataset.samples): inputSample = inputDataset.samples[ 0] * inputDataset.norm_std + inputDataset.norm_mean inputSample *= inputDataset.scale_max - inputDataset.scale_min verificationSample = (verificationDataset.samples[0] * verificationDataset.norm_std + verificationDataset.norm_mean) verificationSample *= verificationDataset.scale_max - verificationDataset.scale_min inputSample[:, 2] = inputDataset.samples[0][:, 2] verificationSample[:, 2] = verificationDataset.samples[0][:, 2] np.testing.assert_array_almost_equal(inputSample, verificationSample, decimal=4) np.testing.assert_array_equal( inputDataset.char_labels[sampleId], verificationDataset.char_labels[sampleId]) np.testing.assert_array_almost_equal( inputDataset.eoc_labels[sampleId], verificationDataset.eoc_labels[sampleId]) np.testing.assert_array_almost_equal( inputDataset.bow_labels[sampleId], verificationDataset.bow_labels[sampleId]) self.assertEqual(inputDataset.texts[sampleId], verificationDataset.texts[sampleId]) np.testing.assert_array_equal(inputDataset.alphabet, verificationDataset.alphabet)
def renderDataset(inputDataset, outputFolder): if not os.path.exists(outputFolder): os.makedirs(outputFolder) preprocessingIsIncorrect = find_scaling_errors(inputDataset) for sampleId, rawSample in enumerate(inputDataset.samples): if sampleId % 50 == 0: print() sys.stdout.write('Rendering sample ' + str(sampleId) + ' / ' + str(len(inputDataset.samples)) + ' ') sys.stdout.flush() else: sys.stdout.write('.') sys.stdout.flush() sample = inputDataset.undo_preprocess(rawSample) if preprocessingIsIncorrect[sampleId]: sample = fix_scaling_error(sample, inputDataset.scale_max, inputDataset.scale_min) penPositions = sample_to_penpositions( sample, inputDataset.char_labels[sampleId], inputDataset.eoc_labels[sampleId], inputDataset.bow_labels[sampleId]) skeletonImage, skeletonCharImage, skeletonEocImage, skeletonBowImage, skeletonMetadata = penpositions_to_skeletonimages( penPositions) img = Image.fromarray(255 - skeletonImage.astype('uint8') * 255, mode='L') img.save(os.path.join(outputFolder, str(sampleId) + '.png'), 'PNG') text = penpositions_to_text(penPositions) with open(os.path.join(outputFolder, str(sampleId) + '.txt'), 'w') as fil: fil.write(text)
def test_pen_to_strokes(self): inputDataset = TestData.getDataset() outputDataset = ConvertedDataset(inputDataset) for sampleId, rawSample in enumerate(inputDataset.samples): sample = inputDataset.undo_preprocess(rawSample) penPositions = sample_to_penpositions( sample, inputDataset.char_labels[sampleId], inputDataset.eoc_labels[sampleId], inputDataset.bow_labels[sampleId]) strokes = penpositions_to_strokes(penPositions) penPositions2 = strokes_to_penpositions(strokes) self.assertEqual(len(penPositions), len(penPositions2)) penPos1 = [pos1.pos for pos1 in penPositions] penPos2 = [pos2.pos for pos2 in penPositions2] np.testing.assert_array_almost_equal(penPos1, penPos2) penUp1 = [pos1.penUp for pos1 in penPositions[:-1] ] # don't test last penUp, seems to be random penUp2 = [pos2.penUp for pos2 in penPositions2[:-1]] np.testing.assert_array_almost_equal(penUp1, penUp2) penChar1 = [pos1.charLabel for pos1 in penPositions[:]] penChar2 = [pos2.charLabel for pos2 in penPositions2[:]] np.testing.assert_array_equal(penChar1, penChar2) penEoc1 = [pos1.eocLabel for pos1 in penPositions[:]] penEoc2 = [pos2.eocLabel for pos2 in penPositions2[:]] np.testing.assert_array_almost_equal(penEoc1, penEoc2) penBow1 = [pos1.bowLabel for pos1 in penPositions[:]] penBow2 = [pos2.bowLabel for pos2 in penPositions2[:]] np.testing.assert_array_almost_equal(penBow1, penBow2)
def convertDataset(inputDataset, DRAW_STEPS=False): outputDataset = createOutputDataset(inputDataset) preprocessingIsIncorrect = find_scaling_errors(inputDataset) for sampleId, rawSample in enumerate(inputDataset.samples): if sampleId % 50 == 0: print() sys.stdout.write('Adding sample ' + str(sampleId) + ' / ' + str(len(inputDataset.samples)) + ' ') sys.stdout.flush() else: sys.stdout.write('.') sys.stdout.flush() sample = inputDataset.undo_preprocess(rawSample) if preprocessingIsIncorrect[sampleId]: sample = fix_scaling_error(sample, inputDataset.scale_max, inputDataset.scale_min) penPositions = sample_to_penpositions( sample, inputDataset.char_labels[sampleId], inputDataset.eoc_labels[sampleId], inputDataset.bow_labels[sampleId]) skeletonImage, skeletonCharImage, skeletonEocImage, skeletonBowImage, skeletonMetadata = penpositions_to_skeletonimages( penPositions) if False: figure = plt.figure('SkeletonImages') plt.subplot(4, 1, 1) plt.imshow(skeletonImage) plt.subplot(4, 1, 2) plt.imshow(skeletonCharImage, cmap='nipy_spectral', vmin=40, vmax=70) plt.subplot(4, 1, 3) plt.imshow(skeletonEocImage, cmap='nipy_spectral') plt.subplot(4, 1, 4) plt.imshow(skeletonBowImage, cmap='nipy_spectral') plt.show() exit(1) thinnedImage = skeletonize(skeletonImage) graph = skeleton_to_graph(thinnedImage) if DRAW_STEPS: print("Drawing ...") figure = plt.figure("Graphs") plt.subplot(4, 1, 1) plt.imshow(thinnedImage, cmap='binary', vmax=10) graph.plot() resolve_strokes(graph) strokes = graph_to_strokes(graph) strokes.sort() annotateStrokes(strokes, skeletonCharImage, skeletonEocImage, skeletonBowImage) smoothStrokes = resample_strokes_smooth(strokes) if DRAW_STEPS: plt.subplot(4, 1, 2) plt.imshow(thinnedImage, cmap='binary', vmax=10) graph.plot() plt.subplot(4, 1, 3) plt.imshow(thinnedImage, cmap='binary', vmax=10) strokes.plot() plt.subplot(4, 1, 4) plt.imshow(thinnedImage, cmap='binary', vmax=10) smoothStrokes.plot() plt.show() exit(1) fakePenPositions = strokes_to_penpositions(smoothStrokes) addSampleToDataset(outputDataset, fakePenPositions, inputDataset.texts[sampleId]) if False: figure = plt.figure('PenPositionsImages') plt.subplot(2, 1, 1) plt.imshow(thinnedImage, cmap='binary', vmax=10) smoothStrokes.plot() plt.subplot(2, 1, 2) plt.imshow(skeletonImage, cmap='binary', vmax=10) currentStrokeX = list() currentStrokeY = list() for penPosition in fakePenPositions: currentStrokeX.append(penPosition.pX) currentStrokeY.append(penPosition.pY) if penPosition.penUp: plt.plot(currentStrokeX, currentStrokeY, '.-') currentStrokeX = list() currentStrokeY = list() plt.show() exit(1) if False: sample = outputDataset.get('samples')[0] penPositions = sample_to_penpositions( sample, outputDataset.get('char_labels')[sampleId], outputDataset.get('eoc_labels')[sampleId], outputDataset.get('sow_labels')[sampleId]) skeletonImage, skeletonCharImage, skeletonEocImage, skeletonBowImage, skeletonMetadata = penpositions_to_skeletonimages( penPositions) figure = plt.figure('OutputSkeletonImages') plt.subplot(4, 1, 1) plt.imshow(skeletonImage) plt.subplot(4, 1, 2) plt.imshow(skeletonCharImage, cmap='nipy_spectral', vmin=40, vmax=70) plt.subplot(4, 1, 3) plt.imshow(skeletonEocImage, cmap='nipy_spectral') plt.subplot(4, 1, 4) plt.imshow(skeletonBowImage, cmap='nipy_spectral') plt.show() exit(1) print() return outputDataset
def convertDataset(inputDataset, resample=False): outputDataset = ConvertedDataset(inputDataset) preprocessingIsIncorrect = find_scaling_errors(inputDataset) for sampleId, rawSample in enumerate(inputDataset.samples): if sampleId % 50 == 0: print() sys.stdout.write('Adding sample ' + str(sampleId) + ' / ' + str(len(inputDataset.samples)) + ' ') sys.stdout.flush() else: sys.stdout.write('.') sys.stdout.flush() sample = inputDataset.undo_preprocess(rawSample) if preprocessingIsIncorrect[sampleId]: sample = fix_scaling_error(sample, inputDataset.scale_max, inputDataset.scale_min) penPositions = sample_to_penpositions( sample, inputDataset.char_labels[sampleId], inputDataset.eoc_labels[sampleId], inputDataset.bow_labels[sampleId]) strokes = penpositions_to_strokes(penPositions) skeletonImage, _, _, _, _ = penpositions_to_skeletonimages( penPositions) if False: strokeAccelerations = analyse_strokes_acceleration(strokes) figure = plt.figure('StrokeAccelerations') plt.hist(strokeAccelerations, bins=30) plt.show() #exit(1) smoothStrokes = resample_strokes_smooth(strokes) fakePenPositions = strokes_to_penpositions(smoothStrokes) outputDataset.addSample(fakePenPositions, inputDataset.texts[sampleId]) if True: figure = plt.figure('PenPositionsImages') plt.subplot(3, 1, 1) plt.imshow(skeletonImage, cmap='binary', vmax=10) strokes.plot() plt.subplot(3, 1, 2) plt.imshow(skeletonImage, cmap='binary', vmax=10) smoothStrokes.plot() plt.subplot(3, 1, 3) plt.imshow(skeletonImage, cmap='binary', vmax=10) currentStrokeX = list() currentStrokeY = list() for penPosition in fakePenPositions: currentStrokeX.append(penPosition.pos[0]) currentStrokeY.append(penPosition.pos[1]) if penPosition.penUp: plt.plot(currentStrokeX, currentStrokeY, '.-') currentStrokeX = list() currentStrokeY = list() if currentStrokeX: plt.plot(currentStrokeX, currentStrokeY, '.-') plt.show() exit(1) print() return outputDataset