def export_csv(dir, overwrite_existing, template): dir = Path(dir) export = DocumentExport(UserTemplate.load(template) if template else DWC) for p in dir.glob('*' + InselectDocument.EXTENSION): try: debug_print('Loading [{0}]'.format(p)) doc = InselectDocument.load(p) validation = export.validation_problems(doc) csv_path = export.csv_path(doc) if validation.any_problems: print( 'Not exporting metadata for [{0}] because there are ' 'validation problems'.format(p) ) for msg in format_validation_problems(validation): print(msg) elif not overwrite_existing and csv_path.is_file(): print('CSV file [{0}] exists - skipping'.format(csv_path)) else: print('Writing CSV for [{0}]'.format(p)) export.export_csv(doc) except KeyboardInterrupt: raise except Exception: print('Error saving CSV from [{0}]'.format(p)) traceback.print_exc()
def test_cancel_save_crops(self): "User cancels save crops" with temp_directory_with_files(TESTDATA / 'shapes.inselect', TESTDATA / 'shapes.png') as tempdir: doc = InselectDocument.load(tempdir / 'shapes.inselect') # Create crops dir with some data doc.crops_dir.mkdir() with doc.crops_dir.joinpath('a_file').open('w') as outfile: outfile.write('Some data\n') class CancelExport(Exception): pass def progress(msg): "A progress function that cancels the export" raise CancelExport() self.assertRaises(CancelExport, DocumentExport(self.TEMPLATE).save_crops, doc, progress=progress) # Nothing should have changed within tempdir self.assertEqual( ['shapes.inselect', 'shapes.png', doc.crops_dir.name], sorted(p.name for p in tempdir.iterdir())) self.assertEqual(['a_file'], [p.name for p in doc.crops_dir.iterdir()])
def save_crops(dir, overwrite_existing, template): dir = Path(dir) export = DocumentExport(UserTemplate.load(template) if template else DWC) for p in dir.glob('*' + InselectDocument.EXTENSION): try: debug_print('Loading [{0}]'.format(p)) doc = InselectDocument.load(p) validation = export.validation_problems(doc) if validation.any_problems: print( 'Not saving crops for [{0}] because there are validation ' 'problems'.format(p) ) for msg in format_validation_problems(validation): print(msg) elif not overwrite_existing and doc.crops_dir.is_dir(): print('Crops dir [{0}] exists - skipping'.format(doc.crops_dir)) else: print('Will save crops for [{0}] to [{1}]'.format(p, doc.crops_dir)) debug_print('Loading full-resolution scanned image') doc.scanned.array debug_print('Saving crops') export.save_crops(doc) except KeyboardInterrupt: raise except Exception: print('Error saving crops from [{0}]'.format(p)) traceback.print_exc()
def test_csv_export(self): "CSV data are exported as expected" with temp_directory_with_files(TESTDATA / 'shapes.inselect', TESTDATA / 'shapes.png') as tempdir: doc = InselectDocument.load(tempdir / 'shapes.inselect') csv_path = DocumentExport(self.TEMPLATE).export_csv(doc) self.assertEqual(csv_path, tempdir / 'shapes.csv') # Check CSV contents with csv_path.open('rb') as f: reader = unicodecsv.reader(f, encoding='utf8') headers = [ 'Cropped_image_name', 'ItemNumber', 'NormalisedLeft', 'NormalisedTop', 'NormalisedRight', 'NormalisedBottom', 'ThumbnailLeft', 'ThumbnailTop', 'ThumbnailRight', 'ThumbnailBottom', 'OriginalLeft', 'OriginalTop', 'OriginalRight', 'OriginalBottom', 'catalogNumber', 'Department', 'scientificName', 'scientificName-value' ] self.assertEqual(headers, next(reader)) # Check only the metadata columns and 'original' coordinates # columns, ignoring thumbnail (which doesn't exist) # and normalised (which are floating point) coordinates metadata_cols = itemgetter(0, 1, 10, 11, 12, 13, 14, 15, 16, 17) self.assertEqual(('01_1.png', '1', '0', '0', '189', '189', '1', 'Entomology', 'A', '1'), metadata_cols(next(reader))) self.assertEqual(('02_2.png', '2', '271', '0', '459', '189', '2', 'Entomology', 'B', '2'), metadata_cols(next(reader))) self.assertEqual(('03_10.png', '3', '194', '196', '257', '232', '3', 'Entomology', 'インセクト', '10'), metadata_cols(next(reader))) self.assertEqual(('04_3.png', '4', '0', '248', '189', '437', '4', 'Entomology', 'Elsinoë', '3'), metadata_cols(next(reader))) self.assertEqual(('05_4.png', '5', '271', '248', '459', '437', '5', 'Entomology', 'D', '4'), metadata_cols(next(reader))) self.assertIsNone(next(reader, None))
def test_fname_collison(self): "Duplicated crop fnames have numerical suffixes to avoid collisions" class FakeDocument(object): pass document = FakeDocument() document.items = [{ "fields": { "scientificName": "A" }, }, { "fields": { "scientificName": "A" }, }, { "fields": { "scientificName": "A" }, }, { "fields": { "scientificName": "D" }, }, { "fields": { "scientificName": "B" }, }, { "fields": { "scientificName": "D" }, }, { "fields": { "scientificName": "A" }, }] fnames = list(DocumentExport(self.TEMPLATE).crop_fnames(document)) self.assertEqual([ 'A.png', 'A-1.png', 'A-2.png', 'D.png', 'B.png', 'D-1.png', 'A-3.png' ], fnames)
def test_save_crops(self): "Cropped object images are written correctly" with temp_directory_with_files(TESTDATA / 'shapes.inselect', TESTDATA / 'shapes.png') as tempdir: doc = InselectDocument.load(tempdir / 'shapes.inselect') crops_dir = DocumentExport(self.TEMPLATE).save_crops(doc) self.assertTrue(crops_dir.is_dir()) self.assertEqual(crops_dir, doc.crops_dir) cropped_fnames = sorted(crops_dir.glob('*.png')) self.assertEqual( ['01_1.png', '02_2.png', '03_10.png', '04_3.png', '05_4.png'], [f.name for f in cropped_fnames]) # Check the contents of each file boxes = doc.scanned.from_normalised(i['rect'] for i in doc.items) for box, path in zip(boxes, sorted(crops_dir.glob('*.png'))): x0, y0, x1, y1 = box.coordinates self.assertTrue( np.all(doc.scanned.array[y0:y1, x0:x1] == cv2.imread(str(path))))