def main(): parser = argparse.ArgumentParser(description='Deletes percepts from the database by tag and deletes the percept data files.') parser.add_argument('database', help='Name of database to use') parser.add_argument('tag', help='Percepts with this tag will be deleted') parser.add_argument('-c', '--config', type=str, default='~/.rigor.ini', help='Path to .rigor.ini config file. Default: ~/.rigor.ini') parser.add_argument('--keep-percept-data', action='store_true', default=False, help="Don't remove the percept data files") parser.add_argument('-n', '--dryrun', action='store_true', default=False, help="Don't actually delete anything") args = parser.parse_args() config = RigorDefaultConfiguration(args.config) db = Database(args.database, config) ops = PerceptOps(config) if args.dryrun: print('DRY RUN') with db.get_session() as session: percepts = session.query(Percept).join(PerceptTag).filter(PerceptTag.name == args.tag).all() if len(percepts) == 0: print('No percepts have the tag "{}"'.format(args.tag)) for ii, percept in enumerate(percepts): print('{} of {}: deleting percept with id {}'.format(ii, len(percepts), percept.id)) if args.dryrun: continue if args.keep_percept_data: # only delete database entry, not percept file session.delete(percept) else: # delete percept data file as well as database entries ops.destroy(percept, session) if args.dryrun: print('DRY RUN')
def cmd_clone(args): if not args.quiet: print("Cloning database {0} to {1}".format(args.source, args.destination)) if not args.dry_run: Database.cls().clone(args.source, args.destination) database = Database.instance(args.destination) mapper = DatabaseMapper(database) mapper.set_destroy_lock(False) # new databases are always unlocked
def cmd_destroy(args): if not args.quiet: print("Destroying database {0}".format(args.database)) database = Database.instance(args.database) mapper = DatabaseMapper(database) if mapper.get_destroy_lock(): sys.stderr.write("Error: database is locked\n") sys.exit(2) mapper = None database = None if not args.dry_run: Database.cls().drop(args.database)
def main(): parser = argparse.ArgumentParser( description= 'Deletes percepts from the database by tag and deletes the percept data files.' ) parser.add_argument('database', help='Name of database to use') parser.add_argument('tag', help='Percepts with this tag will be deleted') parser.add_argument( '-c', '--config', type=str, default='~/.rigor.ini', help='Path to .rigor.ini config file. Default: ~/.rigor.ini') parser.add_argument('--keep-percept-data', action='store_true', default=False, help="Don't remove the percept data files") parser.add_argument('-n', '--dryrun', action='store_true', default=False, help="Don't actually delete anything") args = parser.parse_args() config = RigorDefaultConfiguration(args.config) db = Database(args.database, config) ops = PerceptOps(config) if args.dryrun: print('DRY RUN') with db.get_session() as session: percepts = session.query(Percept).join(PerceptTag).filter( PerceptTag.name == args.tag).all() if len(percepts) == 0: print('No percepts have the tag "{}"'.format(args.tag)) for ii, percept in enumerate(percepts): print('{} of {}: deleting percept with id {}'.format( ii, len(percepts), percept.id)) if args.dryrun: continue if args.keep_percept_data: # only delete database entry, not percept file session.delete(percept) else: # delete percept data file as well as database entries ops.destroy(percept, session) if args.dryrun: print('DRY RUN')
def main(): parser = argparse.ArgumentParser( description='Runs text detector on relevant images') parser.add_argument('classifier_file', help='Path to classifier CLF') parser.add_argument('-l', '--limit', type=int, metavar='COUNT', required=False, help='Maximum number of images to use') parser.add_argument( '-r', '--random', action="store_true", default=False, required=False, help='Fetch images ordered randomly if limit is active') parser.add_argument('database', help='Database to use') args = parser.parse_args() parameters["classifier_file"] = args.classifier_file i = rigor.runner.Runner('text', parameters, limit=args.limit, random=args.random) database_mapper = DatabaseMapper(Database.instance(args.database)) for result in i.run(): detected = result[1] expected = result[2] image = database_mapper.get_image_by_id(result[0]) cv_image = rigor.imageops.fetch(image) cv2.polylines(cv_image, expected, True, cv2.RGB(0, 255, 0)) cv2.polylines(cv_image, detected, True, cv2.RGB(255, 255, 0)) cv2.imwrite(".".join((str(image["id"]), image["format"])), cv_image)
def cmd_lock(args): if not args.quiet: print("Locking database {0}".format(args.database)) if not args.dry_run: database = Database.instance(args.database) mapper = DatabaseMapper(database) mapper.set_destroy_lock(True)
def cmd_unlock(args): if not args.quiet: print("Unlocking database {0}".format(args.database)) if not args.dry_run: database = Database.instance(args.database) mapper = DatabaseMapper(database) mapper.set_destroy_lock(False)
def main(): training_file = open('training.txt','w') testing_file = open('testing.txt','w') training_images = {'blur':list(), 'noblur':list()} testing_images = {'blur':list(), 'noblur':list()} parser = argparse.ArgumentParser(description='generates training/testing files for blur') parser.add_argument('-l', '--limit', type=int, metavar='COUNT', required=False, help='Maximum number of images to use') parser.add_argument('-r', '--random', action="store_true", default=False, required=False, help='Fetch images ordered randomly if limit is active') parser.add_argument('--tag_require', action='append', dest='tags_require', default=None, required=False, help='Tag that must be present on selected images') parser.add_argument('--tag_exclude', action='append', dest='tags_exclude', default=None, required=False, help='Tag that must not be present on selected images') parser.add_argument('-p', '--percent_training', dest='percent', default=0.25, required=False, help='Tag indicating what percent of images for training') parser.add_argument('database', help='Name of database to use') args = parser.parse_args() db = Database.instance(args.database) db_mapper = DatabaseMapper(db) images = db_mapper.get_images_for_analysis(kDomain, limit=args.limit, random=args.random, tags_require=args.tags_require, tags_exclude=args.tags_exclude) blur_images = list() noblur_images = list() for image in images: if image['annotations'][0]['model'] == 'blur': blur_images.append(image) else: noblur_images.append(image) random.shuffle(blur_images) random.shuffle(noblur_images) blur_training_len = int(len(blur_images)*float(args.percent)) noblur_training_len = int(len(noblur_images)*float(args.percent)) training_images['blur'] = blur_images[:blur_training_len] testing_images['blur'] = blur_images[blur_training_len:] training_images['noblur'] = noblur_images[:noblur_training_len] testing_images['noblur'] = noblur_images[noblur_training_len:] for file,image_dict in ((training_file,training_images),(testing_file,testing_images)): for model in image_dict.keys(): for image in image_dict[model]: file.write('{}\t{}\n'.format(rigor.imageops.find(image), model))
def cmd_patch(args): database = Database.instance(args.database) mapper = DatabaseMapper(database) start_level = mapper.get_patch_level() + 1 stop_level = None if args.level: stop_level = args.level patch(mapper, args.patch_dir, start_level, stop_level, args.dry_run, args.quiet)
def __init__(self, algorithm, config, database_name, parameters=None, checkpoint=None): Runner.__init__(self, algorithm, parameters, checkpoint) self._config = config self._database = Database(database_name, config) self._perceptops = PerceptOps(config)
def cmd_create(args): if not args.quiet: print("Creating database {0}".format(args.database)) if not args.dry_run: Database.cls().create(args.database) stop_level = None if args.level: stop_level = args.level try: database = Database.instance(args.database) mapper = DatabaseMapper(database) patch(mapper, args.patch_dir, 0, stop_level, args.dry_run, True) except: # Save exception for later exc_info = sys.exc_info() try: Database.cls().drop(args.database) except: pass raise exc_info[0], exc_info[1], exc_info[2]
def main(): rigor.domain.money.init(parameters) logger = rigor.logger.getLogger(__file__) database_mapper = DatabaseMapper(Database.instance(kDatabase)) logger.debug('Fetching image IDs from database') images = database_mapper.get_images_for_analysis(kDomain, kLimit, False) for parameter_set in get_parameters(): timestamp = datetime.utcnow().strftime("{0}-%Y%m%d_%H%M%S%f".format(kDomain)) with open("{0}.params".format(timestamp), "w") as parameter_file: json.dump(parameter_set, parameter_file) parameter_file.write("\n") with open("{0}.results".format(timestamp), "w") as result_file: image_config = partial(rigor.domain.money.run, parameters=parameter_set) logger.debug('Processing {0} images'.format(len(images))) for result in map(image_config, images): result_file.write("\t".join([str(x) for x in result])) result_file.write("\n")
def main(): parser = argparse.ArgumentParser(description='Runs text detector on relevant images') parser.add_argument('classifier_file', help='Path to classifier CLF') parser.add_argument('-l', '--limit', type=int, metavar='COUNT', required=False, help='Maximum number of images to use') parser.add_argument('-r', '--random', action="store_true", default=False, required=False, help='Fetch images ordered randomly if limit is active') parser.add_argument('database', help='Database to use') args = parser.parse_args() parameters["classifier_file"] = args.classifier_file i = rigor.runner.Runner('text', parameters, limit=args.limit, random=args.random) database_mapper = DatabaseMapper(Database.instance(args.database)) for result in i.run(): detected = result[1] expected = result[2] image = database_mapper.get_image_by_id(result[0]) cv_image = rigor.imageops.fetch(image) cv2.polylines(cv_image, expected, True, cv2.RGB(0, 255, 0)) cv2.polylines(cv_image, detected, True, cv2.RGB(255, 255, 0)) cv2.imwrite(".".join((str(image["id"]), image["format"])), cv_image)
def get_database(): shutil.copyfile(constants.kFixtureFile, constants.kTestFile) database = Database(constants.kTestFile, kConfig) return database
def main(): training_file = open('training.txt', 'w') testing_file = open('testing.txt', 'w') training_images = {'blur': list(), 'noblur': list()} testing_images = {'blur': list(), 'noblur': list()} parser = argparse.ArgumentParser( description='generates training/testing files for blur') parser.add_argument('-l', '--limit', type=int, metavar='COUNT', required=False, help='Maximum number of images to use') parser.add_argument( '-r', '--random', action="store_true", default=False, required=False, help='Fetch images ordered randomly if limit is active') parser.add_argument('--tag_require', action='append', dest='tags_require', default=None, required=False, help='Tag that must be present on selected images') parser.add_argument('--tag_exclude', action='append', dest='tags_exclude', default=None, required=False, help='Tag that must not be present on selected images') parser.add_argument( '-p', '--percent_training', dest='percent', default=0.25, required=False, help='Tag indicating what percent of images for training') parser.add_argument('database', help='Name of database to use') args = parser.parse_args() db = Database.instance(args.database) db_mapper = DatabaseMapper(db) images = db_mapper.get_images_for_analysis(kDomain, limit=args.limit, random=args.random, tags_require=args.tags_require, tags_exclude=args.tags_exclude) blur_images = list() noblur_images = list() for image in images: if image['annotations'][0]['model'] == 'blur': blur_images.append(image) else: noblur_images.append(image) random.shuffle(blur_images) random.shuffle(noblur_images) blur_training_len = int(len(blur_images) * float(args.percent)) noblur_training_len = int(len(noblur_images) * float(args.percent)) training_images['blur'] = blur_images[:blur_training_len] testing_images['blur'] = blur_images[blur_training_len:] training_images['noblur'] = noblur_images[:noblur_training_len] testing_images['noblur'] = noblur_images[noblur_training_len:] for file, image_dict in ((training_file, training_images), (testing_file, testing_images)): for model in image_dict.keys(): for image in image_dict[model]: file.write('{}\t{}\n'.format(rigor.imageops.find(image), model))
def importToRigor(rowStream, configFn, dryrun=False, verbose=False, overwrite=False, numRows=None): # set up database kConfig = rigor.config.RigorDefaultConfiguration(configFn) kDbName = 'YOUR_DB_NAME_HERE' db = Database(kDbName, kConfig) print('Connecting to database...') with db.get_session(commit=not dryrun) as session: print('...connected') print('Importing rows...') lastHash = None lastLocator = None lastPerceptId = None start = time.time() for ii, row in enumerate(rowStream): # cast everything to a string, in case we're getting live Python objects # instead of reading from a file # TODO: move this to csvtoolkit for k, v in row.data.items(): if v is None: v = '' # CSV module saves None as '' row.data[k] = str(v) if row.data['_kind'] == 'percept': #---------------------------------------- PERCEPT # check for existing percept with same 'hash' hash = row.data['p:hash'] if hash: sqlExistingPercept = session.query(rigor.types.Percept).filter(rigor.types.Percept.hash == hash).first() if sqlExistingPercept: if overwrite: print('TODO: overwrite') raise NotImplementedError else: # skip this row #print('WARNING: {} line {}: the percept hash "{}" is already in the Rigor database. Skipping this percept.'.format(row.filename, row.lineNumber, hash)) continue # build percept byte_count = row.data['p:byte_count'] if byte_count == '': byte_count = 0 percept = rigor.types.Percept() session.add(percept) percept.byte_count = byte_count percept.credentials = row.data['p:credentials'] percept.device_id = row.data['p:device_id'] percept.format = row.data['p:format'] percept.hash = row.data['p:hash'] percept.locator = row.data['p:locator'] percept.stamp = datetime.datetime.strptime(row.data['p:stamp'], "%Y-%m-%dT%H:%M:%SZ" ) percept.x_size = row.data['p:x_size'] or None percept.y_size = row.data['p:y_size'] or None # sample_count, sample_rate, sensors, collections # tags, properties # tags tags = row.data['p:tags'] if tags: tags = tags.split(',') for tag in tags: percept.tags.append(rigor.types.PerceptTag(tag)) # properties for fieldName, value in row.data.items(): if not fieldName.startswith('p:property:'): continue propertyName = fieldName.replace('p:property:', '') if propertyName == '' or value == '': continue percept.properties[propertyName] = rigor.types.PerceptProperty(name=propertyName, value=value) if verbose: print('percept ready for the database:') print(pprint.pformat(percept.serialize())) #---------------------------------------- END PERCEPT elif row.data['_kind'] == 'annotation': #---------------------------------------- ANNOTATION # check for existing annotation with same 'uid' property uid = row.data['a:property:uid'] if uid: sqlExistingAnnotation = session.query(rigor.types.AnnotationProperty).filter(rigor.types.AnnotationProperty.name == 'uid', rigor.types.AnnotationProperty.value == uid).first() if sqlExistingAnnotation: if overwrite: print('TODO: overwrite') raise NotImplementedError else: # skip this row # print('WARNING: {} line {}: the annotation uid "{}" is already in the Rigor database. Skipping this annotation.'.format(row.filename, row.lineNumber, uid)) continue # find the percept id to attach this annotation to perceptId = None hash = row.data['a:percept_hash'] locator = row.data['a:percept_locator'] if bool(hash) == bool(locator): raise Exception('{}: line {}: annotation must have either a:percept_hash or a:percept_locator, but not both or neither.'.format(row.filename, row.lineNumber)) if hash: if lastHash == hash: perceptId = lastPerceptId else: try: sqlPercept = session.query(rigor.types.Percept).filter(rigor.types.Percept.hash == hash).one() perceptId = sqlPercept.id lastHash = hash lastPerceptId = perceptId lastLocator = None except sqlalchemy.orm.exc.NoResultFound: raise Exception('ERROR: {} line {}: percept hash from CSV not found in Rigor db: {}'.format(row.filename, row.lineNumber, hash)) elif locator: print('TODO: look up percept id from a:percept_locator') raise NotImplementedError # build annotation annotation = rigor.types.Annotation() session.add(annotation) if row.data['a:boundary'] != '': boundary = json.loads(row.data['a:boundary']) if boundary: annotation.boundary = boundary if row.data['a:confidence'] != '': annotation.confidence = int(row.data['a:confidence']) annotation.domain = row.data['a:domain'] annotation.percept_id = perceptId annotation.model = row.data['a:model'] # parse iso6801 formatted timestamp in UTC ('Z' suffix) annotation.stamp = datetime.datetime.strptime(row.data['a:stamp'], "%Y-%m-%dT%H:%M:%SZ" ) # tags tags = row.data['a:tags'] if tags: tags = tags.split(',') for tag in tags: annotation.tags.append(rigor.types.AnnotationTag(tag)) # properties for fieldName, value in row.data.items(): if not fieldName.startswith('a:property:'): continue propertyName = fieldName.replace('a:property:', '') if propertyName == '' or value == '': continue annotation.properties[propertyName] = rigor.types.AnnotationProperty(name=propertyName, value=value) if verbose: print('annotation ready for the database:') print(pprint.pformat(annotation.serialize())) #---------------------------------------- ANNOTATION else: # _kind is neither annotation nor percept raise Exception('{}: line {}: Unknown value in "_kind" column: "{}". Should be "percept" or "annotation".'.format(row.filename, row.lineNumber, row.data['_kind'])) # flush the session occasionally # this prevents a very large sqlalchemy session from building up locally, filling up memory if (ii+1) % 10 == 0: if numRows: seconds = time.time() - start secondsPerRow = seconds/(ii+1) remainingRows = numRows - (ii+1) secondsLeft = remainingRows * secondsPerRow rowsPerMinute = 60 / max(secondsPerRow, 0.001) print('{}: {} of {} complete. {} minutes left. {:0.1f} rows per minute.'.format(row.filename, ii+1, numRows, int(secondsLeft/60), rowsPerMinute)) else: print('{} complete'.format(ii+1)) session.flush() if not dryrun: session.commit() yield row # done consuming rows from the stream. # finished adding all the rows. now flush the session. if dryrun: print('Rolling back (DRY RUN)...') else: print('Flushing one more time, then committing...') session.flush() # with statement is over if dryrun: print('Rolled back transaction.') else: print('Transaction was committed.')
def importdb(): shutil.copyfile(constants.kFixtureFile, constants.kImportDatabase) database = Database(constants.kImportDatabase, kConfig) with database.get_session() as session: session.query(Percept).delete() return database
"""" Script to delete ground truth (image, thumbnail, and all!) """ import argparse import rigor.imageops from rigor.dbmapper import DatabaseMapper from rigor.database import Database parser = argparse.ArgumentParser(description='Deletes ground truth (image, thumbnail, and all!)') parser.add_argument('database', help='Name of database to use') parser.add_argument('delete_ids', metavar='delete_id', nargs='+', type=int, help='ID(s) of images to delete') args = parser.parse_args() db = Database.instance(args.database) db_mapper = DatabaseMapper(db) for image_id in args.delete_ids: image = db_mapper.get_image_by_id(image_id) print("OBLITERATING {}".format(image['id'])) rigor.imageops.destroy_image(db, image)
def __init__(self, database): self._dbmapper = DatabaseMapper(Database.instance(database))
"""" Script to delete ground truth (image, thumbnail, and all!) """ import argparse import rigor.imageops from rigor.dbmapper import DatabaseMapper from rigor.database import Database parser = argparse.ArgumentParser( description='Deletes ground truth (image, thumbnail, and all!)') parser.add_argument('database', help='Name of database to use') parser.add_argument('delete_ids', metavar='delete_id', nargs='+', type=int, help='ID(s) of images to delete') args = parser.parse_args() db = Database.instance(args.database) db_mapper = DatabaseMapper(db) for image_id in args.delete_ids: image = db_mapper.get_image_by_id(image_id) print("OBLITERATING {}".format(image['id'])) rigor.imageops.destroy_image(db, image)