Пример #1
0
 def __init__(self,
              algorithm,
              config,
              database_name,
              parameters=None,
              checkpoint=None):
     Runner.__init__(self, algorithm, parameters, checkpoint)
     self._config = config
     self._database = Database(database_name, config)
     self._perceptops = PerceptOps(config)
Пример #2
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Deletes percepts from the database by tag and deletes the percept data files.'
    )
    parser.add_argument('database', help='Name of database to use')
    parser.add_argument('tag', help='Percepts with this tag will be deleted')
    parser.add_argument(
        '-c',
        '--config',
        type=str,
        default='~/.rigor.ini',
        help='Path to .rigor.ini config file.  Default: ~/.rigor.ini')
    parser.add_argument('--keep-percept-data',
                        action='store_true',
                        default=False,
                        help="Don't remove the percept data files")
    parser.add_argument('-n',
                        '--dryrun',
                        action='store_true',
                        default=False,
                        help="Don't actually delete anything")
    args = parser.parse_args()
    config = RigorDefaultConfiguration(args.config)

    db = Database(args.database, config)
    ops = PerceptOps(config)

    if args.dryrun:
        print('DRY RUN')

    with db.get_session() as session:
        percepts = session.query(Percept).join(PerceptTag).filter(
            PerceptTag.name == args.tag).all()
        if len(percepts) == 0:
            print('No percepts have the tag "{}"'.format(args.tag))
        for ii, percept in enumerate(percepts):
            print('{} of {}: deleting percept with id {}'.format(
                ii, len(percepts), percept.id))
            if args.dryrun:
                continue
            if args.keep_percept_data:
                # only delete database entry, not percept file
                session.delete(percept)
            else:
                # delete percept data file as well as database entries
                ops.destroy(percept, session)

    if args.dryrun:
        print('DRY RUN')
Пример #3
0
def importdb():
    shutil.copyfile(constants.kFixtureFile, constants.kImportDatabase)
    database = Database(constants.kImportDatabase, kConfig)
    with database.get_session() as session:
        session.query(Percept).delete()
    return database
Пример #4
0
def importToRigor(rowStream, configFn, dryrun=False, verbose=False, overwrite=False, numRows=None):
	# set up database
	kConfig = rigor.config.RigorDefaultConfiguration(configFn)
	kDbName = 'YOUR_DB_NAME_HERE'
	db = Database(kDbName, kConfig)

	print('Connecting to database...')
	with db.get_session(commit=not dryrun) as session:
		print('...connected')
		print('Importing rows...')

		lastHash = None
		lastLocator = None
		lastPerceptId = None
		start = time.time()
		for ii, row in enumerate(rowStream):

			# cast everything to a string, in case we're getting live Python objects
			# instead of reading from a file
			# TODO: move this to csvtoolkit
			for k, v in row.data.items():
				if v is None: v = '' # CSV module saves None as ''
				row.data[k] = str(v)

			if row.data['_kind'] == 'percept':
				#---------------------------------------- PERCEPT

				# check for existing percept with same 'hash'
				hash = row.data['p:hash']
				if hash:
					sqlExistingPercept = session.query(rigor.types.Percept).filter(rigor.types.Percept.hash == hash).first()
					if sqlExistingPercept:
						if overwrite:
							print('TODO: overwrite')
							raise NotImplementedError
						else:
							# skip this row
							#print('WARNING: {} line {}: the percept hash "{}" is already in the Rigor database.  Skipping this percept.'.format(row.filename, row.lineNumber, hash))
							continue

				# build percept
				byte_count = row.data['p:byte_count']
				if byte_count == '': byte_count = 0

				percept = rigor.types.Percept()
				session.add(percept)
				percept.byte_count = byte_count
				percept.credentials = row.data['p:credentials']
				percept.device_id = row.data['p:device_id']
				percept.format = row.data['p:format']
				percept.hash = row.data['p:hash']
				percept.locator = row.data['p:locator']
				percept.stamp = datetime.datetime.strptime(row.data['p:stamp'], "%Y-%m-%dT%H:%M:%SZ" )
				percept.x_size = row.data['p:x_size'] or None
				percept.y_size = row.data['p:y_size'] or None
				# sample_count, sample_rate, sensors, collections
				# tags, properties

				# tags
				tags = row.data['p:tags']
				if tags:
					tags = tags.split(',')
					for tag in tags:
						percept.tags.append(rigor.types.PerceptTag(tag))

				# properties
				for fieldName, value in row.data.items():
					if not fieldName.startswith('p:property:'):
						continue
					propertyName = fieldName.replace('p:property:', '')
					if propertyName == '' or value == '':
						continue
					percept.properties[propertyName] = rigor.types.PerceptProperty(name=propertyName, value=value)
				if verbose:
					print('percept ready for the database:')
					print(pprint.pformat(percept.serialize()))

				#---------------------------------------- END PERCEPT
			elif row.data['_kind'] == 'annotation':
				#---------------------------------------- ANNOTATION

				# check for existing annotation with same 'uid' property
				uid = row.data['a:property:uid']
				if uid:
					sqlExistingAnnotation = session.query(rigor.types.AnnotationProperty).filter(rigor.types.AnnotationProperty.name == 'uid', rigor.types.AnnotationProperty.value == uid).first()
					if sqlExistingAnnotation:
						if overwrite:
							print('TODO: overwrite')
							raise NotImplementedError
						else:
							# skip this row
							# print('WARNING: {} line {}: the annotation uid "{}" is already in the Rigor database.  Skipping this annotation.'.format(row.filename, row.lineNumber, uid))
							continue

				# find the percept id to attach this annotation to
				perceptId = None
				hash = row.data['a:percept_hash']
				locator = row.data['a:percept_locator']
				if bool(hash) == bool(locator):
					raise Exception('{}: line {}: annotation must have either a:percept_hash or a:percept_locator, but not both or neither.'.format(row.filename, row.lineNumber))
				if hash:
					if lastHash == hash:
						perceptId = lastPerceptId
					else:
						try:
							sqlPercept = session.query(rigor.types.Percept).filter(rigor.types.Percept.hash == hash).one()
							perceptId = sqlPercept.id
							lastHash = hash
							lastPerceptId = perceptId
							lastLocator = None
						except sqlalchemy.orm.exc.NoResultFound:
							raise Exception('ERROR: {} line {}: percept hash from CSV not found in Rigor db: {}'.format(row.filename, row.lineNumber, hash))
				elif locator:
					print('TODO: look up percept id from a:percept_locator')
					raise NotImplementedError

				# build annotation
				annotation = rigor.types.Annotation()
				session.add(annotation)
				if row.data['a:boundary'] != '':
					boundary = json.loads(row.data['a:boundary'])
					if boundary:
						annotation.boundary = boundary
				if row.data['a:confidence'] != '':
					annotation.confidence = int(row.data['a:confidence'])
				annotation.domain = row.data['a:domain']
				annotation.percept_id = perceptId
				annotation.model = row.data['a:model']

				# parse iso6801 formatted timestamp in UTC ('Z' suffix)
				annotation.stamp = datetime.datetime.strptime(row.data['a:stamp'], "%Y-%m-%dT%H:%M:%SZ" )

				# tags
				tags = row.data['a:tags']
				if tags:
					tags = tags.split(',')
					for tag in tags:
						annotation.tags.append(rigor.types.AnnotationTag(tag))

				# properties
				for fieldName, value in row.data.items():
					if not fieldName.startswith('a:property:'):
						continue
					propertyName = fieldName.replace('a:property:', '')
					if propertyName == '' or value == '':
						continue
					annotation.properties[propertyName] = rigor.types.AnnotationProperty(name=propertyName, value=value)
				if verbose:
					print('annotation ready for the database:')
					print(pprint.pformat(annotation.serialize()))

				#---------------------------------------- ANNOTATION
			else: # _kind is neither annotation nor percept
				raise Exception('{}: line {}: Unknown value in "_kind" column: "{}".  Should be "percept" or "annotation".'.format(row.filename, row.lineNumber, row.data['_kind']))

			# flush the session occasionally
			# this prevents a very large sqlalchemy session from building up locally, filling up memory
			if (ii+1) % 10 == 0:
				if numRows:
					seconds = time.time() - start
					secondsPerRow = seconds/(ii+1)
					remainingRows = numRows - (ii+1)
					secondsLeft = remainingRows * secondsPerRow
					rowsPerMinute = 60 / max(secondsPerRow, 0.001)
					print('{}: {} of {} complete.  {} minutes left.  {:0.1f} rows per minute.'.format(row.filename, ii+1, numRows, int(secondsLeft/60), rowsPerMinute))
				else:
					print('{} complete'.format(ii+1))
				session.flush()
				if not dryrun:
					session.commit()

			yield row

		# done consuming rows from the stream.
		# finished adding all the rows.  now flush the session.
		if dryrun:
			print('Rolling back (DRY RUN)...')
		else:
			print('Flushing one more time, then committing...')
			session.flush()


	# with statement is over
	if dryrun:
		print('Rolled back transaction.')
	else:
		print('Transaction was committed.')
Пример #5
0
def get_database():
    shutil.copyfile(constants.kFixtureFile, constants.kTestFile)
    database = Database(constants.kTestFile, kConfig)
    return database