def __init__(self, db_string, context='cnf', jobs=1, tlim=5000, mlim=2000, flim=1000, separator=" ", join_type="LEFT", verbose=False): self.databases = db_string.split(os.pathsep) self.context = context self.jobs = jobs self.tlim = tlim # time limit (seconds) self.mlim = mlim # memory limit (mega bytes) self.flim = flim # file size limit (mega bytes) self.separator = separator self.join_type = join_type self.verbose = verbose self.database = Database(self.databases, self.verbose)
def test_create_db(self): os.remove(self.TDB) with Database([self.TDB], verbose=True) as db: assert (Schema.is_database(self.TDB)) assert (len(db.get_databases()) == 1) assert (len(db.get_features()) == 0) assert (len(db.get_tables()) == 0) with Database([self.TDB], verbose=True) as db: assert (db.dpath(self.TDBN) == self.TDB) assert (db.dmain(self.TDBN)) assert (len(db.dcontexts(self.TDBN)) == 0) assert (len(db.dtables(self.TDBN)) == 0) assert (len(db.dviews(self.TDBN)) == 0)
def test_create_unique_feature(self): os.remove(self.TDB) FEAT = "featB" NAME = self.TDBN + ".features" with Database([self.TDB], verbose=True) as db: db.create_feature(FEAT, "empty") assert (FEAT in db.get_features()) assert ("features" in db.get_tables()) with Database([self.TDB], verbose=True) as db: assert (db.ftable(FEAT) == NAME) assert (db.fcolumn(FEAT) == FEAT) assert (db.fdefault(FEAT) == "empty") assert (not db.fvirtual(FEAT)) assert (db.fcontext(FEAT) == "cnf") assert (db.fdatabase(FEAT) == self.TDBN)
def test_create_feature(self): os.remove(self.TDB) FEAT = "featA" NAME = self.TDBN + "." + FEAT with Database([self.TDB], verbose=True) as db: db.create_feature(FEAT) assert (FEAT in db.get_features()) assert (FEAT in db.get_tables()) with Database([self.TDB], verbose=True) as db: assert (db.ftable(FEAT) == NAME) assert (db.fcolumn(FEAT) == "value") assert (db.fdefault(FEAT) == None) assert (not db.fvirtual(FEAT)) assert (db.fcontext(FEAT) == "cnf") assert (db.fdatabase(FEAT) == self.TDBN)
def get_features(self, path=None): if path == None: return self.database.tables_and_views() elif path in self.databases: with Database([path]) as db: return db.tables_and_views() else: return []
def get_virtual_features(self, path=None): if path == None: return self.database.views() elif path in self.databases: with Database([path]) as db: return db.views() else: return []
def get_feature_info(self, name, path=None): if path is None: system_record = self.database.system_record(name) meta_record = self.database.meta_record(name) return {**system_record, **meta_record} else: with Database(path) as db: system_record = db.system_record(name) meta_record = db.meta_record(name) return {**system_record, **meta_record}
def set_attributes_locked(self, hash, attributes): self.mutex.acquire() try: # create new connection due to limitations of multi-threaded use (cursor initialization issue) with Database(self.databases) as db: for attr in attributes: cmd, name, value = attr[0], attr[1], attr[2] db.submit('{} INTO {} (hash, value) VALUES ("{}", "{}")'.format(cmd, name, hash, value)) finally: self.mutex.release()
def test_insert_values(self): os.remove(self.TDB) FEAT = "letter" NAME = self.TDBN + ".features" with Database([self.TDB], verbose=True) as db: db.create_feature(FEAT, "empty") db.set_values(FEAT, "a", ['1', '2', '3']) db.set_values(FEAT, "b", ['4', '5', '6']) q = GBDQuery(db) r = db.query(q.build_query(resolve=[FEAT])) eprint(r) assert (r == [('1', 'a'), ('2', 'a'), ('3', 'a'), ('4', 'b'), ('5', 'b'), ('6', 'b')]) r = db.query(q.build_query("{}=a".format(FEAT), resolve=[FEAT])) assert (r == [('1', 'a'), ('2', 'a'), ('3', 'a')])
def __enter__(self): self.database = Database(self.databases, self.verbose) with ExitStack() as stack: stack.enter_context(self.database) self._stack = stack.pop_all() return self
class GbdApi: # Create a new GbdApi object which operates on the given databases def __init__(self, db_string, jobs=1, separator=" ", join_type="LEFT", verbose=False): self.databases = db_string.split(":") self.jobs = jobs self.mutex = multiprocessing.Lock() self.separator = separator self.join_type = join_type self.verbose = verbose def __enter__(self): self.database = Database(self.databases, self.verbose) with ExitStack() as stack: stack.enter_context(self.database) self._stack = stack.pop_all() return self def __exit__(self, exc_type, exc, traceback): self._stack.__exit__(exc_type, exc, traceback) # Calculate GBD hash @staticmethod def hash_file(path): return gbd_hash(path) # Import data from CSV file def import_file(self, path, key, source, target): if not self.feature_exists(target): print("Feature {} does not exist. Import canceled.".format(target)) with open(path, newline='') as csvfile: csvreader = csv.DictReader(csvfile, delimiter=self.separator, quotechar='\'') lst = [(row[key].strip(), row[source].strip()) for row in csvreader if row[source] and row[source].strip()] print("Inserting {} values into group {}".format(len(lst), target)) self.database.bulk_insert(target, lst) # Initialize table 'local' with instances found under given path def init_database(self, path=None): eprint('Initializing local path entries {} using {} cores'.format( path, self.jobs)) if self.jobs == 1 and multiprocessing.cpu_count() > 1: eprint("Activate parallel initialization using --jobs={}".format( multiprocessing.cpu_count())) benchmark_administration.remove_benchmarks(self.database) benchmark_administration.register_benchmarks(self, self.database, path, self.jobs) def bootstrap(self, named_algo, hashes): bootstrap.bootstrap(self, self.database, named_algo, hashes, self.jobs) def get_databases(self): return self.databases # Get all features (or those of given db) def get_features(self, path=None): if path == None: return self.database.tables_and_views() elif path in self.databases: with Database([path]) as db: return db.tables_and_views() else: return [] # Get all material features (or those of given db) def get_material_features(self, path=None): if path == None: return self.database.tables() elif path in self.databases: with Database([path]) as db: return db.tables() else: return [] # Get all virtual features (or those of given db) def get_virtual_features(self, path=None): if path == None: return self.database.views() elif path in self.databases: with Database([path]) as db: return db.views() else: return [] # Check for existence of given feature def feature_exists(self, name): return name in self.database.tables() # Creates the given feature def create_feature(self, name, default_value=None): self.database.create_table(name, default_value) # Removes the given feature def remove_feature(self, name): self.database.delete_table(name) # Rename the given feature def rename_feature(self, old_name, new_name): self.database.rename_table(old_name, new_name) def get_feature_size(self, name): if not name in self.get_features(): raise ValueError("Attribute '{}' is not available".format(name)) return self.database.table_size(name) # Retrieve information about a specific feature def get_feature_info(self, name, path=None): if path is None: system_record = self.database.system_record(name) meta_record = self.database.meta_record(name) return {**system_record, **meta_record} else: with Database(path) as db: system_record = db.system_record(name) meta_record = db.meta_record(name) return {**system_record, **meta_record} def meta_set(self, feature, meta_feature, value): self.database.meta_set(feature, meta_feature, value) # clears sepcified meta-features of feature, # or clears all meta-features if meta_feature is not specified def meta_clear(self, feature, meta_feature=None): self.database.meta_clear(feature, meta_feature) def callback_set_attributes_locked(self, arg): self.set_attributes_locked(arg['hashvalue'], arg['attributes']) def set_attributes_locked(self, hash, attributes): self.mutex.acquire() try: # create new connection due to limitations of multi-threaded use (cursor initialization issue) with Database(self.databases) as db: for attr in attributes: cmd, name, value = attr[0], attr[1], attr[2] db.submit( '{} INTO {} (hash, value) VALUES ("{}", "{}")'.format( cmd, name, hash, value)) finally: self.mutex.release() # Set the attribute value for the given hashes def set_attribute(self, feature, value, hash_list, force): if not feature in self.get_material_features(): raise ValueError( "Attribute '{}' is not available (or virtual)".format(feature)) values = ', '.join( ['("{}", "{}")'.format(hash, value) for hash in hash_list]) if self.database.table_unique(feature): if force: self.database.submit( 'DELETE FROM {} WHERE hash IN ("{}")'.format( feature, '", "'.join(hash_list))) try: self.database.submit( 'REPLACE INTO {} (hash, value) VALUES {}'.format( feature, values)) except sqlite3.IntegrityError as err: #thrown if existing value is not the default value or equal to the value to be set #requires the unique on insert-triggers introduced in version 3.0.9 eprint(str(err) + ": Use the force!") else: try: self.database.submit( 'INSERT INTO {} (hash, value) VALUES {}'.format( feature, values)) except Exception as err: #thrown if hash+value combination is already set #requires the unique constraint introduced in version 3.0.9 eprint(err) # Remove the attribute value for the given hashes def remove_attributes(self, feature, hash_list): if not feature in self.get_material_features(): raise ValueError( "Attribute '{}' is not available (or virtual)".format(feature)) self.database.submit("DELETE FROM {} WHERE hash IN ('{}')".format( feature, "', '".join(hash_list))) def set_tag(self, tag_feature, tag_value, hash_list): self.database.set_tag(tag_feature, tag_value, hash_list) def search(self, feature, hashvalue): if not feature in self.get_features(): raise ValueError("Attribute '{}' is not available".format(feature)) return self.database.value_query( "SELECT value FROM {} WHERE hash = '{}'".format( feature, hashvalue)) def query_search(self, query=None, hashes=[], resolve=[], collapse="GROUP_CONCAT", group_by="hash"): try: sql = search.build_query(query, hashes, resolve or [], collapse, group_by or "hash", self.join_type) return self.database.query(sql) except sqlite3.OperationalError as err: raise ValueError("Query error in database '{}': {}".format( self.databases, err)) except tatsu.exceptions.FailedParse as err: raise ValueError("Query error in parser: {}.".format(err.message)) def calculate_par2_score(self, query, feature): info = self.database.meta_record(feature) if not "timeout" in info: eprint( "Time-limit 'timeout' missing in meta-record of table '{}'.". format(feature)) eprint("Unable to calculate score.") return if not "memout" in info: eprint( "Memory-limit 'memout' missing in meta-record of table '{}'.". format(feature)) if not "machine" in info: eprint( "Machine-id 'machine' missing in meta-record of table '{}'.". format(feature)) timeout = int(info["timeout"]) times = self.query_search(query, [], [feature]) score = 0 penalized = set() for time in times: if is_number(time[1]): score += int(time[1]) else: score += 2 * timeout penalized.add(time[1]) print(score / len(times)) print(penalized)
class GBD: # Create a new GBD object which operates on the given databases def __init__(self, db_string, context='cnf', jobs=1, tlim=5000, mlim=2000, flim=1000, separator=" ", join_type="LEFT", verbose=False): self.databases = db_string.split(os.pathsep) self.context = context self.jobs = jobs self.tlim = tlim # time limit (seconds) self.mlim = mlim # memory limit (mega bytes) self.flim = flim # file size limit (mega bytes) self.separator = separator self.join_type = join_type self.verbose = verbose self.database = Database(self.databases, self.verbose) def __enter__(self): with ExitStack() as stack: stack.enter_context(self.database) self._stack = stack.pop_all() return self def __exit__(self, exc_type, exc, traceback): self._stack.__exit__(exc_type, exc, traceback) def get_limits(self) -> dict(): return { 'tlim': self.tlim, 'mlim': self.mlim, 'flim': self.flim } def get_databases(self): return list(self.database.get_databases()) def get_database_path(self, dbname): return self.database.dpath(dbname) # Get all features def get_features(self, dbname=None): return self.database.get_features(tables=True, views=True, database=dbname) # Get all material features def get_material_features(self, dbname=None): return self.database.get_features(tables=True, views=False, database=dbname) # Get all virtual features def get_virtual_features(self, dbname=None): return self.database.get_features(tables=False, views=True, database=dbname) # Check for existence of given feature def feature_exists(self, name): return name in self.get_features() # Creates the given feature def create_feature(self, name, default_value=None): if not self.feature_exists(name): self.database.create_feature(name, default_value) else: raise GBDException("Feature '{}' does already exist".format(name)) # Removes the given feature def delete_feature(self, name): if self.feature_exists(name): self.database.delete_feature(name) else: raise GBDException("Feature '{}' does not exist or is virtual".format(name)) # Rename the given feature def rename_feature(self, old_name, new_name): if not self.feature_exists(old_name): raise GBDException("Feature '{}' does not exist or is virtual".format(old_name)) elif self.feature_exists(new_name): raise GBDException("Feature '{}' does already exist".format(new_name)) else: self.database.rename_feature(old_name, new_name) # Retrieve information about a specific feature def get_feature_info(self, name): return self.database.feature_info(name) # Set the attribute value for the given hashes def set_attribute(self, feature, value, query, hashes=[], force=False): if not feature in self.get_material_features(): raise GBDException("Feature '{}' missing or virtual".format(feature)) hash_list = hashes if query: hash_list = [hash[0] for hash in self.query_search(query, hashes)] try: self.database.set_values(feature, value, hash_list) except Exception as err: raise GBDException(str(err)) # Remove the attribute value for the given hashes def remove_attributes(self, feature, hash_list): if not feature in self.get_material_features(): raise GBDException("Feature '{}' not found".format(feature)) self.database.delete_hashes(feature, hash_list) def query_search(self, gbd_query=None, hashes=[], resolve=[], collapse="GROUP_CONCAT", group_by="hash"): try: query_builder = GBDQuery(self.database, self.join_type, collapse) sql = query_builder.build_query(gbd_query, hashes, resolve or [], group_by or "hash") return self.database.query(sql) except sqlite3.OperationalError as err: raise GBDException("Database Operational Error: {}".format(str(err))) except tatsu.exceptions.FailedParse as err: raise GBDException("Parser Error: {}".format(str(err)))