Пример #1
0
 def __init__(self, db_string, context='cnf', jobs=1, tlim=5000, mlim=2000, flim=1000, separator=" ", join_type="LEFT", verbose=False):
     self.databases = db_string.split(os.pathsep)
     self.context = context
     self.jobs = jobs
     self.tlim = tlim  # time limit (seconds)
     self.mlim = mlim  # memory limit (mega bytes)
     self.flim = flim  # file size limit (mega bytes)
     self.separator = separator
     self.join_type = join_type
     self.verbose = verbose
     self.database = Database(self.databases, self.verbose)
Пример #2
0
 def test_create_db(self):
     os.remove(self.TDB)
     with Database([self.TDB], verbose=True) as db:
         assert (Schema.is_database(self.TDB))
         assert (len(db.get_databases()) == 1)
         assert (len(db.get_features()) == 0)
         assert (len(db.get_tables()) == 0)
     with Database([self.TDB], verbose=True) as db:
         assert (db.dpath(self.TDBN) == self.TDB)
         assert (db.dmain(self.TDBN))
         assert (len(db.dcontexts(self.TDBN)) == 0)
         assert (len(db.dtables(self.TDBN)) == 0)
         assert (len(db.dviews(self.TDBN)) == 0)
Пример #3
0
 def test_create_unique_feature(self):
     os.remove(self.TDB)
     FEAT = "featB"
     NAME = self.TDBN + ".features"
     with Database([self.TDB], verbose=True) as db:
         db.create_feature(FEAT, "empty")
         assert (FEAT in db.get_features())
         assert ("features" in db.get_tables())
     with Database([self.TDB], verbose=True) as db:
         assert (db.ftable(FEAT) == NAME)
         assert (db.fcolumn(FEAT) == FEAT)
         assert (db.fdefault(FEAT) == "empty")
         assert (not db.fvirtual(FEAT))
         assert (db.fcontext(FEAT) == "cnf")
         assert (db.fdatabase(FEAT) == self.TDBN)
Пример #4
0
 def test_create_feature(self):
     os.remove(self.TDB)
     FEAT = "featA"
     NAME = self.TDBN + "." + FEAT
     with Database([self.TDB], verbose=True) as db:
         db.create_feature(FEAT)
         assert (FEAT in db.get_features())
         assert (FEAT in db.get_tables())
     with Database([self.TDB], verbose=True) as db:
         assert (db.ftable(FEAT) == NAME)
         assert (db.fcolumn(FEAT) == "value")
         assert (db.fdefault(FEAT) == None)
         assert (not db.fvirtual(FEAT))
         assert (db.fcontext(FEAT) == "cnf")
         assert (db.fdatabase(FEAT) == self.TDBN)
Пример #5
0
 def get_features(self, path=None):
     if path == None:
         return self.database.tables_and_views()
     elif path in self.databases:
         with Database([path]) as db:
             return db.tables_and_views()
     else:
         return []
Пример #6
0
 def get_virtual_features(self, path=None):
     if path == None:
         return self.database.views()
     elif path in self.databases:
         with Database([path]) as db:
             return db.views()
     else:
         return []
Пример #7
0
 def get_feature_info(self, name, path=None):
     if path is None:
         system_record = self.database.system_record(name)
         meta_record = self.database.meta_record(name)
         return {**system_record, **meta_record}
     else:
         with Database(path) as db:
             system_record = db.system_record(name)
             meta_record = db.meta_record(name)
             return {**system_record, **meta_record}
Пример #8
0
 def set_attributes_locked(self, hash, attributes):
     self.mutex.acquire()
     try:
         # create new connection due to limitations of multi-threaded use (cursor initialization issue)
         with Database(self.databases) as db:
             for attr in attributes:
                 cmd, name, value = attr[0], attr[1], attr[2]
                 db.submit('{} INTO {} (hash, value) VALUES ("{}", "{}")'.format(cmd, name, hash, value))
     finally:
         self.mutex.release()
Пример #9
0
 def test_insert_values(self):
     os.remove(self.TDB)
     FEAT = "letter"
     NAME = self.TDBN + ".features"
     with Database([self.TDB], verbose=True) as db:
         db.create_feature(FEAT, "empty")
         db.set_values(FEAT, "a", ['1', '2', '3'])
         db.set_values(FEAT, "b", ['4', '5', '6'])
         q = GBDQuery(db)
         r = db.query(q.build_query(resolve=[FEAT]))
         eprint(r)
         assert (r == [('1', 'a'), ('2', 'a'), ('3', 'a'), ('4', 'b'),
                       ('5', 'b'), ('6', 'b')])
         r = db.query(q.build_query("{}=a".format(FEAT), resolve=[FEAT]))
         assert (r == [('1', 'a'), ('2', 'a'), ('3', 'a')])
Пример #10
0
 def __enter__(self):
     self.database = Database(self.databases, self.verbose)
     with ExitStack() as stack:
         stack.enter_context(self.database)
         self._stack = stack.pop_all()
     return self
Пример #11
0
class GbdApi:
    # Create a new GbdApi object which operates on the given databases
    def __init__(self,
                 db_string,
                 jobs=1,
                 separator=" ",
                 join_type="LEFT",
                 verbose=False):
        self.databases = db_string.split(":")
        self.jobs = jobs
        self.mutex = multiprocessing.Lock()
        self.separator = separator
        self.join_type = join_type
        self.verbose = verbose

    def __enter__(self):
        self.database = Database(self.databases, self.verbose)
        with ExitStack() as stack:
            stack.enter_context(self.database)
            self._stack = stack.pop_all()
        return self

    def __exit__(self, exc_type, exc, traceback):
        self._stack.__exit__(exc_type, exc, traceback)

    # Calculate GBD hash
    @staticmethod
    def hash_file(path):
        return gbd_hash(path)

    # Import data from CSV file
    def import_file(self, path, key, source, target):
        if not self.feature_exists(target):
            print("Feature {} does not exist. Import canceled.".format(target))
        with open(path, newline='') as csvfile:
            csvreader = csv.DictReader(csvfile,
                                       delimiter=self.separator,
                                       quotechar='\'')
            lst = [(row[key].strip(), row[source].strip()) for row in csvreader
                   if row[source] and row[source].strip()]
            print("Inserting {} values into group {}".format(len(lst), target))
            self.database.bulk_insert(target, lst)

    # Initialize table 'local' with instances found under given path
    def init_database(self, path=None):
        eprint('Initializing local path entries {} using {} cores'.format(
            path, self.jobs))
        if self.jobs == 1 and multiprocessing.cpu_count() > 1:
            eprint("Activate parallel initialization using --jobs={}".format(
                multiprocessing.cpu_count()))
        benchmark_administration.remove_benchmarks(self.database)
        benchmark_administration.register_benchmarks(self, self.database, path,
                                                     self.jobs)

    def bootstrap(self, named_algo, hashes):
        bootstrap.bootstrap(self, self.database, named_algo, hashes, self.jobs)

    def get_databases(self):
        return self.databases

    # Get all features (or those of given db)
    def get_features(self, path=None):
        if path == None:
            return self.database.tables_and_views()
        elif path in self.databases:
            with Database([path]) as db:
                return db.tables_and_views()
        else:
            return []

    # Get all material features (or those of given db)
    def get_material_features(self, path=None):
        if path == None:
            return self.database.tables()
        elif path in self.databases:
            with Database([path]) as db:
                return db.tables()
        else:
            return []

    # Get all virtual features (or those of given db)
    def get_virtual_features(self, path=None):
        if path == None:
            return self.database.views()
        elif path in self.databases:
            with Database([path]) as db:
                return db.views()
        else:
            return []

    # Check for existence of given feature
    def feature_exists(self, name):
        return name in self.database.tables()

    # Creates the given feature
    def create_feature(self, name, default_value=None):
        self.database.create_table(name, default_value)

    # Removes the given feature
    def remove_feature(self, name):
        self.database.delete_table(name)

    # Rename the given feature
    def rename_feature(self, old_name, new_name):
        self.database.rename_table(old_name, new_name)

    def get_feature_size(self, name):
        if not name in self.get_features():
            raise ValueError("Attribute '{}' is not available".format(name))
        return self.database.table_size(name)

    # Retrieve information about a specific feature
    def get_feature_info(self, name, path=None):
        if path is None:
            system_record = self.database.system_record(name)
            meta_record = self.database.meta_record(name)
            return {**system_record, **meta_record}
        else:
            with Database(path) as db:
                system_record = db.system_record(name)
                meta_record = db.meta_record(name)
                return {**system_record, **meta_record}

    def meta_set(self, feature, meta_feature, value):
        self.database.meta_set(feature, meta_feature, value)

    # clears sepcified meta-features of feature,
    # or clears all meta-features if meta_feature is not specified
    def meta_clear(self, feature, meta_feature=None):
        self.database.meta_clear(feature, meta_feature)

    def callback_set_attributes_locked(self, arg):
        self.set_attributes_locked(arg['hashvalue'], arg['attributes'])

    def set_attributes_locked(self, hash, attributes):
        self.mutex.acquire()
        try:
            # create new connection due to limitations of multi-threaded use (cursor initialization issue)
            with Database(self.databases) as db:
                for attr in attributes:
                    cmd, name, value = attr[0], attr[1], attr[2]
                    db.submit(
                        '{} INTO {} (hash, value) VALUES ("{}", "{}")'.format(
                            cmd, name, hash, value))
        finally:
            self.mutex.release()

    # Set the attribute value for the given hashes
    def set_attribute(self, feature, value, hash_list, force):
        if not feature in self.get_material_features():
            raise ValueError(
                "Attribute '{}' is not available (or virtual)".format(feature))
        values = ', '.join(
            ['("{}", "{}")'.format(hash, value) for hash in hash_list])
        if self.database.table_unique(feature):
            if force:
                self.database.submit(
                    'DELETE FROM {} WHERE hash IN ("{}")'.format(
                        feature, '", "'.join(hash_list)))
            try:
                self.database.submit(
                    'REPLACE INTO {} (hash, value) VALUES {}'.format(
                        feature, values))
            except sqlite3.IntegrityError as err:
                #thrown if existing value is not the default value or equal to the value to be set
                #requires the unique on insert-triggers introduced in version 3.0.9
                eprint(str(err) + ": Use the force!")
        else:
            try:
                self.database.submit(
                    'INSERT INTO {} (hash, value) VALUES {}'.format(
                        feature, values))
            except Exception as err:
                #thrown if hash+value combination is already set
                #requires the unique constraint introduced in version 3.0.9
                eprint(err)

    # Remove the attribute value for the given hashes
    def remove_attributes(self, feature, hash_list):
        if not feature in self.get_material_features():
            raise ValueError(
                "Attribute '{}' is not available (or virtual)".format(feature))
        self.database.submit("DELETE FROM {} WHERE hash IN ('{}')".format(
            feature, "', '".join(hash_list)))

    def set_tag(self, tag_feature, tag_value, hash_list):
        self.database.set_tag(tag_feature, tag_value, hash_list)

    def search(self, feature, hashvalue):
        if not feature in self.get_features():
            raise ValueError("Attribute '{}' is not available".format(feature))
        return self.database.value_query(
            "SELECT value FROM {} WHERE hash = '{}'".format(
                feature, hashvalue))

    def query_search(self,
                     query=None,
                     hashes=[],
                     resolve=[],
                     collapse="GROUP_CONCAT",
                     group_by="hash"):
        try:
            sql = search.build_query(query, hashes, resolve or [], collapse,
                                     group_by or "hash", self.join_type)
            return self.database.query(sql)
        except sqlite3.OperationalError as err:
            raise ValueError("Query error in database '{}': {}".format(
                self.databases, err))
        except tatsu.exceptions.FailedParse as err:
            raise ValueError("Query error in parser: {}.".format(err.message))

    def calculate_par2_score(self, query, feature):
        info = self.database.meta_record(feature)
        if not "timeout" in info:
            eprint(
                "Time-limit 'timeout' missing in meta-record of table '{}'.".
                format(feature))
            eprint("Unable to calculate score.")
            return
        if not "memout" in info:
            eprint(
                "Memory-limit 'memout' missing in meta-record of table '{}'.".
                format(feature))
        if not "machine" in info:
            eprint(
                "Machine-id 'machine' missing in meta-record of table '{}'.".
                format(feature))
        timeout = int(info["timeout"])
        times = self.query_search(query, [], [feature])
        score = 0
        penalized = set()
        for time in times:
            if is_number(time[1]):
                score += int(time[1])
            else:
                score += 2 * timeout
                penalized.add(time[1])
        print(score / len(times))
        print(penalized)
Пример #12
0
class GBD:
    # Create a new GBD object which operates on the given databases
    def __init__(self, db_string, context='cnf', jobs=1, tlim=5000, mlim=2000, flim=1000, separator=" ", join_type="LEFT", verbose=False):
        self.databases = db_string.split(os.pathsep)
        self.context = context
        self.jobs = jobs
        self.tlim = tlim  # time limit (seconds)
        self.mlim = mlim  # memory limit (mega bytes)
        self.flim = flim  # file size limit (mega bytes)
        self.separator = separator
        self.join_type = join_type
        self.verbose = verbose
        self.database = Database(self.databases, self.verbose)

    def __enter__(self):
        with ExitStack() as stack:
            stack.enter_context(self.database)
            self._stack = stack.pop_all()
        return self

    def __exit__(self, exc_type, exc, traceback):
        self._stack.__exit__(exc_type, exc, traceback)

    def get_limits(self) -> dict():
        return { 'tlim': self.tlim, 'mlim': self.mlim, 'flim': self.flim }

    def get_databases(self):
        return list(self.database.get_databases())

    def get_database_path(self, dbname):
        return self.database.dpath(dbname)

    # Get all features
    def get_features(self, dbname=None):
        return self.database.get_features(tables=True, views=True, database=dbname)

    # Get all material features
    def get_material_features(self, dbname=None):
        return self.database.get_features(tables=True, views=False, database=dbname)

    # Get all virtual features
    def get_virtual_features(self, dbname=None):
        return self.database.get_features(tables=False, views=True, database=dbname)

    # Check for existence of given feature
    def feature_exists(self, name):
        return name in self.get_features()

    # Creates the given feature
    def create_feature(self, name, default_value=None):
        if not self.feature_exists(name):
            self.database.create_feature(name, default_value)
        else:
            raise GBDException("Feature '{}' does already exist".format(name))

    # Removes the given feature
    def delete_feature(self, name):
        if self.feature_exists(name):
            self.database.delete_feature(name)
        else:
            raise GBDException("Feature '{}' does not exist or is virtual".format(name))

    # Rename the given feature
    def rename_feature(self, old_name, new_name):
        if not self.feature_exists(old_name):
            raise GBDException("Feature '{}' does not exist or is virtual".format(old_name))
        elif self.feature_exists(new_name):
            raise GBDException("Feature '{}' does already exist".format(new_name))
        else:
            self.database.rename_feature(old_name, new_name)

    # Retrieve information about a specific feature
    def get_feature_info(self, name):
        return self.database.feature_info(name)

    # Set the attribute value for the given hashes
    def set_attribute(self, feature, value, query, hashes=[], force=False):
        if not feature in self.get_material_features():
            raise GBDException("Feature '{}' missing or virtual".format(feature))
        hash_list = hashes
        if query:
            hash_list = [hash[0] for hash in self.query_search(query, hashes)]
        try:
            self.database.set_values(feature, value, hash_list)
        except Exception as err:
            raise GBDException(str(err))

    # Remove the attribute value for the given hashes
    def remove_attributes(self, feature, hash_list):
        if not feature in self.get_material_features():
            raise GBDException("Feature '{}' not found".format(feature))
        self.database.delete_hashes(feature, hash_list)

    def query_search(self, gbd_query=None, hashes=[], resolve=[], collapse="GROUP_CONCAT", group_by="hash"):
        try:
            query_builder = GBDQuery(self.database, self.join_type, collapse)
            sql = query_builder.build_query(gbd_query, hashes, resolve or [], group_by or "hash")
            return self.database.query(sql)
        except sqlite3.OperationalError as err:
            raise GBDException("Database Operational Error: {}".format(str(err)))
        except tatsu.exceptions.FailedParse as err:
            raise GBDException("Parser Error: {}".format(str(err)))