示例#1
0
    def test_merging(self):
        """ of merging databases """

        db = database.Database2()
        my_table = "mytable"
        column_names = ["id", "property", "value"]
        column_types = [int, str, float]
        fns = ["file1.db", "file2.pdb"]
        for fn in fns:
            db.create(fn, True)
            db.connect(fn)
            db.cursor.execute('PRAGMA synchronous=OFF')
            db.create_table(my_table, column_names, column_types)
            data = [(0, "width", 5.6), (1, "length", "34")]
            db.store_data(my_table, data)
            db.close()
        fn_output = "file3.db"
        database.merge_databases(fns, fn_output, my_table)
        db.connect(fn_output)
        data = db.get_table(my_table)
        self.assertEqual(len(data), 4)
        db.close()
        for fn in fns:
            os.remove(fn)
        os.remove(fn_output)
示例#2
0
 def make_db_with_data(self, fname, tbl, data):
     db = database.Database2()
     db.create(fname, overwrite=True)
     db.connect(fname)
     db.create_table(tbl, ["assignment", "em2d"], [str, float])
     db.store_data(tbl, data)
     db.close()
示例#3
0
def gather_solution_results(fns, fn_output, raisef=0.1):
    """
       Reads a set of database files and puts them in a single file
       Makes sure to reorder all column names if neccesary before merging
       @param fns List of database files
       @param fn_output Name of the output database
       @param raisef See help for gather_best_solution_results()
    """
    tbl = "results"
    # Get names and types of the columns from first database file
    db = database.Database2()
    db.connect(fns[0])
    names = db.get_table_column_names(tbl)
    types = db.get_table_types(tbl)
    indices = get_sorting_indices(names)
    sorted_names = [names[i] for i in indices]
    sorted_types = [types[i] for i in indices]
    log.info("Gathering results. Saving to %s", fn_output)
    out_db = database.Database2()
    out_db.create(fn_output, overwrite=True)
    out_db.connect(fn_output)
    out_db.create_table(tbl, sorted_names, sorted_types)

    n_problems = 0
    for fn in fns:
        try:
            log.info("Reading %s", fn)
            db.connect(fn)
            names = sorted(db.get_table_column_names(tbl))
            they_are_sorted = field_delim.join(names)
            log.debug("Retrieving %s", they_are_sorted)
            sql_command = "SELECT %s FROM %s" % (they_are_sorted, tbl)
            data = db.retrieve_data(sql_command)
            out_db.store_data(tbl, data)
            db.close()
        except Exception as e:
            log.error("Error for file %s: %s", fn, e)
            n_problems += 1
    ratio = float(n_problems) / float(len(fns))
    if ratio > raisef:
        raise IOError("There are %8.1f %s of the database "
                      "files to merge with problems! " % (ratio * 100, "%"))
    out_db.close()
示例#4
0
 def setUp(self):
     IMP.test.TestCase.setUp(self)
     self.db = database.Database2()
     self.fn = IMP.base.create_temporary_file_name('test.db')
     self.db.create(self.fn, True)
     self.db.connect(self.fn)
     self.tables = ["mytable1", "mytable2"]
     self.column_names = ["id", "property", "value"]
     self.column_types = [int, str, float]
     for t in self.tables:
         self.db.create_table(t, self.column_names, self.column_types)
示例#5
0
    def test_sampling_schema(self):
        """
            Test
        """
        subunits = ["subunitA", "subunitB", "subunitC", "subunitD"]
        anchored = [False, False, False, False]
        fixed = [False, False, False, False]

        n_transformations = 50
        db = database.Database2()
        fn = 'temp.db'
        db.create(fn, overwrite=True)
        db.connect(fn)

        transformations = []
        table_name = "results"
        db.create_table(table_name, ["reference_frames"], [str])
        for i in range(n_transformations):
            Ts = []
            for j in range(len(subunits)):
                center = alg.Vector3D(0, 0, 0)
                T = alg.Transformation3D(
                    alg.get_random_rotation_3d(),
                    alg.get_random_vector_in(alg.Sphere3D(center, 34)))
                Ts.append(T)
            transformations.append(Ts)

        data = []
        for Ts in transformations:
            text = [io.Transformation3DToText(T).get_text() for T in Ts]
            text = "/".join(text)
            data.append([
                text,
            ])
        db.store_data(table_name, data)
        db.close()

        sch = sampling.SamplingSchema(4, fixed, anchored)
        sch.read_from_database(fn)

        for i in range(len(transformations)):
            for j in range(len(subunits)):
                T = transformations[i][j]
                t = T.get_translation()
                q = T.get_rotation().get_quaternion()

                pos = sch.transformations[j][i].get_translation()
                ori = sch.transformations[j][i].get_rotation().get_quaternion()
                for k in range(3):
                    self.assertAlmostEqual(pos[k], t[k])
                for k in range(4):
                    self.assertAlmostEqual(q[k], ori[k])

        os.remove(fn)
示例#6
0
 def setUp(self):
     IMP.test.TestCase.setUp(self)
     self.db = database.Database2()
     self.fn = IMP.create_temporary_file_name('test.db')
     self.db.create(self.fn, True)
     self.db.connect(self.fn)
     # Speed up tests by not waiting for the disk
     self.db.cursor.execute('PRAGMA synchronous=OFF')
     self.tables = ["mytable1", "mytable2"]
     self.column_names = ["id", "property", "value"]
     self.column_types = [int, str, float]
     for t in self.tables:
         self.db.create_table(t, self.column_names, self.column_types)
示例#7
0
def gather_best_solution_results(fns,
                                 fn_output,
                                 max_number=50000,
                                 raisef=0.1,
                                 orderby="em2d"):
    """
       Reads a set of database files and merge them into a single file.

       @param fns List of files with databases
       @param fn_output The database to create
       @param max_number Maximum number of records to keep, sorted according
            to orderby
       @param raisef Ratio of problematic database files tolerated before
            raising an error. This option is to tolerate some files
            of the databases being broken because the cluster fails,
            fill the disks, etc
       @param orderby Criterium used to sort the the records
       NOTE:
       Makes sure to reorder all column names if neccesary before merging
       The record for the native solution is only added once (from first file).
    """
    tbl = "results"
    # Get names and types of the columns from first database file
    db = database.Database2()
    db.connect(fns[0])
    names = db.get_table_column_names(tbl)
    types = db.get_table_types(tbl)
    indices = get_sorting_indices(names)
    sorted_names = [names[i] for i in indices]
    sorted_types = [types[i] for i in indices]

    names.sort()
    ind = names.index(orderby)
    they_are_sorted = field_delim.join(names)
    # Get the native structure data from the first database
    sql_command = """SELECT %s FROM %s
                  WHERE assignment="native" LIMIT 1 """ % (they_are_sorted,
                                                           tbl)
    native_data = db.retrieve_data(sql_command)
    db.close()
    log.info("Gathering results. Saving to %s", fn_output)
    out_db = database.Database2()
    out_db.create(fn_output, overwrite=True)
    out_db.connect(fn_output)
    out_db.create_table(tbl, sorted_names, sorted_types)

    best_records = []
    n_problems = 0
    for fn in fns:
        try:
            log.info("Reading %s", fn)
            db.connect(fn)
            #            log.debug("Retrieving %s", they_are_sorted)
            sql_command = """SELECT %s FROM %s
                             WHERE assignment<>"native"
                             ORDER BY %s ASC LIMIT %s """ % (
                they_are_sorted, tbl, orderby, max_number)
            data = db.retrieve_data(sql_command)
            log.info("%s records read from %s", len(data), fn)
            db.close()
            # Fill heap
            for d in data:
                a = HeapRecord(d, ind)
                if (len(best_records) < max_number):
                    heapq.heappush(best_records, a)
                else:
                    # remember that < here compares for greater em2d value,
                    # as a HeapRecord is used
                    if (best_records[0] < a):
                        heapq.heapreplace(best_records, a)
        except Exception as e:
            log.error("Error for %s: %s", fn, e)
            n_problems += 1

    # If the number of problematic files is too high, report that something
    # big is going on. Otherwise tolerate some errors from some tasks that
    # failed (memory errors, locks, writing errors ...)
    ratio = float(n_problems) / float(len(fns))
    if ratio > raisef:
        raise IOError("There are %8.1f %s of the database "
                      "files to merge with problems! " % (ratio * 100, "%"))
    # append the native data to the best_records
    heapq.heappush(best_records, native_data[0])
    out_db.store_data(tbl, best_records)
    out_db.close()
示例#8
0
 def make_db_with_table(self, fname, tbl):
     db = database.Database2()
     db.create(fname, overwrite=True)
     db.connect(fname)
     db.create_table(tbl, ["foo", "bar"], [int, float])
     db.close()