def test_merging(self): """ of merging databases """ db = database.Database2() my_table = "mytable" column_names = ["id", "property", "value"] column_types = [int, str, float] fns = ["file1.db", "file2.pdb"] for fn in fns: db.create(fn, True) db.connect(fn) db.cursor.execute('PRAGMA synchronous=OFF') db.create_table(my_table, column_names, column_types) data = [(0, "width", 5.6), (1, "length", "34")] db.store_data(my_table, data) db.close() fn_output = "file3.db" database.merge_databases(fns, fn_output, my_table) db.connect(fn_output) data = db.get_table(my_table) self.assertEqual(len(data), 4) db.close() for fn in fns: os.remove(fn) os.remove(fn_output)
def make_db_with_data(self, fname, tbl, data): db = database.Database2() db.create(fname, overwrite=True) db.connect(fname) db.create_table(tbl, ["assignment", "em2d"], [str, float]) db.store_data(tbl, data) db.close()
def gather_solution_results(fns, fn_output, raisef=0.1): """ Reads a set of database files and puts them in a single file Makes sure to reorder all column names if neccesary before merging @param fns List of database files @param fn_output Name of the output database @param raisef See help for gather_best_solution_results() """ tbl = "results" # Get names and types of the columns from first database file db = database.Database2() db.connect(fns[0]) names = db.get_table_column_names(tbl) types = db.get_table_types(tbl) indices = get_sorting_indices(names) sorted_names = [names[i] for i in indices] sorted_types = [types[i] for i in indices] log.info("Gathering results. Saving to %s", fn_output) out_db = database.Database2() out_db.create(fn_output, overwrite=True) out_db.connect(fn_output) out_db.create_table(tbl, sorted_names, sorted_types) n_problems = 0 for fn in fns: try: log.info("Reading %s", fn) db.connect(fn) names = sorted(db.get_table_column_names(tbl)) they_are_sorted = field_delim.join(names) log.debug("Retrieving %s", they_are_sorted) sql_command = "SELECT %s FROM %s" % (they_are_sorted, tbl) data = db.retrieve_data(sql_command) out_db.store_data(tbl, data) db.close() except Exception as e: log.error("Error for file %s: %s", fn, e) n_problems += 1 ratio = float(n_problems) / float(len(fns)) if ratio > raisef: raise IOError("There are %8.1f %s of the database " "files to merge with problems! " % (ratio * 100, "%")) out_db.close()
def setUp(self): IMP.test.TestCase.setUp(self) self.db = database.Database2() self.fn = IMP.base.create_temporary_file_name('test.db') self.db.create(self.fn, True) self.db.connect(self.fn) self.tables = ["mytable1", "mytable2"] self.column_names = ["id", "property", "value"] self.column_types = [int, str, float] for t in self.tables: self.db.create_table(t, self.column_names, self.column_types)
def test_sampling_schema(self): """ Test """ subunits = ["subunitA", "subunitB", "subunitC", "subunitD"] anchored = [False, False, False, False] fixed = [False, False, False, False] n_transformations = 50 db = database.Database2() fn = 'temp.db' db.create(fn, overwrite=True) db.connect(fn) transformations = [] table_name = "results" db.create_table(table_name, ["reference_frames"], [str]) for i in range(n_transformations): Ts = [] for j in range(len(subunits)): center = alg.Vector3D(0, 0, 0) T = alg.Transformation3D( alg.get_random_rotation_3d(), alg.get_random_vector_in(alg.Sphere3D(center, 34))) Ts.append(T) transformations.append(Ts) data = [] for Ts in transformations: text = [io.Transformation3DToText(T).get_text() for T in Ts] text = "/".join(text) data.append([ text, ]) db.store_data(table_name, data) db.close() sch = sampling.SamplingSchema(4, fixed, anchored) sch.read_from_database(fn) for i in range(len(transformations)): for j in range(len(subunits)): T = transformations[i][j] t = T.get_translation() q = T.get_rotation().get_quaternion() pos = sch.transformations[j][i].get_translation() ori = sch.transformations[j][i].get_rotation().get_quaternion() for k in range(3): self.assertAlmostEqual(pos[k], t[k]) for k in range(4): self.assertAlmostEqual(q[k], ori[k]) os.remove(fn)
def setUp(self): IMP.test.TestCase.setUp(self) self.db = database.Database2() self.fn = IMP.create_temporary_file_name('test.db') self.db.create(self.fn, True) self.db.connect(self.fn) # Speed up tests by not waiting for the disk self.db.cursor.execute('PRAGMA synchronous=OFF') self.tables = ["mytable1", "mytable2"] self.column_names = ["id", "property", "value"] self.column_types = [int, str, float] for t in self.tables: self.db.create_table(t, self.column_names, self.column_types)
def gather_best_solution_results(fns, fn_output, max_number=50000, raisef=0.1, orderby="em2d"): """ Reads a set of database files and merge them into a single file. @param fns List of files with databases @param fn_output The database to create @param max_number Maximum number of records to keep, sorted according to orderby @param raisef Ratio of problematic database files tolerated before raising an error. This option is to tolerate some files of the databases being broken because the cluster fails, fill the disks, etc @param orderby Criterium used to sort the the records NOTE: Makes sure to reorder all column names if neccesary before merging The record for the native solution is only added once (from first file). """ tbl = "results" # Get names and types of the columns from first database file db = database.Database2() db.connect(fns[0]) names = db.get_table_column_names(tbl) types = db.get_table_types(tbl) indices = get_sorting_indices(names) sorted_names = [names[i] for i in indices] sorted_types = [types[i] for i in indices] names.sort() ind = names.index(orderby) they_are_sorted = field_delim.join(names) # Get the native structure data from the first database sql_command = """SELECT %s FROM %s WHERE assignment="native" LIMIT 1 """ % (they_are_sorted, tbl) native_data = db.retrieve_data(sql_command) db.close() log.info("Gathering results. Saving to %s", fn_output) out_db = database.Database2() out_db.create(fn_output, overwrite=True) out_db.connect(fn_output) out_db.create_table(tbl, sorted_names, sorted_types) best_records = [] n_problems = 0 for fn in fns: try: log.info("Reading %s", fn) db.connect(fn) # log.debug("Retrieving %s", they_are_sorted) sql_command = """SELECT %s FROM %s WHERE assignment<>"native" ORDER BY %s ASC LIMIT %s """ % ( they_are_sorted, tbl, orderby, max_number) data = db.retrieve_data(sql_command) log.info("%s records read from %s", len(data), fn) db.close() # Fill heap for d in data: a = HeapRecord(d, ind) if (len(best_records) < max_number): heapq.heappush(best_records, a) else: # remember that < here compares for greater em2d value, # as a HeapRecord is used if (best_records[0] < a): heapq.heapreplace(best_records, a) except Exception as e: log.error("Error for %s: %s", fn, e) n_problems += 1 # If the number of problematic files is too high, report that something # big is going on. Otherwise tolerate some errors from some tasks that # failed (memory errors, locks, writing errors ...) ratio = float(n_problems) / float(len(fns)) if ratio > raisef: raise IOError("There are %8.1f %s of the database " "files to merge with problems! " % (ratio * 100, "%")) # append the native data to the best_records heapq.heappush(best_records, native_data[0]) out_db.store_data(tbl, best_records) out_db.close()
def make_db_with_table(self, fname, tbl): db = database.Database2() db.create(fname, overwrite=True) db.connect(fname) db.create_table(tbl, ["foo", "bar"], [int, float]) db.close()