def test_database_from_dataframes(): with NamedTemporaryFile(suffix="test.db") as f: db = db_from_dataframes( db_filename=f.name, dataframes={"A": dfA, "B": dfB}, primary_keys={"A": "numbers"}, indices={"A": [("numbers", "strings")]}, subdir="test_datacache") cursor_A = db.execute("SELECT * FROM A") results_A = cursor_A.fetchall() eq_(results_A, [(1, "a"), (2, "b"), (3, "c")]) cursor_B = db.execute("SELECT * FROM B") results_B = cursor_B.fetchall() eq_(results_B, [("nuzzle",), ("ruzzle",)])
def test_database_from_dataframes(): with NamedTemporaryFile(suffix="test.db") as f: db = db_from_dataframes(db_filename=f.name, dataframes={ "A": dfA, "B": dfB }, primary_keys={"A": "numbers"}, indices={"A": [("numbers", "strings")]}, subdir="test_datacache") cursor_A = db.execute("SELECT * FROM A") results_A = cursor_A.fetchall() eq_(results_A, [(1, "a"), (2, "b"), (3, "c")]) cursor_B = db.execute("SELECT * FROM B") results_B = cursor_B.fetchall() eq_(results_B, [("nuzzle", ), ("ruzzle", )])
def _create_database(self, force=False): print("Creating database: %s" % self.local_db_path()) filename = self.local_db_filename() df = self.gtf.dataframe() all_index_groups = self._all_possible_indices(df.columns) # split single DataFrame into dictionary mapping each unique # feature name onto that subset of the data feature_names = df["feature"].unique() dataframes = {} # every table gets the same set of indices indices_dict = {} # if a feature has an ID then make it that table's primary key primary_keys = {} for feature in feature_names: df_subset = df[df.feature == feature] dataframes[feature] = df_subset primary_key = self._get_primary_key(feature, df_subset) if primary_key: primary_keys[feature] = primary_key indices_dict[feature] = self._feature_indices(all_index_groups, primary_key, df_subset) self._connection = datacache.db_from_dataframes( db_filename=filename, dataframes=dataframes, indices=indices_dict, primary_keys=primary_keys, subdir=CACHE_SUBDIR, overwrite=force, version=DATABASE_SCHEMA_VERSION, ) return self._connection