def test_import_flatflit(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) temp = os.path.join(fold, "temp_db_bug") if not os.path.exists(temp): os.mkdir(temp) text = [ "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten" ] data = [{ "name": text[random.randint(0, 9)], "number": random.randint(0, 99) } for i in range(0, 10000)] filename = os.path.join(temp, "out_flatfile_tab_pos2.txt") datatab = data[:1] + [{"name": " one\ttab", "number": 100}] + data[1:] df = pandas.DataFrame(datatab) df.to_csv(filename, sep="\t", encoding="utf8", header=True, index=False) with open(filename, "r", encoding="utf8") as f: content = f.read() content = content.replace('"', '') with open(filename + ".2.txt", "w", encoding="utf8") as f: f.write(content) dbfile = os.path.join(fold, "out_db.db3") if os.path.exists(dbfile): os.remove(dbfile) import_flatfile_into_database(dbfile, filename + ".2.txt", table="example", fLOG=fLOG) db = Database(dbfile, LOG=fLOG) db.connect() count = db.get_table_nb_lines("example") sch = db.get_table_columns("example") values = db.execute_view("SELECT * FROM example") db.close() if count != 10001: rows = [str(v) for v in values][:10] mes = "\n".join(rows) fLOG(datatab[:3]) raise Exception("expect:10001 not {0}\nROWS:\n{1}".format( count, mes)) exp = [('name', str), ('number', int)] if sch != exp: raise Exception("{0}!={1} ({2})".format(sch, exp, len(datatab)))
def test_import_index(self): fLOG( __file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join( os.path.abspath( os.path.split(__file__)[0]), "temp_database_index.db3") if os.path.exists(dbf): os.remove(dbf) fLOG("import flat file") ntbl2 = import_flatfile_into_database(dbf, file, fLOG=fLOG) fLOG("import flat file pandas") ntbl3 = import_flatfile_into_database_pandas(dbf, file, fLOG=fLOG, table="pandas_table", chunksize=500, sep="\t") fLOG("-----------------") db = Database(dbf, LOG=fLOG) db.connect() tbl1 = pandas.read_sql("SELECT * FROM " + ntbl2, db._connection) tbl2 = pandas.read_sql("SELECT * FROM " + ntbl3, db._connection) db.close() self.assertEqual(tbl1.shape, tbl2.shape) self.assertEqual(list(tbl1.columns), list(tbl2.columns)) self.assertEqual(list(tbl1.dtypes), list(tbl2.dtypes)) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() db.create_index("index1", "ACAPA", "Date") li = db.get_index_list() self.assertEqual( li, [('index1', 'ACAPA', 'CREATE INDEX index1 ON ACAPA (Date)', ('Date',))]) line = db.get_table_nfirst_lines("ACAPA") col = [_[0] for _ in db.get_table_columns("ACAPA")] line = line[0] add = {k: v for k, v in zip(col, line)} db.insert("ACAPA", add) db.commit() db.update("ACAPA", "Date", add["Date"], add) db.commit() db.close()
def test_import_index(self): fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__") file = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "data", "ACA.PA.txt") dbf = os.path.join(os.path.abspath(os.path.split(__file__)[0]), "temp_database_index.db3") if os.path.exists(dbf): os.remove(dbf) fLOG("import flat file") ntbl2 = import_flatfile_into_database(dbf, file, fLOG=fLOG) fLOG("import flat file pandas") ntbl3 = import_flatfile_into_database_pandas(dbf, file, fLOG=fLOG, table="pandas_table", chunksize=500, sep="\t") fLOG("-----------------") db = Database(dbf, LOG=fLOG) db.connect() tbl1 = pandas.read_sql("SELECT * FROM " + ntbl2, db._connection) tbl2 = pandas.read_sql("SELECT * FROM " + ntbl3, db._connection) db.close() self.assertEqual(tbl1.shape, tbl2.shape) self.assertEqual(list(tbl1.columns), list(tbl2.columns)) self.assertEqual(list(tbl1.dtypes), list(tbl2.dtypes)) assert os.path.exists(dbf) db = Database(dbf, LOG=fLOG) db.connect() db.create_index("index1", "ACAPA", "Date") li = db.get_index_list() self.assertEqual( li, [('index1', 'ACAPA', 'CREATE INDEX index1 ON ACAPA (Date)', ('Date', ))]) line = db.get_table_nfirst_lines("ACAPA") col = [_[0] for _ in db.get_table_columns("ACAPA")] line = line[0] add = {k: v for k, v in zip(col, line)} db.insert("ACAPA", add) db.commit() db.update("ACAPA", "Date", add["Date"], add) db.commit() db.close()
def test_import_flatflit(self) : fLOG (__file__, self._testMethodName, OutputPrint = __name__ == "__main__") fold = os.path.abspath(os.path.split(__file__)[0]) temp = os.path.join(fold, "temp_db_bug") if not os.path.exists(temp) : os.mkdir(temp) text = [ "one","two","three","four","five","six","seven","eight","nine","ten" ] data = [ { "name": text[random.randint(0,9)], "number": random.randint(0,99)} \ for i in range(0,10000) ] filename = os.path.join(temp, "out_flatfile_tab_pos2.txt") datatab = data[:1] + [ {"name": " one\ttab", "number":100 } ] + data[1:] df = pandas.DataFrame(datatab) df.to_csv(filename, sep="\t", encoding="utf8", header=True, index=False) with open(filename,"r",encoding="utf8") as f : content = f.read() content = content.replace('"','') with open(filename + ".2.txt","w",encoding="utf8") as f : f.write(content) dbfile = os.path.join(fold, "out_db.db3") if os.path.exists(dbfile) : os.remove(dbfile) import_flatfile_into_database(dbfile, filename + ".2.txt", table="example", fLOG = fLOG) db = Database(dbfile, LOG = fLOG) db.connect() count = db.get_table_nb_lines("example") sch = db.get_table_columns("example") values = db.execute_view("SELECT * FROM example") db.close() if count != 10001 : rows = [ str(v) for v in values ] [:10] mes = "\n".join(rows) fLOG(datatab[:3]) raise Exception("expect:10001 not {0}\nROWS:\n{1}".format(count,mes)) exp = [('name', str), ('number', int)] if sch != exp: raise Exception("{0}!={1} ({2})".format(sch, exp, len(datatab)))