def dates(args): if args.range: args.range = args.range.split("--") if args.action == print_max_datetimes: DatasetDatabase(args.database_file).connect().print_max_date_times() elif args.action == print_min_datetimes: DatasetDatabase(args.database_file).connect().print_min_date_times() elif args.action == print_start_end_datetimes: DatasetDatabase(args.database_file).connect().print_start_end_points(range=args.range, point_threshold=args.threshold) elif args.action == plot_dates: if not args.all: point_dic = DatasetDatabase(args.database_file).connect() \ .get_start_end_points(range=args.range, use_file=args.use_file, point_threshold=args.threshold) datetime_pairs = [] for key, value in point_dic.items(): datetime_pairs.append(value) DatasetPlotter.plot_start_end_points(sorted(datetime_pairs, key=lambda x: x[0] + x[-1])) else: point_dic = DatasetDatabase(args.database_file).connect() \ .get_all_points(range=args.range, use_file=args.use_file, point_threshold=args.threshold) points = [] for key, value in point_dic.items(): points.append(value) DatasetPlotter.plot_all_points(sorted(points, key=lambda x: x[0] + x[-1]))
def dates(args): if args.range: args.range = args.range.split("--") if args.action == print_max_datetimes: DatasetDatabase(args.database_file).connect().print_max_date_times() elif args.action == print_min_datetimes: DatasetDatabase(args.database_file).connect().print_min_date_times() elif args.action == print_start_end_datetimes: DatasetDatabase(args.database_file).connect().print_start_end_points( range=args.range, point_threshold=args.threshold) elif args.action == plot_dates: if not args.all: point_dic = DatasetDatabase(args.database_file).connect() \ .get_start_end_points(range=args.range, use_file=args.use_file, point_threshold=args.threshold) datetime_pairs = [] for key, value in point_dic.items(): datetime_pairs.append(value) DatasetPlotter.plot_start_end_points( sorted(datetime_pairs, key=lambda x: x[0] + x[-1])) else: point_dic = DatasetDatabase(args.database_file).connect() \ .get_all_points(range=args.range, use_file=args.use_file, point_threshold=args.threshold) points = [] for key, value in point_dic.items(): points.append(value) DatasetPlotter.plot_all_points( sorted(points, key=lambda x: x[0] + x[-1]))
def test(testfiles): dataset = testfiles["data10000"] sqlite_db = "dataset10000.db" h5_db = "h510000.db" dc = DatasetConverter(dataset, sqlite_db) dc.convert() h5conv = DatasetDB2HDF5(sqlite_db, h5_db) h5conv.convert() db = DatasetDatabase(sqlite_db) db.connect() first_datetime = dt.datetime.strptime(db.get_first_datetime(None), DATE_FORMAT) last_datetime = dt.datetime.strptime(db.get_last_datetime(None), DATE_FORMAT) delta = last_datetime - first_datetime pnum = delta.days * 3600 * 24 + delta.seconds + 1 db.disconnect() with h5py.File(h5_db, 'r') as f: for name in f.keys(): assert f[name].len() == pnum
def calc(args): db = DatasetDatabase(args.database_file) db.connect() first_datetime = dt.datetime.strptime(db.get_first_datetime(None), DATE_FORMAT) last_datetime = dt.datetime.strptime(db.get_last_datetime(None), DATE_FORMAT) ts_names = db.get_distinct_names() delta = last_datetime - first_datetime pnum = delta.days * 3600 * 24 + delta.seconds + 1 total_points = pnum * len(ts_names) print(first_datetime.strftime("%m/%d/%Y-%H:%M:%S") + " - " + last_datetime.strftime("%m/%d/%Y-%H:%M:%S")) print("delta: " + str(delta)) print("points per time series: %d" % pnum) print("total points in interpolated dataset: " + str(total_points)) print("Estimated size (4 bytes per point): %f MB" % (total_points * 4.0 / 1024.0 / 1024.0)) db.disconnect()
def calc(args): db = DatasetDatabase(args.database_file) db.connect() first_datetime = dt.datetime.strptime(db.get_first_datetime(None), DATE_FORMAT) last_datetime = dt.datetime.strptime(db.get_last_datetime(None), DATE_FORMAT) ts_names = db.get_distinct_names() delta = last_datetime - first_datetime pnum = delta.days * 3600 * 24 + delta.seconds + 1 total_points = pnum * len(ts_names) print( first_datetime.strftime("%m/%d/%Y-%H:%M:%S") + " - " + last_datetime.strftime("%m/%d/%Y-%H:%M:%S")) print("delta: " + str(delta)) print("points per time series: %d" % pnum) print("total points in interpolated dataset: " + str(total_points)) print("Estimated size (4 bytes per point): %f MB" % (total_points * 4.0 / 1024.0 / 1024.0)) db.disconnect()
def test_converter(testfiles): dc = DatasetConverter(testfiles["data100"], "./test_database.db") dc.convert() db = DatasetDatabase("./test_database.db") db.connect() ts = db.get_time_series("Forex·EURSEK·NoExpiry") assert ts.fetchall() == [("07/08/2015", "00:05:12", "9.37086666666667", "1.0"), ("07/08/2015", "00:05:13", "9.3714", "1.0"), ("07/08/2015", "00:05:14", "9.3713", "1.0")] db.disconnect() os.remove("./test_database.db")
def test_converter(testfiles): dc = DatasetConverter(testfiles["data100"], "./test_database.db") dc.convert() db = DatasetDatabase("./test_database.db") db.connect() ts = db.get_time_series("Forex·EURSEK·NoExpiry") assert ts.fetchall() == [("07/08/2015", "00:05:12", "9.37086666666667", "1.0"), ("07/08/2015", "00:05:13", "9.3714", "1.0"), ("07/08/2015", "00:05:14", "9.3713", "1.0") ] db.disconnect() os.remove("./test_database.db")
def test_DatasetDatabase(): # # Part1 (insert 1 row) # test_db_filename = "test_db" db = DatasetDatabase(test_db_filename) db.connect() db.store_data("time-series1", 0, "11-11-2015", "19:12:00", 123.4, 1) assert isinstance(db.conn, sql.Connection) c = db.conn.cursor() assert isinstance(c, sql.Cursor) c.execute("SELECT * from dataset") assert c.fetchone() == ("time-series1", 0, "11-11-2015", "19:12:00", "123.4", "1") iterator = db.get_time_series("time-series1") assert iterator is not None for row in iterator: assert row == ("11-11-2015", "19:12:00", "123.4", "1") db.disconnect() assert db.conn is None os.remove(test_db_filename) # # Part 2 (insert multiple rows) # db = DatasetDatabase(test_db_filename) db.connect() data = [("time-series1", 1, "11-11-2015", "19:12:01", "123.5", "1"), ("time-series1", 2, "11-11-2015", "19:12:02", "123.6", "1"), ("time-series1", 3, "11-11-2015", "19:12:03", "123.7", "1"), ("time-series1", 4, "11-11-2015", "19:12:04", "123.8", "1"), ("time-series1", 5, "11-11-2015", "19:12:05", "123.9", "1"), ("time-series1", 6, "11-11-2015", "19:12:06", "123.5", "1")] db.store_multiple_data(data) iterator = db.get_time_series("time-series1") assert iterator.fetchall() == [("11-11-2015", "19:12:01", "123.5", "1"), ("11-11-2015", "19:12:02", "123.6", "1"), ("11-11-2015", "19:12:03", "123.7", "1"), ("11-11-2015", "19:12:04", "123.8", "1"), ("11-11-2015", "19:12:05", "123.9", "1"), ("11-11-2015", "19:12:06", "123.5", "1")] db.disconnect() assert db.conn is None os.remove(test_db_filename)