def test_basic_query(self): conn = read.connect_sql(self.conn_str) self.assertTrue(conn.psql_optimized) sa = conn.execute('SELECT * FROM employees') ctrl = np.array([(1, u'Arthur', u'King', 40000.0, 2.1, 10), (2, u'Jones', u'James', 1000000.0, 1.9, 2), (3, u'The Moabite', u'Ruth', 50000.0, 1.8, 6)], dtype=[('id', '<i8'), ('last_name', 'O'), ('first_name', 'O'), ('salary', '<f8'), ('height', '<f8'), ('usefulness', '<i8')]) self.assertTrue(np.array_equal(sa, ctrl))
def test_connect_sql(self): conn_str = 'sqlite:///{}'.format( utils_for_tests.path_of_data('small.db')) conn = read.connect_sql(conn_str) sa = conn.execute('SELECT * FROM employees') ctrl = np.array([(1, u'Arthur', u'King', 40000.0, 2.1, 10), (2, u'Jones', u'James', 1000000.0, 1.9, 2), (3, u'The Moabite', u'Ruth', 50000.0, 1.8, 6)], dtype=[('id', '<i8'), ('last_name', 'O'), ('first_name', 'O'), ('salary', '<f8'), ('height', '<f8'), ('usefulness', '<i8')]) self.assertTrue(np.array_equal(sa, ctrl)) conn = read.connect_sql(conn_str, allow_caching=True) sa = conn.execute('SELECT * FROM employees') self.assertTrue(np.array_equal(sa, ctrl)) sa = conn.execute('SELECT id FROM employees') ctrl2 = np.array([(1, ), (2, ), (3, )], dtype=[('id', '<i8')]) self.assertTrue(np.array_equal(sa, ctrl2)) sa = conn.execute('SELECT * FROM employees') self.assertTrue(np.array_equal(sa, ctrl))
def test_connect_sql(self): conn_str = 'sqlite:///{}'.format(utils_for_tests.path_of_data('small.db')) conn = read.connect_sql(conn_str) sa = conn.execute('SELECT * FROM employees') ctrl = np.array([(1, u'Arthur', u'King', 40000.0, 2.1, 10), (2, u'Jones', u'James', 1000000.0, 1.9, 2), (3, u'The Moabite', u'Ruth', 50000.0, 1.8, 6)], dtype=[('id', '<i8'), ('last_name', 'O'), ('first_name', 'O'), ('salary', '<f8'), ('height', '<f8'), ('usefulness', '<i8')]) self.assertTrue(np.array_equal(sa, ctrl)) conn = read.connect_sql(conn_str, allow_caching=True) sa = conn.execute('SELECT * FROM employees') self.assertTrue(np.array_equal(sa, ctrl)) sa = conn.execute('SELECT id FROM employees') ctrl2 = np.array([(1,), (2,), (3,)], dtype=[('id', '<i8')]) self.assertTrue(np.array_equal(sa, ctrl2)) sa = conn.execute('SELECT * FROM employees') self.assertTrue(np.array_equal(sa, ctrl))
def get_rg_from_csv( self, csv_file_path, parse_datetimes=[], unit_id_col=None, start_time_col=None, stop_time_col=None, feature_col=None, val_col=None, ): """ Get an RG-formatted table from a CSV file. Parameters ---------- csv_file_path : str Path of the csv file to import table from parse_datetimes : list of col names Columns that should be interpreted as datetimes unit_id_col : str or None The name of the column containing unique unit IDs. For example, in Table 1, this is 'student_id'. If None, ArrayEmitter will pick the first otherwise unspecified column start_time_col : str or None The name of the column containing start time. In Table 1, this is 'start_year'. If None, ArrayEmitter will pick the second otherwise unspecified column. end_time_col : str or None The name of the column containing the stop time. In Table 1, this is 'end_year'. If None, ArrayEmitter will pick the third otherwise unspecified column. feature_col : str or None The name of the column containing the feature name. In Table 1, this is 'feature'. If None, ArrayEmitter will pick the fourth otherwise unspecified column. val_col : str or None The name of the column containing the value for the given feature for the given user at the given time. In Table 1, this is 'value'. If None, ArrayEmitter will pick the fifth otherwise unspecified column. Returns ------- ArrayGenerator Copy of this ArrayGenerator which has rg_table specified Examples -------- >>> ae = ArrayEmitter() >>> ae = ae.get_rg_from_csv('table_1.csv') """ # in-memory db cp = self.__copy() conn = connect_sql("sqlite://") cp.__rg_table_name = utils.csv_to_sql(conn, csv_file_path, parse_datetimes=parse_datetimes) cp.__conn = conn cp.__col_specs["unit_id"] = unit_id_col cp.__col_specs["start_time"] = start_time_col cp.__col_specs["stop_time"] = stop_time_col cp.__col_specs["feature"] = feature_col cp.__col_specs["val"] = val_col cp.__resolve_cols() # SQLite doesn't really have datetimes, so we transparently translate # to unix times. cp.__convert_to_unix_time = True return cp
def get_rg_from_sql( self, conn_str, table_name, unit_id_col=None, start_time_col=None, stop_time_col=None, feature_col=None, val_col=None, ): """ Gets an RG-formatted matrix from a CSV file Parameters ---------- conn_str : str SQLAlchemy connection string to connect to the database and run the query. table_name : str The name of the RG-formatted table in the database unit_id_col : str or None The name of the column containing unique unit IDs. For example, in Table 1, this is 'student_id'. If None, ArrayEmitter will pick the first otherwise unspecified column start_time_col : str or None The name of the column containing start time. In Table 1, this is 'start_year'. If None, ArrayEmitter will pick the second otherwise unspecified column. end_time_col : str or None The name of the column containing the stop time. In Table 1, this is 'end_year'. If None, ArrayEmitter will pick the third otherwise unspecified column. feature_col : str or None The name of the column containing the feature name. In Table 1, this is 'feature'. If None, ArrayEmitter will pick the fourth otherwise unspecified column. val_col : str or None The name of the column containing the value for the given feature for the given user at the given time. In Table 1, this is 'value'. If None, ArrayEmitter will pick the fifth otherwise unspecified column. Returns ------- ArrayGenerator Copy of this ArrayGenerator which has rg_table specified Examples -------- >>> conn_str = ... >>> ae = ArrayEmitter() >>> ae = ae.get_rg_from_SQL('SELECT * FROM table_1', 'student_id', ... conn_str=conn_str) """ cp = self.__copy() cp.__conn = connect_sql(conn_str, allow_pgres_copy_optimization=True) cp.__rg_table_name = table_name cp.__col_specs["unit_id"] = unit_id_col cp.__col_specs["start_time"] = start_time_col cp.__col_specs["stop_time"] = stop_time_col cp.__col_specs["feature"] = feature_col cp.__col_specs["val"] = val_col cp.__resolve_cols() return cp
from diogenes.array_emitter import M_to_rg from diogenes.read import connect_sql from settings import conn_str to_table = 'vancouver._Z_test_rg' conn = connect_sql(conn_str) sql = 'DROP TABLE ' + to_table conn.execute(sql) M_to_rg(conn_str, 'vancouver.test', 'vancouver._Z_test_rg', 'student_id', start_time_col='sat_date', feature_cols=('sat_score_math', 'sat_score_reading', 'sat_score_writing')) M_to_rg(conn_str, 'vancouver.test', 'vancouver._Z_test_rg', 'student_id', start_time_col='act_date', feature_cols=('act_score_composite', 'act_score_english', 'act_score_math', 'act_score_reading', 'act_score_science')) M_to_rg(conn_str, 'vancouver.student', 'vancouver._Z_test_rg',
from diogenes.array_emitter import M_to_rg from diogenes.read import connect_sql from settings import conn_str to_table = 'vancouver._Z_test_rg' conn = connect_sql(conn_str) sql = 'DROP TABLE ' + to_table conn.execute(sql) M_to_rg( conn_str, 'vancouver.test', 'vancouver._Z_test_rg', 'student_id', start_time_col='sat_date', feature_cols=('sat_score_math', 'sat_score_reading', 'sat_score_writing')) M_to_rg( conn_str, 'vancouver.test', 'vancouver._Z_test_rg', 'student_id', start_time_col='act_date', feature_cols=('act_score_composite', 'act_score_english', 'act_score_math', 'act_score_reading', 'act_score_science')) M_to_rg( conn_str, 'vancouver.student', 'vancouver._Z_test_rg',