def test_clickhouse_ds(): HOST = 'localhost' PORT = 8123 clickhouse_url = f'http://{HOST}:{PORT}' requests.post(clickhouse_url, data='CREATE DATABASE IF NOT EXISTS test') requests.post(clickhouse_url, data='DROP TABLE IF EXISTS test.mock') requests.post(clickhouse_url, data="""CREATE TABLE test.mock( col1 String ,col2 Int64 ,col3 Array(UInt8) ) ENGINE=Memory""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('a',1,[1,2,3])""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('b',2,[2,3,1])""") requests.post(clickhouse_url, data="""INSERT INTO test.mock VALUES ('c',3,[3,1,2])""") clickhouse_ds = ClickhouseDS( 'SELECT * FROM test.mock ORDER BY col2 DESC LIMIT 2', host=HOST, port=PORT) assert (len(clickhouse_ds.df) == 2) assert (sum(map(int, clickhouse_ds.df['col2'])) == 5) assert (len(list(clickhouse_ds.df['col3'][1])) == 3) assert (set(clickhouse_ds.df.columns) == set(['col1', 'col2', 'col3'])) mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data=clickhouse_ds)
def test_mysql_ds(): HOST = 'localhost' USER = '******' PASSWORD = '' DATABASE = 'mysql' PORT = 3306 con = mysql.connector.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 BIGINT, col_3 BOOL)') for i in range(0, 200): cur.execute( f'INSERT INTO test_mindsdb VALUES ("This is string number {i}", {i}, {i % 2 == 0})' ) con.commit() con.close() mysql_ds = MySqlDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DATABASE, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor', log_level=logging.ERROR) mdb.analyse_dataset(from_data=mysql_ds)
def test_postgres_ds(): HOST = 'localhost' USER = '******' PASSWORD = '' DBNAME = 'postgres' PORT = 5432 con = pg8000.connect(database=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int, col_3 Boolean, col_4 Date, col_5 Int [])' ) for i in range(0, 200): dt = datetime.datetime.now() - datetime.timedelta(days=i) dt_str = dt.strftime('%Y-%m-%d') cur.execute( f'INSERT INTO test_mindsdb VALUES (\'String {i}\', {i}, {i % 2 == 0}, \'{dt_str}\', ARRAY [1, 2, {i}])' ) con.commit() con.close() mysql_ds = PostgresDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DBNAME, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor', log_level=logging.ERROR) mdb.analyse_dataset(from_data=mysql_ds)
DBNAME = 'postgres' PORT = 5432 con = psycopg2.connect(dbname=DBNAME, user=USER, password=PASSWORD, host=HOST, port=PORT) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute( 'CREATE TABLE test_mindsdb(col_1 Text, col_2 Int, col_3 Boolean)') for i in range(0, 200): cur.execute( f'INSERT INTO test_mindsdb VALUES (\'This is tring number {i}\', {i}, {i % 2 == 0})' ) con.commit() con.close() mysql_ds = PostgresDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DBNAME, port=PORT) assert (len(mysql_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data=mysql_ds)
data='CREATE DATABASE IF NOT EXISTS test') requests.post('http://localhost:8123', data='DROP TABLE IF EXISTS test.mock') requests.post('http://localhost:8123', data="""CREATE TABLE test.mock( col1 String ,col2 Int64 ,col3 Array(UInt8) ) ENGINE=Memory""") requests.post('http://localhost:8123', data="""INSERT INTO test.mock VALUES ('a',1,[1,2,3])""") requests.post('http://localhost:8123', data="""INSERT INTO test.mock VALUES ('b',2,[2,3,1])""") requests.post('http://localhost:8123', data="""INSERT INTO test.mock VALUES ('c',3,[3,1,2])""") log.info('Querying data') clickhouse_ds = ClickhouseDS( 'SELECT * FROM test.mock ORDER BY col2 DESC LIMIT 2') log.info('Validating data integrity') assert (len(clickhouse_ds.df) == 2) assert (sum(map(int, clickhouse_ds.df['col2'])) == 5) assert (len(list(clickhouse_ds.df['col3'][1])) == 3) assert (set(clickhouse_ds.df.columns) == set(['col1', 'col2', 'col3'])) mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data=clickhouse_ds) log.info('Finished running ClickhouseDS tests successfully !')
import os from mindsdb import Predictor, S3DS mdb = Predictor(name='analyse_dataset_test_predictor') s3_ds = S3DS(bucket_name='mindsdb-example-data', file_path='home_rentals.csv') mdb.analyse_dataset(from_data=s3_ds)
from mindsdb import Predictor mdb = Predictor(name='analyse_dataset_test_predictor') mdb.analyse_dataset(from_data="https://s3.eu-west-2.amazonaws.com/mindsdb-example-data/home_rentals.csv")
def test_maria_ds(): HOST = 'localhost' USER = '******' PASSWORD = '' DATABASE = 'mysql' PORT = 4306 con = mysql.connector.connect(host=HOST, port=PORT, user=USER, password=PASSWORD, database=DATABASE) cur = con.cursor() cur.execute('DROP TABLE IF EXISTS test_mindsdb') cur.execute("""CREATE TABLE test_mindsdb ( col_int BIGINT, col_float FLOAT, col_categorical Text, col_bool BOOL, col_text Text, col_date DATE, col_datetime DATETIME, col_timestamp TIMESTAMP, col_time TIME ) """) for i in range(0, 200): dt = datetime.datetime.now() - datetime.timedelta(days=i) query = f"""INSERT INTO test_mindsdb (col_int, col_float, col_categorical, col_bool, col_text, col_date, col_datetime, col_timestamp, col_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s) """ values = (i, i + 0.01, f"Cat {i}", i % 2 == 0, f"long long long text {i}", dt.date(), dt, dt.strftime('%Y-%m-%d %H:%M:%S.%f'), dt.strftime('%H:%M:%S.%f')) cur.execute(query, values) con.commit() con.close() maria_ds = MariaDS(table='test_mindsdb', host=HOST, user=USER, password=PASSWORD, database=DATABASE, port=PORT) assert (len(maria_ds._df) == 200) mdb = Predictor(name='analyse_dataset_test_predictor', log_level=logging.ERROR) model_data = mdb.analyse_dataset(from_data=maria_ds) analysis = model_data['data_analysis_v2'] assert model_data assert analysis def assert_expected_type(column_typing, expected_type, expected_subtype): assert column_typing['data_type'] == expected_type assert column_typing['data_subtype'] == expected_subtype assert column_typing['data_type_dist'][expected_type] == 199 assert column_typing['data_subtype_dist'][expected_subtype] == 199 assert_expected_type(analysis['col_categorical']['typing'], DATA_TYPES.CATEGORICAL, DATA_SUBTYPES.MULTIPLE) assert_expected_type(analysis['col_bool']['typing'], DATA_TYPES.CATEGORICAL, DATA_SUBTYPES.SINGLE) assert_expected_type(analysis['col_int']['typing'], DATA_TYPES.NUMERIC, DATA_SUBTYPES.INT) assert_expected_type(analysis['col_float']['typing'], DATA_TYPES.NUMERIC, DATA_SUBTYPES.FLOAT) assert_expected_type(analysis['col_date']['typing'], DATA_TYPES.DATE, DATA_SUBTYPES.DATE) assert_expected_type(analysis['col_datetime']['typing'], DATA_TYPES.DATE, DATA_SUBTYPES.TIMESTAMP) assert_expected_type(analysis['col_timestamp']['typing'], DATA_TYPES.DATE, DATA_SUBTYPES.TIMESTAMP) assert_expected_type(analysis['col_text']['typing'], DATA_TYPES.SEQUENTIAL, DATA_SUBTYPES.TEXT)