def test_clickhouse_ds(self): from mindsdb_datasources import ClickhouseDS LIMIT = 100 clickhouse_ds = ClickhouseDS( host=self.HOST, port=self.PORT, user=self.USER, password=self.PASSWORD, query='SELECT * FROM {}.{} LIMIT {}'.format( self.DATABASE, 'home_rentals', LIMIT ) ) # test filter for val in clickhouse_ds.filter([['location', 'like','ood']])['location']: assert val == 'good' assert len(clickhouse_ds.filter([['rental_price', '>', 2500]], 3)) == 3 assert len(clickhouse_ds.filter([['initial_price', '<', 0]], 3)) == 0 # mess with the values inside then try to analyze it clickhouse_ds.df = break_dataset(clickhouse_ds.df) assert len(clickhouse_ds) <= LIMIT F.analyse_dataset(from_data=clickhouse_ds)
def test_mysql_ds(self): from mindsdb_datasources import MySqlDS LIMIT = 400 mysql_ds = MySqlDS( host=self.HOST, user=self.USER, password=self.PASSWORD, database=self.DATABASE, port=self.PORT, query= ' (SELECT * FROM (SELECT * FROM {table} LIMIT {limit}) as t1) UNION ALL (SELECT * FROM (SELECT * FROM {table} LIMIT {limit}) as t1)' .format(table=self.TABLE, limit=int(LIMIT / 2))) mysql_ds.df = break_dataset(mysql_ds.df) assert len(mysql_ds) <= LIMIT F.analyse_dataset(mysql_ds) # Our SQL parsing fails here, test if we're still able to filter via the dataframe fallback for val in mysql_ds.filter([['sex', 'like', 'fem']])['sex']: assert val == 'female' assert len(mysql_ds.filter([['age', '>', 20]], 12)) == 12 assert len(mysql_ds.filter([['age', '=', 60]], 1)) == 1 assert len(mysql_ds.filter([['age', '>', 150]], 11)) == 0
def test_snowflake_ds(self): if os.name == 'nt': print('Snowflake datasource (SnowflakeDS) can\'t be used on windows at the moment due to the connector not working') return from mindsdb_datasources import SnowflakeDS # Create the datasource snowflake_ds = SnowflakeDS( query='SELECT * FROM HEALTHCARE_COSTS', host=DB_CREDENTIALS['snowflake']['host'], user=DB_CREDENTIALS['snowflake']['user'], password=DB_CREDENTIALS['snowflake']['password'], account=DB_CREDENTIALS['snowflake']['account'], warehouse=DB_CREDENTIALS['snowflake']['warehouse'], database=DB_CREDENTIALS['snowflake']['database'], schema=DB_CREDENTIALS['snowflake']['schema'], protocol=DB_CREDENTIALS['snowflake']['protocol'], port=DB_CREDENTIALS['snowflake']['port'], ) snowflake_ds.df = break_dataset(snowflake_ds.df) # Make sure we can use it for some basic tasks data_analysis = mindsdb_native.F.analyse_dataset( snowflake_ds, sample_settings={'sample_percentage': 5} ) assert len(data_analysis['columns']) == 7
def test_impala_ds(self): from mindsdb_datasources import ImpalaDS LIMIT = 100 impala_ds = ImpalaDS(host=self.HOST, port=self.PORT, database=self.DATABASE, query='SELECT * FROM {}.{} LIMIT {}'.format( 'test_data', self.TABLE, LIMIT)) impala_ds.df = break_dataset(impala_ds.df) assert len(impala_ds) == LIMIT
def test_scylla_ds(self): from mindsdb_datasources import ScyllaDS LIMIT = 100 scylla_ds = ScyllaDS(host=self.HOST, user=self.USER, password=self.PASSWORD, database=self.KEYSPACE, port=self.PORT, query='SELECT * FROM {}.{} LIMIT {}'.format( 'test_data', self.TABLE, LIMIT)) scylla_ds.df = break_dataset(scylla_ds.df) assert len(scylla_ds) == LIMIT
def test_postgres_ds(self): from mindsdb_datasources import PostgresDS LIMIT = 100 postgres_ds = PostgresDS(host=self.HOST, user=self.USER, password=self.PASSWORD, database=self.DATABASE, port=self.PORT, query='SELECT * FROM {}.{} LIMIT {}'.format( 'test_data', self.TABLE, LIMIT)) postgres_ds.df = break_dataset(postgres_ds.df) assert len(postgres_ds) == LIMIT F.analyse_dataset(postgres_ds)
def test_phoenix_ds(self): from mindsdb_datasources import PhoenixDS LIMIT = 100 phoenix_ds = PhoenixDS( url=self.URL, authentication=self.AUTHENTICATION, user=self.USER, password=self.PASSWORD, query='SELECT * FROM {}.{} LIMIT {}'.format( 'test_data', self.TABLE, LIMIT ) ) phoenix_ds.df = break_dataset(phoenix_ds.df) assert len(phoenix_ds) == LIMIT
def test_maria_ds(self): from mindsdb_datasources import MariaDS LIMIT = 200 maria_ds = MariaDS(host=self.HOST, user=self.USER, password=self.PASSWORD, database=self.DATABASE, port=self.PORT, query='SELECT * FROM `{}` LIMIT {}'.format( self.TABLE, LIMIT)) maria_ds.df = break_dataset(maria_ds.df) assert len(maria_ds) <= LIMIT # Our SQL parsing succeds here, but the query fails, test if we're still able to filter via the dataframe fallback maria_ds._query = maria_ds._query.replace(self.TABLE, 'wrongly_named_table') assert len(maria_ds.filter([['Population', '<', 33098932]], 8)) == 8 assert len(maria_ds.filter([['Development_Index', '!=', 3]], 12)) == 12