Python break_dataset示例，common.break_dataset Python示例

示例#1

0

显示文件

    def test_clickhouse_ds(self):
        from mindsdb_datasources import ClickhouseDS
        LIMIT = 100

        clickhouse_ds = ClickhouseDS(
            host=self.HOST,
            port=self.PORT,
            user=self.USER,
            password=self.PASSWORD,
            query='SELECT * FROM {}.{} LIMIT {}'.format(
                self.DATABASE,
                'home_rentals',
                LIMIT
            )
        )

        # test filter
        for val in clickhouse_ds.filter([['location', 'like','ood']])['location']:
            assert val == 'good'

        assert len(clickhouse_ds.filter([['rental_price', '>', 2500]], 3)) == 3
        assert len(clickhouse_ds.filter([['initial_price', '<', 0]], 3)) == 0

        # mess with the values inside then try to analyze it
        clickhouse_ds.df = break_dataset(clickhouse_ds.df)
        assert len(clickhouse_ds) <= LIMIT
        F.analyse_dataset(from_data=clickhouse_ds)

示例#2

0

显示文件

文件： test_mysql_ds.py 项目： StpMax/datasources

    def test_mysql_ds(self):
        from mindsdb_datasources import MySqlDS

        LIMIT = 400

        mysql_ds = MySqlDS(
            host=self.HOST,
            user=self.USER,
            password=self.PASSWORD,
            database=self.DATABASE,
            port=self.PORT,
            query=
            ' (SELECT * FROM (SELECT * FROM {table} LIMIT {limit}) as t1) UNION ALL (SELECT * FROM (SELECT * FROM {table} LIMIT {limit}) as t1)'
            .format(table=self.TABLE, limit=int(LIMIT / 2)))

        mysql_ds.df = break_dataset(mysql_ds.df)

        assert len(mysql_ds) <= LIMIT

        F.analyse_dataset(mysql_ds)

        # Our SQL parsing fails here, test if we're still able to filter via the dataframe fallback
        for val in mysql_ds.filter([['sex', 'like', 'fem']])['sex']:
            assert val == 'female'

        assert len(mysql_ds.filter([['age', '>', 20]], 12)) == 12
        assert len(mysql_ds.filter([['age', '=', 60]], 1)) == 1
        assert len(mysql_ds.filter([['age', '>', 150]], 11)) == 0

示例#3

0

显示文件

    def test_snowflake_ds(self):
        if os.name == 'nt':
            print('Snowflake datasource (SnowflakeDS) can\'t be used on windows at the moment due to the connector not working')
            return

        from mindsdb_datasources import SnowflakeDS

        # Create the datasource
        snowflake_ds = SnowflakeDS(
            query='SELECT * FROM HEALTHCARE_COSTS',
            host=DB_CREDENTIALS['snowflake']['host'],
            user=DB_CREDENTIALS['snowflake']['user'],
            password=DB_CREDENTIALS['snowflake']['password'],
            account=DB_CREDENTIALS['snowflake']['account'],
            warehouse=DB_CREDENTIALS['snowflake']['warehouse'],
            database=DB_CREDENTIALS['snowflake']['database'],
            schema=DB_CREDENTIALS['snowflake']['schema'],
            protocol=DB_CREDENTIALS['snowflake']['protocol'],
            port=DB_CREDENTIALS['snowflake']['port'],
        )

        snowflake_ds.df = break_dataset(snowflake_ds.df)

        # Make sure we can use it for some basic tasks
        data_analysis = mindsdb_native.F.analyse_dataset(
            snowflake_ds,
            sample_settings={'sample_percentage': 5}
        )

        assert len(data_analysis['columns']) == 7

示例#4

0

显示文件

    def test_impala_ds(self):
        from mindsdb_datasources import ImpalaDS

        LIMIT = 100

        impala_ds = ImpalaDS(host=self.HOST,
                             port=self.PORT,
                             database=self.DATABASE,
                             query='SELECT * FROM {}.{} LIMIT {}'.format(
                                 'test_data', self.TABLE, LIMIT))

        impala_ds.df = break_dataset(impala_ds.df)

        assert len(impala_ds) == LIMIT

示例#5

0

显示文件

文件： test_scylla.py 项目： mindsdb/datasources

    def test_scylla_ds(self):
        from mindsdb_datasources import ScyllaDS

        LIMIT = 100

        scylla_ds = ScyllaDS(host=self.HOST,
                             user=self.USER,
                             password=self.PASSWORD,
                             database=self.KEYSPACE,
                             port=self.PORT,
                             query='SELECT * FROM {}.{} LIMIT {}'.format(
                                 'test_data', self.TABLE, LIMIT))

        scylla_ds.df = break_dataset(scylla_ds.df)

        assert len(scylla_ds) == LIMIT

示例#6

0

显示文件

文件： test_postgres_ds.py 项目： StpMax/datasources

    def test_postgres_ds(self):
        from mindsdb_datasources import PostgresDS

        LIMIT = 100

        postgres_ds = PostgresDS(host=self.HOST,
                                 user=self.USER,
                                 password=self.PASSWORD,
                                 database=self.DATABASE,
                                 port=self.PORT,
                                 query='SELECT * FROM {}.{} LIMIT {}'.format(
                                     'test_data', self.TABLE, LIMIT))

        postgres_ds.df = break_dataset(postgres_ds.df)

        assert len(postgres_ds) == LIMIT

        F.analyse_dataset(postgres_ds)

示例#7

0

显示文件

文件： test_phoenix_ds.py 项目： mindsdb/datasources

    def test_phoenix_ds(self):
        from mindsdb_datasources import PhoenixDS

        LIMIT = 100

        phoenix_ds = PhoenixDS(
            url=self.URL,
            authentication=self.AUTHENTICATION,
            user=self.USER,
            password=self.PASSWORD,
            query='SELECT * FROM {}.{} LIMIT {}'.format(
                'test_data',
                self.TABLE,
                LIMIT
            )
        )

        phoenix_ds.df = break_dataset(phoenix_ds.df)

        assert len(phoenix_ds) == LIMIT

示例#8

0

显示文件

文件： test_maria_ds.py 项目： mindsdb/datasources

    def test_maria_ds(self):
        from mindsdb_datasources import MariaDS

        LIMIT = 200

        maria_ds = MariaDS(host=self.HOST,
                           user=self.USER,
                           password=self.PASSWORD,
                           database=self.DATABASE,
                           port=self.PORT,
                           query='SELECT * FROM `{}` LIMIT {}'.format(
                               self.TABLE, LIMIT))

        maria_ds.df = break_dataset(maria_ds.df)

        assert len(maria_ds) <= LIMIT

        # Our SQL parsing succeds here, but the query fails, test if we're still able to filter via the dataframe fallback
        maria_ds._query = maria_ds._query.replace(self.TABLE,
                                                  'wrongly_named_table')
        assert len(maria_ds.filter([['Population', '<', 33098932]], 8)) == 8
        assert len(maria_ds.filter([['Development_Index', '!=', 3]], 12)) == 12