Python BlazingContext.sql примеры использования

Язык программирования: Python

Пространство имен/Пакет: blazingsql

Класс/Тип: BlazingContext

Метод/Функция: sql

Примеров на hotexamples.com: 6

Python BlazingContext.sql - 6 примеров найдено. Это лучшие примеры Python кода для blazingsql.BlazingContext.sql, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BlazingContext(20)

create_table(15)

sql(6)

describe_table(2)

drop_table(2)

dors_client(1)

execute(1)

explain(1)

hdfs(1)

s3(1)

Пример #1

Показать файл

Файл: utilityHive.py Проект: gcca/blazingsql

def create_hive_partition_data(input, file_format, table_name, partitions, output, num_files):
	if not os.path.exists(output):
		os.makedirs(output)

	bc = BlazingContext(dask_client=None)
	if file_format == 'psv':
		dtypes = get_dtypes(table_name)
		col_names = get_column_names(table_name)
		bc.create_table(table_name, input, file_format='csv', delimiter="|", dtype=dtypes,names=col_names)
	else:
		bc.create_table(table_name, input)

	columns = bc.describe_table(table_name)
	data_partition_array_dict = []
	for partition in partitions:
		if partition in columns:
			result = bc.sql(f'select distinct({partition}) from {table_name}')

			if type(result) is dask_cudf.core.DataFrame:
				result = result.compute()

			valuesPartition = result.to_pandas().to_dict()
			finalValues = list(set(valuesPartition[partition].values()) & set(partitions[partition]))
			dictOfvalues = {i: finalValues[i] for i in range(0, len(finalValues))}
			valuesPartition[partition] = dictOfvalues
			data_partition_array_dict.append(valuesPartition)
		else:
			print('Column "' + partition + '" not exist')

	_save_partition_files(bc, table_name, data_partition_array_dict, output, file_format, num_files)

Пример #2

Показать файл

Файл: utilityHive.py Проект: tspannhw/blazingsql

def create_hive_partition_data(input, table_name, partitions, output, num_files_per_parquet):
	if not os.path.exists(output):
		os.makedirs(output)

	bc = BlazingContext()
	bc.create_table(table_name, input)

	columns = bc.describe_table(table_name)
	data_partition_array_dict = []
	for partition in partitions:
		if partition in columns:
			values = bc.sql(f'select distinct({partition}) from {table_name}')
			data_partition_array_dict.append(values.to_pandas().to_dict())
		else:
			print('Column "' + partition + '" not exist')

	_save_partition_files(bc, table_name, data_partition_array_dict, output, num_files_per_parquet)

Пример #3

Показать файл

Файл: blazingsql_helper.py Проект: bsuryadevara/clx

class BlazingSQLHelper:
    def __init__(self):
        cluster = LocalCUDACluster()
        client = Client(cluster)
        self._bc = BlazingContext(dask_client=client, network_interface='lo')

    """This function runs blazingSQL query. 
    
    :param config: Query related tables configuration.
    :type config: dict
    :return: Query results.
    :rtype: cudf.DataFrame
    """

    def run_query(self, config):
        for table in config["tables"]:
            table_name = table["table_name"]
            file_path = table["input_path"]
            kwargs = table.copy()
            del kwargs["table_name"]
            del kwargs["input_path"]
            self._bc.create_table(table_name, file_path, **kwargs)
        sql = config["sql"]
        log.debug("Executing query: %s" % (sql))
        result = self._bc.sql(sql)
        result = result.compute()
        return result

    """This function drops blazingSQL tables.
    :param table_names: List of table names to drop.
    :type table_names: List
    """

    def drop_table(self, table_names):
        for table_name in table_names:
            log.debug("Drop table: %s" % (table_name))
            self._bc.drop_table(table_name)

Пример #4

Показать файл

class BlazingSQLHelper:
    def __init__(self, pool=False):
        # Setting pool=True allocates half the GPU memory.
        self._bc = BlazingContext(pool=pool)

    """This function runs blazingSQL query. 
    
    :param config: Query related tables configuration.
    :type config: dict
    :return: Query results.
    :rtype: cudf.DataFrame
    """

    def run_query(self, config):
        for table in config["tables"]:
            table_name = table["table_name"]
            file_path = table["input_path"]
            kwargs = table.copy()
            del kwargs["table_name"]
            del kwargs["input_path"]
            self._bc.create_table(table_name, file_path, **kwargs)
        sql = config["sql"]
        log.debug("Executing query: %s" % (sql))
        result = self._bc.sql(sql)
        self.has_data = False
        return result

    """This function drops blazingSQL tables.
    :param table_names: List of table names to drop.
    :type table_names: List
    """

    def drop_table(self, table_names):
        for table_name in table_names:
            log.debug("Drop table: %s" % (table_name))
            self._bc.drop_table(table_name)

Пример #5

Показать файл

import subprocess

# client = Client('127.0.0.1:8786')
# client.restart()
# bc = BlazingContext(dask_client=client, network_interface="lo")

bc = BlazingContext()

authority = 'localhost:54310'
hdfs_host = 'localhost'
hdfs_port = 54310
hdfs_driver = 'libhdfs'
result, error_msg, fs = bc.hdfs(authority,
                                host=hdfs_host,
                                port=hdfs_port,
                                user='******',
                                driver=hdfs_driver)

cursor = hive.connect('localhost').cursor()

table = bc.create_table('ptransactions', cursor, file_format='parquet')
for i in range(11):
    query = "SELECT * FROM ptransactions where t_year=2017 and t_company_id={t_company_id} LIMIT 10".format(
        t_company_id=i)
    ddf = bc.sql(query)
    print(query)

    if isinstance(ddf, cudf.DataFrame):
        print(ddf)
    else:
        print(ddf.compute())

Пример #6

Показать файл

Файл: csv_to_parquet.py Проект: schernolyas/bsql-demos

from dask.distributed import Client
from blazingsql import BlazingContext
from dask_cuda import LocalCUDACluster

# initalize BlazingContext with the Dask Client of local GPUs to distribute query execution
bc = BlazingContext(dask_client=Client(LocalCUDACluster()),
                    network_interface='lo')

# register public AWS S3 bucket
bc.s3('blazingsql-colab', bucket_name='blazingsql-colab')

# create a table from that S3 bucket
col_names = [
    'key', 'fare', 'pickup_x', 'pickup_y', 'dropoff_x', 'dropoff_y',
    'passenger_count'
]
bc.create_table('taxi',
                's3://blazingsql-colab/taxi_data/taxi_00.csv',
                names=col_names)

# query the table & write results locally as parquet
bc.sql('SELECT * FROM taxi').to_parquet(f'../../data/yellow_cab')