Exemplo n.º 1
0
def test_autocommit_switching(dsn, configuration):
    table_name = unique_table_name()

    connection = connect(dsn, **get_credentials(configuration))
    connection.autocommit = True  # <---
    connection.cursor().execute(f"CREATE TABLE {table_name} (a INTEGER)")
    connection.close()

    options = make_options(autocommit=True)
    connection = connect(
        dsn, turbodbc_options=options, **get_credentials(configuration)
    )
    connection.autocommit = False  # <---
    connection.cursor().execute(f"INSERT INTO {table_name} VALUES (?)", [42])
    connection.close()

    # table is there, but data was not persisted
    connection = connect(dsn, **get_credentials(configuration))
    cursor = connection.cursor()
    cursor.execute(f"SELECT * FROM {table_name}")
    results = cursor.fetchall()
    assert results == []

    cursor.execute(f"DROP TABLE {table_name}")
    connection.commit()
def create_connection_string_turbo(server, database):
    options = turbodbc.make_options(prefer_unicode=True)
    constr = 'Driver={ODBC Driver 13 for SQL Server};Server=' + \
        server + ';Database=' + database + ';Trusted_Connection=yes;'
    con = turbodbc.connect(connection_string=constr, turbodbc_options=options)

    return con
Exemplo n.º 3
0
def test_options_with_overrides():
    options = make_options(read_buffer_size=Rows(123),
                           parameter_sets_to_buffer=2500,
                           prefer_unicode=True,
                           use_async_io=True)

    assert options.read_buffer_size.rows == 123
    assert options.parameter_sets_to_buffer == 2500
    assert options.prefer_unicode == True
    assert options.use_async_io == True
Exemplo n.º 4
0
    def __init__(self):
        """
        :param config:
        """
        try:
            #[].[dbo].[big_table]
            self.database = 'AdventureWorks2016'
            #[AdventureWorks2016].[Person].[Person]
            user = '******'
            pwd = 'trusted'

            server = 'CRPC015162\SQLDEV'
            driver = 'ODBC+Driver+17+for+SQL+Server'
            driver = "{" + driver.replace("+", " ") + "}"

            options = make_options(read_buffer_size=Megabytes(100),
                                   parameter_sets_to_buffer=1000,
                                   varchar_max_character_limit=10000,
                                   use_async_io=True,
                                   prefer_unicode=True,
                                   autocommit=True,
                                   large_decimals_as_64_bit_types=True,
                                   limit_varchar_results_to_max=True)
            if pwd == 'trusted':
                self.cursor = turbodbc.connect(
                    driver=driver,
                    server=server,
                    database=self.database,
                    uid=user,
                    Trusted_Connection='yes',
                    turbodbc_options=options).cursor()
            else:
                self.cursor = turbodbc.connect(
                    driver=driver,
                    server=server,
                    database=self.database,
                    uid=user,
                    pwd=pwd,
                    turbodbc_options=options).cursor()

            con_string = f"DRIVER={driver};SERVER={server};DATABASE={self.database};Trusted_Connection=yes;" \
                if pwd == 'trusted' \
                else f"DRIVER={driver};" \
                     f"SERVER={server};" \
                     f"DATABASE={self.database};" \
                     f"UID={user};" \
                     f"PWD={pwd}"

            quoted = urllib.parse.quote_plus(con_string)
            self.engine = sa.create_engine(
                'mssql+pyodbc:///?odbc_connect={}'.format(quoted))

        except turbodbc.exceptions.DatabaseError as ex:
            logging.exception(
                f"[Exception][database_connector][init][{str(ex)}]")
Exemplo n.º 5
0
 def execute(self, query):
     """Execute SQL query on Teradata database.
     
     Parameters:
         query (str): SQL query
     """
     con = turbodbc.connect(
         dsn=self.dsn, turbodbc_options=turbodbc.make_options(prefer_unicode=True, autocommit=True)
     )
     cur = con.cursor()
     cur.execute(query)
     con.close()
Exemplo n.º 6
0
def exasol_connection(db_uri, commit=True):
    db_params = urlparse(db_uri)
    options = make_options(read_buffer_size=Megabytes(100))
    connection = connect(
        driver="/Library/ODBC/EXASolution_ODBC.bundle/Contents/MacOS/libexaodbc-io418sys.dylib",
        EXAUID=db_params.username,
        EXAPWD=db_params.password,
        EXAHOST=db_params.hostname,
        turbodbc_options=options,
    )
    yield connection
    if commit:
        connection.commit()
Exemplo n.º 7
0
 def connect_to_db(self):
     options = make_options(prefer_unicode=True,
                            autocommit=True,
                            use_async_io=True)
     cnxn = connect(DRIVER="SQL SERVER",
                    SERVER=self.server,
                    DATABASE=self.database,
                    UID=self.uid,
                    PWD=self.pwd,
                    turbodbc_options=options)
     self.conn = cnxn
     self.cursor = cnxn.cursor()
     self.engine = create_engine(self.engine)
Exemplo n.º 8
0
def td_load_df(df, dsn, table_name, index=None):  #This is the main function
    """
    Load pandas dataframe to teradata (using turbodbc)
    This function's intended use is to quickly load small to relatively large dataframes 
    into the database without thinking about it
    
    The function is NOT recommended for use with very large dataframes (over 10 million cells)
    
    Parameters
    ----------
    df: a pandas dataframe, must only used allowed dtypes
    dsn: datasource name, should be == 'Teradata' unless it has a different name on your PC
    table_name: Teradata tablename (where to load)
    index (optional): string column name of the index column, if None attempts to guess
    
    Examples:
    ---------
    >>>test_1 = pd.DataFrame({'ints_1':[0, 1, 2, 3]
                          ,'ints_2':[0, 3, 4, 5]
                          ,'ints_3':[-1, -2, -3, -4]})
    >>>td_load_df(test_1, dsn='Teradata', table_name='ar_test_turbodbc_1')
    
    Current limitations:
    --------------------
    0. Largely untested
    1. String column length is equal to the length of the largest string column
    2. Datetime columns are loaded as several integer columns (year, month, day)
	3. Does not support NA values at this time (throws an error)
    """
    if df.isnull().values.any():
        raise ValueError("""This function currently does not support NA values,
    please fill them before loading""")
    df.columns = [col.upper() for col in df.columns]
    cat_columns = df.select_dtypes(['category']).columns
    for col in cat_columns:
        df[col] = df[col].astype(str)
    df_datetime_to_text(df)
    table_name_clean = 'UAT_DM.' + table_name.upper().replace('UAT_DM.', '')
    options = make_options(autocommit=True)
    connection = connect(dsn=dsn, turbodbc_options=options)
    cursor = connection.cursor()
    try:
        cursor.execute('DROP TABLE ' + table_name_clean)
    except DatabaseError:
        pass
    cursor.execute(
        sql_create_statement(df, table_name_clean, guess_index(df, index)))
    cursor.executemanycolumns(sql_insert_statement(df, table_name_clean),
                              [df[col].values for col in df.columns])
    connection.close()
    print("Loaded your dataframe successfully")
Exemplo n.º 9
0
def test_autocommit_enabled_at_start(dsn, configuration):
    table_name = unique_table_name()
    options = make_options(autocommit=True)
    connection = connect(dsn, turbodbc_options=options, **get_credentials(configuration))

    connection.cursor().execute('CREATE TABLE {} (a INTEGER)'.format(table_name))
    connection.close()

    connection = connect(dsn, **get_credentials(configuration))
    cursor = connection.cursor()
    cursor.execute('SELECT * FROM {}'.format(table_name))
    results = cursor.fetchall()
    assert results == []

    cursor.execute('DROP TABLE {}'.format(table_name))
    connection.commit()
Exemplo n.º 10
0
def open_connection(configuration,
                    rows_to_buffer=None,
                    parameter_sets_to_buffer=100,
                    **turbodbc_options):
    dsn = configuration['data_source_name']
    prefer_unicode = configuration.get('prefer_unicode', False)
    read_buffer_size = turbodbc.Rows(rows_to_buffer) if rows_to_buffer else turbodbc.Megabytes(1)

    options = turbodbc.make_options(read_buffer_size=read_buffer_size,
                                    parameter_sets_to_buffer=parameter_sets_to_buffer,
                                    prefer_unicode=prefer_unicode,
                                    **turbodbc_options)
    connection = turbodbc.connect(dsn, turbodbc_options=options, **get_credentials(configuration))

    yield connection
    connection.close()
Exemplo n.º 11
0
 def read(self, query):
     """Read data from Teradata database.
     
     Parameters:
         query (str): SQL query
         
     Returns:
         pandas.DataFrame with results of query
     """
     con = turbodbc.connect(
         dsn=self.dsn, turbodbc_options=turbodbc.make_options(prefer_unicode=True, autocommit=True)
     )
     cur = con.cursor()
     cur.execute(query)
     df = pd.DataFrame(cur.fetchallnumpy())
     df.columns = df.columns.str.lower()
     con.close()
     return df      
Exemplo n.º 12
0
def test_options_with_overrides():
    options = make_options(read_buffer_size=Rows(123),
                           parameter_sets_to_buffer=2500,
                           varchar_max_character_limit=42,
                           prefer_unicode=True,
                           use_async_io=True,
                           autocommit=True,
                           large_decimals_as_64_bit_types=True,
                           limit_varchar_results_to_max=True)

    assert options.read_buffer_size.rows == 123
    assert options.parameter_sets_to_buffer == 2500
    assert options.varchar_max_character_limit == 42
    assert options.prefer_unicode == True
    assert options.use_async_io == True
    assert options.autocommit == True
    assert options.large_decimals_as_64_bit_types == True
    assert options.limit_varchar_results_to_max == True
Exemplo n.º 13
0
def open_connection(configuration,
                    rows_to_buffer=None,
                    parameter_sets_to_buffer=100,
                    **turbodbc_options):
    dsn = configuration['data_source_name']
    prefer_unicode = configuration.get('prefer_unicode', False)
    read_buffer_size = turbodbc.Rows(
        rows_to_buffer) if rows_to_buffer else turbodbc.Megabytes(1)

    options = turbodbc.make_options(
        read_buffer_size=read_buffer_size,
        parameter_sets_to_buffer=parameter_sets_to_buffer,
        prefer_unicode=prefer_unicode,
        **turbodbc_options)
    connection = turbodbc.connect(dsn,
                                  turbodbc_options=options,
                                  **get_credentials(configuration))

    yield connection
    connection.close()
 def __init__(self,
              table_name,
              idx_name,
              dsn,
              extra_joins='',
              var_list=None):
     '''
     table_name(string): name of SQL table that stores records
     idx_name(string): column name that stores unique identifier for a record (row from table specified)
     dsn(string): dsn ODBC string to connect to SQL through turbodbc driver
     extra_joins(string): extra SQL code the end of get_record{s}(). Can be used to create extra joins for getting
     data from secondary tables or for more WHERE filters
     '''
     self.var_list = var_list
     self.table_name = table_name
     self.idx_name = '[' + idx_name + ']'
     options = make_options()
     self.conn = turbodbc.connect(dsn=dsn, turbodbc_options=options)
     self.cursor = self.conn.cursor()
     self.extra_joins = extra_joins
     """
Exemplo n.º 15
0
def test_options_with_overrides():
    options = make_options(read_buffer_size=Rows(123),
                           parameter_sets_to_buffer=2500,
                           varchar_max_character_limit=42,
                           prefer_unicode=True,
                           use_async_io=True,
                           autocommit=True,
                           large_decimals_as_64_bit_types=True,
                           limit_varchar_results_to_max=True,
                           force_extra_capacity_for_unicode=True,
                           fetch_wchar_as_char=True)

    assert options.read_buffer_size.rows == 123
    assert options.parameter_sets_to_buffer == 2500
    assert options.varchar_max_character_limit == 42
    assert options.prefer_unicode == True
    assert options.use_async_io == True
    assert options.autocommit == True
    assert options.large_decimals_as_64_bit_types == True
    assert options.limit_varchar_results_to_max == True
    assert options.force_extra_capacity_for_unicode == True
    assert options.fetch_wchar_as_char == True
Exemplo n.º 16
0
def test_autocommit_switching(dsn, configuration):
    table_name = unique_table_name()

    connection = connect(dsn, **get_credentials(configuration))
    connection.autocommit = True   # <---
    connection.cursor().execute('CREATE TABLE {} (a INTEGER)'.format(table_name))
    connection.close()

    options = make_options(autocommit=True)
    connection = connect(dsn, turbodbc_options=options, **get_credentials(configuration))
    connection.autocommit = False  # <---
    connection.cursor().execute('INSERT INTO {} VALUES (?)'.format(table_name), [42])
    connection.close()

    # table is there, but data was not persisted
    connection = connect(dsn, **get_credentials(configuration))
    cursor = connection.cursor()
    cursor.execute('SELECT * FROM {}'.format(table_name))
    results = cursor.fetchall()
    assert results == []

    cursor.execute('DROP TABLE {}'.format(table_name))
    connection.commit()
Exemplo n.º 17
0
def test_options_without_parameters():
    options = make_options()
    # one of the default parameters tested in the C++ part
    assert options.parameter_sets_to_buffer == 1000
Exemplo n.º 18
0
 def load(self, df, table):
     """Load pandas.DataFrame to Teradata table.
     
     Parameters:
         df (pandas.DataFrame): DataFrame for loading
         table (str): table name
     """
     con = turbodbc.connect(
         dsn=self.dsn, 
         turbodbc_options=turbodbc.make_options(prefer_unicode=True, autocommit=True)
     )
     cur = con.cursor()
     
     try:
         drop_sql = 'drop table {}.{}'.format(self.database, table)
         cur.execute(drop_sql)
     except turbodbc.DatabaseError:
         pass
         
     teradata_types = {
         'int8': 'byteint', 'int16': 'smallint', 'int32': 'integer', 'int64': 'bigint',
         'float16': 'float', 'float32': 'float', 'float64': 'double', 'object': 'varchar',
         'bool': 'byteint'
     }
     
     query = 'create multiset table {}.{} ('.format(self.database, table)
     for idx, dtype in zip(df.dtypes.index, df.dtypes.values):
         dtype = str(dtype)
         td_type = teradata_types[dtype] + \
                     ('' if dtype != 'object' else '({})'.format(df[idx].str.len().max()))
         query += '{} {}, '.format(idx, td_type)
     query = query[:-2] + ') no primary index'
     
     cur.execute(query)
     con.close()
     
     if not os.path.exists(self.temp_folder):
         os.makedirs(self.temp_folder)
     df.to_csv('{}/df.csv'.format(self.temp_folder), sep=',', decimal='.', index=False)
     
     script = \
     'set session charset "UTF8";\n' + \
     f'logon {self.teradata_ip}/{self.login}, {self.password};\n' + \
     f'database {self.database};\n' + \
     f'begin loading {self.table}\n' + \
     f'errorfiles {self.fastload_err1}, {self.fastload_err2}\n' + \
     'checkpoint 1000000;\n' + \
     'set record vartext "," nostop;\n' + \
     'record 2;\n' + \
     'define\n' + \
     ',\n'.join(
         ['{} (varchar({}))'.format(col, df[col].astype(str).str.len().max()) for col in df.columns]
     ) + '\n' + \
     'file = {};\n'.format(os.getcwd() + '\\{}\\df.csv'.format(self.temp_folder)) + \
     f'insert into {table}\n' + \
     'values(\n' + \
     ',\n'.join([':' + col for col in df.columns]) + \
     ');\n' + \
     'end loading;\n' + \
     'logoff;\n' + \
     'quit;'
     script_file = open(f'{self.temp_folder}/fastload_script.txt', "w+")
     script_file.write(script)
     script_file.close()
     command = f'cd {self.fastload_path} | fastload.exe < ' + \
               f'{os.getcwd()}\\{self.temp_folder}\\fastload_script.txt'
     flg = os.system(command)
     shutil.rmtree(self.temp_folder)
Exemplo n.º 19
0
def test_options_without_parameters():
    options = make_options()
    # one of the default parameters tested in the C++ part
    assert options.parameter_sets_to_buffer == 1000
Exemplo n.º 20
0
from turbodbc import connect, make_options, Megabytes
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
options = make_options(read_buffer_size=Megabytes(20), autocommit=True)

conn = connect(dsn='drill', turbodbc_options=options)
sql = "select * from dfs.`/test.csv`"
cursor = conn.cursor()
cursor.execute(sql)
table = cursor.fetchallarrow()
data = table.to_pandas()
print(data)
Exemplo n.º 21
0
df_workcases = pd.read_sql_query(
    """select left(hubs.hub_name,2) as Country,lwc.id as LWCid, u.name as 'Action_by',c.id as CustomerID, lp.id as LoanPortfolioID, lwc.closed_at, 
lwc.activity_category_id
from loan_workout_cases lwc
inner join customers c on c.id=lwc.customer_id
inner join loan_portfolios lp on lp.customer_id=c.id
inner join payment_accounts pa on pa.loan_portfolio_id=lp.id
inner join payments p on p.payment_account_id=pa.id
inner join users u on lwc.user_id = u.id
inner join hubs on c.hub_id=hubs.id
where lwc.could_reach_customer=1 and lwc.activity_category_id IN ("5252","5253") and lwc.closed_at >= "2018-12-10" 
group by lwc.id 
limit 10;""",
    con=conSolar)

options = make_options(parameter_sets_to_buffer=1000)
conn = connect(driver='{ODBC Driver 17 for SQL Server}',
               server='mbslbiserver.database.windows.net',
               database='mbsldwh_dev',
               turbodbc_options=options)

test_query = '''DELETE FROM EXTR_WORKCASES


                INSERT INTO Extr_workcases ([Country],[LWCid]      ,[Action_by]      ,[CustomerID]      ,[LoanPortfolioID]      ,[closed_at]      ,[activity_category_id])
                VALUES (?,?,?,?,?,?,?) '''

cursor.executemanycolumns(test_query, [
    df_workcases['Country'].values, df_workcases['LWCid'].values,
    df_workcases['Action_by'].values, df_workcases['CustomerID'].values,
    df_workcases['activity_category_id'].values,
Exemplo n.º 22
0
    def create_connect_args(self, url):
        """Create the connect args for Turbodbc.

        Some code adapted from the Pyodbc connector in the SQLAlchemy
        codebase.
        """

        options = url.translate_connect_args(username='******')

        query = url.query
        options.update(query)

        connect_args = {}

        # first get the Turbodbc specific options
        turbodbc_options = {}
        for param in ('read_buffer_size', 'parameter_sets_to_buffer',
                      'use_async_io', 'autocommit',
                      'large_decimals_as_64_bit_types'):
            if param in options:
                raw = options.pop(param)
                if param in ('use_async_io', 'autocommit',
                             'large_decimals_as_64_bit_types'):
                    value = util.asbool(raw)
                else:
                    value = util.asint(raw)
                turbodbc_options[param] = value

        # we always need to set prefer_unicode=True for MSSQL + Turbodbc
        connect_args['turbodbc_options'] = make_options(prefer_unicode=True,
                                                        **turbodbc_options)
        for param in ('ansi', 'unicode_results'):
            if param in options:
                connect_args[param] = util.asbool(options.pop(param))

        dsn_connection = 'dsn' in options or \
            ('host' in options and 'database' not in options)
        if dsn_connection:
            dsn = [options.pop('host', '') or options.pop('dsn', '')]
        else:
            dsn = []
            port = ''
            if 'port' in options and 'port' not in query:
                port = int(options.pop('port'))

            driver = options.pop('driver', None)
            if driver is None:
                util.warn(
                    "No driver name specified; "
                    "this is expected by ODBC when using "
                    "DSN-less connections")
            else:
                connect_args['driver'] = driver

            connect_args.update(
                server=(options.pop('host', '')),
                port=port,
                database=options.pop('database', '')
            )

        user = options.pop('user', None)
        if user:
            connect_args.update(
                uid=user,
                pwd=options.pop('password', '')
            )
        else:
            connect_args['trusted_connection'] = 'Yes'

        return [dsn, connect_args]