def load_csv_to_table(table ,schema_file ,csv_file, server, database, config,cred_file='config/dblogin.config',skipfirstrow=1): """Takes csv file, schema file, with sql server connection params and inserts data to a specified table Args: table: table name where csv data will be written schema_file: schema file that has all column names and data type names csv_file: data being loaded server: sql server host name config: which configuration name to pull username and password credentials cred_file: location of db login config file skipfirstrow(optional): if 1 then skip the first row of data (exclude headers) Returns: None """ from files import loop_csv_file from files import get_schema_file with open(cred_file,'rb') as cred: db_info = json.loads(cred.read()) username = db_info[config]['username'] password = db_info[config]['password'] data_list = loop_csv_file(csv_file) connection = mssql_connect(server, database, username, password) schema_list = get_schema_file(schema_file) #skips the first value of data_list which is the header data_list = iter(data_list) if skipfirstrow == 1: next(data_list) process_datarow_to_list(data_list,schema_list,connection,table)
def load_delimited_file_to_table(connection, table, source_file, schema_file, skipfirstrow=1, delimiter=','): """Takes delimited file name, schema file, and db connection and inserts data to a specified table Args: table: table name where csv data will be written schema_file: schema file that has all column names and data type names csv_file: data being loaded server: sql server host name config: which configuration name to pull username and password credentials cred_file: location of db login config file skipfirstrow(optional): if 1 then skip the first row of data (exclude headers) Returns: None """ data_list = loop_delimited_file(source_file, delimiter=delimiter) schema_list = get_schema_file(schema_file) #skips the first value of data_list which is the header data_list = iter(data_list) if skipfirstrow == 1: next(data_list) insert_datarows_to_table(data_list, schema_list, connection, table)
def create_table(connection, table_name, schema_file): """Create table. Args: connection: pyodbc.connect() object, Connection to use when running Sql table_name: string, Table name including db schema (ex: my_schema.my_table) schema_file: string, Path to csv schema file with each row as col_name, data_type Returns: cursor object, Results of the call to pyodb.connection().cursor().execute(query) """ cursor = connection.cursor() schema_list = get_schema_file(schema_file) table_split = table_name.split('.') table = table_split[-1] use_db = "" if len(table_split) > 1: use_db = "USE {0}; ".format(table_split[0]) ddl = use_db + """IF NOT EXISTS ( SELECT [name] FROM sys.tables WHERE [name] = '{0}' ) CREATE TABLE {0} (""".format(table_name) for col, dt in schema_list: ddl = ddl + col + ' ' + dt + ' NULL, ' ddl = ddl[:-2] + ');' try: log.debug(ddl) cursor.execute(ddl.encode('utf-8')) except UnicodeDecodeError: cursor.execute(ddl) return cursor
def cursor_to_json(cursor, dest_file, dest_schema_file=None, source_schema_file=None): """Takes a cursor and creates JSON file with the data and a schema file for loading to other data systems. Args: cursor: cursor object with data to extract to file dest_file: string, path and file name to save data Returns: None """ if source_schema_file is None: schema = [] for i in cursor.description: schema.append([i[0], str(i[1])]) else: from files import get_schema_file schema = get_schema_file(source_schema_file) if dest_schema_file is not None: with open(dest_schema_file, 'wb') as schemafile: for row in schema: col = row[0] if 'date' in row[1]: datatype = 'timestamp' elif 'list' in row[1]: datatype = 'list' elif 'bigint' in row[1]: datatype = 'bigint' elif 'int' in row[1] or 'long' in row[1]: datatype = 'integer' elif 'float' in row[1]: datatype = 'float' elif 'bool' in row[1]: datatype = 'boolean' elif 'str' in row[1]: datatype = 'string' else: datatype = 'string' schemafile.write("%s\n" % (col + ',' + datatype)) with open(dest_file, 'wb') as outfile: for row in cursor: result_dct = process_data_row(row, schema) outfile.write("%s\n" % json.dumps(result_dct, default=_defaultencode))
def cursor_to_json(cursor, dest_file, dest_schema_file=None, source_schema_file=None): """Takes a cursor and creates JSON file with the data and a schema file for loading to other data systems. Args: cursor: cursor object with data to extract to file dest_file: string, path and file name to save data Returns: None """ if source_schema_file is None: schema = [] for i in cursor.description: schema.append([i[0],str(i[1])]) else: schema = get_schema_file(source_schema_file) if dest_schema_file is not None: with open(dest_schema_file,'wb') as schemafile: for row in schema: #try: col = row[0] if 'date' in row[1]: datatype = 'timestamp' elif 'list' in row[1]: datatype = 'list' elif 'int' in row[1] or 'long' in row[1]: datatype = 'integer' elif 'float' in row[1]: datatype = 'float' elif 'bool' in row[1]: datatype = 'boolean' elif 'str' in row[1]: datatype = 'string' else: datatype = 'string' schemafile.write("%s\n" % (col + ',' + datatype)) #except Exception as e: # print "Exception on row ", row # print e with open(dest_file,'wb') as outfile: for row in cursor: #try: result_dct = process_postgres_data_row(row,schema) outfile.write("%s\n" % json.dumps(result_dct, default=_defaultencode))
def load_json_file_to_table(connection, table , source_file, schema_file): """Takes delimited file name, schema file, and db connection and inserts data to a specified table Args: table: table name where csv data will be written schema_file: schema file that has all column names and data type names csv_file: data being loaded server: sql server host name config: which configuration name to pull username and password credentials cred_file: location of db login config file skipfirstrow(optional): if 1 then skip the first row of data (exclude headers) Returns: None """ data_list = loop_json_file(source_file) schema_list = get_schema_file(schema_file) #data_list = iter(data_list) insert_datarows_dct_to_table(data_list,schema_list,connection,table)
def load_csv_to_table(table, schema_file, csv_file, server, database, config, cred_file='config/dblogin.config', skipfirstrow=1): """Takes csv file, schema file, with sql server connection params and inserts data to a specified table Args: table: table name where csv data will be written schema_file: schema file that has all column names and data type names csv_file: data being loaded server: sql server host name config: which configuration name to pull username and password credentials cred_file: location of db login config file skipfirstrow(optional): if 1 then skip the first row of data (exclude headers) Returns: None """ from files import loop_csv_file from files import get_schema_file with open(cred_file, 'rb') as cred: db_info = json.loads(cred.read()) username = db_info[config]['username'] password = db_info[config]['password'] data_list = loop_csv_file(csv_file) connection = mssql_connect(server, database, username, password) schema_list = get_schema_file(schema_file) #skips the first value of data_list which is the header data_list = iter(data_list) if skipfirstrow == 1: next(data_list) process_datarow_to_list(data_list, schema_list, connection, table)
def create_table(connection, table_name, schema_file, index): # courseTagDict """Runs SQL statement and commits changes to database. Args: connection: pyodbc.connect() object, Connection to use when running Sql table_name: string, Table name including db schema (ex: my_schema.my_table) schema_file: string, Path to csv schema file with each row as col_name, data_type index: string, Column name of index (can put multiple columns comma delimited if desired) Returns: cursor object, Results of the call to pyodb.connection().cursor().execute(query) """ cursor = connection.cursor() schema_list = get_schema_file(schema_file) ddl = 'CREATE TABLE IF NOT EXISTS ' + table_name + '(' for col, dt in schema_list: ddl = ddl + col + ' ' + dt + ', ' ddl = ddl[:-2] + ');' try: cursor.execute(ddl.encode('utf-8')) except UnicodeDecodeError: cursor.execute(ddl) if index is not None: idx_name = table_name + '_idx' exists = run_sql(connection, "SELECT to_regclass('{0}')".format(idx_name)) if exists.fetchone()[0] != idx_name: index_name = table_name.split('.')[-1] + '_idx' ddl2 = 'CREATE INDEX {0} ON {1}({2});'.format(index_name, table_name, index) try: cursor.execute(ddl2.encode('utf-8')) except UnicodeDecodeError: cursor.execute(ddl2) connection.commit() return cursor
def load_csv_to_table(table ,schema_file ,csv_file, connection, skipfirstrow=1): """Takes csv file, schema file, with sql server connection params and inserts data to a specified table Args: table: table name where csv data will be written schema_file: schema file that has all column names and data type names csv_file: data being loaded server: sql server host name config: which configuration name to pull username and password credentials cred_file: location of db login config file skipfirstrow(optional): if 1 then skip the first row of data (exclude headers) Returns: None """ data_list = loop_csv_file(csv_file) schema_list = get_schema_file(schema_file) #skips the first value of data_list which is the header data_list = iter(data_list) if skipfirstrow == 1: next(data_list) insert_datarows_to_table(data_list,schema_list,connection,table)