Пример #1
0
def process_all():
    myconn = ConnectionHelper().get_named_connection("current")
    cursor = myconn.cursor()
    cursor.execute("select etl_file_id from etl_file order by etl_file_id")
    for row in cursor:
        etl_file_id = row[0]
        logging.info("about to process %s", etl_file_id)
        process(etl_file_id)
Пример #2
0
 def test_sqlite(self):
     ch = ConnectionHelper(ConnectionHelperTest.yaml_file_path)
     connection = ch.get_named_connection("sqlite3")
     self.assertEqual(ConnectionHelper.get_dialect(connection),
                      dialects.DIALECT_SQLITE)
     cursor = connection.cursor()
     self.assertEqual(ConnectionHelper.get_dialect(cursor),
                      dialects.DIALECT_SQLITE)
Пример #3
0
    def test_memory_sqlite(self):
        ch = ConnectionHelper(ConnectionHelperTest.yaml_file_path)
        connection = ch.get_named_connection("sqlite3")
        connection.cursor().execute("select 'x'")

        connection = ch.get_named_connection("sqlite3_mem")
        conn_info = ch.connections["sqlite3_mem"]
        dialect, dburl = ch.get_components(conn_info["url"])
        self.assertEqual(dialect, "sqlite")
        self.assertEqual(dburl, ":memory:")
        connection.cursor().execute("select 'x'")
Пример #4
0
    def test_sqlite_returning(self):
        logger = logging.getLogger(__name__ + ":test_sqlite_returning")
        drop_sql = "drop table if exists a"
        create_sql = "create table a (b serial primary key, c numeric)"
        insert_sql = "insert into a (c) values (%(c)s)"
        returning_text = "returning b"

        ch = ConnectionHelper(ConnectionHelperTest.yaml_file_path)

        connection = ch.get_named_connection("sqlite3_mem")
        cursor = CursorHelper(connection.cursor())
        cursor.execute(drop_sql)
        cursor.execute(create_sql)
        new_id = cursor.execute(insert_sql, {"c": 3}, returning=returning_text)
        self.assertEqual(1, new_id)
Пример #5
0
    def test_all(self):
        connection = ConnectionHelper().get_named_connection("test")
        up = UtProcess(connection)
        binds = {
            "schema_nm": "test",
            "process_nm": "UtProcess_test",
            "thread_nm": None,
            "process_run_nbr": 1,  #TODO
            "status_msg": "testing",
            "status_id": None,  #class UtProcess_test(unittest.TestCase):
            "status_ts": None,  #datetime().date(2017,4,6,0,0,0),
            "ignore_flg": "N"
        }
        up.insert_process(binds)

        connection.rollback()
Пример #6
0
 def test_yaml_reader(self):
     connections = ConnectionHelper.get_connections_yaml(
         ConnectionHelperTest.yaml_file_path)
     self.assertEqual(len(connections), 2)
     self.assertTrue("sqlite3" in connections)
     self.assertTrue("sqlite3_mem" in connections)
     sql = connections["sqlite3_mem"]["sql"][0]
     self.assertEqual(sql, "PRAGMA foreign_keys = ON")
Пример #7
0
 def get_postgres_connection(
         cls):  # TODO externalize use - test schema to initialize
     db_url = "postgres:host='localhost' dbname='sales_reporting' user='******' password='******'"  # TODO externalize
     cls.connection = ConnectionHelper.get_connection(db_url)
     cursor = cls.connection.cursor()
     schema = "integration_test"
     cursor.execute("create schema %s" % schema)
     cursor.execute("set schema '%s'" % schema)
     return cls.connection
Пример #8
0
 def test_dataset_from_sql(self):
     sql = "select * from etl_sale where etl_file_id = %(ETL_FILE_ID)s"
     binds = {"ETL_FILE_ID": 201723}
     connection = ConnectionHelper().get_named_connection("current")
     # pandas.read_sql(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, columns=None, chunksize=None)[source]
     now = datetime.datetime.now()
     sale_dataframe = Dataset.read_sql(sql, connection, params=binds)
     logger.debug("test_dataset_from_sql elapsed %s " % (datetime.datetime.now() - now))
     logger.debug("rowcount %s" % len(sale_dataframe.rows))
    def test_fetch_or_insert(self):
        connection = ConnectionHelper().get_named_connection("test")
        cursors = Cursors(connection)
        binds = {
            "rule_name": "TEST_COND",
            "table_name": "etl_sale",
            "msg": "I don't know",
            "sql_text":
            "select id from etl_sale where etl_file_id = %(ETL_FILE_ID)",
            "narrative": "huh",
            "severity": 3,
            "format_str": "id % is %s",
            # "CORRECTIVE_ACTION" : "Fix it"
        }

        id = UtConditionPersistence.fetch_or_insert(cursors, binds)
        connection.commit()

        assert (id is not None)
Пример #10
0
    def main():
        """
        SqlRunner.py [-h] --connection_name CONNECTION_NAME
                          --infile_name INFILE_NAME
                          [--continue_on_error CONTINUE_ON_ERROR]
                          [--print_sql]
                          [--commit]

        optional arguments:
          -h, --help            show this help message and exit
          --connection_name CONNECTION_NAME
                                name of database connection
          --infile_name INFILE_NAME
                                name of sql script file
          --continue_on_error CONTINUE_ON_ERROR
                                continue running statements after an error
          --print_sql           print each executed SQL statement
          --commit              commit at end

        :return:
        """
        logging.basicConfig(level=logging.INFO)
        PARSER = argparse.ArgumentParser()
        PARSER.add_argument("--connection_name", required=True, default="test",
                            help="name of database connection")

        PARSER.add_argument("--infile_name", required=True,
                            help="name of sql script file")

        PARSER.add_argument("--continue_on_error",
                            help="continue running statements after an error")

        PARSER.add_argument("--print_sql", action="store_true",
                            help="print each executed SQL statement")

        PARSER.add_argument("--commit", action="store_true",
                            help="commit at end")

        PARSER.add_argument("--interactive", action="store_true",
                            help="Interactive seesion")

        PARSER.set_defaults(print_sql=False, continue_on_error=False)

        ARGS = PARSER.parse_args()
        CONNECTION = ConnectionHelper(None).get_named_connection(ARGS.connection_name)
        RUNNER = SqlRunner(infile_name=ARGS.infile_name,
                           conn=CONNECTION,
                           continue_on_error=ARGS.continue_on_error,
                           print_sql=ARGS.print_sql,
                           commit=True)
        if ARGS.interactive:
            RUNNER.interactive()
        else:
            RUNNER.process()
Пример #11
0
def sales_pivot_from_sql():
    from pdsutil.DbUtil import ConnectionHelper
    sql = "select * from etl_sale where etl_file_id = %(ETL_FILE_ID)s"
    parms = {"ETL_FILE_ID": 201723}
    connection = ConnectionHelper().get_named_connection("current")
    df = pandas.read_sql(sql, connection, params=parms)
    print(df)
    result = pandas.pivot_table(df,
                                columns='ship_dt',
                                values='cases_shipped',
                                index=['ship_to_cust_id', 'product_descr'])
    print("pivot %s\n" % result)
def main():
    #
    # parser = argparse.ArgumentParser(description='load a file')
    # parser.add_argument('--etl_file_id', dest='etl_file_id', required=True)
    # parser.add_argument('--rerun', action='store_true')
    # parser.set_defaults(rerun=False)
    #
    # args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)
    myconn = ConnectionHelper(None).get_named_connection("test")
    CdsDataloadConditions().process(myconn,
                                    {"ETL_FILE_ID": 20})  # TODO add args
def main():
    logging.basicConfig(level=logging.INFO)
    # parser = argparse.ArgumentParser(description='load a file')
    # parser.add_argument('--inputfile', dest='inputfile')
    # parser.add_argument("--connection", required=True)
    # parser.add_argument("--distributor")
    # parser.set_defaults(test=False)
    # args = parser.parse_args()
    #
    # mainconn = ConnectionHelper().get_named_connection(args.connections)
    # CdsDataloader().process(args.inputfile, mainconn, args.distributor, False)
    #
    # import sqlite3

    mainconn = ConnectionHelper().get_named_connection("it")
    # mainconn = sqlite3.Connection("/tmp/scratch.dbf")

    CdsDataloader().process("/tmp/customers.cds", mainconn, "EXOTICTX", False)
Пример #14
0
    def setUp(self):
        # db_url = os.getenv(self.POSTGRES_TEST_URL)
        if self.dialect == dialects.DIALECT_POSTGRES:
            db_url = "postgres:host='localhost' dbname='sales_reporting_db' user='******' password='******'"
            # TODO externalize
            test_schema = self.TEST_SCHEMA
        elif self.dialect == dialects.DIALECT_SQLITE:
            db_url = "sqlite::memory:"
            test_schema = None
        else:
            raise Exception("unsupported dialect %s" % self.dialect)

        message = "Skipping test as %s environment variable not set" % self.POSTGRES_TEST_URL
        if not db_url:
            logging.warning(message)
            self.skipTest(message)

        self.connection = ConnectionHelper.get_connection(db_url)
        SchemaInitter(self.connection, test_schema).process()
        ut_condition_init(self.connection).process()
        sr_data_init(self.connection).process()
Пример #15
0
def sales_pivot_from_sql_by_month():
    full_begin = time.time()

    # Given a dict of dataframes, for example:
    # dfs = {'gadgets': df_gadgets, 'widgets': df_widgets}

    # writer = pd.ExcelWriter(filename, engine='xlsxwriter')
    # for sheetname, df in dfs.items():  # loop through `dict` of dataframes
    #     df.to_excel(writer, sheet_name=sheetname)  # send df to writer
    #     worksheet = writer.sheets[sheetname]  # pull worksheet object
    #     for idx, col in enumerate(df):  # loop through all columns
    #         series = df[col]
    #         max_len = max((
    #             series.astype(str).map(len).max(),  # len of largest item
    #             len(str(series.name))  # len of column name/header
    #         )) + 1  # adding a little extra space
    #         worksheet.set_column(idx, idx, max_len)  # set column width
    # writer.save()

    from pdsutil.DbUtil import ConnectionHelper
    sql = "select * from etl_cust_product_month_mv where sum_cases_shipped > 0"
    parms = {"ETL_FILE_ID": 201723}
    connection = ConnectionHelper().get_named_connection("current")
    before_query = time.time()
    df = pandas.read_sql(sql, connection, params=parms)
    after_query = time.time()

    # print (df)
    pivot_df = pandas.pivot_table(df,
                                  columns='ship_month',
                                  values='sum_cases_shipped',
                                  index=['ship_to_cust_id', 'product_descr'])
    print("pivot_df index %s" % pivot_df.index.name)
    after_pivot = time.time()
    # print("pivot %s\n" % pivot_df)

    row_count = 0
    # for row in pivot_df.iterrows():
    #     row_length = len(row)
    #     print ("row len %s data %s" % (row_length, str(row)))
    #     row_count += 1
    #     if row_count > 9:
    #         break;

    #  TODO http://stackoverflow.com/questions/17241004/pandas-how-to-get-the-data-frame-index-as-an-array set index_columns

    before_to_excel = time.time()
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    writer = pandas.ExcelWriter('/tmp/all_sales_pivot.xlsx',
                                engine='xlsxwriter')

    # Convert the dataframe to an XlsxWriter Excel object.
    sheet_name = "All Sales"
    pivot_df.to_excel(writer, sheet_name=sheet_name)
    worksheet = writer.sheets[sheet_name]
    # set_columnm_widths(worksheet,pivot_df)
    to_csv_begin = time.time()

    import io
    output = io.StringIO()
    pivot_df.to_csv(output)
    to_csv_end = time.time()

    to_record_begin = time.time()
    records = pivot_df.to_records()
    to_record_end = time.time()
    ###

    to_excel_internal_start = time.time()
    import xlsxwriter
    output = open("/tmp/crosstab.xslx", "wb")
    workbook = xlsxwriter.Workbook(output)
    to_excel(workbook, "wank", records)
    workbook.close()
    to_excel_internal_end = time.time()

    writer.save()
    after_excel = time.time()
    full_end = time.time()
    log_time("query", before_query, after_query)
    log_time("pivot", after_query, after_pivot)
    log_time("to_excel", before_to_excel, after_excel)
    log_time("to_csv", to_csv_begin, to_csv_end)
    log_time("to_records", to_record_begin, to_record_end)
    log_time("internal to_excel", to_excel_internal_start,
             to_excel_internal_end)
    log_time("full time", full_begin, full_end)
Пример #16
0
from pdsutil.DbUtil import ConnectionHelper

connection = ConnectionHelper().get_named_connection("it")
cursor = connection.cursor()
cursor.execute(
    "select distinct ship_to_cust_id from etl_sale order by ship_to_cust_id")
sale_ship_to_ids = cursor.fetchall()
print(sale_ship_to_ids)
cursor.execute(
    "select distinct ship_to_cust_id from etl_customer order by ship_to_cust_id"
)
cust_ship_to_ids = cursor.fetchall()
print(cust_ship_to_ids)
for sale_id_t, cust_id_t in zip(sale_ship_to_ids, cust_ship_to_ids):
    binds = {"to_id": sale_id_t[0], "cust_id": cust_id_t[0]}
    print("cust_id %s to_id %s" % (sale_id_t[0], cust_id_t[0]))
    cursor.execute(
        "update etl_customer set ship_to_cust_id = %(to_id)s where ship_to_cust_id = %(cust_id)s",
        binds)
connection.commit()
Пример #17
0
        :param etl_file_id: the etl_file.etl_file_id to extract
        :param file_name: output file name
        :param by_line_number: boolean - much slower but records in original order

        :return: None
        """

        if by_line_number:
            self.process_by_line_number(etl_file_id, file_name)
        else:
            self.process_by_table(etl_file_id, file_name)

    def unload_all(self):

        cursor = CursorHelper(self.connection.cursor())
        sql = "select etl_file_id from etl_file"
        rows = cursor.execute(sql)

        for row in rows:
            etl_file_id = row[0]
        #    self.process(etl_file_id, "../pdssr_testdata/%s.cds" % etl_file_id, False)
            self.process(etl_file_id, "/tmp/python/%s.cds" % etl_file_id, False)


if __name__ == "__main__":
     myconn = ConnectionHelper().get_named_connection("it")
     unloader = CdsUnload(myconn)
     #unloader.process(30, "/tmp/201502.cds", False)  # TODO
     unloader.unload_all()
Пример #18
0
def process(etl_file_id):
    myconn = ConnectionHelper().get_named_connection("current")
    Post(myconn).process(etl_file_id)
Пример #19
0
from pdsutil.DbUtil import ConnectionHelper, CursorHelper
import sys
import csv


if __name__ == "__main__":
    connection_name = sys.argv[0]
    connection = ConnectionHelper().get_named_connection(connection_name)
    cursor = CursorHelper(connection.cursor())
    rows = cursor.execute(sql,binds)
    quoting_types = [csv.QUOTE_NONNUMERIC, csv.QUOTE_ALL, csv.QUOTE_MINIMAL, csv.QUOTE_NONE]
    def to_csv(self, file, emit_headers: bool = True, dialect: str = "excel", delimiter: str = ",",
               quotechar: str = "'", quoting: str = csv.QUOTE_NONNUMERIC):


        writer = csv.writer(file, dialect="excel",
                            delimiter=',', quotechar='"',
                            quoting=csv.QUOTE_NONNUMERIC)
        if emit_headers:
            writer.writerow(self.column_names)
        for row in self.rows:
            writer.writerow(row)
Пример #20
0
import sys
from  pdsutil.DbUtil import ConnectionHelper, CursorHelper
from pdsutil.Dataset import Dataset

import datetime
import logging

logging.basicConfig(level=logging.INFO)
sql = "select * from etl_sale where etl_file_id = %(ETL_FILE_ID)s"
binds = {"ETL_FILE_ID" : 201723}
connection = ConnectionHelper().get_named_connection("current")
cursor = CursorHelper(connection.cursor())


sales = Dataset.from_sql(connection,sql,binds)
# sales.to_csv(sys.stdout)

# to_csv
out_file = open("/tmp/sales.csv","w")
sales.to_csv(out_file)

# to_sqlite
sales.set_column_meta("curr_cd",str,3)
sales.set_column_meta("org_customer_id",str,10)
db = sales.to_sqlite("etl_sale",verbose=False)
cursor = CursorHelper(db.cursor())
rows = cursor.execute("select count(*) from etl_sale")
for row in rows:
    print (row)
Пример #21
0
 def connect(self, name: str):
     self.connection = ConnectionHelper().get_named_connection(name)
     self.cursor = CursorHelper(self.connection.cursor())
Пример #22
0
class isql:
    def __init__(self):
        self.statements = SqlStatements.from_statement_list([])
        print(self.statements)
        self.statement = None
        self.connection = None
        self.cursor = None
        self.binds = {}

    def dump_statements(self):
        print("about to dump")
        print(yaml.dump(self.statements))

    def connect(self, name: str):
        self.connection = ConnectionHelper().get_named_connection(name)
        self.cursor = CursorHelper(self.connection.cursor())

    def load(self, filename: str):
        self.statements = SqlStatements.from_yaml(filename).statements
        self.list_statements()

    def list_statements(self, verbose=False):
        for i, v in enumerate(self.statements):
            print("%s %s" % (i, v))
        # for k in self.statements:
        #     print (k)

    def bind_date(self, name, year, month, day):
        self.binds[name] = datetime.datetime(year, month, day)

    def use(self, number: int):
        for i, k in enumerate(self.statements):
            if i == number:
                self.statement = self.statements[k]

    def use_statement(self, name: str):
        self.statement = name

    def run(self, index=None):
        if index == None:
            print("stmt: %s" % self.statement)
        else:
            for i, k in enumerate(self.statements):
                if i == index:
                    self.statement = self.statements[k]
                    print("k: %s sql: %s" % (k, self.statements["sql"]))
        rows = self.cursor.execute(self.statement["sql"], self.binds)
        for row in rows:
            print(row)

    def bind(self, name, value):
        self.binds[name] = value

    def execute(self, statement_name: str = None):
        if self.connection is None:
            print("use connect before use")
        if statement_name:
            sql = self.statements[statement_name]
        elif self.statement is None:
            sql = self.statements[statement_name]
        else:
            print("pass statement or call use_statement() before execute")
        result = self.cursor.execute(sql, self.binds)

    import sys

    def to_csv(self, headers=True, outfile=sys.stdout):
        import csv
        rows = self.execute()
        column_names = [i for i[0] in self.cursor.description]
        print(column_names)

        writer = csv.writer(outfile,
                            dialect="excel",
                            delimiter=',',
                            quotechar='"',
                            quoting=csv.QUOTE_NONNUMERIC)
        if headers:
            writer.writerow(column_names)
        for row in rows:
            writer.writerow(row)

    @staticmethod
    def help():
        print("connect()")
        print("list_statements()")
        print("execute(statement_name)")
        print("bind_date(name,yr,month,day)")
        print("load('filename')")
        print("use statement(statement_name")
        print("use")
def get_test_postgres_connection():
    db_url = "postgres:host='localhost' dbname='sales_reporting_db' user='******' password='******'"  # TODO externalize
    return ConnectionHelper.get_connection(db_url)