def initUI(self): self.status = QtGui.QLabel('Generating Hash', self) self.pbar = QtGui.QProgressBar(self) self.pbar.setGeometry(30, 40, 200, 25) self.timer = QtCore.QBasicTimer() self.step = 0 self.final = 20 self.timer.start(20, self) self.setGeometry(300, 300, 280, 170) self.setWindowTitle('Subtitle Downloader') self.show() self.final = 100 process = utilities.utilities(sys.argv[1]) self.responce = process.get_hash() if self.responce == 1: self.status.setText('IO error') self.timer.start(20, self) else: self.responce = process.get_subtitle() if self.responce == 2: self.status.setText('Not found') self.timer.start(20, self) else: self.responce = process.write_subtitle() if self.responce == 0: self.status.setText('Done') self.timer.start(0, self) else: self.timer.start(20, self) self.status.setText('Permission Err')
def initUI(self): self.status = QtGui.QLabel('Generating Hash', self) self.pbar = QtGui.QProgressBar(self) self.pbar.setGeometry(30, 40, 200, 25) self.timer = QtCore.QBasicTimer() self.step = 0 self.final = 20 self.timer.start(20,self) self.setGeometry(300, 300, 280, 170) self.setWindowTitle('Subtitle Downloader') self.show() self.final = 100 process = utilities.utilities(sys.argv[1]) self.responce = process.get_hash() if self.responce == 1: self.status.setText('IO error') self.timer.start(20,self) else: self.responce = process.get_subtitle() if self.responce == 2: self.status.setText('Not found') self.timer.start(20,self) else: self.responce = process.write_subtitle() if self.responce == 0: self.status.setText('Done') self.timer.start(0,self) else: self.timer.start(20,self) self.status.setText('Permission Err')
def __init__(self,name,parent=None): self.parent = parent self.name = name self.util = utilities() # this is here so the methods appear in text completion cmd = 'self.util = utilities(parent = self.parent.ui.tab_'+name+')' exec(cmd) self.setupXYComboBox() self.modUtil = utilities(parent = self) self._lineEdits = self.util.returnChildrenDictionary(QtGui.QLineEdit) self._spinBoxes = self.util.returnChildrenDictionary(QtGui.QSpinBox) self._checkBoxes = self.util.returnChildrenDictionary(QtGui.QCheckBox) self._comboBoxes = self.util.returnChildrenDictionary(QtGui.QComboBox) self._setupText = self.util.returnChildrenDictionary(QtGui.QTextBrowser, searchString = 'setupText') self.modUtil.setAllLineEditValidator2Double() self.loadModuleEntries() self.loadSetupHTML() self._findClimbConventionalCheckboxes()
def __init__(self): # declare objects including DUT, MFC and GOLDEN self.dataSup = data_parsing() self.UTIL = utilities() self.data_path = "" self.dataFileName = "" self.testName = "" self.setupFilePath = "" self.setupFileName = ""
def __init__(self,Table_Offset = (0,0,0), Table_Datum = (0,0,0),parent = None): self.parent = parent self.util = utilities() self._Table_Offset = Table_Offset self._Table_Datum = Table_Datum self._g_rapid = 'G0' self._g_feed = 'G1' self._new_line = '\n' self._x = 'X' self._y = 'Y' self._z = 'Z' self._feed = 'F' self._speed = 'S' self._preamble = self.util.readTextFile('Preamble.txt') self._postamble = self.util.readTextFile('Postamble.txt') self._pause = 'G04P' self._spindleOnCW = 'M03' self._spindleOnCCW = 'M04' self._spindleOff = 'M05'
def __init__(self,Table_Offset = (0,0,0), Table_Datum = (0,0,0),parent = None): self.parent = parent self.util = utilities() self._Table_Offset = Table_Offset self._Table_Datum = Table_Datum self._g_rapid = 'G0' self._g_feed = 'G1' self._new_line = '\n' self._x = 'X' self._y = 'Y' self._z = 'Z' self._feed = 'F' self._speed = 'S' self._preamble = self.util.readTextFile('Preamble.txt') self._postamble = self.util.readTextFile('Postamble.txt') self._pause = 'G04P' self._spindleOnCW = 'M03' self._spindleOnCCW = 'M04' self._spindleOff = 'M05'
class DataProcessor: NUMBER_OF_FIELDS_STAGING = 33 NUMBER_OF_FIELDS_FACT = 29 NUMBER_OF_ROWS = 0 dictDate = {} utils = utilities() db_utils = DBUtilities() logger = utils.formatLogger("STAGING_ETL") staging_tuples_placeholder = utils.generatePlaceholderTuple(NUMBER_OF_FIELDS_STAGING) fact_tuples_placeholder = utils.generatePlaceholderTuple(NUMBER_OF_FIELDS_FACT) end_of_period = None raw_file_path = None processed_file_path = None def __init__(self,end_of_period,raw_file_path,processed_file_path): self.end_of_period = self.utils.removeTimeStamp(end_of_period) self.raw_file_path = raw_file_path self.processed_file_path = processed_file_path def read_and_load_files(self): os.chdir(file_path) files = [f for f in os.listdir('.') if os.path.isfile(f)] for f in files: if f.endswith(".csv"): # Create Pandas Data Frame, for reporting with open(f) as records: reader = csv.reader(records, delimiter= ',') for row in reader: # Skip Header Row if row[0] == 'End of Period': continue if len(row) == self.NUMBER_OF_FIELDS: #insert_tuple = self.utils.generateTuple(self.NUMBER_OF_FIELDS) tt = (row[0],row[1],row[2],row[3],row[4],row[5],row[6],row[7],row[8],row[9], row[10],row[11],row[12],row[13],row[14],row[15],row[16],row[17],row[18], row[19],row[20],row[21],row[22],row[23],row[24],row[25],row[26],row[27], row[28],row[29],row[30],row[31],row[32]) self.db_utils.insert_staging_data(tt,self.staging_tuples_placeholder) #exit() else: self.logger.warning("Row Missing Some Columns") #Backup FileKeep copy in a director processed_file_path = self.utils.createDirectory(self.raw_file_path,self.processed_file_path) dest = shutil.copy(self.raw_file_path+f , processed_file_path+f) if dest == processed_file_path: self.logger("RAW DATA FILE SUCCESSFULLY ARCHIVED") self.db_utils.release_db_resources() """ Revised solution , Generate DIM Entries for Region and Country before Populating the Fact Table""" def process_staging_data(self,etl = 0,end_of_period = '2011-04-30'): records = self.db_utils.getUnProcessedStagingData(etl,end_of_period) if len(records) == 0: self.logger.info("NOT DATA TO PROCESS") exit() for record in records: id = record[0] load_date = record[1] end_of_period = record[2] end_of_period_key = self.db_utils._checkupdateTimeDimension(end_of_period) self.logger.info("END_OF_PERIOD_KEY SET TO -> " + str(end_of_period_key)) loan_number = record[3].decode("utf-8") #REGION SETTING region = record[4].decode("utf-8") region_key = self.db_utils._checkUpdateRegionDimension(region) self.logger.info("REGION_KEY SET TO -> " + str(region_key)) #COUNTRY SETTING country_code = record[5].decode("utf-8") country_name = record[6].decode("utf-8") country_key = self.db_utils._checkupdateCountryDimension(country_code,country_name, region_key) self.logger.info("COUNTRY_KEY SET TO -> " + str(country_key)) #BORROWER SETTING borrower = record[7].decode("utf-8") borrower_key = self.db_utils._checkupdateBorrowerDimension(borrower) self.logger.info("BORROWER_KEY SET TO -> " + str(borrower_key)) #GUARANTOR guarantor_country_code = record[8].decode("utf-8") guarantor_country_code_key = self.db_utils._getCountryKey(guarantor_country_code) guarantor = record[9].decode("utf-8") guarantor_key = self.db_utils._checkupdateGuarantorDimension(guarantor, guarantor_country_code_key) self.logger.info("GUARANTOR_KEY SET TO -> " + str(guarantor_key)) #LOAN TYPE loan_type = record[10].decode("utf-8") loan_type_key = self.db_utils._checkupdateLoanTypeDimension(loan_type) self.logger.info("LOAN_TYPE SET TO -> " + str(loan_type_key)) #LOAN STATUS loan_status = record[11].decode("utf-8") loan_status_key = self.db_utils._checkupdateLoanStatusDimension(loan_status) self.logger.info("LOAN_STATUS KEY SET TO -> " + str(loan_status_key)) interest_rate = record[12].decode("utf-8") currency_of_commitment = record[13].decode("utf-8") currency_commitment_key = self.db_utils._checkupdateCurrencyDimension(currency_of_commitment) self.logger.info("CURRENCY KEY SET TO -> " + str(currency_commitment_key)) #PROJECT project_id = record[14].decode("utf-8") project_name = record[15].decode("utf-8") project_key = self.db_utils._checkupdateProjectDimension(project_id,project_name) self.logger.info("PROJECT KEY SET TO -> " + str(project_key)) original_pricincipal_amount = record[16].decode("utf-8") cancelled_amount = record[17].decode("utf-8") undisbursed_amount = record[18].decode("utf-8") disbursed_amount = record[19].decode("utf-8") repaid_to_ibrd = record[20].decode("utf-8") due_to_ibrd = record[21].decode("utf-8") exchange_adjustment = record[22].decode("utf-8") borrowers_obligation = record[23].decode("utf-8") sold_third_party = record[24].decode("utf-8") repaid_third_party = record[25].decode("utf-8") due_third_party = record[26].decode("utf-8") loans_held = record[27].decode("utf-8") #FIRST_REPAYMENT first_repayment_date = record[28] first_repayment_date_key = None if first_repayment_date: first_repayment_date_key = self.utils.generateTimeDimensionKey(first_repayment_date) #LAST_REPAYMENT last_repayment_date = record[29] last_repayment_date_key = None if last_repayment_date: last_repayment_date_key = self.utils.generateTimeDimensionKey(last_repayment_date) #AGREEMENT SIGNING agreement_signing_date = record[30] agreement_signing_date_key = None if agreement_signing_date: agreement_signing_date_key = self.utils.generateTimeDimensionKey(agreement_signing_date) #BOARD APPROVAL board_approval_date = record[31] board_approval_date_key = None if board_approval_date: board_approval_date_key = self.utils.generateTimeDimensionKey(board_approval_date) #EFFECTIVE DATE effective_date = record[32] effective_date_key = None if effective_date: effective_date_key = self.utils.generateTimeDimensionKey(effective_date) #CLOSE DATE closed_date = record[33] closed_date_key = None if closed_date: closed_date_key = self.utils.generateTimeDimensionKey(closed_date) #LAST DISBURSEMENT last_disbursement_date = record[34] last_disbursement_date_key = None if last_disbursement_date: last_disbursement_date_key = self.utils.generateTimeDimensionKey(last_disbursement_date) etl = record[35] fct_tuple = (end_of_period_key, loan_number, loan_status_key, loan_type_key, project_key, borrower_key, country_key, currency_commitment_key, guarantor_key, interest_rate, original_pricincipal_amount, cancelled_amount, undisbursed_amount, disbursed_amount, repaid_to_ibrd, due_to_ibrd, exchange_adjustment, borrowers_obligation, sold_third_party, repaid_third_party, due_third_party,loans_held, first_repayment_date_key, last_repayment_date_key ,agreement_signing_date_key, board_approval_date_key, effective_date_key, closed_date_key, last_disbursement_date_key) affected_rows = self.db_utils.insert_fct_data(fct_tuple, self.fact_tuples_placeholder) if affected_rows > 0: ## set staging table's ETL flag to 1 , to signify that row processing is complete records_count = self.db_utils._setETLFlag(id) if records_count > 0: self.logger.info("==================================================================")
else: print >> sys.stderr, '\n*** ERROR: must specify precisely 2 arg input, output***' self.printHelp() if '-f' in opts: self.fname = True def printHelp(self): help = __doc__.replace('<PROGNAME>',sys.argv[0],1) print >> sys.stderr, help exit() if __name__ == "__main__": config = CommandLine() util = utilities() N = 100000 #print re.escape("name="kgm"/>") for i, chunk in enumerate(util.read_in_chunks(config.inputFile, N)): SentenceList = [] for item in chunk: #print item item = item.encode('utf-8') result = util.SentenceTokenize(item) #quick fix for treebank treat " as '' result = [re.sub(r'\`\`|\'\'','\"',word) for word in result] SentenceList.append(result) #resText = " ".join(result)
__author__ = 'Joeri Nicolaes' __author_email__ = '*****@*****.**' from datetime import datetime from decimal import Decimal import sys, unittest import db_logic import pytz import sys import unittest #sys.path.append('~/pycharm/parking-plaza') import utilities util = utilities.utilities() parkingDb = db_logic.db_logic("Benares-dev", "localhost", "5432", "ParkingPlaza", "BENARES") parkingDb.ConnectToDb() class convert_timedeltaTestCases(unittest.TestCase): def testDaysOfDifference(self): # Test normal case with day of difference slot = { 'startTime': datetime(2016, 2, 25, 10, 0, 0, tzinfo=pytz.utc), 'endTime': datetime(2016, 2, 26, 10, 0, 0, tzinfo=pytz.utc) } result = util.convert_timedelta(slot['endTime'] - slot['startTime']) self.assertEqual(result['days'], 1, "difference in days is not 1") self.assertEqual(result['hours'], 24, "difference in hours is not 24") self.assertEqual(result['minutes'], 0,
class Tests(unittest.TestCase): utils = utilities() db_utils = DBUtilities() TUPLE_VALUE = '(row[0],row[1])' EXPECTED_TUPLE_PLACEHOLDER_VALUE = '(%s,%s)' EXPECTED_NON_ALPHA_VALUE = 'Administracin Nacional de Electricidad.' EXPECTED_KEY = '20110430' EXPECTED_TIME_DIM = { 'YEAR_NUMBER': '2020', 'QUARTER_NUMBER': 'Q1', 'DAY_OF_MONTH': '01', 'DAY_OF_WEEK': 'Wed', 'MONTH_NAME': 'Jan', 'MONTH_NUMBER': '01', 'CALENDER_DATE': '2020-01-01', 'WEEK_NUMBER': '00' } record = None currency_name = 1 EXPECTED_TIME_KEY = "20110430" EXPECTED_DATE_NO_TIMESTAMP = "2011-04-30" WORKING_DIRECTORY = "D:\personal\wb\stagging" NEW_WORKING_DIRECTORY = "D:\personal\wb\stagging\processed" PROCESSED_DATA_DIR = "processed" def test_generateTuple(self): tt = self.utils.generateTuple(2) self.assertEqual(tt, self.TUPLE_VALUE) def test_generatePlaceholderTuple(self): tt = self.utils.generatePlaceholderTuple(2) self.assertEqual(tt, self.EXPECTED_TUPLE_PLACEHOLDER_VALUE) def test_generateTimeDimensionKey(self, dateParsed='2011/04/30'): dtKey = self.utils.generateTimeDimensionKey(dateParsed) self.assertEqual(dtKey, self.EXPECTED_KEY) def test_removeNonAlphanumericExcept( self, stringPassed='Administraci????n Nacional de Electricidad*.'): clean_string = self.utils.removeNonAlphanumericExcept(stringPassed) self.assertEqual(clean_string, self.EXPECTED_NON_ALPHA_VALUE) def test_genTimeDimensionAttributes(self, valueDate='2020-01-01'): resp = self.utils.genTimeDimensionAttributes(valueDate) self.assertDictEqual(resp, self.EXPECTED_TIME_DIM) def test_getTimeKey(self, value="20110430"): record = self.db_utils._getTimeKey(value) self.assertEqual(record[0], self.EXPECTED_TIME_KEY) def test_setETLFlag(self, id=1): row_count = self.db_utils._setETLFlag(id) self.assertEqual(row_count, 1) def test_removeTimeStamp(self, dateValue="2011-04-30 00:00:00"): newDate = self.utils.removeTimeStamp(dateValue) self.assertEqual(newDate, self.EXPECTED_DATE_NO_TIMESTAMP) def test_createDirectory(self): os.chdir(self.WORKING_DIRECTORY) newDir = self.utils.createDirectory(self.WORKING_DIRECTORY, self.PROCESSED_DATA_DIR) self.assertEqual(newDir, self.NEW_WORKING_DIRECTORY)
def __init__(self): self.u = utilities.utilities() f = open('files.txt', 'r') self.files = f.readlines() for f in self.files: self.u.copy_file(f.strip())
from utilities import utilities from data_processor import DataProcessor import sys import time from datetime import datetime, timedelta utils = utilities() logger = utils.formatLogger("BEGIN ETL PROCESS") logger.info("BEGINNING ETL PROCESS") end_of_period = None if len(sys.argv) > 3: end_of_period = datetime.strptime(sys.argv[1], '%Y-%m-%d') logger.info("SETTING END OF PERIOD DATE - " + str(end_of_period)) time.sleep(2) raw_file_path = sys.argv[2] logger.info("SETTING RAW DATA FILE PATH TO - " + str(raw_file_path)) time.sleep(2) processed_file_path = sys.argv[3] logger.info("SETTING PROCESSED DATA FILE PATH TO - " + str(processed_file_path)) time.sleep(2) else: logger.error("ENTER END-OF-PERIOD & DATA FILE PATH") exit() data_processor = DataProcessor(end_of_period, raw_file_path,
class DBUtilities(object): conn = None cursor = None utils = utilities() logger = utils.formatLogger("DB_CONNECT") def __init__(self): try: self.conn = mysql.connector.connect(host='localhost', database="wb", user="******", password="", charset='utf8') self.cursor = self.conn.cursor(prepared=True) if self.conn: #print("Connection to DB Successul") self.logger.info('DB CONNECTION SUCCESSFULL !') else: print("No Connect") except mysql.connector.Error as error: self.logger.error("Error : {} ".format(error)) def insert_staging_data(self, insert_tuple, tuples_placeholder): columns = self.staging_columns() sql = """ INSERT INTO stg_loans (""" + str( columns) + """) VALUES """ + tuples_placeholder #print(sql) try: self.cursor.execute(sql, insert_tuple) self.conn.commit() self.logger.info("STAGING COMMIT SUCCESSFUL !!") except mysql.connector.Error as error: print("Error {}".format(error)) def insert_fct_data(self, insert_tuple, tuples_placeholder): row_count = None columns = self.fct_columns() sql = """ INSERT INTO fct_loans (""" + str( columns) + """) VALUES """ + tuples_placeholder #print(sql) try: self.cursor.execute(sql, insert_tuple) self.conn.commit() self.logger.info("FACT INSERT - ROWS AFFECTED = {}".format( self.cursor.rowcount)) row_count = self.cursor.rowcount except mysql.connector.Error as error: print("Error {}".format(error)) return row_count def release_db_resources(self): if (self.conn.is_connected()): self.cursor.close() self.conn.close() self.logger.info("DB RESOURCES RELEASED !") def staging_columns(self): return """END_OF_PERIOD,LOAN_NUMBER,REGION,COUNTRY,COUNTRY_CODE,BORROWER,GUARANTOR_COUNTRY_CODE,GUARANTOR,LOAN_TYPE, LOAN_STATUS,INTEREST_RATE,CURRENCY_OF_COMMITMENT,PROJECT_ID,PROJECT_NAME,ORIGINAL_PRICINCIPAL_AMOUNT, CANCELLED_AMOUNT,UNDISBURSED_AMOUNT,DISBURSED_AMOUNT,REPAID_TO_IBRD,DUE_TO_IBRD,EXCHANGE_ADJUSTMENT, BORROWERS_OBLIGATION,SOLD_THIRD_PARTY,REPAID_THIRD_PARTY,DUE_THIRD_PARTY,LOANS_HELD,FIRST_REPAYMENT_DATE, LAST_REPAYMENT_DATE,AGREEMENT_SIGNING_DATE,BOARD_APPROVAL_DATE,EFFECTIVE_DATE,CLOSED_DATE,LAST_DISBURSEMENT_DATE""" def fct_columns(self): return """END_OF_PERIOD_KEY,LOAN_CREDIT_NUMBER,LOAN_STATUS_KEY,LOAN_TYPE_KEY,PROJECT_KEY,BORROWER_KEY,COUNTRY_KEY,CURRENCY_KEY, GUARANTOR_KEY,INTEREST_RATE,ORIGINAL_PRICINCIPAL_AMOUNT, CANCELLED_AMOUNT, UNDISBURSED_AMOUNT, DISBURSED_AMOUNT,REPAID_TO_IBRD, DUE_TO_IBRD,EXCHANGE_ADJUSTMENT,BORROWERS_OBLIGATION,SOLD_THIRD_PARTY,REPAID_THIRD_PARTY,DUE_THIRD_PARTY,LOANS_HELD,FIRST_REPAYMENT_DATE, LAST_REPAYMENT_DATE,AGREEMENT_SIGNING_DATE,BOARD_APPROVAL_DATE,EFFECTIVE_DATE,CLOSED_DATE,LAST_DISBURSEMENT_DATE""" def getUnProcessedStagingData(self, etl, end_of_period): sql = """ SELECT * FROM stg_loans WHERE etl = %s AND end_of_period = %s """ self.cursor.execute(sql, ( etl, end_of_period, )) return self.cursor.fetchall() def _checkUpdateRegionDimension(self, region_name): region_key = None try: sql_check = """ SELECT * FROM dim_region WHERE lower(region_name) = %s """ #print(sql_check) self.cursor.execute(sql_check, (region_name.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (record == None and len(region_name) > 0): sql_insert_query = """ INSERT INTO dim_region ( region_key, region_name ) VALUES ( %s, %s )""" insert_tuple = ("", region_name.upper()) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATED WITH REGION : -> " + str(region_name)) self.cursor.execute(sql_check, (region_name.lower(), )) record = self.cursor.fetchone() region_key = record[0] else: region_key = record[0] except mysql.connector.Error as error: self.logger.error("Region Check and Update {} ".format(error)) return region_key def _checkupdateLoanStatusDimension(self, loan_status_name): loan_status_key = None try: sql_check = """ SELECT * FROM dim_loan_status WHERE lower(loan_status) = %s """ self.cursor.execute(sql_check, (loan_status_name.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (record == None and len(loan_status_name) > 0): sql_insert_query = """ INSERT INTO dim_loan_status ( loan_status_key, loan_status ) VALUES ( %s, %s )""" insert_tuple = ("", loan_status_name.upper()) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATE WITH NEW LOAN : -> " + str(loan_status_name).upper()) self.cursor.execute(sql_check, (loan_status_name.lower(), )) record = self.cursor.fetchone() loan_status_key = record[0] else: loan_status_key = record[0] except mysql.connector.Error as error: self.logger.error("LOAN STATUS Check and Update {} ".format(error)) return loan_status_key def _checkupdateLoanTypeDimension(self, loan_type_name): loan_type_key = None try: sql_check = """ SELECT * FROM dim_loan_type WHERE lower(loan_type_name) = %s """ self.cursor.execute(sql_check, (loan_type_name.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (record == None and len(loan_type_name) > 0): sql_insert_query = """ INSERT INTO dim_loan_type ( loan_type_key, loan_type_name ) VALUES ( %s, %s )""" insert_tuple = ("", loan_type_name.upper()) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATED WITH NEW LOAN TYPE : -> " + str(loan_type_name).upper()) self.cursor.execute(sql_check, (loan_type_name.lower(), )) record = self.cursor.fetchone() loan_type_key = record[0] else: loan_type_key = record[0] except mysql.connector.Error as error: self.logger.error("LOAN TYPE Check and Update {} ".format(error)) return loan_type_key def _checkupdateCountryDimension(self, country_code, country_name, region_key): country_key = None try: sql_check = """ SELECT * FROM dim_country WHERE lower(country_code) = %s """ self.cursor.execute(sql_check, (country_code.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (record == None and len(country_code) > 0): sql_insert_query = """ INSERT INTO dim_country ( country_key, country_code , country_name , region_key ) VALUES ( %s, %s , %s , %s )""" insert_tuple = ("", country_code.upper(), country_name.upper(), region_key) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATED WITH NEW COUNTRY : -> " + str(country_code).upper() + " - " + str(country_name).upper() + " - " + str(region_key)) self.cursor.execute(sql_check, (country_code.lower(), )) record = self.cursor.fetchone() country_key = record[0] else: country_key = record[0] except mysql.connector.Error as error: self.logger.error("COUNTRY Check and Update {} ".format(error)) return country_key def _checkupdateProjectDimension(self, project_id, project_name): project_key = None try: sql_check = """ SELECT * FROM dim_project WHERE lower(project_id) = %s """ self.cursor.execute(sql_check, (project_id.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (record == None and len(project_id) > 0): sql_insert_query = """ INSERT INTO dim_project ( project_key, project_id , project_name ) VALUES ( %s, %s , %s )""" insert_tuple = ("", project_id.upper(), project_name.upper()) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATED WITH NEW PROJECT : -> " + str(project_id).upper() + "-" + str(project_name).upper()) self.cursor.execute(sql_check, (project_id.lower(), )) record = self.cursor.fetchone() project_key = record[0] else: project_key = record[0] except mysql.connector.Error as error: self.logger.error("PROJECT Check and Update {} ".format(error)) return project_key def _checkupdateBorrowerDimension(self, borrower_name): borrower_key = None try: sql_check = """ SELECT * FROM dim_borrower WHERE lower(borrower_name) = %s """ self.cursor.execute(sql_check, (borrower_name.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (record == None and len(borrower_name) > 0): sql_insert_query = """ INSERT INTO dim_borrower ( borrower_key, borrower_name ) VALUES ( %s, %s )""" insert_tuple = ("", borrower_name.upper()) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATED WITH NEW BORROWER : -> " + str(borrower_name).upper()) self.cursor.execute(sql_check, (borrower_name.lower(), )) record = self.cursor.fetchone() borrower_key = record[0] else: borrower_key = record[0] except mysql.connector.Error as error: self.logger.error("BORROWER Check and Update {} ".format(error)) return borrower_key def _checkupdateCurrencyDimension(self, currency_name): currency_key = None try: sql_check = """ SELECT * FROM dim_currency WHERE lower(currency_name) = %s """ self.cursor.execute(sql_check, (currency_name.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (len(currency_name) > 0 and record is None): sql_insert_query = """ INSERT INTO dim_currency ( currency_key, currency_name ) VALUES ( %s, %s )""" insert_tuple = ("", currency_name.upper()) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATED WITH NEW CURRENCY : -> " + str(currency_name).upper()) self.cursor.execute(sql_check, (currency_name.lower(), )) record = self.cursor.fetchone() currency_key = record[0] elif record: currency_key = record[0] except mysql.connector.Error as error: self.logger.error("CURRENCY Check and Update {} ".format(error)) return currency_key def _getCountryKey(self, country_code): country_key = None try: sql_check = """ SELECT * FROM dim_country WHERE lower(country_code) = %s """ self.cursor.execute(sql_check, (country_code.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (record == None): self.logger.info( "NOT MATCHING COUNTRY CODE FOUND FOR GUARANTOR - FIX LATER : -> " + str(country_code).upper()) else: country_key = record[0] except mysql.connector.Error as error: self.logger.error( "COUNTRY Check for GUARANTOR update {} ".format(error)) return country_key def _checkupdateGuarantorDimension(self, guarantor_name, country_key): guarantor_key = None try: sql_check = """ SELECT * FROM dim_guarantor WHERE lower(guarantor_name) = %s """ self.cursor.execute(sql_check, (guarantor_name.lower(), )) record = self.cursor.fetchone() # Insert New Record and Return the RecordKey if (record == None and len(guarantor_name) > 0): sql_insert_query = """ INSERT INTO dim_guarantor ( guarantor_key, guarantor_name, guarantor_country_key ) VALUES ( %s, %s , %s)""" insert_tuple = ("", guarantor_name.upper(), country_key) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATED WITH NEW GUARANTOR : -> " + str(guarantor_name).upper()) self.cursor.execute(sql_check, (guarantor_name.lower(), )) record = self.cursor.fetchone() guarantor_key = record[0] else: guarantor_key = record[0] except mysql.connector.Error as error: self.logger.error("GUARANTOR Check and Update {} ".format(error)) return guarantor_key def _checkupdateTimeDimension(self, end_of_period): end_of_period_key = None try: ## Bug with mysql.connector, returns a "IndexError: bytearray index out of range" when ID is in where clause, ## Will create a temp function to create new connection and return a record object #sql_check = """ SELECT * FROM dim_time WHERE time_key = %s """ #self.cursor.execute(sql_check, (end_of_period_value,)) #record = self.cursor.fetchone() end_of_period_key = self.utils.generateTimeDimensionKey( end_of_period) record = self._getTimeKey(end_of_period_key) # Insert New Record and Return the RecordKey if (record == None and end_of_period is not None): sql_insert_query = """ INSERT INTO dim_time ( time_key, year_number, quarter_number , month_number, month_name, day_of_month, week_number, day_of_week, calender_date ) VALUES ( %s, %s , %s , %s , %s , %s , %s , %s , %s) """ timeDimAttrs = self.utils.genTimeDimensionAttributes( end_of_period) insert_tuple = (end_of_period_key, timeDimAttrs['YEAR_NUMBER'], timeDimAttrs['QUARTER_NUMBER'], timeDimAttrs['MONTH_NUMBER'], timeDimAttrs['MONTH_NAME'], timeDimAttrs['DAY_OF_MONTH'], timeDimAttrs['WEEK_NUMBER'], timeDimAttrs['DAY_OF_WEEK'], timeDimAttrs['CALENDER_DATE']) self.cursor.execute(sql_insert_query, insert_tuple) self.conn.commit() self.logger.info("DIM UPDATED WITH NEW TIME ATTR : -> " + str(end_of_period_key)) self.cursor.execute(sql_check, (end_of_period_key, )) record = self.cursor.fetchone() end_of_period_key = record[0] else: end_of_period_key = record[0] except mysql.connector.Error as error: self.logger.error("TIME ATTR CHECK & UPDATE {} ".format(error)) exit() return end_of_period_key def _getTimeKey(self, end_of_period_key): record = None try: db_conn_tmp = MySQLdb.connect("localhost", "root", "", "wb") cursor_tmp = db_conn_tmp.cursor() sql_check = """ SELECT * FROM dim_time WHERE time_key = %s """ cursor_tmp.execute(sql_check, (end_of_period_key, )) record = cursor_tmp.fetchone() except (MySQLdb.Error, MySQLdb.Warning) as error: self.logger.error("FETCHING TIME KEY {} ".format(error)) finally: cursor_tmp.close() db_conn_tmp.close() return record def _setETLFlag(self, id): row_count = None try: sql_update = """ UPDATE stg_loans SET ETL = 1 WHERE id = %s """ insert_tuple = (str(id)) self.cursor.execute(sql_update, insert_tuple) self.conn.commit() row_count = self.cursor.rowcount except mysql.connector.Error as error: self.logger.error("ETL FLAG UPDATE {} ".format(error)) return row_count
class customGP(): params = eaParams() utils = utilities() def __init(self): random.seed(self.params.deapSeed) def selTournament(self, individuals, k, tournsize, fit_attr="fitness"): chosen = [] for i in xrange(k): aspirants = tools.selRandom(individuals, tournsize) best = self.utils.getBest(aspirants) chosen.append(best) return chosen def varAnd(self, population, toolbox): # apply crossover and mutation offspring = [toolbox.clone(ind) for ind in population] # crossover for i in range(1, len(offspring), 2): if random.random() < self.params.crossoverProbability: offspring[i - 1], offspring[i] = toolbox.mate(offspring[i - 1], offspring[i]) del offspring[i - 1].fitness.values, offspring[i].fitness.values # mutation - subtree replacement for i in range(len(offspring)): if random.random() < self.params.mutSRProbability: offspring[i], = toolbox.mutSubtreeReplace(offspring[i]) del offspring[i].fitness.values # mutation - subtree shrink for i in range(len(offspring)): if random.random() < self.params.mutSSProbability: offspring[i], = toolbox.mutSubtreeShrink(offspring[i]) del offspring[i].fitness.values # mutation - node replacement for i in range(len(offspring)): if random.random() < self.params.mutNRProbability: offspring[i], = toolbox.mutNodeReplace(offspring[i]) del offspring[i].fitness.values # mutation - ephemeral constant replacement for i in range(len(offspring)): if random.random() < self.params.mutECRProbability: offspring[i], = toolbox.mutConstantReplace(offspring[i]) del offspring[i].fitness.values return offspring def eaInit(self, population, toolbox, ngen, stats=None, halloffame=None, verbose=__debug__): # preliminary steps before beginning evolutionary loop # copied largely from algorithms.eaSimple logbook = tools.Logbook() logbook.header = ['gen', 'nevals'] + (stats.fields if stats else []) logbook.chapters["fitness"].header = "min", "avg", "max" logbook.chapters["size"].header = "min", "avg", "max" # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in population if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit if halloffame is not None: halloffame.update(population) for ind in population: print str("%.2f" % ind.fitness.values[0]) + " " + str(ind) record = stats.compile(population) if stats else {} logbook.record(gen=0, nevals=len(invalid_ind), **record) if verbose: print "" print logbook.stream print "-----------------------------------------------------------------------------------" self.utils.logFirst() self.utils.logFitness(self.utils.getBest(population)) # begin evolution self.eaLoop(logbook, population, toolbox, ngen, stats, halloffame=halloffame) # get the best individual at the end of the evolutionary run best = self.utils.getBest(population) # log chromosome and test performance in different environments self.utils.unseenSeeds(best) self.utils.logChromosome(best) # save and run simulation self.utils.saveOutput() self.utils.playbackBest(best) return population, logbook def eaLoop(self, logbook, population, toolbox, ngen, stats=None, halloffame=None, verbose=__debug__): # evolutionary loop copied largely from algorithms.eaSimple for gen in range(1, ngen + 1): # pause to free up CPU time.sleep(self.params.genSleep) # create the next generation elites = tools.selBest(population, self.params.eliteSize) offspring = toolbox.select(population, len(population)-self.params.eliteSize) # Vary the pool of individuals offspring = self.varAnd(offspring, toolbox) # assign to a new population newPop = elites + offspring # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in newPop if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit # print each fitness score and chromosome for ind in newPop: print str("%.2f" % ind.fitness.values[0]) + " " + self.utils.printTree(ind) # Update the hall of fame with the generated individuals if halloffame is not None: halloffame.update(newPop) # Replace the current population by the offspring population[:] = newPop # Append the current generation statistics to the logbook record = stats.compile(population) if stats else {} logbook.record(gen=gen, nevals=len(invalid_ind), **record) if verbose: print "-----------------------------------------------------------------------------------" print logbook.stream print "-----------------------------------------------------------------------------------" # print the best chromosome best = self.utils.getBest(population) self.utils.logFitness(best) print self.utils.printTree(best) def genFull(self, pset, min_, max_, type_=None): # copied verbatim from deap gp module def condition(height, depth): return depth == height return self.generate(pset, min_, max_, condition, type_) def generate(self, pset, min_, max_, condition, type_=None): if type_ is None: type_ = pset.ret expr = [] height = random.randint(min_, max_) stack = [(0, type_)] time.sleep(0.2) while len(stack) != 0: depth, type_ = stack.pop() if condition(height, depth): try: # term = random.choice(pset.terminals[type_]) term = random.choice(pset.terminals[type_] + pset.conditions[type_] + pset.actions[type_]) except IndexError: _, _, traceback = gp.sys.exc_info() raise IndexError, "The gp.generate function tried to add " \ "a terminal of type '%s', but there is " \ "none available." % (type_,), traceback if gp.isclass(term): term = term() if term.arity > 0: for arg in reversed(term.args): stack.append((depth + 1, arg)) expr.append(term) else: primitiveAvailable = True try: # prim = random.choice(pset.primitives[type_]) prim = random.choice(pset.primitives[type_] + pset.decorators[type_]) except IndexError: primitiveAvailable = False if primitiveAvailable: expr.append(prim) for arg in reversed(prim.args): stack.append((depth + 1, arg)) else: try: # term = random.choice(pset.terminals[type_]) term = random.choice(pset.terminals[type_] + pset.conditions[type_] + pset.actions[type_]) except IndexError: _, _, traceback = gp.sys.exc_info() raise IndexError, "The gp.generate function tried to add " \ "a terminal of type '%s', but there is " \ "none available." % (type_,), traceback if gp.isclass(term): term = term() if term.arity > 0: for arg in reversed(term.args): stack.append((depth + 1, arg)) expr.append(term) return expr def mutGenerate(self, pset, node): expr = [(node)] stack = [] if node.arity > 0: for arg in reversed(node.args): stack.append((arg)) while len(stack) != 0: type_ = stack.pop() try: term = random.choice(pset.terminals[type_]) except IndexError: _, _, traceback = gp.sys.exc_info() raise IndexError, "The gp.generate function tried to add " \ "a terminal of type '%s', but there is " \ "none available." % (type_,), traceback if gp.isclass(term): term = term() expr.append(term) return expr def mutNodeReplacement(self, individual, pset): if len(individual) < 2: return individual, # choose existing node at random index = random.randrange(0, len(individual)) node = individual[index] type_ = node.ret # make sure we have a real node and not an ephemeral constant count = 0 while count < 20 and node not in pset.primitives[type_]+ pset.decorators[type_] + pset.conditions[type_] + pset.actions[type_]: count += 1 index = random.randrange(0, len(individual)) node = individual[index] type_ = node.ret if node not in pset.primitives[type_] + pset.decorators[type_] + pset.conditions[type_] + pset.actions[type_]: return individual, # choose a replacement node at random newlist = [] if node in pset.primitives[node.ret] + pset.decorators[node.ret]: newList = pset.primitives[node.ret] + pset.decorators[node.ret] else: newList = pset.conditions[node.ret] + pset.actions[node.ret] prims = [p for p in newList if p.children == node.children] prim = random.choice(prims) # replace the selected node with one of the new type expr = [(prim)] if prim.arity > 0: for arg in prim.args: if arg not in prim.children: # this argument is a constant so generate a new one term = random.choice(pset.terminals[arg]) if gp.isclass(term): term = term() expr.append(term) else: # this agument is a child node so keep it intact nodeSlice = individual.searchSubtree(index + len(expr)) expr = expr + individual[nodeSlice] # replace node and subtree with new expression nodeSlice = individual.searchSubtree(index) exprSlice = slice(0, len(expr)) individual[nodeSlice] = expr[exprSlice] else: # replace node with new primitive nodeSlice = individual.searchSubtree(index) primSlice = slice(0, 1) individual[nodeSlice] = [(prim)][primSlice] return individual, def mutShrinkToChild(self, individual, pset): if len(individual) < 3 or individual.height <= 1: return individual, iprims = [] for i, node in enumerate(individual[1:], 1): if (node in pset.primitives[node.ret] + pset.decorators[node.ret]) and node.ret in node.args: iprims.append((i, node)) if len(iprims) != 0: index, prim = random.choice(iprims) arg_idx = random.choice([i for i, type_ in enumerate(prim.args) if type_ == prim.ret]) rindex = index + 1 for _ in range(arg_idx + 1): rslice = individual.searchSubtree(rindex) subtree = individual[rslice] rindex += len(subtree) slice_ = individual.searchSubtree(index) individual[slice_] = subtree return individual, def mutShrinkToTerminal(self, individual, pset): if len(individual) < 3 or individual.height <= 1: return individual, iprims = [] for i, node in enumerate(individual[1:], 1): if (node in pset.primitives[node.ret] + pset.decorators[node.ret]) and node.ret in node.args: iprims.append((i, node)) if len(iprims) != 0: primIndex, prim = random.choice(iprims) primSlice = individual.searchSubtree(primIndex) iterms = [] for i, node in enumerate(individual[primSlice], 1): if node in pset.actions[prim.ret] + pset.conditions[prim.ret]: iterms.append((i, node)) termIndex, term = random.choice(iterms) termIndex += primIndex termSlice = individual.searchSubtree(termIndex - 1) subtree = individual[termSlice] primSlice = individual.searchSubtree(primIndex) individual[primSlice] = subtree return individual, def cxOnePoint(self, ind1, ind2): if len(ind1) < 2 or len(ind2) < 2: return ind1, ind2 type_ = ind1.root.ret selection1 = [i for i, node in enumerate(ind1[1:], 1) if node.ret == type_] selection2 = [i for i, node in enumerate(ind2[1:], 1) if node.ret == type_] if len(selection1) == 0 or len(selection2) == 0: return ind1, ind2 index1 = random.choice(selection1) index2 = random.choice(selection2) slice1 = ind1.searchSubtree(index1) slice2 = ind2.searchSubtree(index2) ind1[slice1], ind2[slice2] = ind2[slice2], ind1[slice1] return ind1, ind2 def mutEphemeral(self, individual, pset): ephemerals_idx = [index for index, node in enumerate(individual) if isinstance(node, Ephemeral)] if len(ephemerals_idx) > 0: ephemerals_idx = (random.choice(ephemerals_idx),) for i in ephemerals_idx: if type(individual[i]) in pset.bbReadIndexes or type(individual[i]) in pset.bbWriteIndexes: individual[i] = type(individual[i])() elif type(individual[i]) in pset.repetitions: print "=========================== reps ===========================" print individual magnitude = 0 if random.random() < .33: magnitude = 2 else: magnitude = 1 direction = 0; if random.random() < .5: direction = -1 else: direction = 1 newValue = individual[i].value + (magnitude * direction) if newValue > 9: newValue = 9 elif newValue < 1: newValue = 1 print newValue individual[i].value = newValue print individual print "" else: print "=========================== constant ===========================" print individual print individual[i].value newValue = self.utils.gaussian(individual[i].value, .05) print newValue individual[i].value = newValue print individual print "" return individual, def mutUniformInner(self, individual, expr, pset): type_ = individual.root.ret if random.random() < 0.9: psets = pset.decorators[type_] + pset.primitives[type_] else: psets = pset.actions[type_] + pset.conditions[type_] nodeSet = [i for i, node in enumerate(individual[1:], 1) if node in psets] if (len(nodeSet) > 0): index = random.choice(nodeSet) else: index = random.choice([i for i, node in enumerate(individual[1:], 1) if node.ret == type_]) print individual print index print individual[index].name slice_ = individual.searchSubtree(index) type_ = individual[index].ret individual[slice_] = expr(pset=pset, type_=type_) print individual print "" return individual, def mutNodeReplacementInner(self, individual, pset): if len(individual) < 2: return individual, # choose existing node at random type_ = individual.root.ret if random.random() < 0.9: psets = pset.decorators[type_] + pset.primitives[type_] else: psets = pset.actions[type_] + pset.conditions[type_] nodeSet = [i for i, node in enumerate(individual[1:], 1) if node in psets] if (len(nodeSet) > 0): index = random.choice(nodeSet) else: index = random.choice([i for i, node in enumerate(individual[1:], 1) if node.ret == type_]) node = individual[index] print individual print index print individual[index].name # make sure we have a real node and not an ephemeral constant count = 0 while count < 20 and node not in pset.primitives[type_]+ pset.decorators[type_] + pset.conditions[type_] + pset.actions[type_]: count += 1 index = random.randrange(0, len(individual)) node = individual[index] type_ = node.ret if node not in pset.primitives[type_] + pset.decorators[type_] + pset.conditions[type_] + pset.actions[type_]: return individual, # choose a replacement node at random newlist = [] if node in pset.primitives[node.ret] + pset.decorators[node.ret]: newList = pset.primitives[node.ret] + pset.decorators[node.ret] else: newList = pset.conditions[node.ret] + pset.actions[node.ret] prims = [p for p in newList if p.children == node.children] prim = random.choice(prims) # replace the selected node with one of the new type expr = [(prim)] if prim.arity > 0: for arg in prim.args: if arg not in prim.children: # this argument is a constant so generate a new one term = random.choice(pset.terminals[arg]) if gp.isclass(term): term = term() expr.append(term) else: # this agument is a child node so keep it intact nodeSlice = individual.searchSubtree(index + len(expr)) expr = expr + individual[nodeSlice] # replace node and subtree with new expression nodeSlice = individual.searchSubtree(index) exprSlice = slice(0, len(expr)) individual[nodeSlice] = expr[exprSlice] else: # replace node with new primitive nodeSlice = individual.searchSubtree(index) primSlice = slice(0, 1) individual[nodeSlice] = [(prim)][primSlice] print individual print "" return individual, def cxOnePointInner(self, ind1, ind2, pset): if len(ind1) < 2 or len(ind2) < 2: return ind1, ind2 print self.utils.printTree(ind1) print self.utils.printTree(ind2) type_ = ind1.root.ret if random.random() < 0.9: psets = pset.decorators[type_] + pset.primitives[type_] else: psets = pset.actions[type_] + pset.conditions[type_] set1 = [i for i, node in enumerate(ind1[1:], 1) if node in psets] set2 = [i for i, node in enumerate(ind2[1:], 1) if node in psets] print set1 print set2 if (len(set1) > 0): index1 = random.choice(set1) else: index1 = random.choice([i for i, node in enumerate(ind1[1:], 1) if node.ret == type_]) if (len(set2) > 0): index2 = random.choice(set2) else: index2 = random.choice([i for i, node in enumerate(ind2[1:], 1) if node.ret == type_]) print ind1[index1].name print ind2[index2].name slice1 = ind1.searchSubtree(index1) slice2 = ind2.searchSubtree(index2) ind1[slice1], ind2[slice2] = ind2[slice2], ind1[slice1] print self.utils.printTree(ind1) print self.utils.printTree(ind2) print "" return ind1, ind2