class CopyDb(threading.Thread): ''' ''' def __init__(self, source, target, progress_title): super().__init__() self.source = source self.target = target self.progress_title = progress_title self.title = f'{constants.APP_NAME} - {self.progress_title} Progress' self.db_name = ntpath.basename(self.source) def run(self): self.Progress = Progress(self, self.title, abort=False) con = sql.connect(self.source) with sql.connect(self.target) as bck: con.backup(bck, pages=1, progress=self._update_progress) con.close() self.Progress.destroy() def _update_progress(self, status, remaining, total): progress = (total - remaining) / total * 100 message = 'Copied {:,} of {:,} pages from {}'.format( total - remaining, total, self.db_name) self.Progress.update_progress(progress, message)
class DataController(threading.Thread): ''' ''' def __init__(self, process): super().__init__() self.process = process self.main = self.process.main self.selections = self.process.selections self.files = deepcopy(self.process.get_files()) self.file_count = len(self.files) self.counter = 0 self.progress = Progress(self, self.process.title) self.abort = False self.con = None def run(self): try: self._process_files() except Exception: msg.showerror( self.process.title, f'Unhandled exception occurred:\n\n{traceback.format_exc()}', parent=self.process.gui) finally: self.progress.destroy() def _process_files(self): for i, item in enumerate(self.files.items(), start=1): if self.main.end_processes or self.abort: break self.counter = i name, data = item jurisidiction, df = data load_detail = LoadDetail(self, df, name) load_detail.load(jurisidiction) def update_progress(self, progress, message): self.progress.update_progress( progress, message, self.counter, self.file_count )
class Grid(tk.Frame): def __init__(self, parent): tk.Frame.__init__(self, parent) tk.Grid.rowconfigure(self, 0, weight=1) tk.Grid.rowconfigure(self, 1, weight=1) tk.Grid.columnconfigure(self, 0, weight=1) tk.Grid.columnconfigure(self, 1, weight=1) tk.Grid.rowconfigure(self, 2, weight=1) self.configure(width=600, height=400) self.progress = None self.btn1 = tk.Button(self, text='Show Progress') self.btn1.grid(row=0, column=0, sticky='news') self.btn2 = tk.Button(self, text='Sleep') self.btn2.grid(row=0, column=1, sticky='news') self.btn3 = tk.Button(self, text='mq sleep') self.btn3.grid(row=1, column=0, sticky='news') self.btn4 = tk.Button(self, text='thread sleep') self.btn4.grid(row=1, column=1, sticky='news') self._run = 0 self.running_time = tk.StringVar() self.lbl_running_time = tk.Label(self, textvariable=self.running_time) self.lbl_running_time.grid(row=2, column=0, columnspan=2) self.btn1.configure(command=self.show_progress) self.btn2.configure(command=self.sleep) self.btn3.configure(command=self.mq_sleep) self.btn4.configure(command=self.thread_sleep) self.mq = Queue.Queue() self.main_show() def show_progress(self): if self.progress: self.hide_progress() self.progress = Progress(self) x = self.winfo_width() / 2 y = self.winfo_height() / 2 self.progress.place(relx=0.0, rely=0.0, x=x, y=y, anchor='center') def hide_progress(self): if self.progress: self.progress.destroy() self.progress = None def sleep(self): import time; time.sleep(5) def mq_sleep(self): self.mq.put('sleep') def thread_sleep(self): def sleep(): import time;time.sleep(5) self._run = 0 t = threading.Thread(target=sleep) t.setDaemon(True) t.start() def main_show(self): self.running_time.set(str(self._run)) try: m = self.mq.get_nowait() if m == 'sleep': import time; time.sleep(5) except Queue.Empty, e: pass self._run += 1 self.after(1000, self.main_show)
class RollupTotals(threading.Thread): ''' Parent class for SegmentTotals(), SegmentTotalsRegion(), CategoryTotals() and CategoryTotalsRegion(). Creates a new table with the data fetched using the query set in each of the child classes. ''' FIRST_QUARTER_COLUMN = 3 def __init__( self, is_addon=False, is_category=True, is_region=False, is_business_code_totals=False ): super().__init__() self.is_addon = is_addon self.is_category = is_category self.is_region = is_region self.is_business_code_totals = is_business_code_totals self.title = constants.APP_NAME self.input_table_name = constants.BUSINESS_CODE_TOTALS_TABLE if self.is_addon: self.input_table_name += constants.ADDON_SUFFIX self.df = None self.query = '' self.rollup_id_name = '' self.rollup_table_name = '' def run(self): if self.is_addon: self.rollup_table_name += constants.ADDON_SUFFIX self.progress = Progress(self, self.title, abort=False) self.progress.update_progress(0, 'Fetching business code totals.') self._set_df() progress = 90 if self.is_region else 70 self.progress.update_progress(progress, 'Preparing data.') if self.df is not None: # drops the old id column self.df.drop(constants.ID_COLUMN_NAME, axis=1, inplace=True) if not self.is_business_code_totals: # drops the business code id column self.df.drop( constants.BUSINESS_CODE_ID_COLUMN_NAME, axis=1, inplace=True ) self._update_column_names() self._set_region_id_column() column_names = list(self.df) juri_column = ( constants.REGION_ID_COLUMN_NAME if self.is_region else constants.TAC_COLUMN_NAME ) # sets the columns that will be used in the table in the order # that they will be in new_column_names = [ constants.ID_COLUMN_NAME, juri_column, self.rollup_id_name ] + column_names[self.FIRST_QUARTER_COLUMN:] self.df = self.df[new_column_names] self._group_by_new_id() progress = 95 if self.is_region else 85 self.progress.update_progress(progress, 'Creating table.') self._create_table() self.progress.update_progress(100, 'Build complete.') self.progress.destroy() def _set_df(self): sql_code = 'ATTACH DATABASE ? AS ?' args = (str(constants.DB_PATHS[constants.STARS_DB]), constants.STARS_DB) con = sql.connect( constants.DB_PATHS[constants.STATEWIDE_DATASETS_DB], uri=True, timeout=constants.DB_TIMEOUT ) db_attached = utilities.execute_sql( sql_code=sql_code, args=args, open_con=con, dontfetch=True ) if db_attached: results = utilities.execute_sql( sql_code=self.query, open_con=con, getcursor=True ) if results: column_names = [i[0] for i in results.description] data = results.fetchall() self.df = pd.DataFrame(data, columns=column_names) con.close() def _update_column_names(self): column_names = list(self.df) # changes column to "id" from "new_id" column_names[0] = constants.ID_COLUMN_NAME if self.is_region: tac_index = 1 if self.is_business_code_totals else 2 # changes column to "region_id" from "tac" column_names[tac_index] = constants.REGION_ID_COLUMN_NAME # updates the column names in the dataframe self.df.columns = column_names def _set_region_id_column(self): # gets the regions id's from the id column region_id_column = self.df[ constants.ID_COLUMN_NAME ].apply(lambda x: x.split('-')[0]) self.df[constants.REGION_ID_COLUMN_NAME] = region_id_column def _group_by_new_id(self): column_names = list(self.df) group_columns = column_names[:self.FIRST_QUARTER_COLUMN] sum_columns = column_names[self.FIRST_QUARTER_COLUMN:] self.df = self.df.groupby( group_columns, as_index=False, sort=False )[sum_columns].sum() def _create_table(self): con = sql.connect( constants.DB_PATHS[constants.STATEWIDE_DATASETS_DB], timeout=constants.DB_TIMEOUT ) try: with con: self.df.to_sql( self.rollup_table_name, con, if_exists='replace', index=False ) except sql.OperationalError as e: msg.showerror(self.title, e) con.close()
class BusinessDetailJoin(threading.Thread): ''' ''' title = f'{constants.APP_NAME} - Business Detail (Join)' def __init__(self, jurisdiction, files, selections): super().__init__() self.jurisdiction = jurisdiction self.files = files self.selections = selections self.file_count = len(self.files) self.name = '' self.abort = False self.con = None self.df = None self.other_columns = [] self.table_names = [] self.period_columns = {} def run(self): self.progress = Progress(self, self.title) self.con = sql.connect(':memory:') try: progress = 0 for i, item in enumerate(self.files.items()): name, data = item df = data[1] if not self.name: self.name = name self.update_progress( progress, f'Creating SQL table for {name}.' ) self._process_file(df, i) progress += 45 / self.file_count self.update_progress(45, 'Joining SQL tables.') self._join_tables() # drops the id column self.df.drop(constants.ID_COLUMN_NAME, axis=1, inplace=True) self._revert_column_names() self._sort_quarters() self.update_progress(55, 'Filling in NULLS.') utilities.FillNa.fill_na(self.df) self.df.to_csv('test.csv') if self.df is not None: load_detail = LoadDetail(self, self.df, self.name) load_detail.load(self.jurisdiction) except Exception: msg.showerror( self.title, 'Unhandled exception occurred attempting to join ' f'{self.file_count} for {self.jurisdiction.id}:\n\n{traceback.format_exc()}' ) finally: self.progress.destroy() self.con.close() return self.df def _process_file(self, df, count): table_name = f't{count}' self.table_names.append(table_name) self._prepare_column_names(df) column_names = list(df) if not self.other_columns: self.other_columns = column_names[:constants.FIRST_QUARTER_COLUMN] self.other_columns.insert(0, constants.ID_COLUMN_NAME) self.period_columns[table_name] = column_names[constants.FIRST_QUARTER_COLUMN:] self._insert_id_column(df) # creates the table in the in memory database df.to_sql(table_name, self.con, index=False) def _prepare_column_names(self, df): ''' Prepares the column names so that they will be accepted by sql. ''' names = list(df) fixed_names = [] for name in names: name = name.replace(':', '').replace(' ', '_') if name[0].isdigit(): name = 'c_' + name fixed_names.append(name) df.columns = fixed_names def _insert_id_column(self, df): ''' Gets a unique sub number for each location to use as part of the key for that location. ''' # inserts an empty column into the dataframe for the row ids df.insert(0, constants.ID_COLUMN_NAME, '') row_ids = set() for i, row in enumerate(df.itertuples(index=False)): permit = str(row[constants.PERMIT_COLUMN + 1]) # the currently assigned value to the permit for this location # which may be a blank sub = row[constants.SUB_COLUMN + 1] # unique id to use as the join row_id = self._row_id(row_ids, permit, sub) # sets the row id in the dataframe df.at[i, constants.ID_COLUMN_NAME] = row_id # adds the row id to the set of already created row ids row_ids.add(row_id) def _row_id(self, keys, permit, sub): # the key is made up of the permit number, sub number and a third # integer separated by "-" key = f'{permit}-{sub}-0' # if the key is already in the list of keys, which will # occur when there exists duplicate subs for a permit # the last part of the key will be incremented until it is # no longer in the list of keys while key in keys: sub_id = int(key[-1]) + 1 key = f'{key[:-1]}{sub_id}' return key def _join_tables(self,): table_one = self.table_names[0] joined_table = 'joined' create_columns = deepcopy(self.other_columns) left_select_columns = [ f'{table_one}.{column}' for column in self.other_columns ] create_columns += self.period_columns[table_one] left_select_columns += self.period_columns[table_one] right_period_columns = self.period_columns[table_one] for name in self.table_names[1:]: right_period_columns += self.period_columns[name] right_select_columns = [ f'{name}.{column}' for column in self.other_columns ] + right_period_columns create_columns += self.period_columns[name] left_select_columns += self.period_columns[name] create_columns_string = ','.join(create_columns) left_select_columns_string = ','.join(left_select_columns) right_select_columns_string = ','.join(right_select_columns) # creates a new table "joined" to insert the results of the # emulated full outer join into it # then drops table one and the table that was joined and renames # the joined results to the name of table one to join with the # next table sql_script = f''' CREATE TABLE {joined_table} ({create_columns_string}); INSERT INTO {joined_table} ({create_columns_string}) SELECT {left_select_columns_string} FROM {table_one} LEFT JOIN {name} USING({constants.ID_COLUMN_NAME}) UNION ALL SELECT {right_select_columns_string} FROM {name} LEFT JOIN {table_one} USING({constants.ID_COLUMN_NAME}) WHERE {table_one}.{constants.ID_COLUMN_NAME} IS NULL; DROP TABLE {table_one}; DROP TABLE {name}; ALTER TABLE {joined_table} RENAME TO {table_one}; ''' self.con.executescript(sql_script) query = f''' SELECT * FROM {table_one} ''' self.df = pd.read_sql(query, self.con) def _revert_column_names(self): ''' Reverts the column names so that they are accepted by the load process. ''' revert_names = [] column_names = list(self.df) for name in column_names: name = name.replace('c_', '').replace('_', ' ') revert_names.append(name) self.df.columns = revert_names def _sort_quarters(self): column_names = list(self.df) quarter_columns = column_names[constants.FIRST_QUARTER_COLUMN:] sort_keys = [(q, q[-4:] + q[0]) for q in quarter_columns] quarter_columns = [ q[0] for q in sorted(sort_keys, key=itemgetter(1), reverse=True) ] column_names[constants.FIRST_QUARTER_COLUMN:] = quarter_columns # replaces the dataframe with the one with the quarters sorted self.df = self.df[column_names] def update_progress(self, progress, message): self.progress.update_progress( progress, message, 0, self.selections.jurisdiction_count )