示例#1
0
class CopyDb(threading.Thread):
    '''
    '''
    def __init__(self, source, target, progress_title):
        super().__init__()

        self.source = source
        self.target = target
        self.progress_title = progress_title

        self.title = f'{constants.APP_NAME} - {self.progress_title} Progress'

        self.db_name = ntpath.basename(self.source)

    def run(self):
        self.Progress = Progress(self, self.title, abort=False)

        con = sql.connect(self.source)

        with sql.connect(self.target) as bck:
            con.backup(bck, pages=1, progress=self._update_progress)

        con.close()

        self.Progress.destroy()

    def _update_progress(self, status, remaining, total):
        progress = (total - remaining) / total * 100

        message = 'Copied {:,} of {:,} pages from {}'.format(
            total - remaining, total, self.db_name)

        self.Progress.update_progress(progress, message)
示例#2
0
class DataController(threading.Thread):
    '''
    '''
    
    
    def __init__(self, process):
        super().__init__()
        
        self.process = process
        
        self.main = self.process.main
        self.selections = self.process.selections
        
        self.files = deepcopy(self.process.get_files())
        self.file_count = len(self.files)
        
        self.counter = 0
        
        self.progress = Progress(self, self.process.title)
        
        self.abort = False
        self.con = None
        
        
    def run(self):
        try:
            self._process_files()
            
        except Exception:
            msg.showerror(
                self.process.title, 
                f'Unhandled exception occurred:\n\n{traceback.format_exc()}',
                parent=self.process.gui)
            
        finally:
            self.progress.destroy()
                    
                    
    def _process_files(self):
        for i, item in enumerate(self.files.items(), start=1):
            if self.main.end_processes or self.abort:
                break
            
            self.counter = i
            
            name, data = item
            
            jurisidiction, df = data
            
            load_detail = LoadDetail(self, df, name)
               
            load_detail.load(jurisidiction)
            
        
    def update_progress(self, progress, message):
        self.progress.update_progress(
            progress, message, self.counter, self.file_count
            )
示例#3
0
class Grid(tk.Frame):

    def __init__(self, parent):
        tk.Frame.__init__(self, parent)
        tk.Grid.rowconfigure(self, 0, weight=1)
        tk.Grid.rowconfigure(self, 1, weight=1)
        tk.Grid.columnconfigure(self, 0, weight=1)
        tk.Grid.columnconfigure(self, 1, weight=1)
        tk.Grid.rowconfigure(self, 2, weight=1)

        self.configure(width=600, height=400)

        self.progress = None

        self.btn1 = tk.Button(self, text='Show Progress')
        self.btn1.grid(row=0, column=0, sticky='news')
        self.btn2 = tk.Button(self, text='Sleep')
        self.btn2.grid(row=0, column=1, sticky='news')
        self.btn3 = tk.Button(self, text='mq sleep')
        self.btn3.grid(row=1, column=0, sticky='news')
        self.btn4 = tk.Button(self, text='thread sleep')
        self.btn4.grid(row=1, column=1, sticky='news')

        self._run = 0
        self.running_time = tk.StringVar()

        self.lbl_running_time = tk.Label(self, textvariable=self.running_time)
        self.lbl_running_time.grid(row=2, column=0, columnspan=2)

        self.btn1.configure(command=self.show_progress)
        self.btn2.configure(command=self.sleep)
        self.btn3.configure(command=self.mq_sleep)
        self.btn4.configure(command=self.thread_sleep)

        self.mq = Queue.Queue()

        self.main_show()

    def show_progress(self):
        if self.progress:
            self.hide_progress()

        self.progress = Progress(self)

        x = self.winfo_width() / 2
        y = self.winfo_height() / 2

        self.progress.place(relx=0.0, rely=0.0, x=x, y=y, anchor='center')


    def hide_progress(self):
        if self.progress:
            self.progress.destroy()
            self.progress = None

    def sleep(self):
        import time; time.sleep(5)

    def mq_sleep(self):
        self.mq.put('sleep')

    def thread_sleep(self):
        def sleep():
            import time;time.sleep(5)
            self._run = 0

        t = threading.Thread(target=sleep)
        t.setDaemon(True)
        t.start()

    def main_show(self):
        self.running_time.set(str(self._run))
        try:
            m = self.mq.get_nowait()
            if m == 'sleep':
                import time; time.sleep(5)
        except Queue.Empty, e:
            pass
        self._run += 1
        self.after(1000, self.main_show)
示例#4
0
class RollupTotals(threading.Thread):
    '''
        Parent class for SegmentTotals(), SegmentTotalsRegion(), 
        CategoryTotals() and CategoryTotalsRegion().
        
        Creates a new table with the data fetched using the query 
        set in each of the child classes.
    '''
    
    
    FIRST_QUARTER_COLUMN = 3
    

    def __init__(
            self, is_addon=False, is_category=True, is_region=False,
            is_business_code_totals=False
            ):
        super().__init__()
        
        self.is_addon = is_addon
        self.is_category = is_category
        self.is_region = is_region
        self.is_business_code_totals = is_business_code_totals
        
        self.title = constants.APP_NAME
        
        self.input_table_name = constants.BUSINESS_CODE_TOTALS_TABLE
        
        if self.is_addon:
            self.input_table_name += constants.ADDON_SUFFIX
        
        self.df = None
        
        self.query = ''
        self.rollup_id_name = ''
        self.rollup_table_name = ''
        
        
    def run(self):
        if self.is_addon:
            self.rollup_table_name += constants.ADDON_SUFFIX
         
        self.progress = Progress(self, self.title, abort=False)
         
        self.progress.update_progress(0, 'Fetching business code totals.')
         
        self._set_df()
         
        progress = 90 if self.is_region else 70
         
        self.progress.update_progress(progress, 'Preparing data.')
         
        if self.df is not None:
            # drops the old id column
            self.df.drop(constants.ID_COLUMN_NAME, axis=1, inplace=True)
            
            if not self.is_business_code_totals:
                # drops the business code id column
                self.df.drop(
                    constants.BUSINESS_CODE_ID_COLUMN_NAME, axis=1, inplace=True
                    )
              
            self._update_column_names()
              
            self._set_region_id_column()
              
            column_names = list(self.df)
              
            juri_column = (
                constants.REGION_ID_COLUMN_NAME if self.is_region 
                else constants.TAC_COLUMN_NAME
                )
                       
            # sets the columns that will be used in the table in the order 
            # that they will be in
            new_column_names = [
                constants.ID_COLUMN_NAME, juri_column, self.rollup_id_name
                ] + column_names[self.FIRST_QUARTER_COLUMN:]
            
            self.df = self.df[new_column_names]
                
            self._group_by_new_id()
              
            progress = 95 if self.is_region else 85
              
            self.progress.update_progress(progress, 'Creating table.')
                
            self._create_table()
              
            self.progress.update_progress(100, 'Build complete.')
            
        self.progress.destroy()
        
        
    def _set_df(self):
        sql_code = 'ATTACH DATABASE ? AS ?'
        
        args = (str(constants.DB_PATHS[constants.STARS_DB]), constants.STARS_DB)
        
        con = sql.connect(
            constants.DB_PATHS[constants.STATEWIDE_DATASETS_DB], uri=True,
            timeout=constants.DB_TIMEOUT
            )
           
        db_attached = utilities.execute_sql(
            sql_code=sql_code, args=args, open_con=con, dontfetch=True
            )
        
        if db_attached:
            results = utilities.execute_sql(
                sql_code=self.query, open_con=con, getcursor=True
                )
            
            if results:
                column_names = [i[0] for i in results.description]
                
                data = results.fetchall()
                
                self.df = pd.DataFrame(data, columns=column_names)
                
        con.close()
        
        
    def _update_column_names(self):
        column_names = list(self.df)
        
        # changes column to "id" from "new_id"
        column_names[0] = constants.ID_COLUMN_NAME
        
        if self.is_region:
            tac_index = 1 if self.is_business_code_totals else 2
            
            # changes column to "region_id" from "tac"
            column_names[tac_index] = constants.REGION_ID_COLUMN_NAME
        
        # updates the column names in the dataframe
        self.df.columns = column_names
        
        
    def _set_region_id_column(self):
        # gets the regions id's from the id column
        region_id_column = self.df[
            constants.ID_COLUMN_NAME
            ].apply(lambda x: x.split('-')[0])
            
        self.df[constants.REGION_ID_COLUMN_NAME] = region_id_column
        

    def _group_by_new_id(self):
        column_names = list(self.df)
        
        group_columns = column_names[:self.FIRST_QUARTER_COLUMN]
        
        sum_columns = column_names[self.FIRST_QUARTER_COLUMN:]
        
        self.df = self.df.groupby(
            group_columns, as_index=False, sort=False
            )[sum_columns].sum()
            
            
    def _create_table(self):
        con = sql.connect(
            constants.DB_PATHS[constants.STATEWIDE_DATASETS_DB], 
            timeout=constants.DB_TIMEOUT
            )
        
        try:
            with con:
                self.df.to_sql(
                    self.rollup_table_name, con, if_exists='replace', 
                    index=False
                    )
                
        except sql.OperationalError as e:
            msg.showerror(self.title, e)
            
        con.close()
示例#5
0
class BusinessDetailJoin(threading.Thread):
    '''
    '''
    
    
    title = f'{constants.APP_NAME} - Business Detail (Join)'
    
    
    def __init__(self, jurisdiction, files, selections):
        super().__init__()
        
        self.jurisdiction = jurisdiction
        self.files = files
        self.selections = selections
        
        self.file_count = len(self.files)
        
        self.name = ''
        
        self.abort = False
        self.con = None
        
        self.df = None
        
        self.other_columns = []
        self.table_names = []
        
        self.period_columns = {}
        
        
    def run(self):
        self.progress = Progress(self, self.title)

        self.con = sql.connect(':memory:')
        
        try:
            progress = 0
        
            for i, item in enumerate(self.files.items()):
                name, data = item
                df = data[1]
                
                if not self.name:
                    self.name = name
                    
                self.update_progress(
                    progress, f'Creating SQL table for {name}.'
                    )
                
                self._process_file(df, i)
                
                progress += 45 / self.file_count
                
            self.update_progress(45, 'Joining SQL tables.')
            
            self._join_tables()
            
            # drops the id column
            self.df.drop(constants.ID_COLUMN_NAME, axis=1, inplace=True)
            
            self._revert_column_names()
            
            self._sort_quarters()
            
            self.update_progress(55, 'Filling in NULLS.')
            
            utilities.FillNa.fill_na(self.df)
            self.df.to_csv('test.csv')
            if self.df is not None:
                load_detail = LoadDetail(self, self.df, self.name)
                
                load_detail.load(self.jurisdiction)
                
        except Exception:
            msg.showerror(
                self.title, 
                'Unhandled exception occurred attempting to join '
                f'{self.file_count} for {self.jurisdiction.id}:\n\n{traceback.format_exc()}'
                )
        
        finally:
            self.progress.destroy()
            self.con.close()
    
        return self.df
       
        
    def _process_file(self, df, count):
        table_name = f't{count}'
        self.table_names.append(table_name)
         
        self._prepare_column_names(df)
        
        column_names = list(df)
        
        if not self.other_columns:
            self.other_columns = column_names[:constants.FIRST_QUARTER_COLUMN]
            
            self.other_columns.insert(0, constants.ID_COLUMN_NAME)
        
        self.period_columns[table_name] = column_names[constants.FIRST_QUARTER_COLUMN:]
        
        self._insert_id_column(df)
        
        # creates the table in the in memory database
        df.to_sql(table_name, self.con, index=False)
        
        
    def _prepare_column_names(self, df):
        '''
            Prepares the column names so that they will be accepted by sql.
        '''
        names = list(df)
        
        fixed_names = []
        
        for name in names:
            name = name.replace(':', '').replace(' ', '_')
            
            if name[0].isdigit():
                name = 'c_' + name
                
            fixed_names.append(name)
                
        df.columns = fixed_names
        
        
    def _insert_id_column(self, df):
        '''
            Gets a unique sub number for each location to use as part of the 
            key for that location.
        '''
        # inserts an empty column into the dataframe for the row ids
        df.insert(0, constants.ID_COLUMN_NAME, '')
        
        row_ids = set()
        
        for i, row in enumerate(df.itertuples(index=False)):
            permit = str(row[constants.PERMIT_COLUMN + 1])
            
            # the currently assigned value to the permit for this location
            # which may be a blank
            sub = row[constants.SUB_COLUMN + 1]
     
            # unique id to use as the join
            row_id = self._row_id(row_ids, permit, sub)
            
            # sets the row id in the dataframe
            df.at[i, constants.ID_COLUMN_NAME] = row_id
            
            # adds the row id to the set of already created row ids
            row_ids.add(row_id)
    
    
    def _row_id(self, keys, permit, sub):
        # the key is made up of the permit number, sub number and a third 
        # integer separated by "-"
        key = f'{permit}-{sub}-0'
        
        # if the key is already in the list of keys, which will
        # occur when there exists duplicate subs for a permit
        # the last part of the key will be incremented until it is
        # no longer in the list of keys
        while key in keys:
            sub_id = int(key[-1]) + 1
            key = f'{key[:-1]}{sub_id}'
                
        return key
         

    def _join_tables(self,):
        table_one = self.table_names[0]
        joined_table = 'joined'
        
        create_columns = deepcopy(self.other_columns)
        left_select_columns = [
            f'{table_one}.{column}' for column in self.other_columns
            ]
        
        create_columns += self.period_columns[table_one]
        left_select_columns += self.period_columns[table_one]
        
        right_period_columns = self.period_columns[table_one]
        
        for name in self.table_names[1:]:
            right_period_columns += self.period_columns[name]
            
            right_select_columns = [
                f'{name}.{column}' for column in self.other_columns
                ] + right_period_columns
            
            create_columns += self.period_columns[name]
            left_select_columns += self.period_columns[name]
            
            create_columns_string = ','.join(create_columns)
            left_select_columns_string = ','.join(left_select_columns)
            right_select_columns_string = ','.join(right_select_columns)
            
            # creates a new table "joined" to insert the results of the 
            # emulated full outer join into it
            # then drops table one and the table that was joined and renames
            # the joined results to the name of table one to join with the 
            # next table
            sql_script = f'''
                CREATE TABLE {joined_table} ({create_columns_string});
                
                INSERT INTO {joined_table} ({create_columns_string})
                    SELECT {left_select_columns_string}
                    FROM {table_one} 
                    LEFT JOIN {name} USING({constants.ID_COLUMN_NAME})
                    
                    UNION ALL
                    
                    SELECT {right_select_columns_string}
                    FROM {name} 
                    LEFT JOIN {table_one} USING({constants.ID_COLUMN_NAME})
                    WHERE {table_one}.{constants.ID_COLUMN_NAME} IS NULL;
                    
                DROP TABLE {table_one};
                
                DROP TABLE {name};
                
                ALTER TABLE {joined_table} RENAME TO {table_one}; 
                '''

            self.con.executescript(sql_script)
            
        query = f'''
            SELECT *
            FROM {table_one}
            '''
             
        self.df = pd.read_sql(query, self.con)
        
            
    def _revert_column_names(self):
        '''
            Reverts the column names so that they are accepted by the load
            process.
        '''
        revert_names = []
        
        column_names = list(self.df)
        
        for name in column_names:
            name = name.replace('c_', '').replace('_', ' ')
            
            revert_names.append(name)
            
        self.df.columns = revert_names


    def _sort_quarters(self):
        column_names = list(self.df)
        
        quarter_columns = column_names[constants.FIRST_QUARTER_COLUMN:]
            
        sort_keys = [(q, q[-4:] + q[0]) for q in quarter_columns]
        
        quarter_columns = [
            q[0] for q in sorted(sort_keys, key=itemgetter(1), reverse=True)
            ]
        
        column_names[constants.FIRST_QUARTER_COLUMN:] = quarter_columns
            
        # replaces the dataframe with the one with the quarters sorted 
        self.df = self.df[column_names]
        
    
    def update_progress(self, progress, message):
        self.progress.update_progress(
            progress, message, 0, self.selections.jurisdiction_count
            )