def db_setup_as_string():
    tables = db_utils.get_tables()
    #tables = db_utils.sql_load_fr_db(r"""SELECT tbl_name FROM sqlite_master WHERE (type='table' or type='view') and not (name in""" + db_utils.sqlite_internal_tables() + r""") ORDER BY tbl_name""")[1]
    res = []
    for table in sorted(tables):
        res.append((table, ))
        table_info = db_utils.get_table_info(table)
        res.append(table_info)
    return utils.anything_to_string_representation(res)
Пример #2
0
def db_setup_as_string():
    tables = db_utils.get_tables()
    #tables = db_utils.sql_load_fr_db(r"""SELECT tbl_name FROM sqlite_master WHERE (type='table' or type='view') and not (name in""" + db_utils.sqlite_internal_tables() + r""") ORDER BY tbl_name""")[1]
    res = []
    for table in sorted(tables):
        res.append((table,))
        table_info = db_utils.get_table_info(table)
        res.append(table_info)
    return utils.anything_to_string_representation(res)
Пример #3
0
    def get_tables(self, line):
        """Ipython extension function for finding tables
        """
        args = parse_argstring(self.get_tables, line)

        engine = self.get_engine(args)

        try:
            df = dbu.get_tables(args.table_name, engine, print_result=not args.as_frame, exact_match=args.exact_match)
            if args.as_frame:
                return df
        except sa.exc.DatabaseError as e:
            print(f'{e}')
Пример #4
0
def build_datasets():
    schema = 'public'
    df_tables = get_tables(schema=schema)
    tables = [i for i in df_tables['table_name'] if 'month_' in i]
    print(tables)

    for table in tables:
        df_cols = get_table_cols(table)
        df_total = pd.DataFrame()

        if df_cols['column_name'].isin(['region']).any():
            print(f"\n{table}")
            query = f"""
            SELECT *
            FROM {schema}.{table}
            """

            df_table = pd.read_sql(query, con=conn_pg, parse_dates=['fecha'])
            list_cols = df_table.dtypes[df_table.dtypes == 'object'].index

            for r in range(1, len(list_cols) + 1):
                combs = combinations(
                    df_table.dtypes[df_table.dtypes == 'object'].index, r)

                for comb in combs:
                    if 'region' in comb:
                        list_group = [i for i in keys_order if i in comb]
                        dfg = df_table.groupby(
                            [pd.Grouper(key='fecha', freq='MS')] + list_group)
                        df = dfg.agg(tables_agg[table])
                        df.reset_index(inplace=True)
                        df['columns'] = df.apply(
                            lambda i: '$'.join(i[list_group]), axis=1)

                        for target in tables_vars[table]:
                            df_pivot = df.pivot(index='fecha',
                                                columns='columns',
                                                values=target)
                            df_pivot.columns = [
                                i + '$' + target.upper()
                                for i in df_pivot.columns
                            ]
                            df_total = pd.concat([df_total, df_pivot], axis=1)

            df_total.to_sql('total_{}'.format(table),
                            con=conn_pg,
                            schema=schema,
                            if_exists='replace')
            df_total.to_csv('../results/total_{}.csv'.format(table))
    def get_table_rows_with_differences(self):
        """
        Counts rows for all tables in new and old database and returns those that differ.
        self.cursor is required where the new database is the regular one and the old database is the attached one
        :param db_aliases_and_prefixes: A list of tuples like ('new', '')
        :return:  a printable list of nr of rows for all tables
        """
        results = {}
        db_aliases_and_connections = [('exported_db', self.dest_dbconnection), ('source_db', self.source_dbconnection)]
        for alias, dbconnection in db_aliases_and_connections:
            tablenames = db_utils.get_tables(dbconnection, skip_views=True)
            for tablename in tablenames:
                sql = """SELECT count(*) FROM %s"""%(tablename)
                try:
                    nr_of_rows = dbconnection.execute_and_fetchall(sql)[0][0]
                except:
                    utils.MessagebarAndLog.warning(log_msg=ru(QCoreApplication.translate('ExportData', 'Sql failed while getting table row differences: %s'))%sql)
                else:
                    results.setdefault(tablename, {})[alias] = str(nr_of_rows)

        printable_results = []

        #Create header
        header = ['tablename']
        db_aliases = sorted([_x[0] for _x in db_aliases_and_connections])
        header.extend(db_aliases)
        printable_results.append(header)

        #Create table result rows
        for tablename, dbdict in sorted(results.items()):
            vals = [tablename]
            vals.extend([str(dbdict.get(alias, 'table_missing')) for alias in sorted(db_aliases)])
            if vals[1] != vals[2]:
                printable_results.append(vals)

        printable_msg = '\n'.join(['{0:40}{1:15}{2:15}'.format(result_row[0], result_row[1], result_row[2]) for result_row in printable_results])
        return printable_msg
Пример #6
0
    def add_metadata_to_about_db(self, dbconnection, created_tables_sqls=None):
        tables = sorted(db_utils.get_tables(dbconnection=dbconnection, skip_views=True))

        #Matches comment inside /* */
        #create_table_sql CREATE TABLE meteo /*meteorological observations*/(
        table_descr_reg = re.compile(r'/\*(.+)\*/', re.MULTILINE)
        #Matches comment after --:
        # strata text NOT NULL --clay etc
        #, color_mplot text NOT NULL --color codes for matplotlib plots
        column_descr_reg = re.compile(r'([A-Za-z_]+)[ ]+[A-Za-z ]*--(.+)', re.MULTILINE)

        table_name_reg = re.compile(r'([A-Za-z_]+)[ ]+[A-Za-z ]*--(.+)', re.MULTILINE)
        for table in tables:

            #Get table and column comments
            if created_tables_sqls is None:
                table_descr_sql = ("SELECT name, sql from sqlite_master WHERE name = '%s';"%table)
                create_table_sql = dbconnection.execute_and_fetchall(table_descr_sql)[0][1]
            else:
                create_table_sql = created_tables_sqls[table]
            table_descr = table_descr_reg.findall(create_table_sql)
            try:
                table_descr = table_descr[0]
            except IndexError:
                table_descr = None
            else:
                table_descr = table_descr.rstrip('\n').rstrip('\r').replace("'", "''")

            columns_descr = dict(column_descr_reg.findall(create_table_sql))

            table_info = db_utils.get_table_info(table, dbconnection)

            foreign_keys_dict = {}
            foreign_keys = db_utils.get_foreign_keys(table, dbconnection)
            for _table, _from_to in foreign_keys.items():
                _from = _from_to[0][0]
                _to = _from_to[0][1]
                foreign_keys_dict[_from] = (_table, _to)

            sql = r"""INSERT INTO about_db (tablename, columnname, description, data_type, not_null, default_value, primary_key, foreign_key) VALUES """
            sql +=  r'({});'.format(', '.join(["""(CASE WHEN '%s' != '' or '%s' != ' ' or '%s' IS NOT NULL THEN '%s' else NULL END)"""%(col, col, col, col) for col in [table, r'*', table_descr, r'', r'', r'', r'', r'']]))
            dbconnection.execute(sql)

            for column in table_info:
                colname = column[1]
                data_type = column[2]
                not_null = str(column[3]) if str(column[3]) == '1' else ''
                default_value = column[4] if column[4] else ''
                primary_key = str(column[5]) if str(column[5]) != '0' else ''
                _foreign_keys = ''
                if colname in foreign_keys_dict:
                    _foreign_keys = '%s(%s)'%(foreign_keys_dict[colname])
                column_descr = columns_descr.get(colname, None)
                if column_descr:
                    column_descr = column_descr.rstrip('\n').rstrip('\r').replace("'", "''")
                sql = 'INSERT INTO about_db (tablename, columnname, data_type, not_null, default_value, primary_key, foreign_key, description) VALUES '
                sql += '({});'.format(', '.join(["""CASE WHEN '%s' != '' or '%s' != ' ' or '%s' IS NOT NULL THEN '%s' else NULL END"""%(col, col, col, col) for col in [table, colname, data_type, not_null, default_value, primary_key, _foreign_keys, column_descr]]))
                try:
                    dbconnection.execute(sql)
                except:
                    try:
                        print(sql)
                    except:
                        pass
                    raise