示例#1
0
    def load(self, *args, **kwargs):

        with get_postgres() as conn:
            with conn.cursor() as cursor:
                try:
                    with open(constants.LOCAL_STORAGE + self.file,
                              "r") as upload_file:
                        # cursor.copy_from(upload_file,self.table_name,sep='\t')
                        # conn.commit()
                        cursor.copy_expert(
                            "COPY {} from stdin with csv header delimiter '\t'"
                            .format(self.table_name), upload_file)
                        conn.commit()

                    upload_error = False
                    for msg in conn.notices:
                        if '{}'.format(self.table_name) not in msg:
                            continue

                        upload_error = True if 'stl_load_errors' in msg else upload_error
                        if 'record' in msg:
                            try:
                                rows_cnt = int(re.findall(r'\d+', msg)[0])
                            except (IndexError, ValueError):
                                pass

                    if upload_error:
                        self.show_redshift_error(conn, rollback=False)
                        return

                except psycopg2.InternalError as ex:
                    raise ex
示例#2
0
    def _get_parameters(self):

        with get_postgres() as conn:
            with conn.cursor(
                    cursor_factory=psycopg2.extras.RealDictCursor) as cursor:
                cursor.execute("""
                SELECT * FROM public.indeed_etl_jobs where is_active = TRUE 
                """)
                for query in cursor.fetchall():
                    yield query
示例#3
0
    def _get_open_jobs(self):

        with get_postgres() as conn:
            with conn.cursor(
                    cursor_factory=psycopg2.extras.RealDictCursor) as cursor:
                cursor.execute("""
                select distinct job_key 
                from public.indeed_jobs_duration
                    where job_expired = '1900-01-01'
                    and job_no_api = '1900-01-01'
                """)
                for query in cursor.fetchall():
                    yield query
示例#4
0
    def get_tables_list(self, scheme='public'):
        with get_postgres() as conn:
            with conn.cursor(
                    cursor_factory=psycopg2.extras.DictCursor) as cursor:
                cursor.execute(
                    """
                select
                  distinct("tablename") as tablename 
                from
                  pg_table_def
                where
                  "schemaname"=%s;
                """, (scheme, ))

                return [row['tablename'] for row in cursor.fetchall()]
示例#5
0
    def exec_analyze_table(self, table_name, scheme='public'):
        """
        Perform the ANALYZE operation on a given table
        """
        # Detect if scheme is present in a table name
        if '.' not in table_name:
            table_name = '"{}"."{}"'.format(scheme, table_name)

        analyze_query = 'analyze {}'.format(table_name)
        with get_postgres() as conn:
            with conn.cursor() as cursor:
                return self.exec_query(
                    cursor,
                    analyze_query,
                    query_description='ANALYZE {}'.format(table_name))
示例#6
0
    def table_exists(self, table_name, scheme='public'):
        with get_postgres() as conn:
            with conn.cursor(
                    cursor_factory=psycopg2.extras.DictCursor) as cursor:
                cursor.execute(
                    """
                select
                  distinct("tablename") as tablename 
                from
                  pg_table_def
                where
                  "tablename"=%s and
                  "schemaname"=%s;
                """, (table_name, scheme))

                return len(cursor.fetchall()) > 0
示例#7
0
    def rebuild_stage_table(self, recreate_table=True):
        """
        Rebuild appropriate ETL stage table
        """
        with get_postgres() as conn:
            with conn.cursor() as cursor:
                if recreate_table:
                    cursor.execute("drop table if exists {} cascade".format(
                        self.table_name))
                    cursor.execute(self.table_ddl)
                    conn.commit()
                    return

                if not runner.table_exists(self.table_name):
                    cursor.execute(self.table_ddl)
                    conn.commit()
                    return
示例#8
0
 def exec_sql_scripts(self, paths, show_error=True, explicit_commit=False):
     """
     Execute multiple SQL scripts one by one
     """
     affected_rows = 0
     with get_postgres() as conn:
         for path in paths:
             sql_query = self.read_query(path)
             with conn.cursor(
                     cursor_factory=psycopg2.extras.DictCursor) as cursor:
                 affected_rows += self.exec_query(cursor,
                                                  sql_query,
                                                  show_error=show_error,
                                                  query_description=path)
         if explicit_commit:
             conn.commit()
     return affected_rows
示例#9
0
    def exec_analyze_tables(self, table_names, scheme='public'):
        """
        Perform the ANALYZE operation on multiple tables
        """

        analyze_query = 'analyze {}'
        affected_rows = 0
        with get_postgres() as conn:
            with conn.cursor() as cursor:
                for table_name in table_names:
                    # Detect if scheme is present in a table name
                    if '.' not in table_name:
                        table_name = '"{}"."{}"'.format(scheme, table_name)

                    affected_rows += self.exec_query(
                        cursor,
                        analyze_query.format(table_name),
                        query_description='ANALYZE {}'.format(table_name))
        return affected_rows
示例#10
0
 def exec_sql_script(self,
                     path,
                     params=None,
                     show_error=True,
                     explicit_commit=False):
     """
     Execute a single SQL script
     """
     sql_query = self.read_query(path)
     with get_postgres() as conn:
         with conn.cursor(
                 cursor_factory=psycopg2.extras.DictCursor) as cursor:
             result = self.exec_query(cursor,
                                      sql_query,
                                      params,
                                      show_error=show_error,
                                      query_description=path)
             if explicit_commit:
                 conn.commit()
             return result
示例#11
0
def main(*args, **kwargs):

    extractor = IndeedDuration()

    with get_postgres() as conn:
        with conn.cursor(
                cursor_factory=psycopg2.extras.RealDictCursor) as cursor:
            for job_key, expired, api_active in extractor.extract():
                print(job_key, expired, api_active)
                if expired:
                    cursor.execute(f"""
                        update public.indeed_jobs_duration
                        set job_expired = current_date
                        where job_key = '{job_key}'
                    """)
                elif not api_active:
                    cursor.execute(f"""
                        update public.indeed_jobs_duration
                        set job_no_api = current_date
                        where job_key = '{job_key}'
                    """)
            conn.commit()