Exemplo n.º 1
0
def process_song_file(cur: psycopg2.connect, filepath: str) -> None:
    """Extract and load data for song and artist from log files.

    Parameters
    ----------
    cur: psycopg2.connect
        Psycopg2 database cursor for inserting data.

    filepath: str
        Path for log file.

    """

    # open song file
    df = pd.read_json(filepath, lines=True)

    # insert song record
    song_columns = ['song_id', 'title', 'artist_id', 'year', 'duration']
    song_data = song_data = df[song_columns].values[0].tolist()
    cur.execute(song_table_insert, song_data)

    # insert artist record
    artist_columns = [
        'artist_id',
        'artist_name',
        'artist_location',
        'artist_latitude',
        'artist_longitude',
    ]
    artist_data = df[artist_columns].values[0].tolist()

    cur.execute(artist_table_insert, artist_data)
Exemplo n.º 2
0
def upsert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str, pkeys: list) -> None:
    """
    Using cursor.mogrify() to build the bulk insert query
    then cursor.execute() to execute the query
    """
    
    # Create a list of tupples from the dataframe values
    tuples = [tuple(x) for x in df.to_numpy()]
    tuples_str = ', '.join(map(str, tuples))
    
    # Comma-separated dataframe columns
    cols = ','.join(list(df.columns))
    
    insert_statement = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str)
    on_conflict_statement = 'ON CONFLICT (' + ', '.join(map(str, pkeys)) + ')'
    do_update_statement = _create_update_set_statement(list(df.columns))
    
    # SQL quert to execute
    query  = insert_statement + ' ' + on_conflict_statement + ' ' + do_update_statement
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    cursor.close()
Exemplo n.º 3
0
def insert_mood(conn: psycopg2.connect, update: Update,
                user_data: Dict[str, Union[str, List[str]]]) -> None:
    sql = """
    INSERT INTO moodtracker 
        (message_id, chat_id, mood, reason, note, date) VALUES(%s, %s, %s, %s, %s, %s)
    """

    cur = conn.cursor()
    chat_id = update.message.chat_id
    mood = user_data['mood']
    note = user_data['note']
    reasons = user_data['reasons']
    date = datetime.now()
    message_id = str(chat_id) + str(date.strftime("%Y%m%d%H%M%S%f"))

    if len(reasons) == 0:
        reasons = None
        cur.execute(sql, (message_id, chat_id, mood, reasons, note, date))

    else:
        for reason in user_data['reasons']:
            cur.execute(sql, (message_id, chat_id, mood, reason, note, date))

    conn.commit()
    cur.close()
Exemplo n.º 4
0
def insert_data(conn: psycopg2.connect, df: pd.DataFrame) -> None:
    '''
    Bulk insert dataframe into advertisementdata table.

    This function was inspired by Naysan Saran's article "Pandas to PostgreSQL using Psycopg2: Bulk Insert Performance
    Benchmark", in which the author chose a variety of bulk insert methods and compared their execution time. Saving the
    dataframe to a StringIO object and then copying this to the database proved to be the most efficient when dealing
    with millions of records.

    Source: https://naysan.ca/2020/05/09/pandas-to-postgresql-using-psycopg2-bulk-insert-performance-benchmark/
    '''

    set_index(conn, df)

    buffer = StringIO()
    df.to_csv(buffer, index_label='id', header=False)
    buffer.seek(0)
    cursor = conn.cursor()

    try:
        cursor.copy_from(buffer, 'advertisementdata', sep=",")
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        logging.error(f"Error inserting data: {error}")
        conn.rollback()
        cursor.close()

    cursor.close()
Exemplo n.º 5
0
def drop_tables(cur: psycopg2.connect) -> None:
    tables = [
        "Employees", "OrderDetails", "Categories", "Customers", "Orders",
        "Products", "Shippers", "Suppliers"
    ]
    for table in tables:
        cur.execute(f"DROP TABLE {table.lower()};")
def create_table_teams(conn: psycopg2.connect, overwrite: bool=False) -> None:
    ''' Creates the columns and relationships of the TEAMS table '''
    
    table_name = 'TEAMS'
    
    if overwrite == True:
        drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};'''
    else:
        drop_table_statement = ''
        
    create_table_statment = f'''
        {drop_table_statement}
        
        CREATE TABLE {table_name}
        (
            LEAGUE_ID BIGINT
            , SEASON_ID SMALLINT
            , TEAM_ID SMALLINT
            , MANAGER_ID VARCHAR(50)
            , TEAM_NAME VARCHAR(50)
            , MANAGER_NAME VARCHAR(50)
            , ESPN_NAME VARCHAR(50)

            , CONSTRAINT TEAMS_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, TEAM_ID)
        );
        '''

    cursor = conn.cursor()
    cursor.execute(create_table_statment)
    conn.commit()

    cursor.close()
Exemplo n.º 7
0
def init_tables(cur: psycopg2.connect) -> None:
    fixtures_init_path = os.path.join(FIXTURES_PATH, "init")
    fixtures = [os.path.join(fixtures_init_path, f) for f in listdir(fixtures_init_path)]
    for fixture in fixtures:
        with open(fixture, 'r') as f:
            sql = f.read().strip()
            cur.execute(sql)
def create_table_weeks(conn: psycopg2.connect, overwrite: bool=False) -> None:
    ''' Creates the columns and relationships of the WEEKS table '''
    
    table_name = 'WEEKS'
    
    if overwrite == True:
        drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};'''
    else:
        drop_table_statement = ''
        
    create_table_statment = f'''
        {drop_table_statement}
        
        CREATE TABLE {table_name}
        (
            LEAGUE_ID BIGINT
            , SEASON_ID SMALLINT
            , WEEK_NUMBER SMALLINT
            , MATCHUP_PERIOD SMALLINT
            , REG_SEASON_FLAG SMALLINT

            , CONSTRAINT WEEKS_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, WEEK_NUMBER)
        );
        '''

    cursor = conn.cursor()
    cursor.execute(create_table_statment)
    conn.commit()

    cursor.close()
Exemplo n.º 9
0
def table_columns(conn: psycopg2.connect, table: str) -> tuple:
    """ Pulls all columns in a table """
    
    table = table.lower()
    query = f'''
        SELECT COLUMN_NAME
        FROM INFORMATION_SCHEMA.COLUMNS
        WHERE TABLE_SCHEMA = 'public'
            AND TABLE_NAME = '{table}'
    '''
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        
        cols = cursor.fetchall()
        cols = [col[0] for col in cols]
        
        cursor.close()
        
        return cols
        
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        
        return 1
Exemplo n.º 10
0
    def bootstrap( self ):
        """
              Initialize our database connection after our fork() has occured,
            this is due to the nature of the psycopg2 library when used with 
            psycopg2.connect( async = True )
           
            http://initd.org/psycopg/docs/usage.html#thread-and-process-safety

            @params None
            @return None
        """
        # Initialize database connection
        sqlCon = Database(  database = "SpfAudit", async = True )
            
        # Obtain our database descriptor once were ready to process
        self.block( sqlCon )
        sql = sqlCon.cursor()

        # Ensure schema intact before processing 
        sql.execute( self.query[ 'ctable' ] )
        self.block( sql.connection )

        # Propagate our master connection and cursor objects into
        # state structure. 
        self.state.update( {
            'sqlCon' : sqlCon,
            'sql'    : sql,
        } )
Exemplo n.º 11
0
def clear_tables(cur: psycopg2.connect) -> None:
    tables = [
        "Employees", "OrderDetails", "Categories", "Customers", "Orders",
        "Products", "Shippers", "Suppliers"
    ]
    for table in tables:
        query = f"DELETE FROM {table.lower()} WHERE TRUE ;"
        cur.execute(query)
Exemplo n.º 12
0
def add_timer_to_db(conn: psycopg2.connect, cursor, user_id: int, chat_id: int,
                    time: datetime):
    """Добавляет таймер в БД"""
    print('add_timer_to_db')
    time = time - timedelta(hours=5)
    cursor.execute(
        f"""INSERT INTO alerts (user_id, chat_id, time) VALUES ({user_id}, {chat_id}, '{time}')"""
    )
    conn.commit()
Exemplo n.º 13
0
def create_table_scores(conn: psycopg2.connect, overwrite: bool=False) -> None:
    ''' Creates the columns and relationships of the WEEKLY_SCORES table '''
    
    table_name = 'SCORES'
    
    if overwrite == True:
        drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};'''
    else:
        drop_table_statement = ''
        
    create_table_statment = f'''
        {drop_table_statement}
        
        CREATE TABLE {table_name}
        (
            LEAGUE_ID BIGINT
            , SEASON_ID SMALLINT
            , WEEK_NUMBER BIGINT
            , TEAM_ID SMALLINT
            , TEAM_ID_OPP SMALLINT
            , SCORE NUMERIC(5, 2)
            , SCORE_OPP NUMERIC(5, 2)
            , WLT_POINTS NUMERIC(2, 1)
            , WIN_IND SMALLINT
            , LOSS_IND SMALLINT
            , TIE_IND SMALLINT
            , ALL_PLAY_WLT_POINTS NUMERIC(3, 1)
            , ALL_PLAY_WINS SMALLINT
            , ALL_PLAY_LOSSES SMALLINT
            , ALL_PLAY_TIES SMALLINT
            , CUM_SCORE NUMERIC(6, 2)
            , CUM_SCORE_OPP NUMERIC(6, 2)
            , CUM_WLT_POINTS NUMERIC(3, 1)
            , CUM_WINS SMALLINT
            , CUM_LOSSES SMALLINT
            , CUM_TIES SMALLINT
            , CUM_ALL_PLAY_WLT_POINTS NUMERIC(4, 1)
            , CUM_ALL_PLAY_WINS SMALLINT
            , CUM_ALL_PLAY_LOSSES SMALLINT
            , CUM_ALL_PLAY_TIES SMALLINT
            , CUM_SCORE_PER_WEEK NUMERIC(5, 2)
            , CUM_SCORE_OPP_PER_WEEK NUMERIC(5, 2)
            , CUM_ALL_PLAY_WLT_POINTS_PER_WEEK NUMERIC(3, 1)
            , RECORD VARCHAR(10)
            , ALL_PLAY_RECORD VARCHAR(10)
            , STANDINGS SMALLINT
            , HOME_OR_AWAY VARCHAR(10)
            
            , CONSTRAINT WEEKLY_SCORES_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, WEEK_NUMBER, TEAM_ID)
        );
        '''

    cursor = conn.cursor()
    cursor.execute(create_table_statment)
    conn.commit()

    cursor.close()
Exemplo n.º 14
0
def execute_sql(cnx: connect, sql: str, values: tuple) -> list:
    """Pass sql query"""
    start = time.time()
    cursor = cnx.cursor()

    if '.sql' in sql:
        file = open(sql, 'r')
        sql = file.read()
        file.close()

    result = []
    sql_command = sql.split(';')
    try:
        for sql in sql_command:
            if sql == '' or '--' in sql:
                continue
            try:
                cursor.execute(sql, values)

                if 'SELECT' in sql:
                    column_names = tuple(desc[0]
                                         for desc in cursor.description)
                    result.append({
                        "query": sql,
                        'column_names': column_names,
                        "data": cursor.fetchall()
                    })
                elif 'DELETE' in sql:
                    stop = time.time() - start
                    result.append({
                        "query":
                        sql,
                        "data":
                        'query executed in {:06.3f}s. {} rows affected'.format(
                            stop, cursor.rowcount)
                    })
                else:
                    stop = time.time() - start
                    result.append({
                        "query":
                        sql,
                        "data":
                        'query executed in {:06.3f}s'.format(stop)
                    })
            except ProgrammingError as error:
                print(error)
                print("query unsuccessful: {}".format(sql))
                result.append({
                    "query": sql,
                    "data": 'query unsuccessful: {}'.format(error)
                })
    finally:
        cursor.close()
        cnx.close()

    return result
def run(conn: connect):
    with conn:
        with conn.cursor() as curs:
            curs.execute("insert into fetched_records")

    with conn:
        with conn.cursor() as curs:
            a = curs.execute("select * from fetched_records")

    print(a)
Exemplo n.º 16
0
def add_chat_to_db(conn: psycopg2.connect, cursor, chat_name: str,
                   chat_id: int):
    """Добавляет чат в список чатов. Если чат уже есть в БД, то возвращает False"""
    cursor.execute(f"""SELECT * FROM chats WHERE chat_id = {int(chat_id)}""")
    chat = cursor.fetchone()
    if not chat:
        cursor.execute(
            f"""INSERT INTO chats (chat_name, chat_id) VALUES ('{chat_name}', {int(chat_id)})"""
        )
        conn.commit()
Exemplo n.º 17
0
def copy_s3_to_staging(cur: psycopg2, conn: psycopg2.connect) -> None:
    """
    Copy the contents of the S3 buckets to the staging tables in the database
    :param cur: PostgreSQL cursor
    :param conn: PostgreSQL connection object
    :return: None
    """
    for query in copy_table_queries:
        logger.debug("Copying data to staging table as\n{}".format(query))
        cur.execute(query)
        conn.commit()
 def upload_comment(self, comment_data: Comment, conn: psycopg2.connect):
     cur = conn.cursor()
     datas = (comment_data.message, comment_data.author_name,
              comment_data.thumbnails, comment_data.timestamp_msec,
              comment_data.timestamp_text, comment_data.purchase_amount,
              comment_data.movie_id)
     cur.execute(
         "INSERT INTO comment(message,author_name,thumbnails,timestamp_msec,timestamp_text,purchase_amount,movie_id) VALUES (%s,%s,%s,%s,%s,%s,%s);",
         datas)
     conn.commit()
     cur.close()
Exemplo n.º 19
0
def create_staging_tables(cur: psycopg2, conn: psycopg2.connect) -> None:
    """
    Create the temporary tables used for staging.
    :param cur: PostgreSQL cursor
    :param conn: PostgreSQL connection object
    :return: None
    """
    for query in insert_temp_table_queries:
        logger.debug("Creating staging table as \n{}".format(query))
        cur.execute(query)
    conn.commit()
Exemplo n.º 20
0
def execute_query(conn: psycopg2.connect, query: str) -> None:
    cursor = conn.cursor()

    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        logging.error(f"Unable to execute query. Error: {error}")
        conn.rollback()
        cursor.close()

    cursor.close()
 def upload_comments(self, comment_datas: List[Comment],
                     conn: psycopg2.connect):
     cur = conn.cursor()
     list_data = [(comment_data.message, comment_data.author_name,
                   comment_data.thumbnails, comment_data.timestamp_msec,
                   comment_data.timestamp_text,
                   comment_data.purchase_amount, comment_data.movie_id)
                  for comment_data in comment_datas]
     cur.executemany(
         "INSERT INTO comment(message,author_name,thumbnails,timestamp_msec,timestamp_text,purchase_amount,movie_id) VALUES (%s,%s,%s,%s,%s,%s,%s);",
         list_data)
     conn.commit()
     cur.close()
Exemplo n.º 22
0
def access_control_db(con: psycopg2.connect) -> psycopg2.connect:
    #this function will create two users; an admin for us to populate the db, and another
    #user which can only query stuff (this will be what the front end users authenticate as)
    con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cur = con.cursor()
    try:
        query = sql.SQL(
            "select COUNT(*) from {table} where {pkey} = %s").format(
                table=sql.Identifier('pg_roles'),
                pkey=sql.Identifier('rolname'))
        cur.execute(query, ('music_man', ))
        print(cur.fetchone()[0])
    except psycopg2.DatabaseError as e:
        print('Error in DB access control: {}'.format(e))
Exemplo n.º 23
0
def create_table(conn: psycopg2.connect) -> None:
    TABLE_CREATION = """
        CREATE TABLE IF NOT EXISTS moodtracker (
            message_id      varchar(40)      NOT NULL,
            chat_id         varchar(10)      NOT NULL,
            mood            varchar(10),
            reason          varchar(25),
            note            varchar(255),
            date            timestamp 
        )
    """
    cur = conn.cursor()
    cur.execute(TABLE_CREATION)
    cur.close()
    conn.commit()
Exemplo n.º 24
0
def get_score(conn: psycopg2.connect) -> float:
    sql =  """
    SELECT AVG(value)
        FROM (
            SELECT 
            DISTINCT message_id, mood, date, 
            CASE 
                WHEN mood = 'Awesome'  THEN 5
                WHEN mood = 'Good'     THEN 4
                WHEN mood = 'Okay'     THEN 3
                WHEN mood = 'Bad'      THEN 2
                WHEN mood = 'Terrible' THEN 1
            END AS value
        FROM moodtracker
        ORDER BY date ASC
    ) AS mean
    """
    cur = conn.cursor()

    cur.execute(sql)
    data = cur.fetchall()

    cur.close()

    return round(data[0][0], 1)
Exemplo n.º 25
0
def drop_tables(cur: psycopg2.connect, conn: psycopg2.connect) -> None:
    """
    Drops each table using the queries in `drop_table_queries` list.

    Parameters
    ----------

    cur : psycopg2.cursor
        Cursor for accessing database with psycopg.

    conn : psycopg2.connect
        Database connection instance.
    """
    for query in drop_table_queries:
        cur.execute(query)
        conn.commit()
def record_exists(record: Record, conn: connect):
    with conn:
        with conn.cursor() as curs:
            curs.execute((
                "SELECT record_id_updated_at FROM fetched_records WHERE record_id = %(record_id)s"
            ), {'record_id': record.meta.record_id})
            response = curs.fetchone()
    return not (response is None or len(response) == 0)
Exemplo n.º 27
0
def drop_rows(conn: psycopg2.connect, table: str, where_condition: str) -> None:
    ''' Drops rows from a table based on a set of conditions '''

    query  = f'''
        DELETE FROM {table}
        WHERE {where_condition}
    '''
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    cursor.close()
Exemplo n.º 28
0
def run_sql_etl(sql: str, conn: pg.connect, table_name: str):
    """Runs given SQL query on the provided PostgreSQL connection obj.

    Args:
        sql (str): SQL script to run
        conn (psycopg2.connect): PostgreSQL database connection
        table_name (str): Table name for logging purposes
    """
    func_start_time = time()
    log.debug('Running SQL ETL for "%s" table', table_name)
    with conn.cursor() as cur:
        cur.execute(sql)
        conn.commit()
    log.info(
        'SQL ETL for table "%s" completed in: %s seconds',
        table_name,
        round(time() - func_start_time, 3),
    )
def fetch_local_s3_url(record: Record, query: Search, conn: connect):
    with conn:
        with conn.cursor() as curs:
            curs.execute(
                "SELECT s3_bucket, s3_location FROM fetched_records where record_id = %(record_id)s limit 1",
                {'record_id': record.meta.record_id})
            response = curs.fetchone()
    if response is not None:
        return True, response
    else:
        return False,
Exemplo n.º 30
0
def process_data(
    cur: psycopg2.connect,
    conn: psycopg2.connect,
    filepath: str,
    func: Callable,
) -> None:
    """
    Perform data processing for specific raw files.

    Parameters
    ----------
    cur : psycopg2.cursor
        Cursor for accessing database with psycopg.

    conn : psycopg2.connect
        Database connection instance.

    filepath : str
        Path for target file.
    
    func : Callable
        Function to use to process file.

    """

    # get all files matching extension from directory
    all_files = []
    for root, dirs, files in os.walk(filepath):
        files = glob.glob(os.path.join(root, '*.json'))
        for f in files:
            all_files.append(os.path.abspath(f))

    # get total number of files found
    num_files = len(all_files)
    print('{} files found in {}'.format(num_files, filepath))

    # iterate over files and process
    for i, datafile in enumerate(all_files, 1):
        func(cur, datafile)
        conn.commit()
        print('{}/{} files processed.'.format(i, num_files))
Exemplo n.º 31
0
def bulk_load_df(data: pd.DataFrame, table_name: str, conn: pg.connect):
    """Bulk inserts a pandas dataframe into PostgreSQL table

    Args:
        data (pandas.Dataframe): Data for insertion
        table_name (str): Table name for logging purposes
        conn (psycopg2.connect): PostgreSQL database connection
    """
    buffer = StringIO()
    buffer.write(data.to_csv(index=None, header=None, na_rep=''))
    buffer.seek(0)

    with conn.cursor() as cur:
        cur.copy_from(
            buffer,
            table_name,
            columns=data.columns,
            sep=',',
            null='',
        )
        conn.commit()