Exemplo n.º 1
0
def insert_data(conn: psycopg2.connect, df: pd.DataFrame) -> None:
    '''
    Bulk insert dataframe into advertisementdata table.

    This function was inspired by Naysan Saran's article "Pandas to PostgreSQL using Psycopg2: Bulk Insert Performance
    Benchmark", in which the author chose a variety of bulk insert methods and compared their execution time. Saving the
    dataframe to a StringIO object and then copying this to the database proved to be the most efficient when dealing
    with millions of records.

    Source: https://naysan.ca/2020/05/09/pandas-to-postgresql-using-psycopg2-bulk-insert-performance-benchmark/
    '''

    set_index(conn, df)

    buffer = StringIO()
    df.to_csv(buffer, index_label='id', header=False)
    buffer.seek(0)
    cursor = conn.cursor()

    try:
        cursor.copy_from(buffer, 'advertisementdata', sep=",")
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        logging.error(f"Error inserting data: {error}")
        conn.rollback()
        cursor.close()

    cursor.close()
def create_table_weeks(conn: psycopg2.connect, overwrite: bool=False) -> None:
    ''' Creates the columns and relationships of the WEEKS table '''
    
    table_name = 'WEEKS'
    
    if overwrite == True:
        drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};'''
    else:
        drop_table_statement = ''
        
    create_table_statment = f'''
        {drop_table_statement}
        
        CREATE TABLE {table_name}
        (
            LEAGUE_ID BIGINT
            , SEASON_ID SMALLINT
            , WEEK_NUMBER SMALLINT
            , MATCHUP_PERIOD SMALLINT
            , REG_SEASON_FLAG SMALLINT

            , CONSTRAINT WEEKS_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, WEEK_NUMBER)
        );
        '''

    cursor = conn.cursor()
    cursor.execute(create_table_statment)
    conn.commit()

    cursor.close()
Exemplo n.º 3
0
def insert_mood(conn: psycopg2.connect, update: Update,
                user_data: Dict[str, Union[str, List[str]]]) -> None:
    sql = """
    INSERT INTO moodtracker 
        (message_id, chat_id, mood, reason, note, date) VALUES(%s, %s, %s, %s, %s, %s)
    """

    cur = conn.cursor()
    chat_id = update.message.chat_id
    mood = user_data['mood']
    note = user_data['note']
    reasons = user_data['reasons']
    date = datetime.now()
    message_id = str(chat_id) + str(date.strftime("%Y%m%d%H%M%S%f"))

    if len(reasons) == 0:
        reasons = None
        cur.execute(sql, (message_id, chat_id, mood, reasons, note, date))

    else:
        for reason in user_data['reasons']:
            cur.execute(sql, (message_id, chat_id, mood, reason, note, date))

    conn.commit()
    cur.close()
def create_table_teams(conn: psycopg2.connect, overwrite: bool=False) -> None:
    ''' Creates the columns and relationships of the TEAMS table '''
    
    table_name = 'TEAMS'
    
    if overwrite == True:
        drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};'''
    else:
        drop_table_statement = ''
        
    create_table_statment = f'''
        {drop_table_statement}
        
        CREATE TABLE {table_name}
        (
            LEAGUE_ID BIGINT
            , SEASON_ID SMALLINT
            , TEAM_ID SMALLINT
            , MANAGER_ID VARCHAR(50)
            , TEAM_NAME VARCHAR(50)
            , MANAGER_NAME VARCHAR(50)
            , ESPN_NAME VARCHAR(50)

            , CONSTRAINT TEAMS_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, TEAM_ID)
        );
        '''

    cursor = conn.cursor()
    cursor.execute(create_table_statment)
    conn.commit()

    cursor.close()
Exemplo n.º 5
0
def upsert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str, pkeys: list) -> None:
    """
    Using cursor.mogrify() to build the bulk insert query
    then cursor.execute() to execute the query
    """
    
    # Create a list of tupples from the dataframe values
    tuples = [tuple(x) for x in df.to_numpy()]
    tuples_str = ', '.join(map(str, tuples))
    
    # Comma-separated dataframe columns
    cols = ','.join(list(df.columns))
    
    insert_statement = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str)
    on_conflict_statement = 'ON CONFLICT (' + ', '.join(map(str, pkeys)) + ')'
    do_update_statement = _create_update_set_statement(list(df.columns))
    
    # SQL quert to execute
    query  = insert_statement + ' ' + on_conflict_statement + ' ' + do_update_statement
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    cursor.close()
def create_table_scores(conn: psycopg2.connect, overwrite: bool=False) -> None:
    ''' Creates the columns and relationships of the WEEKLY_SCORES table '''
    
    table_name = 'SCORES'
    
    if overwrite == True:
        drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};'''
    else:
        drop_table_statement = ''
        
    create_table_statment = f'''
        {drop_table_statement}
        
        CREATE TABLE {table_name}
        (
            LEAGUE_ID BIGINT
            , SEASON_ID SMALLINT
            , WEEK_NUMBER BIGINT
            , TEAM_ID SMALLINT
            , TEAM_ID_OPP SMALLINT
            , SCORE NUMERIC(5, 2)
            , SCORE_OPP NUMERIC(5, 2)
            , WLT_POINTS NUMERIC(2, 1)
            , WIN_IND SMALLINT
            , LOSS_IND SMALLINT
            , TIE_IND SMALLINT
            , ALL_PLAY_WLT_POINTS NUMERIC(3, 1)
            , ALL_PLAY_WINS SMALLINT
            , ALL_PLAY_LOSSES SMALLINT
            , ALL_PLAY_TIES SMALLINT
            , CUM_SCORE NUMERIC(6, 2)
            , CUM_SCORE_OPP NUMERIC(6, 2)
            , CUM_WLT_POINTS NUMERIC(3, 1)
            , CUM_WINS SMALLINT
            , CUM_LOSSES SMALLINT
            , CUM_TIES SMALLINT
            , CUM_ALL_PLAY_WLT_POINTS NUMERIC(4, 1)
            , CUM_ALL_PLAY_WINS SMALLINT
            , CUM_ALL_PLAY_LOSSES SMALLINT
            , CUM_ALL_PLAY_TIES SMALLINT
            , CUM_SCORE_PER_WEEK NUMERIC(5, 2)
            , CUM_SCORE_OPP_PER_WEEK NUMERIC(5, 2)
            , CUM_ALL_PLAY_WLT_POINTS_PER_WEEK NUMERIC(3, 1)
            , RECORD VARCHAR(10)
            , ALL_PLAY_RECORD VARCHAR(10)
            , STANDINGS SMALLINT
            , HOME_OR_AWAY VARCHAR(10)
            
            , CONSTRAINT WEEKLY_SCORES_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID, WEEK_NUMBER, TEAM_ID)
        );
        '''

    cursor = conn.cursor()
    cursor.execute(create_table_statment)
    conn.commit()

    cursor.close()
Exemplo n.º 7
0
def add_timer_to_db(conn: psycopg2.connect, cursor, user_id: int, chat_id: int,
                    time: datetime):
    """Добавляет таймер в БД"""
    print('add_timer_to_db')
    time = time - timedelta(hours=5)
    cursor.execute(
        f"""INSERT INTO alerts (user_id, chat_id, time) VALUES ({user_id}, {chat_id}, '{time}')"""
    )
    conn.commit()
Exemplo n.º 8
0
def add_chat_to_db(conn: psycopg2.connect, cursor, chat_name: str,
                   chat_id: int):
    """Добавляет чат в список чатов. Если чат уже есть в БД, то возвращает False"""
    cursor.execute(f"""SELECT * FROM chats WHERE chat_id = {int(chat_id)}""")
    chat = cursor.fetchone()
    if not chat:
        cursor.execute(
            f"""INSERT INTO chats (chat_name, chat_id) VALUES ('{chat_name}', {int(chat_id)})"""
        )
        conn.commit()
Exemplo n.º 9
0
def copy_s3_to_staging(cur: psycopg2, conn: psycopg2.connect) -> None:
    """
    Copy the contents of the S3 buckets to the staging tables in the database
    :param cur: PostgreSQL cursor
    :param conn: PostgreSQL connection object
    :return: None
    """
    for query in copy_table_queries:
        logger.debug("Copying data to staging table as\n{}".format(query))
        cur.execute(query)
        conn.commit()
Exemplo n.º 10
0
def create_staging_tables(cur: psycopg2, conn: psycopg2.connect) -> None:
    """
    Create the temporary tables used for staging.
    :param cur: PostgreSQL cursor
    :param conn: PostgreSQL connection object
    :return: None
    """
    for query in insert_temp_table_queries:
        logger.debug("Creating staging table as \n{}".format(query))
        cur.execute(query)
    conn.commit()
 def upload_comment(self, comment_data: Comment, conn: psycopg2.connect):
     cur = conn.cursor()
     datas = (comment_data.message, comment_data.author_name,
              comment_data.thumbnails, comment_data.timestamp_msec,
              comment_data.timestamp_text, comment_data.purchase_amount,
              comment_data.movie_id)
     cur.execute(
         "INSERT INTO comment(message,author_name,thumbnails,timestamp_msec,timestamp_text,purchase_amount,movie_id) VALUES (%s,%s,%s,%s,%s,%s,%s);",
         datas)
     conn.commit()
     cur.close()
Exemplo n.º 12
0
def execute_query(conn: psycopg2.connect, query: str) -> None:
    cursor = conn.cursor()

    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        logging.error(f"Unable to execute query. Error: {error}")
        conn.rollback()
        cursor.close()

    cursor.close()
 def upload_comments(self, comment_datas: List[Comment],
                     conn: psycopg2.connect):
     cur = conn.cursor()
     list_data = [(comment_data.message, comment_data.author_name,
                   comment_data.thumbnails, comment_data.timestamp_msec,
                   comment_data.timestamp_text,
                   comment_data.purchase_amount, comment_data.movie_id)
                  for comment_data in comment_datas]
     cur.executemany(
         "INSERT INTO comment(message,author_name,thumbnails,timestamp_msec,timestamp_text,purchase_amount,movie_id) VALUES (%s,%s,%s,%s,%s,%s,%s);",
         list_data)
     conn.commit()
     cur.close()
Exemplo n.º 14
0
def create_table(conn: psycopg2.connect) -> None:
    TABLE_CREATION = """
        CREATE TABLE IF NOT EXISTS moodtracker (
            message_id      varchar(40)      NOT NULL,
            chat_id         varchar(10)      NOT NULL,
            mood            varchar(10),
            reason          varchar(25),
            note            varchar(255),
            date            timestamp 
        )
    """
    cur = conn.cursor()
    cur.execute(TABLE_CREATION)
    cur.close()
    conn.commit()
Exemplo n.º 15
0
def drop_tables(cur: psycopg2.connect, conn: psycopg2.connect) -> None:
    """
    Drops each table using the queries in `drop_table_queries` list.

    Parameters
    ----------

    cur : psycopg2.cursor
        Cursor for accessing database with psycopg.

    conn : psycopg2.connect
        Database connection instance.
    """
    for query in drop_table_queries:
        cur.execute(query)
        conn.commit()
Exemplo n.º 16
0
def drop_rows(conn: psycopg2.connect, table: str, where_condition: str) -> None:
    ''' Drops rows from a table based on a set of conditions '''

    query  = f'''
        DELETE FROM {table}
        WHERE {where_condition}
    '''
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    cursor.close()
Exemplo n.º 17
0
def run_sql_etl(sql: str, conn: pg.connect, table_name: str):
    """Runs given SQL query on the provided PostgreSQL connection obj.

    Args:
        sql (str): SQL script to run
        conn (psycopg2.connect): PostgreSQL database connection
        table_name (str): Table name for logging purposes
    """
    func_start_time = time()
    log.debug('Running SQL ETL for "%s" table', table_name)
    with conn.cursor() as cur:
        cur.execute(sql)
        conn.commit()
    log.info(
        'SQL ETL for table "%s" completed in: %s seconds',
        table_name,
        round(time() - func_start_time, 3),
    )
Exemplo n.º 18
0
def process_data(
    cur: psycopg2.connect,
    conn: psycopg2.connect,
    filepath: str,
    func: Callable,
) -> None:
    """
    Perform data processing for specific raw files.

    Parameters
    ----------
    cur : psycopg2.cursor
        Cursor for accessing database with psycopg.

    conn : psycopg2.connect
        Database connection instance.

    filepath : str
        Path for target file.
    
    func : Callable
        Function to use to process file.

    """

    # get all files matching extension from directory
    all_files = []
    for root, dirs, files in os.walk(filepath):
        files = glob.glob(os.path.join(root, '*.json'))
        for f in files:
            all_files.append(os.path.abspath(f))

    # get total number of files found
    num_files = len(all_files)
    print('{} files found in {}'.format(num_files, filepath))

    # iterate over files and process
    for i, datafile in enumerate(all_files, 1):
        func(cur, datafile)
        conn.commit()
        print('{}/{} files processed.'.format(i, num_files))
Exemplo n.º 19
0
def bulk_load_df(data: pd.DataFrame, table_name: str, conn: pg.connect):
    """Bulk inserts a pandas dataframe into PostgreSQL table

    Args:
        data (pandas.Dataframe): Data for insertion
        table_name (str): Table name for logging purposes
        conn (psycopg2.connect): PostgreSQL database connection
    """
    buffer = StringIO()
    buffer.write(data.to_csv(index=None, header=None, na_rep=''))
    buffer.seek(0)

    with conn.cursor() as cur:
        cur.copy_from(
            buffer,
            table_name,
            columns=data.columns,
            sep=',',
            null='',
        )
        conn.commit()
Exemplo n.º 20
0
def add_curator_to_chat(conn: psycopg2.connect, cursor, chat_id: int,
                        user_id: int, username: str):
    """Добавляет куратора чата в БД"""
    print('add_curator_to_chat')
    cursor.execute(
        f"""SELECT user_id FROM curators WHERE chat_id = {chat_id}""")
    rows = cursor.fetchall()
    print(chat_id)
    print(user_id)
    print(rows)
    is_in_db = False
    if rows:
        for row in rows:
            print(row)
            if row['user_id'] == user_id:
                print('USER id in DB!')
                is_in_db = True
    if is_in_db == False:
        cursor.execute(
            f"""INSERT INTO curators (chat_id, user_id, username) VALUES ({chat_id}, {user_id}, '{username}')"""
        )
        conn.commit()
Exemplo n.º 21
0
def insert_rows(conn: psycopg2.connect, df: pd.DataFrame, table: str) -> None:
    ''' Inserts the df values into the DB table '''
    
    # Create a list of tupples from the dataframe values
    tuples = [tuple(x) for x in df.to_numpy()]
    tuples_str = ', '.join(map(str, tuples))
    
    # Comma-separated dataframe columns
    cols = ','.join(list(df.columns))
    
    # SQL quert to execute
    query  = "INSERT INTO %s(%s) VALUES %s" % (table, cols, tuples_str)
    
    cursor = conn.cursor()
    try:
        cursor.execute(query)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    cursor.close()
Exemplo n.º 22
0
def create_table_settings(conn: psycopg2.connect, overwrite: bool=False) -> None:
    ''' Creates the columns and relationships of the SETTINGS table '''
    
    table_name = 'SETTINGS'
    
    if overwrite == True:
        drop_table_statement = f'''DROP TABLE IF EXISTS {table_name};'''
    else:
        drop_table_statement = ''
        
    create_table_statment = f'''
        {drop_table_statement}
        
        CREATE TABLE {table_name}
        (
            LEAGUE_ID BIGINT
            , SEASON_ID SMALLINT
            , PLAYOFF_SEEDING_RULE VARCHAR(100)
            , PLAYOFF_SEEDING_RULE_BY SMALLINT
            , NUM_PLAYOFF_TEAMS SMALLINT
            , FIRST_SCORING_PERIOD SMALLINT
            , FINAL_SCORING_PERIOD SMALLINT
            , PLAYOFF_WEEK_START SMALLINT
            , SCORING_TYPE VARCHAR(50)
            , REG_SEASON_MATCHUP_TIEBREAKER VARCHAR(50)
            , PLAYOFF_MATCHUP_TIEBREAKER VARCHAR(50)
            , HOME_TEAM_BONUS SMALLINT

            , CONSTRAINT SETTINGS_PKEY PRIMARY KEY(LEAGUE_ID, SEASON_ID)
        );
        '''

    cursor = conn.cursor()
    cursor.execute(create_table_statment)
    conn.commit()

    cursor.close()
Exemplo n.º 23
0
def parse_and_insert(cur: psycopg2, conn: psycopg2.connect) -> None:
    """
    Take the data in the staging tables and distribute them into their proper Redshift analytic tables.
    :param cur: PostgreSQL cursor
    :param conn: PostgreSQL connection object
    :return:
    """
    logger.info("Fast inserting records into songs table.")
    cur.execute(song_table_fast_insert)
    logger.info("Fast inserting records into artists table.")
    cur.execute(artist_table_fast_insert)
    conn.commit()
    cur.execute(events_select)
    data = cur.fetchall()
    columns = [
        'user_id', 'first_name', 'last_name', 'gender', 'level', 'ts',
        'artist_name', 'song_name', 'length', 'location', 'session_id',
        'user_agent'
    ]
    df = pd.DataFrame(data, columns=columns)
    logger.info(
        "Slow inserting records into time, users, and songplay tables.")
    _insert_df_to_time_user_songplay(cur, df)
    conn.commit()
Exemplo n.º 24
0
def commit(connection: psycopg2.connect) -> None:
    """Applies changes to the database"""

    connection.commit()