def _checkmaps(conn: psycopg2.extensions.connection, col: str, tbl: str,
               file_itms: set):
    ''' We do this to resync new data with what is in the db currently. check and remap data '''

    cur = conn.cursor()
    col = col.replace(" ", "_")
    col = col.lower()

    # pulling the 'key' column from the db
    LOG.debug(f'''SELECT DISTINCT "{col}" from {tbl};''')
    cur.execute(f'''SELECT DISTINCT "{col}" from {tbl};''')
    query = cur.fetchall()
    existing_items = set([i[0] for i in query])

    for each in file_itms:
        #print(str(type(each)))
        #print(each)
        if str(each) not in existing_items:
            # clean up formatting problems
            if each != None and type(each) == str:
                each = each.replace("\'", "")
            # now insert only the new items we didnt have before into the db
            cur.execute(f'''INSERT INTO {tbl} ("{col}") VALUES ('{each}');''')
            LOG.debug(f"Inserted new item {col}: {each} into table {tbl}")

    conn.commit()
def copy_from(df: pd.DataFrame,
              table: str,
              connection: psycopg2.extensions.connection,
              chunk_size: int = 10000):

    cursor = connection.cursor()
    df = df.copy()
    escaped = {'\\': '\\\\', '\n': r'\n', '\r': r'\r', '\t': r'\t'}
    for col in df.columns:
        if df.dtypes[col] == 'object':
            for v, e in escaped.items():
                df[col] = df[col].str.replace(v, e)
    try:
        for i in range(0, df.shape[0], chunk_size):
            f = io.StringIO()
            chunk = df.iloc[i:(i + chunk_size)]
            # The default separator is a tab, and NULLs are indicated by the two character-string '\N'
            chunk.to_csv(f,
                         index=False,
                         header=False,
                         sep='\t',
                         na_rep='\\N',
                         quoting=None)
            f.seek(0)
            cursor.copy_from(f, table, columns=list(df.columns))
            connection.commit()
    except psycopg2.Error:
        connection.rollback()
        cursor.close()
Exemplo n.º 3
0
def __stock_tags_bulk_insert(df: pd.DataFrame, column_name: str,
                             conn: psycopg2.extensions.connection,
                             cursor: psycopg2.extensions.cursor) -> Any:
    """
        Inserts all tag rows of df in the DB of the relationship between stock and tags.
    :param df: df with three columns of stock and a column of tag.
    :param column_name: str. The name of tag column.
    :param conn: psycopg2.extensions.connection. Connection to DB.
    :param cursor: cursor of DB.
    :return: None.
    """
    for index, series in df.iterrows():
        if not pd.isnull(series[column_name]):
            # first searchs the stock id of row.
            stock_id_query = "SELECT id FROM stock " \
                             "WHERE stock_name = '{}' AND exchange = '{}' AND ticker = '{}'".format(
                series['stock_name'],
                series['exchange'],
                series['ticker'])
            cursor.execute(stock_id_query)
            stock_id = cursor.fetchone()[0]

            # next, searchs the tag id of row.
            tag_id_query = "SELECT id FROM tag WHERE tag_name = '{}' AND category = '{}'".format(
                series[column_name], column_name)
            cursor.execute(tag_id_query)
            tag_id = cursor.fetchone()[0]

            # inserts stock_tag relation using stock_id and tag_id
            query = "INSERT INTO stock_tag (stock_id, tag_id) " \
                    "VALUES ('{}', '{}') ON CONFLICT DO NOTHING".format(stock_id, tag_id)
            cursor.execute(query)
    conn.commit()
Exemplo n.º 4
0
def stocks_bulk_insert(df: pd.DataFrame, conn: psycopg2.extensions.connection,
                       cursor: psycopg2.extensions.cursor) -> Any:
    """
        Inserts all rows of df in the DB.
    :param df: pd.DataFrame. stocks data.
    :param conn: psycopg2.extensions.connection. Connection to DB.
    :param cursor: cursor of DB.
    :return: None.
    """

    if df.empty:
        return

    # iterating in DataFrame rows.
    for index, series in df.iterrows():
        query = "INSERT INTO stock (ticker, stock_name, exchange) " \
                "VALUES ('{}', '{}', '{}') ON CONFLICT DO NOTHING".format(series["ticker"],
                                                                          series["stock_name"],
                                                                          series["exchange"])
        cursor.execute(query)
    conn.commit()

    print(">>> GETTING HISTORICAL DATA")
    ticker_list = list(df["ticker"])
    __get_historical_data(ticker_list)
Exemplo n.º 5
0
def create_table(connection: psycopg2.extensions.connection,
                 cursor: psycopg2.extensions.cursor):
    """Function which creates the table if needed

    Args:
        connection (psycopg2.extensions.connection): database connection
        cursor (psycopg2.extensions.cursor): database cursor
    """
    # SQL query to templates table
    create_table_query = '''CREATE TABLE IF NOT EXISTS templates
          (id                   SERIAL      PRIMARY KEY     NOT NULL,
          name                  TEXT                        NOT NULL,
          year                  INT                         NOT NULL,
          month                 INT                         NOT NULL,
          category              TEXT                        NOT NULL,
          uw_category           TEXT,
          wikibreak_category1   TEXT,
          wikibreak_category2   TEXT,
          wikibreak_subcategory TEXT,
          amount                INT,
          cumulative_amount      INT); 
          '''
    # Execute a command
    cursor.execute(create_table_query)
    connection.commit()
    print("Table created successfully")
Exemplo n.º 6
0
def process_data(cur: psycopg2.extensions.cursor,
                 conn: psycopg2.extensions.connection, filepath: str, func):
    """
    Given a connection to a PostgresSQL database, a path to a directory on the
    local filesystem and a processing function, do the following:
        1. Load all *.json files found in filepath and its subdirectories
        2. Print the number of files found in step one
        3. Apply the processing function func to all files found in step one
        
    :param cur: Cursor
    :param conn: Connection to PostgreSQL database
    :param filepath: Path to JSON file
    :param func: (Python) function to process data
    """
    # get all files matching extension from directory
    all_files = []
    for root, dirs, files in os.walk(filepath):
        files = glob.glob(os.path.join(root, '*.json'))
        for f in files:
            all_files.append(os.path.abspath(f))

    # get total number of files found
    num_files = len(all_files)
    print('{} files found in {}'.format(num_files, filepath))

    # iterate over files and process
    for i, datafile in enumerate(all_files, 1):
        func(cur, datafile)
        conn.commit()
        print('{}/{} files processed.'.format(i, num_files))
Exemplo n.º 7
0
def load_tables(config: list, connection: pg.extensions.connection):
    # Iterate and load
    cur = connection.cursor()
    for table in config:
        table_name = table.get('name')
        table_files = [
            filename for filename in os.listdir(downloads_path)
            if filename.startswith(table_name)
        ]
        table_files = check_for_fhv_2017_type(name=table_name,
                                              files=table_files)
        if not table_files:
            print("""No files to upload to {} table.""".format(table_name))
        else:
            for file in table_files:
                file_name = file.split('.')[0]
                table_source = downloads_path.joinpath(f"{file_name}.csv")
                print("""Started to load {} data to db from {}.""".format(
                    table_name, table_source))
                with open(table_source, 'r', encoding='utf-8') as f:
                    next(f)
                    cur.copy_expert(
                        f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
                connection.commit()
                print("""Completed loading file {} into {} table.""".format(
                    file, table_name))
Exemplo n.º 8
0
def copy_from(df: pd.DataFrame,
              table: str,
              connection: psycopg2.extensions.connection,
              chunk_size: int = 10000):
    cursor = connection.cursor()
    df = df.copy()

    escaped = {
        '\\': '\\\\',
        '\n': r'\n',
        '\r': r'\r',
        '\t': r'\t',
    }
    for col in df.columns:
        if df.dtypes[col] == 'object':
            for v, e in escaped.items():
                df[col] = df[col].str.replace(v, e)
    try:
        for i in tqdm(range(0, df.shape[0], chunk_size)):
            f = StringIO()
            chunk = df.iloc[i:(i + chunk_size)]

            chunk.to_csv(f,
                         index=False,
                         header=False,
                         sep='\t',
                         na_rep='\\N',
                         quoting=None)
            f.seek(0)
            cursor.copy_from(f, table, columns=[f'"{i}"' for i in df.columns])
            connection.commit()
    except psycopg2.Error as e:
        print(e)
        connection.rollback()
    cursor.close()
Exemplo n.º 9
0
def create_table(name: str, schema: str,
                 connection: psycopg2.extensions.connection):
    c = connection.cursor()
    ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})"""
    c.execute(ddl)
    connection.commit()
    c.close()
Exemplo n.º 10
0
def insert_DiscordUser_into_databse(
        member_id: int, cursor: psycopg2.extensions.cursor,
        connection: psycopg2.extensions.connection):
    try:
        cursor.execute(sql_insert_DiscordUser_into_database, (member_id, ))
        connection.commit()
    except (Exception, psycopg2.Error) as error:
        print(f"Error while inserting into DiscordUser on database: {error}")
Exemplo n.º 11
0
def create_tables(config: list, connection: pg.extensions.connection):
    cur = connection.cursor()
    for table in config:
        name = table.get('name')
        schema = table.get('schema')
        ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})"""
        cur.execute(ddl)

    connection.commit()
def insert_new_row(table: str, columns: T.List[str], variables: T.List[T.Any],
                   conn: psycopg2.extensions.connection):
    columns_str = ', '.join(columns)
    placeholders_str = ('%s,' * len(columns))[:-1]
    dml = f"INSERT INTO {table} ({columns_str}) VALUES ({placeholders_str})"

    cur = conn.cursor()
    cur.execute(dml, variables)
    conn.commit()
    cur.close()
Exemplo n.º 13
0
def create_index(connection: psycopg2.extensions.connection,
                 cursor: psycopg2.extensions.cursor):
    """Creates an hash index on the name column

    Args:
        connection (psycopg2.extensions.connection): [description]
        cursor (psycopg2.extensions.cursor): [description]
    """
    create_index_query = "CREATE INDEX template_name_index ON templates USING hash (name)"
    cursor.execute(create_index_query)
    connection.commit()
    print("Index on the column name created successfully")
Exemplo n.º 14
0
def insert_DiscordUserVoiceSession_into_database(
        member_id: int, channel_id: int, session_amount: float,
        cursor: psycopg2.extensions.cursor,
        connection: psycopg2.extensions.connection):
    try:
        cursor.execute(sql_insert_DiscordUserVoiceSession_into_Database,
                       (member_id, channel_id, session_amount))
        connection.commit()
    except (Exception, psycopg2.Error) as error:
        print(
            f"Error while inserting into DiscordUserVoiceSession on database: {error}"
        )
Exemplo n.º 15
0
def create_table(sql_query: str, conn: psycopg2.extensions.connection,
                 cur: psycopg2.extensions.cursor) -> None:
    try:
        # Execute the table creation query
        cur.execute(sql_query)
    except Exception as e:
        print(f"{type(e).__name__}: {e}")
        print(f"Query: {cur.query}")
        conn.rollback()
        cur.close()
    else:
        # To take effect, changes need be committed to the database
        conn.commit()
Exemplo n.º 16
0
def load_tables(config: list, connection: pg.extensions.connection):

    # iterate and load
    cur = connection.cursor()
    data_path = '../data/'

    for table in config:
        table_name = table.get('name')
        table_source = data_path + f"{table_name}.csv"
        with open(table_source, 'r') as f:
            next(f)
            cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
        connection.commit()
Exemplo n.º 17
0
def drop_table(connection: psycopg2.extensions.connection,
               cursor: psycopg2.extensions.cursor):
    """Function which drops the table

    Args:
        connection (psycopg2.extensions.connection): database connection
        cursor (psycopg2.extensions.cursor): database cursor
    """
    # SQL query to drop templates table
    drop_table_query = '''DROP TABLE templates;'''
    # Execute a command
    cursor.execute(drop_table_query)
    connection.commit()
    print("Table deleted successfully")
Exemplo n.º 18
0
def load_tables(config: list, connection: pg.extensions.connection):

    # iterate and load
    cur = connection.cursor()
    data_path = Path(os.environ['HOME'], 'Documents', 'data_science', 'ht_v2',
                     'data')

    for table in config:
        table_name = table.get('name')
        table_source = data_path.joinpath(f"{table_name}.csv")
        with open(table_source, 'r') as f:
            next(f)
            cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
        connection.commit()
Exemplo n.º 19
0
def run_query(
    sql_query: str,
    conn: psycopg2.extensions.connection,
    cur: psycopg2.extensions.cursor,
) -> None:
    try:
        cur.execute(sql_query)
    except Exception as e:
        print(f"{type(e).__name__}: {e}")
        print(f"Query: {cur.query}")
        conn.rollback()
        cur.close()
    else:
        conn.commit()
Exemplo n.º 20
0
def get_curr_rev_id(conn: psycopg2.extensions.connection) -> Union[str, None]:
    curs = conn.cursor()
    try:
        curs.execute('SELECT ver FROM migro_ver')
        return curs.fetchone()[0]
    except psycopg2.ProgrammingError:
        conn.rollback()
        curs.execute(
            'CREATE TABLE migro_ver (ver VARCHAR(12) PRIMARY KEY)')
        conn.commit()
        return None
    except TypeError:
        return None
    finally:
        curs.close()
Exemplo n.º 21
0
def load_tables(config: list,
                connection: pg.extensions.connection,
                prefix: str = None):
    # Iterate and load
    cur = connection.cursor()
    for table in config:
        table_name = table.get('name')
        table_name_csv = table_name if not prefix else prefix + table_name
        table_source = data_path.joinpath(f"{table_name_csv}.csv")
        print("""Started to load {} data to db from {}.""".format(
            table_name, table_source))
        with open(table_source, 'r', encoding='utf-8') as f:
            next(f)
            cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
        connection.commit()
        print("""Completed loading {} table.""".format(table_name))
Exemplo n.º 22
0
def execute_query(
    connection: psycopg2.extensions.connection,
    query: str,
    query_parameters: tuple = None
) -> Tuple[Optional[List[dict]], Optional[Dict[str, Any]]]:
    """
        Functie om de database connecties uit connect_databases.py te gebruiken om informatie op te vragen of wijzigingen
        te doen in de database.
        Benodigde input is de database connectie van de bonvengenoemde functie, die in het voorbeeld in een dictionary staat
        en een query op de in de functie ingevoerde database. Optioneel zijn andere parameters
        die psycopg2 accepteerd, zie:
        http://initd.org/psycopg/docs/usage.html
        http://initd.org/psycopg/docs/cursor.html
        """
    if connection.closed:
        print(
            f"Reconnecting closed connection to {connection.get_dsn_parameters()['dbname']}"
        )
    try:
        # Following http://initd.org/psycopg/docs/faq.html#best-practices
        # And https://stackoverflow.com/questions/21158033/query-from-postgresql-using-python-as-dictionary#21158697
        with connection.cursor(cursor_factory=DictCursor) as cursor:
            print(f'Executing query {query} with {query_parameters}')
            cursor.execute(query, query_parameters)
            try:
                results = [dict(row) for row in cursor]
            # no records have returned
            except psycopg2.ProgrammingError:
                results = []
            connection.commit()
    except Exception as e:
        print(
            f"Error: {e}, \ndatabase: {connection.get_dsn_parameters()['dbname']}, \nquery: {query}, \nparameters: "
            f"{query_parameters}")
        connection.rollback()
        error = {
            'response_json':
            json.dumps({'error': {
                'response_code': '400',
                'reason': str(e)
            }}),
            'status_code':
            400
        }
        return None, error
    return results, None
Exemplo n.º 23
0
def insert_dataframe(table: str, conn: psycopg2.extensions.connection,
                     data: pd.DataFrame):
    """

    :param table:
    :param conn:
    :param data:
    :return:
    """
    cursor = conn.cursor()
    records = StringIO(
        data.to_csv(header=False, sep="\t",
                    na_rep="\\N").replace("[", "{").replace("]", "}"))

    res = cursor.copy_from(records, table)
    conn.commit()
    return res
def import_reviews(tsv_filename: str, conn: psycopg2.extensions.connection,
                   cur: psycopg2.extras.RealDictCursor) -> None:
    """Import reviews into the database.

    Args:
        tsv_filename: Name of TSV file containing reviews.
    """
    # Bulk insert review file
    logger.info(f'Bulk inserting {tsv_filename}...')
    with open(tsv_filename, 'r') as f:
        next(f)
        cur.copy_from(f,
                      settings.REVIEW_TABLE,
                      sep='\t',
                      columns=('user_id', 'user_name', 'score', 'content',
                               'date'))
    conn.commit()
def _update_db_maps(conn: psycopg2.extensions.connection, data: pd.DataFrame):

    cur = conn.cursor()

    LOG.info("Doing mapping checks")
    mapping_check = {
            "HomeCenter": "center_map",\
            "Computer Name": "device_map",\
            "software_hash": "software_map",\
            # "IP Address": "ip_map",\
            "OS": "os_map",\
                    }

    for coln, tbl in mapping_check.items():
        col = COL_REPLACE[coln]
        _checkmaps(conn, col, tbl, set(data[coln].unique()))

    conn.commit()
Exemplo n.º 26
0
def __tags_bulk_insert(df: pd.DataFrame, column_name: str,
                       conn: psycopg2.extensions.connection,
                       cursor: psycopg2.extensions.cursor) -> Any:
    """
        Inserts all rows of df in the DB.
    :param df: df with three columns of stock and a column of tag.
    :param column_name: str. The name of tag column.
    :param conn: psycopg2.extensions.connection. Connection to DB.
    :param cursor: cursor of DB.
    :return: None.
    """
    for index, series in df.iterrows():
        if not pd.isnull(series[column_name]):
            query = "INSERT INTO tag (category, tag_name) " \
                    "VALUES ('{}', '{}') " \
                    "ON CONFLICT DO NOTHING".format(column_name, series[column_name])
            cursor.execute(query)
    conn.commit()
Exemplo n.º 27
0
    def main_ingest(df: pd.DataFrame,
                    table: str,
                    connection: psycopg2.extensions.connection,
                    chunk_size: int = 10000):
        """needs a table first"""
        print(connection)

        df = df.copy()

        escaped = {
            '\\': '\\\\',
            '\n': r'\n',
            '\r': r'\r',
            '\t': r'\t',
        }
        for col in df.columns:
            if df.dtypes[col] == 'object':
                for v, e in escaped.items():
                    df[col] = df[col].apply(lambda x: x.replace(v, '') if (
                        x is not None) and (isinstance(x, str)) else x)
        try:
            conn = connection
            cursor = conn.cursor()
            for i in tqdm(range(0, df.shape[0], chunk_size)):
                f = StringIO()
                chunk = df.iloc[i:(i + chunk_size)]

                chunk.to_csv(f,
                             index=False,
                             header=False,
                             sep='\t',
                             na_rep='\\N',
                             quoting=None)
                f.seek(0)
                cursor.copy_from(f,
                                 f'"{table}"',
                                 columns=[f'"{i}"' for i in df.columns])
                connection.commit()
        except psycopg2.Error as e:
            print(e)
            conn = connection
            cursor = conn.cursor()
            conn.rollback()
        cursor.close()
Exemplo n.º 28
0
def fetch_and_queue(*,
                    conn: psycopg2.extensions.connection,
                    student_id: str,
                    run: int,
                    queued_items: Set[Item],
                    blocked_items: Set[Item],
                    area_code_filter: Optional[str] = None) -> int:
    parsed_student, unparsed_json = get_student(student_id)

    count = 0

    with conn.cursor() as curs:
        for stnum, catalog, code in expand_student(student=parsed_student):
            # skip already-queued items in this loop to avoid postgres deadlocks
            if (stnum, code) in queued_items:
                continue

            # skip audits that have been blocked
            if (stnum, code) in blocked_items:
                continue

            # allow filtering batches of audits
            if area_code_filter is not None and area_code_filter != code:
                continue

            count += 1

            curs.execute(
                """
                INSERT INTO queue (priority, student_id, area_catalog, area_code, input_data, run)
                VALUES (1, %(stnum)s, %(catalog)s, %(code)s, cast(%(data)s as jsonb), %(run)s)
            """, {
                    "stnum": stnum,
                    "catalog": catalog,
                    "code": code,
                    "data": unparsed_json,
                    "run": run
                })

        conn.commit()

    return count
Exemplo n.º 29
0
def _get_personal_transport_internal(lat: float, lng: float, t: Union[int, List[int]], conn: psycopg2.extensions.connection,
        personal_transport_endpoint: str, timeout: int = 240) -> Union[Dict[str, Any], Dict[int, Dict[str, Any]]]:
    if isinstance(t, int):
        times = [t]
    result: Union[Dict[str, Any], Dict[int, Dict[str, Any]]]
    if len(times) > 1:
        result = {} # type: ignore
    with conn.cursor() as cur:
        for t_cur in times:
            data = requests.post(personal_transport_endpoint, timeout=timeout, headers={'Accept-encoding': 'gzip,deflate'}, json=
                {
                    'source': [lng, lat],
                    'cost': t * 60,
                    'day_time': 46800,
                    'mode_type': 'car_cost'
                }
            ).json()
            if 'features' not in data:
                log.error(f'Personal transport download ({lat}, {lng}, {t}) failed: "features" is not found in data from transport model service'
                        f' ({personal_transport_endpoint}\ndata:\n{data}')
                geom = {'type': 'Polygon', 'coordinates': []}
            if len(data['features']) == 0:
                log.warning(f'Personal transport download ({lat}, {lng}, {t}) : "features" is empty')
                geom = {'type': 'Polygon', 'coordinates': []}
            elif len(data['features']) > 1:
                log.warning(f'Personal transport availability has more than 1 ({len(data["features"])}) poly: ({lat}, {lng}, {t_cur})')
                cur.execute('SELECT ST_AsGeoJSON(ST_UNION(ARRAY[' + ',\n'.join(map(lambda x: f'ST_GeomFromGeoJSON(\'{json.dumps(x["geometry"])}\')',
                        data['features'])) + '])) LIMIT 1')
                geom = json.loads(cur.fetchone()[0])
            else:
                geom = data['features'][0]['geometry']
            if isinstance(t, int):
                result = geom
            else:
                result[t_cur] = geom # type: ignore
            if len(geom['coordinates']) != 0:
                cur.execute('INSERT INTO car (latitude, longitude, time, geometry) VALUES'
                        ' (%s, %s, %s, ST_SetSRID(ST_GeomFromGeoJSON(%s), 4326))'
                        ' ON CONFLICT (latitude, longitude, time) DO UPDATE SET geometry=excluded.geometry',
                        (lat, lng, t_cur, json.dumps(geom)))
        conn.commit()
    return result
Exemplo n.º 30
0
def insert_data(query: str, conn: psycopg2.extensions.connection,
                cur: psycopg2.extensions.cursor, df: pd.DataFrame,
                page_size: int) -> None:
    data_tuples = [tuple(row.to_numpy()) for index, row in df.iterrows()]

    try:
        psql_extras.execute_values(cur,
                                   query,
                                   data_tuples,
                                   page_size=page_size)
        print("Query:", cur.query)

    except Exception as error:
        print(f"{type(error).__name__}: {error}")
        print("Query:", cur.query)
        conn.rollback()
        cur.close()

    else:
        conn.commit()