Пример #1
0
def to_sql(df, name, schema, con, index, if_exists, mode='default', **kwargs):
    """
    Override the default `pandas.to_sql` method to allow for insertion of
    multiple rows of data at once. This is derived from the upstream patch at
    https://github.com/pandas-dev/pandas/pull/21401, and can be deprecated
    once it is merged and released in a new version of `pandas`.
    """
    assert mode in ('default',
                    'multi'), 'unexpected `to_sql` mode {}'.format(mode)
    if mode == 'default':
        return df.to_sql(name=name,
                         schema=schema,
                         con=con,
                         index=index,
                         if_exists=if_exists,
                         **kwargs)
    else:
        nrows = len(df)
        if nrows == 0:
            return

        chunksize = kwargs.get('chunksize', nrows)
        if chunksize == 0:
            raise ValueError('chunksize argument should be non-zero')
        chunks = int(nrows / chunksize) + 1

        pd_sql = SQLDatabase(con)
        pd_table = SQLTable(name,
                            pd_sql,
                            frame=df,
                            index=index,
                            if_exists=if_exists,
                            index_label=kwargs.get('insert_label'),
                            schema=schema,
                            dtype=kwargs.get('dtype'))
        pd_table.create()
        keys, data_list = pd_table.insert_data()

        with pd_sql.run_transaction() as conn:
            for i in range(chunks):
                start_i = i * chunksize
                end_i = min((i + 1) * chunksize, nrows)
                if start_i >= end_i:
                    break

                chunk_iter = zip(*[arr[start_i:end_i] for arr in data_list])
                data = [{k: v
                         for k, v in zip(keys, row)} for row in chunk_iter]
                conn.execute(pd_table.table.insert(data))  # multivalues insert
Пример #2
0
    def _get_sqa_table(self,
                       table_name: str,
                       table_type: str,
                       schema: str = None,
                       database: str = None,
                       dtype=None
                       ) -> None:
        """
        Method that extracts sqa table object
        """
        self._load_df_if_empty()
        pandas_sql = pandasSQL_builder(con=self._con)

        sqllite = False
        if not isinstance(pandas_sql, SQLDatabase):
            sqllite = True

        pd_db = SQLDatabase(engine=self._con)
        table_klass = SQLTable if not sqllite else SQLiteTable
        target_frame = (self._data if table_type == "staging"
                        else self._his_data)
        target_name = self._build_name(table=table_name, table_type=table_type)
        table = table_klass(
            name=target_name,
            pandas_sql_engine=pd_db,
            index=False,
            frame=target_frame
        )
        return table._create_table_setup()
Пример #3
0
def _create_table(schema: str, table_name: str, creds: SqlCreds,
                  df: pd.DataFrame, if_exists: str):
    """use pandas' own code to create the table and schema"""

    sql_db = SQLDatabase(engine=creds.engine, schema=schema)
    table = SQLTable(
        table_name,
        sql_db,
        frame=df,
        index=False,  # already set as new col earlier if index=True
        if_exists=if_exists,
        index_label=None,
        schema=schema,
        dtype=None,
    )
    table.create()
Пример #4
0
    def to_sql_set_primary_key_and_not_null(self,
                                            frame,
                                            name,
                                            con,
                                            keys,
                                            sql_table,
                                            schema=None,
                                            if_exists='fail',
                                            index=True,
                                            index_label=None,
                                            chunksize=None,
                                            dtype=None):
        # ref: https://github.com/pandas-dev/pandas/blob/master/pandas/io/sql.py#L437
        if if_exists not in ('fail', 'replace', 'append'):
            raise ValueError(
                "'{0}' is not valid for if_exists".format(if_exists))

        # ref: https://github.com/pandas-dev/pandas/blob/master/pandas/io/sql.py#L508
        pandas_sql = SQLDatabase(con, schema=schema)

        if isinstance(frame, pd.Series):
            frame = frame.to_frame()
        elif not isinstance(frame, pd.DataFrame):
            raise NotImplementedError(
                "'frame' argument should be either a Series or a DataFrame")

        if dtype is not None:
            from sqlalchemy.types import to_instance, TypeEngine
            for col, my_type in dtype.items():
                if not isinstance(to_instance(my_type), TypeEngine):
                    raise ValueError(
                        'The type of {} is not a SQLAlchemy type '.format(col))

        table = SQLTable(name,
                         pandas_sql,
                         frame=frame,
                         index=index,
                         if_exists=if_exists,
                         index_label=index_label,
                         schema=schema,
                         keys=keys,
                         dtype=dtype)
        table.table = sql_table
        table.create()
        table.insert(chunksize)
Пример #5
0
 def _sql_engine(self):
     return SQLDatabase(db.engine)
Пример #6
0
def read_sql_table(engine,
                   table_name,
                   index_col=None,
                   columns=None,
                   select_from=None,
                   limit=None,
                   order_by=None,
                   where=None,
                   coerce_types=None,
                   raise_on_missing=True):
    """ Load a table from a SQL database.
    
    Parameters
    ----------
    engine : SQLAlchemy engine
        The SQL database to load from.
    
    table_name : str
        The name of the table to load.
    
    index_col : str, optional
        Column name to use as index for the returned data frame.
    
    columns : sequence of str, optional
        Columns to select from the table. By default, all columns are selected.

    select_from : str or SQLAlchemy clause, optional
        A FROM clause to use for the select statement. Defaults to the
        table name.
    
    limit : int, optional
        Limit the number of rows selected.
    
    order_by : str or SQLAlchemy clause, optional
        An ORDER BY clause to sort the selected rows.
    
    where : str or SQLAlchemy clause, optional
        A WHERE clause used to filter the selected rows.
    
    coerce_types : dict(str : dtype or Python type), optional
        Override pandas type inference for specific columns.
    
    Returns
    -------
    A pandas DataFrame.
    """
    # Pandas does not expose many of these options, so we pull out some of
    # Pandas' internals.
    #
    # An alternative approach would be to use `pandas.read_sql_query` with an
    # appropriate (dialect-specific) query. However, this approach would not
    # utilize Pandas' logic for column type inference (performed by
    # `_harmonize_columns()` below), and would hence produce inferior results.

    from sqlalchemy.schema import MetaData
    from pandas.io.sql import SQLDatabase, SQLTable

    # From pandas.io.sql.read_sql_table
    # and  pandas.io.sql.SQLDatabase.read_table:
    meta = MetaData(engine)
    try:
        meta.reflect(only=[table_name])
    except sqlalchemy.exc.InvalidRequestError:
        if raise_on_missing:
            raise ValueError("Table %s not found" % table_name)
        else:
            return None

    pd_db = SQLDatabase(engine, meta=meta)
    pd_tbl = SQLTable(table_name, pd_db, index=None)

    # Adapted from pandas.io.SQLTable.read:
    if columns is not None and len(columns) > 0:
        if index_col is not None and index_col not in columns:
            columns = [index_col] + columns

        cols = [pd_tbl.table.c[n] for n in columns]
    else:
        cols = pd_tbl.table.c

    if pd_tbl.index is not None:
        [cols.insert(0, pd_tbl.table.c[idx]) for idx in pd_tbl.index[::-1]]

    # Strip the table name from each of the column names to allow for more
    # general FROM clauses.
    sql_select = sqlalchemy.select([
        sqlalchemy.column(str(c).replace('{}.'.format(table_name), '', 1))
        for c in cols
    ])

    if select_from is not None:
        sql_select = sql_select.select_from(select_from)
    else:
        sql_select = sql_select.select_from(sqlalchemy.table(table_name))

    if where is not None:
        if isinstance(where, basestring):
            where = sqlalchemy.text(where)
        sql_select = sql_select.where(where)
    if limit is not None:
        sql_select = sql_select.limit(limit)
    if order_by is not None:
        if isinstance(order_by, basestring):
            order_by = sqlalchemy.sql.column(order_by)
        sql_select = sql_select.order_by(order_by)

    result = pd_db.execute(sql_select)
    data = result.fetchall()
    column_names = result.keys()

    pd_tbl.frame = pandas.DataFrame.from_records(data,
                                                 index=index_col,
                                                 columns=column_names)

    # This line has caused issues with incorrect type inference -- add it
    # back with caution.
    # pd_tbl._harmonize_columns()

    # Added by me: coerce types
    if coerce_types:
        frame = pd_tbl.frame
        for col, dtype in coerce_types.iteritems():
            frame[col] = frame[col].astype(dtype, copy=False)

    return pd_tbl.frame