Пример #1
0
def _create_table(
    df: pd.DataFrame,
    cursor: "pyodbc.Cursor",
    table: str,
    schema: str,
    mode: str,
    index: bool,
    dtype: Optional[Dict[str, str]],
    varchar_lengths: Optional[Dict[str, int]],
) -> None:
    if mode == "overwrite":
        _drop_table(cursor=cursor, schema=schema, table=table)
    elif _does_table_exist(cursor=cursor, schema=schema, table=table):
        return
    sqlserver_types: Dict[str, str] = _data_types.database_types_from_pandas(
        df=df,
        index=index,
        dtype=dtype,
        varchar_lengths_default="VARCHAR(MAX)",
        varchar_lengths=varchar_lengths,
        converter_func=_data_types.pyarrow2sqlserver,
    )
    cols_str: str = "".join([f"{k} {v},\n" for k, v in sqlserver_types.items()])[:-2]
    table_identifier = _get_table_identifier(schema, table)
    sql = (
        f"IF OBJECT_ID(N'{table_identifier}', N'U') IS NULL BEGIN CREATE TABLE {table_identifier} (\n{cols_str}); END;"
    )
    _logger.debug("Create table query:\n%s", sql)
    cursor.execute(sql)
Пример #2
0
def _create_table(
    df: pd.DataFrame,
    cursor: pg8000.Cursor,
    table: str,
    schema: str,
    mode: str,
    index: bool,
    dtype: Optional[Dict[str, str]],
    varchar_lengths: Optional[Dict[str, int]],
) -> None:
    if mode == "overwrite":
        _drop_table(cursor=cursor, schema=schema, table=table)
    elif _does_table_exist(cursor=cursor, schema=schema, table=table):
        return
    postgresql_types: Dict[str, str] = _data_types.database_types_from_pandas(
        df=df,
        index=index,
        dtype=dtype,
        varchar_lengths_default="TEXT",
        varchar_lengths=varchar_lengths,
        converter_func=_data_types.pyarrow2postgresql,
    )
    cols_str: str = "".join(
        [f"{k} {v},\n" for k, v in postgresql_types.items()])[:-2]
    sql = f'CREATE TABLE IF NOT EXISTS "{schema}"."{table}" (\n{cols_str})'
    _logger.debug("Create table query:\n%s", sql)
    cursor.execute(sql)
Пример #3
0
def _create_table(
    df: Optional[pd.DataFrame],
    path: Optional[Union[str, List[str]]],
    cursor: redshift_connector.Cursor,
    table: str,
    schema: str,
    mode: str,
    index: bool,
    dtype: Optional[Dict[str, str]],
    diststyle: str,
    sortstyle: str,
    distkey: Optional[str],
    sortkey: Optional[List[str]],
    primary_keys: Optional[List[str]],
    varchar_lengths_default: int,
    varchar_lengths: Optional[Dict[str, int]],
    parquet_infer_sampling: float = 1.0,
    use_threads: bool = True,
    boto3_session: Optional[boto3.Session] = None,
    s3_additional_kwargs: Optional[Dict[str, str]] = None,
) -> Tuple[str, Optional[str]]:
    if mode == "overwrite":
        _drop_table(cursor=cursor, schema=schema, table=table)
    elif _does_table_exist(cursor=cursor, schema=schema, table=table) is True:
        if mode == "upsert":
            guid: str = uuid.uuid4().hex
            temp_table: str = f"temp_redshift_{guid}"
            sql: str = f"CREATE TEMPORARY TABLE {temp_table} (LIKE {schema}.{table})"
            _logger.debug(sql)
            cursor.execute(sql)
            return temp_table, None
        return table, schema
    diststyle = diststyle.upper() if diststyle else "AUTO"
    sortstyle = sortstyle.upper() if sortstyle else "COMPOUND"
    if df is not None:
        redshift_types: Dict[
            str, str] = _data_types.database_types_from_pandas(
                df=df,
                index=index,
                dtype=dtype,
                varchar_lengths_default=varchar_lengths_default,
                varchar_lengths=varchar_lengths,
                converter_func=_data_types.pyarrow2redshift,
            )
    elif path is not None:
        redshift_types = _redshift_types_from_path(
            path=path,
            varchar_lengths_default=varchar_lengths_default,
            varchar_lengths=varchar_lengths,
            parquet_infer_sampling=parquet_infer_sampling,
            use_threads=use_threads,
            boto3_session=boto3_session,
            s3_additional_kwargs=s3_additional_kwargs,
        )
    else:
        raise ValueError("df and path are None.You MUST pass at least one.")
    _validate_parameters(
        redshift_types=redshift_types,
        diststyle=diststyle,
        distkey=distkey,
        sortstyle=sortstyle,
        sortkey=sortkey,
    )
    cols_str: str = "".join([f"{k} {v},\n"
                             for k, v in redshift_types.items()])[:-2]
    primary_keys_str: str = f",\nPRIMARY KEY ({', '.join(primary_keys)})" if primary_keys else ""
    distkey_str: str = f"\nDISTKEY({distkey})" if distkey and diststyle == "KEY" else ""
    sortkey_str: str = f"\n{sortstyle} SORTKEY({','.join(sortkey)})" if sortkey else ""
    sql = (f"CREATE TABLE IF NOT EXISTS {schema}.{table} (\n"
           f"{cols_str}"
           f"{primary_keys_str}"
           f")\nDISTSTYLE {diststyle}"
           f"{distkey_str}"
           f"{sortkey_str}")
    _logger.debug("Create table query:\n%s", sql)
    cursor.execute(sql)
    return table, schema