Python MSSQL.insert_into примеры, sqlsorcery.MSSQL.insert_into Python примеры использования

Пример #1

0

Показать файл

Файл: test_all.py Проект: dchess/db_tests

def test_this_works():
    config = {
        "server": "db",
        "db": "master",
        "user": "******",
        "pwd": "Just4TestingThis",
        "schema": "dbo",
    }
    sql = MSSQL(**config)
    data = [{"id": 1, "value": "test1"}, {"id": 2, "value": "test2"}]
    df = pd.DataFrame(data)
    sql.insert_into("test_table", df)
    results = pd.read_sql_table("test_table",
                                con=sql.engine,
                                schema=sql.schema)
    assert len(results) == 2

Пример #2

0

Показать файл

def migrate_mssql():
    try:
        sql = MSSQL()

        # Tables
        sql.exec_cmd_from_file(
            "sql/mssql/tables/schoolmint_ApplicationData_changehistory.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/tables/schoolmint_ApplicationData_raw_backup.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/tables/schoolmint_ApplicationData_raw.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/tables/schoolmint_ApplicationDataIndex_raw_backup.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/tables/schoolmint_ApplicationDataIndex_raw.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/tables/schoolmint_ApplicationStatuses.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/tables/schoolmint_FactDailyStatus.sql")
        sql.exec_cmd_from_file("sql/mssql/tables/schoolmint_lk_Enrollment.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/tables/schoolmint_ProgressMonitoring.sql")
        sql.exec_cmd_from_file("sql/mssql/tables/schoolmint_SchoolCodes.sql")

        # Load lookup tables
        enrollments = pd.read_csv("sql/data/lk_enrollment.csv")
        sql.insert_into("schoolmint_lk_Enrollment", enrollments)

        application_statuses = pd.read_csv("sql/data/application_statuses.csv")
        sql.insert_into("schoolmint_ApplicationStatuses", application_statuses)

        # Views
        sql.exec_cmd_from_file(
            "sql/mssql/views/vw_schoolmint_AppStatusList.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/views/vw_schoolmint_FactDailyStatus_InterimTargets.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/views/vw_schoolmint_FactDailyStatus.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/views/vw_schoolmint_ProgressMonitoring.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/views/vw_schoolmint_FactProgressMonitoring.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/views/vw_schoolmint_Index_Demographics.sql")

        # Stored Procedures
        sql.exec_cmd_from_file(
            "sql/mssql/sprocs/sproc_schoolmint_Create_ChangeTracking_Entries.sql"
        )
        sql.exec_cmd_from_file(
            "sql/mssql/sprocs/sproc_schoolmint_Create_FactDailyStatus.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/sprocs/sproc_schoolmint_Index_PostProcess.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/sprocs/sproc_schoolmint_Index_PrepareTables.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/sprocs/sproc_schoolmint_Raw_PostProcess.sql")
        sql.exec_cmd_from_file(
            "sql/mssql/sprocs/sproc_schoolmint_Raw_PrepareTables.sql")

    except ProgrammingError as e:
        if "Cannot open database" in str(e):
            print("ERROR: First create your database and schema manually")

Пример #3

0

Показать файл

Файл: main.py Проект: dchess/data_whiz

import urllib.request
import zipfile
import pandas as pd
from sqlsorcery import MSSQL

# Download 2019 SBAC Research Files
url = "http://caaspp-elpac.cde.ca.gov/caaspp/researchfiles/sb_ca2019_1_csv_v2.zip"
urllib.request.urlretrieve(url, "sb_ca2019_1_csv_v2.zip")

# Extract flat files from zip folder
with zipfile.ZipFile("sb_ca2019_1_csv_v2.zip", "r") as z:
    z.extractall()

# Read datasets into dataframes
entities = pd.read_csv("sb_ca2019entities_csv.txt", encoding="ISO-8859-1")
scores = pd.read_csv("sb_ca2019_1_csv_v2.txt", encoding="ISO-8859-1")

# Establish database connection
sql = MSSQL()

# Insert SBAC files into database tables
sql.insert_into("sbac_entities_2019", entities, chunksize=1000)
sql.insert_into("sbac_scores_2019", scores, chunksize=1000)

# Query district scores for Oakland and San Francisco
df = sql.query_from_file("query.sql")
print(df)

Пример #4

0

Показать файл

class Connector:
    """ETL connector class"""
    def __init__(self):
        self.data_dir = "data"
        self.sql = MSSQL()
        self.ftp = FTP(self.data_dir)

    def sync_all_ftp_data(self):
        for table_name, directory_name in data_reports.items():
            self.ftp.download_files(directory_name)
            self._load_new_records_into_table(table_name, directory_name)

    def _load_new_records_into_table(self, table_name, report_name):
        """Find and insert new records into the data warehouse."""
        start_date = self._get_latest_date(table_name) + timedelta(days=1)
        yesterday = datetime.today() - timedelta(days=1)
        if start_date > yesterday:
            logging.info(
                f"Clever_{table_name} is up to date. No records inserted.")
            return
        else:
            file_names = self._generate_file_names(start_date, yesterday,
                                                   report_name)
            df = self._read_and_concat_files(file_names)
            self.sql.insert_into(f"Clever_{table_name}",
                                 df,
                                 if_exists="append")
            logging.info(
                f"Inserted {len(df)} records into Clever_{table_name}.")

    def _get_latest_date(self, table_name):
        """Get the latest date record in this table."""
        date = self.sql.query(
            f"SELECT TOP(1) [date] FROM custom.Clever_{table_name} ORDER BY [date] DESC"
        )
        latest_date = date["date"][0]
        return datetime.strptime(latest_date, "%Y-%m-%d")

    def _generate_file_names(self, start_date, yesterday, report_name):
        file_names = []
        while start_date <= yesterday:  # loop through yesterday's date
            formatted_date = start_date.strftime("%Y-%m-%d")
            file_names.append(f"{formatted_date}-{report_name}-students.csv")
            start_date += timedelta(days=1)
        return file_names

    def _read_and_concat_files(self, file_names):
        dfs = []
        for file_name in file_names:
            df = pd.read_csv(f"{self.data_dir}/{file_name}")
            logging.info(f"Read {len(df)} records from '{file_name}'.")
            dfs.append(df)
        data = pd.concat(dfs)
        return data

    def sync_student_google_accounts(self):
        """Get student emails from Google Accounts Manager app."""
        browser = Browser(self.data_dir)
        browser.export_student_google_accounts()
        # Transform and load csv data into database table
        df = self._get_data_from_csv_by_name("Student_export")
        df.rename(columns={"ID": "SIS_ID"}, inplace=True)
        self.sql.insert_into("Clever_StudentGoogleAccounts",
                             df,
                             if_exists="replace")
        logging.info(
            f"Inserted {len(df)} new records into Clever_StudentGoogleAccounts."
        )

    def _get_data_from_csv_by_name(self, string_to_match):
        """Get the downloaded csv BY NAME and store it in a dataframe."""
        for filename in os.listdir(self.data_dir):
            if fnmatch(filename, f"*{string_to_match}*"):
                file_path = f"{self.data_dir}/{filename}"
                break
        df = pd.read_csv(file_path)
        logging.info(f"Loaded {len(df)} records from downloaded file.")
        return df

Пример #5

0

Показать файл

Файл: main.py Проект: kippnorcal/seis_connector

class Connector:
    """
    Data connector for Extracting data, Transforming into dataframes,
    and Loading into a database.
    """
    def __init__(self):
        self.sql = MSSQL()
        self.ftp = FTP()
        self.localdir = "files"
        self.remotedir = "seis"
        self.table_prefix = "SEIS"
        self.schools = self.ftp.get_directory_names(self.remotedir)

    def remove_local_files(self):
        """Remove any leftover files from local project directory."""
        filelist = [f for f in os.listdir(self.localdir)]
        for filename in filelist:
            if "gitkeep" not in filename:
                os.remove(os.path.join(self.localdir, filename))

    def get_files_from_ftp(self):
        """Loop through all sub-folders and download files from FTP."""
        self.remove_local_files()
        self.ftp.download_all(self.remotedir, self.localdir)
        self.filenames = [
            f for f in os.listdir(self.localdir) if f.endswith(".csv")
        ]
        logging.info(f"{len(self.filenames)} files downloaded. ")

    def read_files_into_df(self, file_name):
        """
        Given the file name (eg. Student or Service), read the files and concat into one DataFrame.

        Params:
            file_name (str): name of the file that you are trying to combine.

        Return:
            DataFrame: combined data from all files with the same name (ie. same type of data)
        """
        dfs = []
        for school in self.schools:
            path = os.path.join(self.localdir, f"{school}_{file_name}.csv")
            df = pd.read_csv(path,
                             sep=",",
                             quotechar='"',
                             doublequote=True,
                             dtype=str,
                             header=0)
            dfs.append(df)
        merged = pd.concat(dfs)
        merged.replace(np.nan, "", regex=True, inplace=True)
        return merged

    def insert_df_into_db(self, df, table_name):
        """
        Insert DataFrame into database with given table name.

        Params:
            df (DataFrame): data to insert into the database.
            table_name (str): name of the database table that you want to update.

        Return:
            none
        """
        table = f"{self.table_prefix}_{table_name}"
        self.sql.insert_into(table, df, if_exists="replace")
        logging.info(f"Inserted {len(df)} records into {table}.")

Пример #6

0

Показать файл

def insert_table(df, tablename):
    """Drop and replace database table with each run"""
    df_len = len(df.index)
    sql = MSSQL()
    sql.insert_into(tablename, df, if_exists="replace")
    logging.info(f"Loaded {df_len} students into {tablename} table")

Пример #7

0

Показать файл

class Connector:
    """ETL connector class"""
    def __init__(self):
        self.data_dir = "data"
        self.sql = MSSQL()
        self.ftp = FTP(self.data_dir)

    def sync_all_ftp_data(self):
        for table_name, directory_name in data_reports.items():
            self.ftp.download_files(directory_name)
            self._load_new_records_into_table(table_name, directory_name)

    def _load_new_records_into_table(self, table_name, report_name):
        """Find and insert new records into the data warehouse."""
        if report_name == "idm-reports":
            # this folder contains student emails file, which has no datestamp in the file name
            self._process_files_without_datestamp(table_name, report_name)
        else:
            self._process_files_with_datestamp(table_name, report_name)

    def _process_files_without_datestamp(self, table_name, report_name):
        # Student Emails file doesn't contain a datestamp in the file name
        # This table should be truncated and replaced.
        df = self._read_file(f"{self.data_dir}/google-student-emails.csv")
        self.sql.insert_into(f"Clever_{table_name}", df, if_exists="replace")
        logging.info(f"Inserted {len(df)} records into Clever_{table_name}.")

    def _process_files_with_datestamp(self, table_name, report_name):
        # Generate names for files with datestamps in the file name and process those files
        # These tables should be appended to, not truncated.
        start_date = self._get_latest_date(table_name) + timedelta(days=1)
        yesterday = datetime.today() - timedelta(days=1)
        if start_date > yesterday:
            logging.info(
                f"Clever_{table_name} is up to date. No records inserted.")
            return
        else:
            file_names = self._generate_file_names(start_date, yesterday,
                                                   report_name)
            df = self._read_and_concat_files(file_names)
            self.sql.insert_into(f"Clever_{table_name}",
                                 df,
                                 if_exists="append")
            logging.info(
                f"Inserted {len(df)} records into Clever_{table_name}.")

    def _get_latest_date(self, table_name):
        """Get the latest date record in this table."""
        date = self.sql.query(
            f"SELECT TOP(1) [date] FROM custom.Clever_{table_name} ORDER BY [date] DESC"
        )
        latest_date = date["date"][0]
        return datetime.strptime(latest_date, "%Y-%m-%d")

    def _generate_file_names(self, start_date, yesterday, report_name):
        file_names = []
        while start_date <= yesterday:  # loop through yesterday's date
            formatted_date = start_date.strftime("%Y-%m-%d")
            file_names.append(f"{formatted_date}-{report_name}-students.csv")
            start_date += timedelta(days=1)
        return file_names

    def _read_and_concat_files(self, file_names):
        dfs = []
        for file_name in file_names:
            df = pd.read_csv(f"{self.data_dir}/{file_name}")
            logging.info(f"Read {len(df)} records from '{file_name}'.")
            dfs.append(df)
        data = pd.concat(dfs)
        return data

    def _read_file(self, file_name):
        df = pd.read_csv(file_name)
        logging.info(f"Read {len(df)} records from '{file_name}'.")
        return df

Пример #8

0

Показать файл

Файл: main.py Проект: kippnorcal/csv_to_sql

def main():
    df = pd.read_csv(file, sep=",")
    sql = MSSQL()
    sql.insert_into(table, df)

Python MSSQL.insert_into примеры использования