Пример #1
0
def _sync_without_cleanup(resource_df: DataFrame,
                          sync_db: sqlalchemy.engine.base.Engine) -> DataFrame:
    """
    Take fetched API data and sync with database. Creates tables when necessary,
    but ok if temporary tables are there to start. Doesn't delete temporary tables when finished.

    Parameters
    ----------
    resource_df: DataFrame
        a Sections API DataFrame with the current fetched data which
        will be mutated, adding Hash and CreateDate/LastModifiedDate
    sync_db: sqlalchemy.engine.base.Engine
        an Engine instance for creating database connections

    Returns
    -------
    DataFrame
        a DataFrame with current fetched data and reconciled CreateDate/LastModifiedDate
    """
    return sync_to_db_without_cleanup(
        resource_df=resource_df,
        identity_columns=["id"],
        resource_name=SECTIONS_RESOURCE_NAME,
        sync_db=sync_db,
    )
    def test_db_after_sync(test_db_fixture):
        # arrange
        INITIAL_COURSE_DATA = [
            CHANGED_COURSE_BEFORE,
            UNCHANGED_COURSE,
            OMITTED_FROM_SYNC_COURSE,
        ]

        courses_initial_df = DataFrame(INITIAL_COURSE_DATA, columns=COLUMNS)
        courses_initial_df = add_hash_and_json_to(courses_initial_df)
        add_sourceid_to(courses_initial_df, IDENTITY_COLUMNS)

        dateToUse = datetime(2020, 9, 14, 12, 0, 0)
        courses_initial_df["SyncNeeded"] = 0
        courses_initial_df["CreateDate"] = dateToUse
        courses_initial_df["LastModifiedDate"] = dateToUse
        courses_initial_df = courses_initial_df[SYNC_COLUMNS]

        courses_sync_df = DataFrame(SYNC_DATA, columns=COLUMNS)

        with test_db_fixture.connect() as con:
            con.execute("DROP TABLE IF EXISTS Courses")
            con.execute(f"""
                CREATE TABLE IF NOT EXISTS Courses (
                    {SYNC_COLUMNS_SQL}
                )
                """)

        courses_initial_df.to_sql("Courses",
                                  test_db_fixture,
                                  if_exists="append",
                                  index=False,
                                  chunksize=1000)

        # Duplicating a course to verify that duplicates will not be inserted
        dupe = courses_sync_df.iloc[0].copy()
        courses_sync_df = courses_sync_df.append(dupe)

        # act
        sync_to_db_without_cleanup(courses_sync_df, IDENTITY_COLUMNS,
                                   "Courses", test_db_fixture)

        return test_db_fixture
Пример #3
0
def sync_resource(
    resource_name: str,
    db_engine: sqlalchemy.engine.base.Engine,
    data: List[Dict[str, Any]],
    id_column: str = "id",
) -> DataFrame:
    if len(data) == 0:
        return DataFrame()
    resource_df: DataFrame = DataFrame(data)

    synced_df = sync_to_db_without_cleanup(
        resource_df=resource_df,
        identity_columns=[id_column],
        resource_name=resource_name,
        sync_db=db_engine,
    )
    cleanup_after_sync(resource_name, db_engine)
    return synced_df