def main():
    """ starting import """

    # get argument
    parser = argparse.ArgumentParser()
    parser.add_argument("input_json", type=str, help="JSON file to process")
    args = parser.parse_args()

    # transform data into dataframe
    for row in data_iter(args.input_json):
        subset_amterdam.append(row)
        result = pd.DataFrame(subset_amterdam)

    # lower all column names
    result.columns = result.columns.str.lower()

    # aggregating numbers per group
    result = result.groupby(
        ['date_of_publication', 'municipality_code', 'municipality_name', 'province']).agg(
        total_reported = ('total_reported','sum'),
        hospital_admission  = ('hospital_admission','sum'),
        deceased  = ('deceased','sum'),
        ).reset_index()

    log.info(f"Starting import {args.input_json}")

    engine = get_engine()

    result.to_sql(
        "corona_gevallen_new",
        engine,
        dtype={
            "index": Integer(),
            "date_of_publication": Date(),
            "municipality_code": String(),
            "municipality_name": String(),
            "province": String(),
            "total_reported": Integer(),
            "hospital_admission": Integer(),
            "deceased": Integer(),
        },
    )
    log.info("Data loaded into DB")

    ProvenanceRenameOperator(
        task_id="provenance_rename",
        dataset_name="corona",
        prefix_table_name="corona_",
        postfix_table_name="_new",
        rename_indexes=False,
        pg_schema="public",
    ).execute()
    log.info("Renamed columns based on provenance.")
def import_scooter_data(
    table_name: str,
    felyx_api_endpoint: str,
    felyx_api_header: Dict,
    ridecheck_token_endpoint: str,
    ridecheck_token_payload: Dict,
    ridecheck_token_header: Dict,
    ridecheck_data_endpoint: str,
    ridecheck_data_header: Dict,
) -> None:
    """Union resources felyx and RIDECHECK and imports data into database table

    Args:
        felyx_api_endpoint: an URL specification for the data service to call
        felyx_api_header: a dictionary of headers key value pairs used when calling
            the data endpoint
        ridecheck_token_endpoint: an URL specification for the token service to retreive a
            (time limited) access key
        ridecheck_token_payload: a dictionary of key value pairs elements used in the data body
            when calling the token service
        ridecheck_token_header: a dictionary of headers key value pairs used when calling
            the token endpoint
        ridecheck_data_endpoint: an URL specification of the data service to call
        ridecheck_data_header: a dictionary of headers key value pairs used when
            calling the data endpoint

    Executes:
        SQL insert statements of scooter data into database table

    """
    ridecheck = get_data_ridecheck(
        ridecheck_token_endpoint,
        ridecheck_token_payload,
        ridecheck_token_header,
        ridecheck_data_endpoint,
        ridecheck_data_header,
    )
    felyx = get_data_scooter_felyx(felyx_api_endpoint, felyx_api_header)
    scooter_dataset = chain([row for row in ridecheck], [row for row in felyx])

    df = pd.DataFrame([vars(row) for row in scooter_dataset])
    df["geometrie"] = df["geometrie"].apply(lambda g: WKTElement(g.wkt, srid=4326))
    db_engine = get_engine()
    df.to_sql(
        table_name,
        db_engine,
        if_exists="replace",
        index=False,
        dtype={"geometrie": Geometry(geometry_type="POINT", srid=4326)},
    )
    with db_engine.connect() as connection:
        connection.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)")
Exemplo n.º 3
0
def load_grex_from_dwh(table_name):
    db_engine = get_engine()
    dwh_ora_engine = get_ora_engine("oracle_dwh_stadsdelen")
    with dwh_ora_engine.connect() as connection:
        df = pd.read_sql(
            """
            SELECT PLANNR as ID
                 , PLANNAAM
                 , STARTDATUM
                 , PLANSTATUS
                 , OPPERVLAKTE
                 , GEOMETRIE_WKT AS GEOMETRY
            FROM DMDATA.GREX_GV_PLANNEN_V2
        """,
            connection,
            index_col="id",
            coerce_float=True,
            params=None,
            parse_dates=["startdatum"],
            columns=None,
            chunksize=None,
        )
        df["geometry"] = df["geometry"].apply(wkt_loads_wrapped)
        grex_rapportage_dtype = {
            "id": Integer(),
            "plannaam": Text(),
            "startdatum": Date(),
            "planstatus": Text(),
            "oppervlakte": Float(),
            "geometry": Geometry(geometry_type="GEOMETRY", srid=4326),
        }
        df.to_sql(table_name,
                  db_engine,
                  if_exists="replace",
                  dtype=grex_rapportage_dtype)
        with db_engine.connect() as connection:
            connection.execute(
                f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)")
            connection.execute(f"""
                 UPDATE {table_name}
                 SET geometry = ST_CollectionExtract(ST_Makevalid(geometry), 3)
                 WHERE ST_IsValid(geometry) = False
                 OR ST_GeometryType(geometry) != 'ST_MultiPolygon';
                 COMMIT;
             """)
            connection.execute(f"""
                 ALTER TABLE {table_name}
                 ALTER COLUMN geometry TYPE geometry(MultiPolygon,28992)
                 USING ST_Transform(geometry,28992);
             """)
            connection.execute(
                f"DELETE FROM {table_name} WHERE geometry is NULL")
Exemplo n.º 4
0
def _load_geojson(postgres_conn_id):
    """As airflow executes tasks at different hosts,
    these tasks need to happen in a single call.

    Otherwise, the (large) file is downloaded by one host,
    and stored in the XCom table to be shared between tasks.
    """
    tmp_dir = Path(f"/tmp/{dag_id}")
    tmp_dir.mkdir(parents=True, exist_ok=True)

    # 1. download files
    files = {}
    for route in ROUTES:
        dest = f"{tmp_dir}/{route.name}.geojson"
        logger.info("Downloading %s to %s", route.url, dest)
        download_file(route.url, dest, http_conn_id=None)
        files[route.name] = dest

    # 2. generate schema ("schema introspect geojson *.geojson")
    schema = introspect_geojson_files(
        "gevaarlijke-routes", files=list(files.values())
    )
    schema = DatasetSchema.from_dict(schema)  # TODO: move to schema-tools?

    # XXX This is not running as one transaction atm, but autocommitting per chunk
    # 3. import data
    db_engine = get_engine()
    importer = GeoJSONImporter(schema, db_engine, logger=logger)
    for route in ROUTES:
        geojson_path = files[route.name]
        logger.info(
            "Importing %s into %s", route.name, route.tmp_db_table_name
        )
        importer.generate_db_objects(
            table_name=route.schema_table_name,
            db_table_name=route.tmp_db_table_name,
            truncate=True,  # when reexecuting the same task
            ind_tables=True,
            ind_extra_index=False,
        )
        importer.load_file(
            geojson_path,
        )
        if route.post_process:
            hook = PostgresHook(postgres_conn_id=postgres_conn_id)
            hook.run(route.post_process)
Exemplo n.º 5
0
def load_grex(input_csv, table_name):
    db_engine = get_engine()
    df = pd.read_csv(
        input_csv,
        delimiter=";",
        decimal=",",
        encoding="latin_1",
        parse_dates=["STARTDATUM"],
        dayfirst=True,
        index_col="PLANNR",
    )
    df.rename(columns=lambda x: "geometry" if x == "GEOMETRIE" else x.lower(),
              inplace=True)
    df.index.name = "id"
    df["geometry"] = df["geometry"].apply(wkt_loads_wrapped)

    grex_rapportage_dtype = {
        "id": Integer(),
        "plannaam": Text(),
        "planbeheercode": Text(),
        "startdatum": Date(),
        "planstatus": Text(),
        "oppervlakte": Float(),
        "geometry": Geometry(geometry_type="MULTIPOLYGON", srid=4326),
    }

    df.to_sql(table_name,
              db_engine,
              if_exists="replace",
              dtype=grex_rapportage_dtype)
    with db_engine.connect() as connection:
        connection.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)")
        connection.execute(f"""
            ALTER TABLE {table_name}
            ALTER COLUMN geometry TYPE geometry(MultiPolygon,28992)
            USING ST_Transform(geometry,28992)
        """)
        connection.execute(f"DELETE FROM {table_name} WHERE geometry is NULL")
        connection.execute(f"""
            UPDATE {table_name}
            SET geometry = ST_CollectionExtract(ST_Makevalid(geometry), 3)
            WHERE ST_IsValid(geometry) = False;
        """)
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("input_csv", type=str, help="CSV file to process")
    args = parser.parse_args()
    df = pd.read_csv(
        args.input_csv,
        sep=";",
        names=["organisatie", "type_interventie", "aantal", "week_nummer", "jaar"],
        converters={
            "organisatie": strip,
            "type_interventie": strip,
        },
        header=0,
    )
    df.index.name = "id"
    engine = get_engine()
    df.to_sql(
        "corona_handhaving_new",
        engine,
        dtype={"id": Integer(), "aantal": Integer(), "week_nummer": Integer(), "jaar": Integer()},
    )
def import_auto_data(
    table_name: str,
    mywheels_api_endpoint: str,
    mywheels_api_header: Dict,
    mywheels_api_payload: Dict,
) -> None:
    """Union resources MYWHEELS and [...] and imports data into database table

    Args:
        mywheels_api_endpoint: an URL specification for the data service to call
        mywheels_api_header: a dictionary of headers key value pairs used when calling
            the data endpoint
        mywheels_api_payload: a dictionary of key value pairs elements used in the data body
            when calling the token service

    Executes:
        SQL insert statements of auto data into database table

    """
    mywheels = get_data_auto_mywheels(
        mywheels_api_endpoint, mywheels_api_header, mywheels_api_payload
    )
    auto_dataset = [row for row in mywheels]

    df = pd.DataFrame([vars(row) for row in auto_dataset])
    df["geometrie"] = df["geometrie"].apply(lambda g: WKTElement(g.wkt, srid=4326))
    db_engine = get_engine()
    df.to_sql(
        table_name,
        db_engine,
        if_exists="replace",
        index=False,
        dtype={"geometrie": Geometry(geometry_type="POINT", srid=4326)},
    )
    with db_engine.connect() as connection:
        connection.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)")
def load_from_dwh(table_name):
    db_engine = get_engine()
    dwh_ora_engine = get_ora_engine("oracle_dwh_stadsdelen")
    with dwh_ora_engine.connect() as connection:
        df = pd.read_sql(
            """
select ZAAKNUMMER
    , ZAAKNAAM
    , ADRES
    , ZAAK_CATEGORIE
    , ZAAK_SPECIFICATIE
    , BEGINDATUM
    , EINDDATUM
    , OPENINGSTIJDEN_ZO_DO_VAN
    , OPENINGSTIJDEN_ZO_DO_TOT
    , OPENINGSTIJDEN_VR_ZA_VAN
    , OPENINGSTIJDEN_VR_ZA_TOT
    , O_TIJDEN_TERRAS_ZO_DO_VAN
    , O_TIJDEN_TERRAS_ZO_DO_TOT
    , O_TIJDEN_TERRAS_VR_ZA_VAN
    , O_TIJDEN_TERRAS_VR_ZA_TOT
    , LOCATIE_WKT AS LOCATIE
    , TERRASGEOMETRIE_WKT AS TERRASGEOMETRIE
    , POSTCODE
    , STATUS_VERGUNNING
    , STATUS_TIJDELIJK_TERRAS
    , TOESTEMMING_TIJDELIJK_TERRAS
    , PUBL_BESLUIT_TIJDELIJK_TERRAS
    , TIJDELIJK_TERRAS_DETAILS
    , STATUS_VERLENG_TIJDELK_TERRAS
    , VERLENG_TIJDELK_TERRAS_DETAIL
from DMDATA.HORECA_EXPLOITATIEVERGUNNING_V 
        """,
            connection,
            coerce_float=True,
            params=None,
            parse_dates=["begindatum", "einddatum"],
            columns=None,
            chunksize=None,
        )
        df["locatie"] = df["locatie"].apply(wkt_loads_wrapped)
        df["terrasgeometrie"] = df["terrasgeometrie"].apply(wkt_loads_wrapped)
        dtype = {
            "zaaknummer": Integer(),
            "zaaknaam": Text(),
            "zaakcategorie": Text(),
            "zaakspecificatie": Text(),
            "begindatum": Date(),
            "einddatum": Date(),
            "openingstijden_zo_do_van": Text(),
            "openingstijden_zo_do_tot": Text(),
            "openingstijden_vr_za_van": Text(),
            "openingstijden_vr_za_tot": Text(),
            "o_tijden_terras_zo_do_van": Text(),
            "o_tijden_terras_zo_do_tot": Text(),
            "o_tijden_terras_vr_za_van": Text(),
            "o_tijden_terras_vr_za_tot": Text(),
            "locatie": Geometry(geometry_type="POINT", srid=4326),
            "terrasgeometrie": Geometry(geometry_type="MULTIPOLYGON", srid=4326),
            "postcode": Text(),
            "status_vergunning": Text(),
            "status_tijdelijk_terras": Text(),
            "toestemming_tijdelijk_terras": Text(),
            "publ_besluit_tijdelijk_terras": Text(),
            "tijdelijk_terras_details": Text(),
            "status_verleng_tijdelk_terras": Text(),
            "verleng_tijdelk_terras_detail": Text(),
        }
        df.to_sql(
            table_name, db_engine, if_exists="replace", index_label="id", dtype=dtype
        )
        with db_engine.connect() as connection:
            connection.execute(f"ALTER TABLE {table_name} ADD PRIMARY KEY (id)")
            connection.execute(
                f"""
                 ALTER TABLE {table_name}
                 ALTER COLUMN terrasgeometrie TYPE geometry(MultiPolygon,28992)
                 USING ST_Transform(terrasgeometrie,28992)
             """
            )
            connection.execute(
                f"""
                 ALTER TABLE {table_name}
                 ALTER COLUMN locatie TYPE geometry(Point,28992)
                 USING ST_Transform(locatie,28992)
             """
            )
            # connection.execute(f"DELETE FROM {table_name} WHERE terrasgeometrie is NULL")
            connection.execute(
                f"""
                 UPDATE {table_name}
                 SET terrasgeometrie = ST_CollectionExtract(ST_Makevalid(terrasgeometrie), 3)
                 WHERE ST_IsValid(terrasgeometrie) = False;
             """
            )
            connection.execute(
                f"""
                 ALTER TABLE {table_name}
                 RENAME COLUMN VERLENG_TIJDELK_TERRAS_DETAIL to VERLENGING_TIJDELIJK_TERRAS_DETAILS;
                 ALTER TABLE {table_name}
                 RENAME COLUMN STATUS_VERLENG_TIJDELK_TERRAS to STATUS_VERLENGING_TIJDELIJK_TERRAS;                  
             """
            )      
Exemplo n.º 9
0
def load_from_dwh(table_name: str) -> None:
    """Loads data from an Oracle database source into a Postgres database

    Args:
        table_name: The target table where the source data will be stored

    Executes:
        SQL INSERT statements for the data and post-processing
        an ALTER statement to a contraint.

    Note: The SQL processing is done with SQLAlchemy
    Note2: Data is filterd on unit OIS 'Onderzoek, Informatie en Statistiek' by code 380000

    """
    db_engine = get_engine()
    dwh_ora_engine = get_ora_engine("oracle_dwh_ami")
    date_fmt = "%Y%d%m"
    with dwh_ora_engine.connect() as connection:
        df = pd.read_sql(
            """
            SELECT
             gb.DATUM_BOEKING as "datum_boeking"
            ,gb.BEDRAG_BOEKING as "bedrag_boeking"
            ,gb.OMSCHRIJVING as "boeking_omschrijving"
            ,gb.OPMERKING as "boeking_opmerking"
            ,gbr.HOOFDREKENING_CODE as "hoofdrekening_code"
            ,gbr.REKENING_OMSCHRIJVING as "rekening_omschrijving"
            ,gbr.BEDRIJFSEENHEID_CODE as "bedrijfseenheid_code"
            ,gbr.BEDRIJFSEENHEID_OMSCHRIJVING as "bedrijfseenheid_omschrijving"
            ,gb.SUBGROOTBOEK as "subgrootboek"
            ,gb.SUBGROOTBOEKTYPE as "subgrootboektype"
            ,vpr.VPL_VERPLICHTINGNUMMER as "vpl_verplichtingnummer"
            ,vpr.VPL_OMSCHRIJVING as "vpl_omschrijving"
            ,vpr.VPL_JAAR as "vpl_jaar"
            ,vpr.VPL_INKOOPCONTRACTCODE as "vpl_inkoopcontractcode"
            ,vpr.VPL_STATUS as "vpl_status"
            ,vpr.VPR_DTM_LSTE_AFBOEKING as "vpr_datum_laatste_afboeking"
            ,vpr.VPR_BEDRAG_OORSPRONKELIJK as "vpr_bedrag_oorspronkelijk"
            ,vpr.VPR_BEDRAG_AFGEBOEKT as "vpr_bedrag_afgeboekt"
            ,vpr.VPR_BEDRAG_RESTANT as "vpr_bedrag_restant"
            ,vpr.VPR_REGELNUMMER as "vpr_regelnummer"
            ,vpr.VPR_OMSCHRIJVING as "vpr_omschrijving"
            ,vpr.VPR_OPMERKING as "vpr_opmerking"
            ,vpr.VPR_DATUM_GROOTBOEK as "vpr_datum_grootboek"
            ,vpr.VPR_DATUMVERPLICHTING as "vpr_datumverplichting"
            ,wor.CODE as "werkorder_code"
            ,wor.OMSCHRIJVING  as "werkorder_omschrijving"
            ,wor.STATUS as "werkorder_status"
            ,wor.STATUS_OPMERKING as "werkorder_status_opmerking"
            ,rel.code as "relatie_code"
            ,rel.naam as "relatie_naam"
            ,rel.IND_CREDITEUR as "indicatie_crediteur"
            ,rel.IND_DEBITEUR as "indicatie_debiteur"
            ,org.SUBTEAM_CODE as "subteam_code"
            ,org.SUBTEAM_NAAM as "subteam_naam"
            ,org.TEAM_CODE as "team_code"
            ,org.TEAM_NAAM as "team_naam"
            ,org.AFDELING_CODE as "afdeling_code"
            ,org.AFDELING_NAAM as "afdeling_naam"
            ,org.RVE_CODE as "rve_code"
            ,org.RVE_NAAM as "rve_naam"
            ,org.CLUSTER_CODE as "cluster_code"
            ,org.CLUSTER_NAAM as "cluster_naam"
            FROM DATAHUB.FI2_DHB_FCT_GROOTBOEK GB
            INNER JOIN DATAHUB.FI2_DHB_DIM_RELATIE REL ON REL.ID = GB.REL_ID
            INNER JOIN DATAHUB.FI2_DHB_DIM_ORGANISATIE org ON ORG.ID = GB.ORG_ID
            INNER JOIN DATAHUB.FI2_DHB_DIM_GROOTBOEKREKENING gbr ON gbr.ID = GB.gbr_ID
            INNER JOIN DATAHUB.FI2_DHB_DIM_VERPLICHTINGREGEL vpr ON vpr.ID = GB.VPR_ID
            INNER JOIN DATAHUB.FI2_DHB_DIM_WERKORDER wor ON wor.ID = GB.WOR_ID
            WHERE 1=1
            AND RVE_CODE = '380000'
            """,
            connection,
            coerce_float=True,
            params=None,
            parse_dates={
                "datum_boeking": date_fmt,
                "vpr_datum_grootboek": date_fmt,
                "vpr_datumverplichting": date_fmt,
                "vpr_dtm_lste_afboeking": date_fmt,
            },
            columns=None,
            chunksize=None,
        )
        dtype = {
            "datum_boeking": Date(),
            "bedrag_boeking": Numeric(),
            "boeking_omschrijving": Text(),
            "boeking_opmerking": Text(),
            "hoofdrekening_code": Text(),
            "rekening_omschrijving": Text(),
            "bedrijfseenheid_code": Text(),
            "bedrijfseenheid_omschrijving": Text(),
            "subgrootboek": Text(),
            "subgrootboektype": Text(),
            "vpl_verplichtingnummer": Text(),
            "vpl_omschrijving": Text(),
            "vpl_jaar": Integer(),
            "vpl_inkoopcontractcode": Text(),
            "vpl_status": Text(),
            "vpr_datum_laatste_afboeking": Date(),
            "vpr_bedrag_oorspronkelijk": Numeric(),
            "vpr_bedrag_afgeboekt": Numeric(),
            "vpr_bedrag_restant": Numeric(),
            "vpr_regelnummer": Integer(),
            "vpr_omschrijving": Text(),
            "vpr_opmerking": Text(),
            "vpr_datum_grootboek": Date(),
            "vpr_datumverplichting": Date(),
            "werkorder_code": Text(),
            "werkorder_omschrijving": Text(),
            "werkorder_status": Text(),
            "werkorder_status_opmerking": Text(),
            "relatie_code": Text(),
            "relatie_naam": Text(),
            "indicatie_crediteur": Boolean(),
            "indicatie_debiteur": Boolean(),
            "subteam_code": Text(),
            "subteam_naam": Text(),
            "team_code": Text(),
            "team_naam": Text(),
            "afdeling_code": Text(),
            "afdeling_naam": Text(),
            "rve_code": Text(),
            "rve_naam": Text(),
            "cluster_code": Text(),
            "cluster_naam": Text(),
        }
        df.to_sql(table_name,
                  db_engine,
                  if_exists="replace",
                  index_label="id",
                  index=True,
                  dtype=dtype)
        with db_engine.connect() as connection:
            connection.execute(
                f"ALTER TABLE {table_name} ADD PRIMARY KEY (ID)")
Exemplo n.º 10
0
def load_from_dwh(table_name: str) -> None:
    """Loads data from an Oracle database source into a Postgres database

    Args:
        table_name: The target table where the source data will be stored

    Executes:
        SQL INSERT statements for the data and post-processing
        an ALTER statement to a contraint.
        Note: The SQL processing is done with SQLAlchemy

    """
    db_engine = get_engine()
    dwh_ora_engine = get_ora_engine("oracle_dwh_stadsdelen")
    with dwh_ora_engine.connect() as connection:
        df = pd.read_sql(
            """
        select ADWH_VERSIE_ID
            , SOORT_WERKZAAMHEDEN
            , KENTEKEN
            , ADWH_KENTEKEN
            , CATEGORIE
            , ADWH_ACTIVITEIT
            , WERKZAAMHEDEN_CODE
            , WERKZAAMHEDEN_OMSCHRIJVING
            , WERKZAAMHEDEN_DATUM
            , WERKZAAMHEDEN_DATUM_REF_ID
            , WERKZAAMHEDEN_STARTTIJD
            , WERKZAAMHEDEN_EINDTIJD
            , WERKZAAMHEDEN_UREN_GEPLAND
            , PAUZE_STARTTIJD
            , PAUZE_EINDTIJD
            , PAUZE_UREN_GEPLAND
            , INHUUR
            , FASE
            , MEMO
            , TEAM
            , AANTAL_MEDEWERKERS
            , UREN_INZET_MEDEWERKER_INTERN
            , UREN_INZET_MEDEWERKER_INHUUR
            , UREN_INZET_VOERTUIG
            , AANTAL_MEDEWERKERS_INTERN
            , AANTAL_MEDEWERKERS_INHUUR
            , ADWH_LAATST_GEZIEN
            , ADWH_LAATST_GEZIEN_BRON
            from DMDATA.AFVAL_INZML_VOERTUIGPLAN_V2
        """,
            connection,
            coerce_float=True,
            params=None,
            parse_dates=[
                "WERKZAAMHEDEN_DATUM",
                "ADWH_LAATST_GEZIEN",
                "ADWH_LAATST_GEZIEN_BRON",
            ],
            columns=None,
            chunksize=None,
        )
        dtype = {
            "ADWH_VERSIE_ID": Integer(),
            "SOORT_WERKZAAMHEDEN": Text(),
            "KENTEKEN": Text(),
            "ADWH_KENTEKEN": Text(),
            "CATEGORIE": Text(),
            "ADWH_ACTIVITEIT": Text(),
            "WERKZAAMHEDEN_CODE": Text(),
            "WERKZAAMHEDEN_OMSCHRIJVING": Text(),
            "WERKZAAMHEDEN_DATUM": Date(),
            "WERKZAAMHEDEN_DATUM_REF_ID": Integer(),
            "WERKZAAMHEDEN_STARTTIJD": Text(),
            "WERKZAAMHEDEN_EINDTIJD": Text(),
            "WERKZAAMHEDEN_UREN_GEPLAND": Numeric(),
            "PAUZE_STARTTIJD": Text(),
            "PAUZE_EINDTIJD": Text(),
            "PAUZE_UREN_GEPLAND": Text(),
            "INHUUR": Text(),
            "FASE": Text(),
            "MEMO": Text(),
            "TEAM": Text(),
            "AANTAL_MEDEWERKERS": Text(),
            "UREN_INZET_MEDEWERKER_INTERN": Numeric(),
            "UREN_INZET_MEDEWERKER_INHUUR": Numeric(),
            "UREN_INZET_VOERTUIG": Numeric(),
            "AANTAL_MEDEWERKERS_INTERN": Numeric(),
            "AANTAL_MEDEWERKERS_INHUUR": Numeric(),
            "ADWH_LAATST_GEZIEN": DateTime(),
            "ADWH_LAATST_GEZIEN_BRON": DateTime(),
        }
        df.to_sql(table_name,
                  db_engine,
                  if_exists="replace",
                  dtype=dtype,
                  index=False)
        with db_engine.connect() as connection:
            connection.execute(
                f"ALTER TABLE {table_name} ADD PRIMARY KEY (ADWH_VERSIE_ID)")
Exemplo n.º 11
0
def load_from_dwh(table_name: str) -> None:
    """Loads data from an Oracle database source into a Postgres database

    Args:
        table_name: The target table where the source data will be stored

    Executes:
        SQL INSERT statements for the data and post-processing
        an ALTER statement to a contraint.
        Note: The SQL processing is done with SQLAlchemy

    """
    db_engine = get_engine()
    dwh_ora_engine = get_ora_engine("oracle_dwh_stadsdelen")
    with dwh_ora_engine.connect() as connection:
        df = pd.read_sql(
            """
            select "ID"
            ,"DATUM"
            ,"DAG_VAN_WEEK_NUMMER"
            ,"DAG_VAN_WEEK_NAAM"
            ,"DAG_VAN_WEEK_KNAAM"
            ,"VORIGE_DAG_VAN_WEEK_NAAM"
            ,"VORIGE_DAG_VAN_WEEK_KNAAM"
            ,"WEEKEND_IND"
            ,"FEESTDAG_IND"
            ,"FEESTDAG_ADAM_IND"
            ,"AANTAL_WERKDAGEN"
            ,"AANTAL_WERKDAGEN_ADAM"
            ,"SEIZOEN"
            ,"WEEK_IN_MAAND_NUMMER"
            ,"WEEK_IN_MAAND_START_DATUM"
            ,"WEEK_IN_MAAND_EINDE_DATUM"
            ,"WEEK_IN_JAAR_NUMMER"
            ,"ISO_WEEK_NUMMER"
            ,"JAAR_ISO_WEEKNR"
            ,"ISO_WEEK_START_DATUM"
            ,"ISO_WEEK_EINDE_DATUM"
            ,"DAG_VAN_MAAND_NUMMER"
            ,"MAAND_WAARDE"
            ,"MAAND_NAAM"
            ,"MAAND_KNAAM"
            ,"JAARMAAND"
            ,"MAAND_START_DATUM"
            ,"MAAND_EINDE_DATUM"
            ,"DAGEN_IN_MAAND"
            ,"LAATSTE_DAG_VAN_MAAND_IND"
            ,"DAG_VAN_KWARTAAL_NUMMER"
            ,"KWARTAAL_WAARDE"
            ,"KWARTAAL_NAAM"
            ,"JAARKWARTAAL"
            ,"KWARTAAL_START_DATUM"
            ,"KWARTAAL_EINDE_DATUM"
            ,"DAGEN_IN_KWARTAAL"
            ,"LAATSTE_DAG_VAN_KWARTAAL_IND"
            ,"TERTAAL"
            ,"JAAR_TERTAAL"
            ,"DAG_IN_TERTAAL_NUMMER"
            ,"DAGEN_IN_TERTAAL"
            ,"LAATSTE_DAG_VAN_TERTAAL_IND"
            ,"DAG_VAN_JAAR_NUMMER"
            ,"JAAR_WAARDE"
            ,"JAAR_NAAM"
            ,"JAAR_KNAAM"
            ,"JAAR_START_DATUM"
            ,"JAAR_EINDE_DATUM"
            ,"DAGEN_IN_JAAR"
            ,"LAATSTE_DAG_VAN_JAAR_IND"
            ,"JAARNAAM_KORT"
            ,"JULIAANSE_DATUM"
            ,CAST(NULL AS NUMBER) AS "SCHOOLVAKANTIE_NL_NOORD_IND"
            ,CAST(NULL AS NUMBER) AS "SCHOOLVAKANTIE_NL_MIDDEN_IND"
            ,CAST(NULL AS NUMBER) AS "SCHOOLVAKANTIE_NL_ZUID_IND"
            ,"NIVEAUCODE"
            from DMDATA.ALG_DIM_DATUM_V2
        """,
            connection,
            coerce_float=True,
            params=None,
            parse_dates=[
                "DATUM",
                "ISO_WEEK_START_DATUM",
                "ISO_WEEK_EIND_DATUM",
                "WEEK_IN_MAAND_START_DATUM",
                "WEEK_IN_MAAND_EINDE_DATUM",
                "MAAND_START_DATUM",
                "MAAND_EINDE_DATUM",
                "KWARTAAL_START_DATUM",
                "KWARTAAL_EINDE_DATUM",
                "JAAR_START_DATUM",
                "JAAR_EINDE_DATUM",
            ],
            columns=None,
            chunksize=None,
        )
        dtype = {
            "ID": Integer(),
            "DATUM": Date(),
            "DAG_VAN_WEEK_NUMMER": Integer(),
            "DAG_VAN_WEEK_NAAM": Text(),
            "DAG_VAN_WEEK_KNAAM": Text(),
            "VORIGE_DAG_VAN_WEEK_NAAM": Text(),
            "VORIGE_DAG_VAN_WEEK_KNAAM": Text(),
            "WEEKEND_IND": Integer(),
            "FEESTDAG_IND": Integer(),
            "FEESTDAG_ADAM_IND": Integer(),
            "AANTAL_WERKDAGEN": Integer(),
            "AANTAL_WERKDAGEN_ADAM": Integer(),
            "SEIZOEN": Text(),
            "WEEK_IN_MAAND_NUMMER": Integer(),
            "WEEK_IN_MAAND_START_DATUM": Date(),
            "WEEK_IN_MAAND_EINDE_DATUM": Date(),
            "WEEK_IN_JAAR_NUMMER": Integer(),
            "ISO_WEEK_NUMMER": Integer(),
            "JAAR_ISO_WEEKNR": Integer(),
            "ISO_WEEK_START_DATUM": Date(),
            "ISO_WEEK_EINDE_DATUM": Date(),
            "DAG_VAN_MAAND_NUMMER": Integer(),
            "MAAND_WAARDE": Integer(),
            "MAAND_NAAM": Text(),
            "MAAND_KNAAM": Text(),
            "JAARMAAND": Integer(),
            "MAAND_START_DATUM": Text(),
            "MAAND_EINDE_DATUM": Text(),
            "DAGEN_IN_MAAND": Integer(),
            "LAATSTE_DAG_VAN_MAAND_IND": Integer(),
            "DAG_VAN_KWARTAAL_NUMMER": Integer(),
            "KWARTAAL_WAARDE": Integer(),
            "KWARTAAL_NAAM": Text(),
            "JAARKWARTAAL": Integer(),
            "KWARTAAL_START_DATUM": Date(),
            "KWARTAAL_EINDE_DATUM": Date(),
            "DAGEN_IN_KWARTAAL": Integer(),
            "LAATSTE_DAG_VAN_KWARTAAL_IND": Integer(),
            "TERTAAL": Integer(),
            "JAAR_TERTAAL": Text(),
            "DAG_IN_TERTAAL_NUMMER": Integer(),
            "DAGEN_IN_TERTAAL": Integer(),
            "LAATSTE_DAG_VAN_TERTAAL_IND": Integer(),
            "DAG_VAN_JAAR_NUMMER": Integer(),
            "JAAR_WAARDE": Integer(),
            "JAAR_NAAM": Text(),
            "JAAR_KNAAM": Text(),
            "JAAR_START_DATUM": Date(),
            "JAAR_EINDE_DATUM": Date(),
            "DAGEN_IN_JAAR": Integer(),
            "LAATSTE_DAG_VAN_JAAR_IND": Integer(),
            "JAARNAAM_KORT": Text(),
            "JULIAANSE_DATUM": Integer(),
            "SCHOOLVAKANTIE_NL_NOORD_IND": Integer(),
            "SCHOOLVAKANTIE_NL_MIDDEN_IND": Integer(),
            "SCHOOLVAKANTIE_NL_ZUID_IND": Integer(),
            "NIVEAUCODE": Text(),
        }
        df.to_sql(table_name,
                  db_engine,
                  if_exists="replace",
                  dtype=dtype,
                  index=False)
        with db_engine.connect() as connection:
            connection.execute(
                f"ALTER TABLE {table_name} ADD PRIMARY KEY (ID)")