def load_in_batches(dataframe, tableName, totalBatches):
    try:
        con = create_context(host='td........org',
                             user=credentials.short_name_usr_ntid,
                             password=credentials.user_ntid_pswrd,
                             database='user_dw',
                             logmech="LDAP")

        total_rows = len(dataframe.index)
        print('total_rows', total_rows)

        def split_equal(value, parts):
            value = float(value)
            return [int(i * value / parts) for i in range(1, parts + 1)]

        index_list = split_equal(total_rows, totalBatches)

        i = 0
        for j in index_list:
            #print(j)
            print('Inserting Dataframe', 'df[' + str(i) + ':' + str(j) + ']')
            rows = dataframe[i:j]
            copy_to_sql(rows, table_name=tableName, if_exists='append')
            i = j + 1

    finally:
        remove_context()  # if connection not closed the script will hang
Пример #2
0
def score(data_conf, model_conf, **kwargs):
    model = joblib.load("artifacts/input/model.joblib")

    create_context(host=os.environ["AOA_CONN_HOST"],
                   username=os.environ["AOA_CONN_USERNAME"],
                   password=os.environ["AOA_CONN_PASSWORD"],
                   database=data_conf["schema"] if "schema" in data_conf
                   and data_conf["schema"] != "" else None)

    predict_df = DataFrame(data_conf["table"])

    # convert to pandas to use locally
    predict_df = predict_df.to_pandas()

    print("Scoring")
    y_pred = model.predict(predict_df[model.feature_names])

    print("Finished Scoring")

    # create result dataframe and store in Teradata
    y_pred = pd.DataFrame(y_pred, columns=["pred"])
    y_pred["PatientId"] = predict_df["PatientId"].values
    copy_to_sql(df=y_pred,
                table_name=data_conf["predictions"],
                index=False,
                if_exists="replace")
#print("Apple Mobility Finished!  " + timestampStr)

#############################################################
# 2) Covid Cases
#############################################################

from datetime import datetime
import datetime

url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
df = pd.read_csv(url)
df['current_dttm'] = datetime.datetime.today()
df = df.rename(columns={'date': 'date_key'})


copy_to_sql(df = df, table_name = "STG_covid19_stats", schema_name=params.SchemaName , primary_index = ['date_key'], if_exists = 'replace')

from datetime import datetime
datetime.utcnow()
dateTimeObj = pytz.utc.localize(datetime.utcnow()).astimezone(pytz.timezone('US/Pacific'))
timestampStr = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)")
print("Covid Cases Finished!  " + timestampStr)


#############################################################
# 3) Covid Projections
#############################################################
from urllib.request import urlopen
import requests
from pathlib import Path
url = 'https://ihmecovid19storage.blob.core.windows.net/latest/ihme-covid19.zip'
Пример #4
0
                     username=params.MyUser,
                     password=params.Password,
                     temp_database_name=params.SchemaName,
                     logmech=params.LogMech)

#############################################################
# Manual Loads
#############################################################

pda = pd.read_sql('DATABASE ' + params.SchemaName, con)

# DIM_GEO_LOCATION_T
url = 'https://raw.githubusercontent.com/golestm/RTN/master/data/DIM_GEO_LOCATION_T.txt'
df = pd.read_csv(url, sep="|", doublequote=True, encoding='latin-1')
copy_to_sql(df=df,
            table_name="TEMP_DIM_GEO_LOCATION_T",
            schema_name=params.SchemaName,
            if_exists='replace')
pd.read_sql('DATABASE ' + params.SchemaName, con)
pd.read_sql('DELETE FROM DIM_GEO_LOCATION_T;', con)
pd.read_sql(
    'INSERT INTO DIM_GEO_LOCATION_T SELECT * FROM TEMP_DIM_GEO_LOCATION_T;',
    con)
print("DIM_GEO_LOCATION_T Finished!")

# DIM_ZIP_COUNTY_MSA_MAP_RAW
url = 'https://raw.githubusercontent.com/golestm/RTN/master/data/DIM_ZIP_COUNTY_MSA_MAP_RAW.txt'
df = pd.read_csv(url,
                 sep="|",
                 doublequote=True,
                 encoding='latin-1',
                 dtype={