Пример #1
0
def set_username():
    FLAGS.username = getpass.getuser()
    users_dataset = dsx_core_utils.get_remote_data_set_info(FLAGS.users_dataset)
    users_df = load_dataset(users_dataset)
    for i in range(0, len(users_df)):
        if str(users_df["USER_ID"][i]) == FLAGS.username:
           # print(users_df.iloc[i])
           FLAGS.username = extract_username(users_df["USERNAME"][i])
           FLAGS.name = extract_username(users_df["NAME"][i])
Пример #2
0
def init_steps():
    global dataSet
    global dataSource
    global conn
    dataSet = dsx_core_utils.get_remote_data_set_info('submissions')
    dataSource = dsx_core_utils.get_data_source_info(dataSet['datasource'])
    if (sys.version_info >= (3, 0)):
    	conn = jaydebeapi.connect(dataSource['driver_class'], dataSource['URL'], [dataSource['user'], dataSource['password']])
    else:
    	conn = jaydebeapi.connect(dataSource['driver_class'], [dataSource['URL'], dataSource['user'], dataSource['password']])
    load_dataset()
Пример #3
0
def save_record(dataframe):
  print("IN save_record >>>>>>>>")
  for i in range(0, len(dataframe)):
      if dataframe["USERNAME"][i] == FLAGS.username:
          dataSet = dsx_core_utils.get_remote_data_set_info(FLAGS.submissions_dataset)
          insert_query = "INSERT INTO "+ (dataSet['schema'] + '.' if (len(dataSet['schema'].strip()) != 0) else '') + dataSet['table']
          insert_query = insert_query + "(USERNAME, NAME, ACCURACY, ENTRIES, LAST) VALUES ('{0}', '{1}', {2}, {3}, '{4}')"
          insert_query = insert_query.format(dataframe["USERNAME"][i], dataframe["NAME"][i],dataframe["ACCURACY"][i],dataframe["ENTRIES"][i],dataframe["LAST"][i])
          curs = conn.cursor()
          curs.execute(insert_query)
          # load_dataset(dataSet)
          curs.close()
          print("YOUR SUBMISSION SAVED SUCCESSFULLY >>>>>>>>>>>>")
Пример #4
0
def update_record(dataframe):
  print("IN update_record >>>>>>>>")
  for i in range(0, len(dataframe)):
      if dataframe["USERNAME"][i] == FLAGS.username:
          # print(submissions_df.iloc[i])
          dataSet = dsx_core_utils.get_remote_data_set_info(FLAGS.submissions_dataset)
          update_query = "UPDATE "+ (dataSet['schema'] + '.' if (len(dataSet['schema'].strip()) != 0) else '') + dataSet['table']
          update_query = update_query +" SET ENTRIES = {0}, ACCURACY = {1}, LAST = '{2}' WHERE USERNAME = '******'"
          update_query = update_query.format(dataframe["ENTRIES"][i], dataframe["ACCURACY"][i], dataframe["LAST"][i], dataframe["USERNAME"][i])
          curs = conn.cursor()
          curs.execute(update_query)
          # load_dataset(dataSet)
          curs.close()
          print("YOUR SUBMISSION UPDATED SUCCESSFULLY >>>>>>>>>>>>")
Пример #5
0
# #### Retrieve all of the user accounts
#
# ```SQL
# SELECT * FROM 'USER999.AWS_ACCOUNTS'
# ```
#
# > Note: The USER999.AWS_ACCOUNTS dataset is a virtual table mapped to the STOCKS.AWS_ACCOUNTS table running on a Db2 instance running on AWS

# In[1]:

import dsx_core_utils, requests, jaydebeapi, os, io, sys
from pyspark.sql import SparkSession
import pandas as pd

df = None
dataSet = dsx_core_utils.get_remote_data_set_info('USER999.AWS_ACCOUNTS')
dataSource = dsx_core_utils.get_data_source_info(dataSet['datasource'])
if (sys.version_info >= (3, 0)):
    conn = jaydebeapi.connect(dataSource['driver_class'], dataSource['URL'],
                              [dataSource['user'], dataSource['password']])
else:
    conn = jaydebeapi.connect(
        dataSource['driver_class'],
        [dataSource['URL'], dataSource['user'], dataSource['password']])
query = 'select * from "' + (dataSet['schema'] + '"."' if
                             (len(dataSet['schema'].strip()) != 0) else
                             '') + dataSet['table'] + '"'

if (dataSet['query']):
    query = dataSet['query']
df = pd.read_sql(query, con=conn)
# if published use model path
if(args.get('published').lower() == 'true'):
    copy_result = json.loads(published_model_util.copy_model(project_name, "Telco Churn Prediction Model 02"))
    if(copy_result['code'] == 200):
        model_path = copy_result['path'] + "/model"
        published_path = copy_result['path']
    else:
        raise Exception('Unable to evaluate published model: ' + copy_result['description'])

# create spark context
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext

# load the input data
if(False):
    dataSet = dsx_core_utils.get_remote_data_set_info(input_data.get('dataset'))
    dataSource = dsx_core_utils.get_data_source_info(dataSet['datasource'])
    dbTableOrQuery = (dataSet['schema'] + '.' if(len(dataSet['schema'].strip()) != 0) else '') + dataSet['table']
    dataframe = spark.read.format("jdbc").option("url", dataSource['URL']).option("dbtable",dbTableOrQuery).option("user",dataSource['user']).option("password",dataSource['password']).load()
else:
    dataframe = SQLContext(sc).read.csv(input_data , header='true', inferSchema = 'true')

# load the model from disk 
model_rf = PipelineModel.load(model_path)

# generate predictions
predictions = model_rf.transform(dataframe)

# Create Evalutation JSON
evaluation = dict()
evaluation["metrics"] = dict()
Пример #7
0
def get_submissions_dataframe():
    submissions_dataset = dsx_core_utils.get_remote_data_set_info(FLAGS.submissions_dataset)
    submissions_df = load_dataset(submissions_dataset)
    return submissions_df