def run(request_body: dict):
    # Get agent/skill activation request body
    api_endpoint = request_body["apiEndpoint"]
    project = request_body["projectId"]
    token = request_body["token"]
    connection_name = request_body["payload"]["connection_name"]
    query = request_body["payload"]["query"]

    try:
        # Create Cortex client
        client = Cortex.client(api_endpoint=api_endpoint, project=project, token=token)

        # Get connection and create mongo client
        connection = client.get_connection(connection_name)
        params = dict(map(lambda l: (l['name'], l['value']), connection['params']))
        mongo = MongoClient(params["uri"])

        # Use connection
        database = params.get("database")
        collection = params.get("collection")

        if database and collection:
            result = list(mongo[database][collection].find(query))
        else:
            result = {"error": "collection, database and query must be provided"}
    except Exception as e:
        result = {"error": str(e)}
        logger.exception(e)

    # Return result
    return {'payload': result}
def run(req: dict):
    payload = req["payload"]
    instances = payload["instances"]
    exp_name = payload["exp_name"]
    run_id = None
    if "run_id" in payload:
        run_id = payload["run_id"]

    # if model is not loaded
    client = Cortex.client(api_endpoint=req["apiEndpoint"], project=req["projectId"], token=req["token"])
    
    model_ctx[exp_name] = init_model(exp_name, run_id, client, req["projectId"])

    # retrieve model from the context
    model_obj = model_ctx[exp_name]

    # using encoder from model object
    encoder = model_obj["encoder"]

    instances = np.array(instances, dtype=object)
    instances = instances if instances.ndim == 2 else np.reshape(instances, (1, -1))

    instances = encoder(instances)

    # predict
    predictions = model_obj["model"].predict(instances)
    scores = model_obj["model"].predict_proba(instances)
    labels = model_obj["model"].classes_
    return {
        "payload": {
            "predictions": predictions.tolist(),
            "scores": scores.tolist(),
            "labels": labels.tolist()
        }
    }
示例#3
0
def domain_controller(params: dict):
    # initialise the Cortex client
    api_endpoint = params.get('apiEndpoint')
    token = params.get('token')
    project_id = params.get('projectId')
    client = Cortex.client(api_endpoint=api_endpoint,
                           token=token,
                           project=project_id)

    # # get secrets keys
    # os.environ["AWS_SECRET_ACCESS_KEY"] = str(client.get_secret("awssecretkey"))
    # os.environ["AWS_ACCESS_KEY_ID"] = str(client.get_secret("awspublickey"))

    # just in case there are old environment variables for hadron
    for key in os.environ.keys():
        if key.startswith('HADRON'):
            del os.environ[key]

    # extract the payload
    payload = params.get('payload', {})

    # get the domain contract repo from the payload
    uri_pm_repo = payload.get('domain_contract_repo')
    if not isinstance(uri_pm_repo, str):
        raise KeyError(
            "The message parameters passed do not have the mandatory 'domain_contract_repo' payload key"
        )

    # extract any extra kwargs
    hadron_kwargs = payload.get('hadron_kwargs', {})
    # export and pop any environment variable from the kwargs
    for key in hadron_kwargs.copy().keys():
        if str(key).isupper():
            os.environ[key] = hadron_kwargs.pop(key)
    # pop the run_controller attributes from the kwargs
    run_book = hadron_kwargs.pop('runbook', None)
    mod_tasks = hadron_kwargs.pop('mod_tasks', None)
    repeat = hadron_kwargs.pop('repeat', None)
    sleep = hadron_kwargs.pop('sleep', None)
    run_time = hadron_kwargs.pop('run_time', None)
    run_cycle_report = hadron_kwargs.pop('run_cycle_report', None)
    source_check_uri = hadron_kwargs.pop('source_check_uri', None)

    # instantiate the Controller passing any remaining kwargs
    controller = Controller.from_env(uri_pm_repo=uri_pm_repo,
                                     default_save=False,
                                     has_contract=True,
                                     **hadron_kwargs)
    # run the controller nano services.
    controller.run_controller(run_book=run_book,
                              mod_tasks=mod_tasks,
                              repeat=repeat,
                              sleep=sleep,
                              run_time=run_time,
                              source_check_uri=source_check_uri,
                              run_cycle_report=run_cycle_report)
示例#4
0
def delete(req: dict):
    payload = req['payload']
    client = Cortex.client(api_endpoint=req["apiEndpoint"],
                           project=req["projectId"],
                           token=req["token"])

    session_id = None
    if "session_id" in payload:
        session_id = payload["session_id"]
    else:
        return {'payload': "session_id is required"}
    result = SessionClient(client).delete_session(session_id, req["projectId"])
    return {"payload": result}
示例#5
0
def start(req: dict):
    payload = req['payload']
    client = Cortex.client(api_endpoint=req["apiEndpoint"],
                           project=req["projectId"],
                           token=req["token"])
    ttl = None
    description = "No description given"
    if "ttl" in payload:
        ttl = payload["ttl"]
    if "description" in payload:
        description = payload["description"]
    session_client = SessionClient(client)
    session = session_client.start_session(ttl, description, req["projectId"])
    return {'payload': {"session_id": session}}
示例#6
0
def run(request_body: dict):
    # Get agent/skill activation request body
    api_endpoint = request_body["apiEndpoint"]
    project = request_body["projectId"]
    token = request_body["token"]
    experiment_name = request_body["payload"]["experiment_name"]
    instance = request_body["payload"]["instance"]

    # Create Cortex client and get experiment
    client = Cortex.client(api_endpoint=api_endpoint, project=project, token=token)
    experiment = client.experiment(experiment_name)

    # Get model from last experiment run
    exp_run = experiment.last_run()
    model = exp_run.get_artifact('model')

    # Return model predict
    return {'payload': model.predict(instance).tolist()}
def process(params):
    # create a Cortex client instance from the job's parameters
    client = Cortex.client(api_endpoint=params['apiEndpoint'], token=params['token'])
    # get the agent payload
    payload = params.get('payload',{})
    # You can print logs to the console these are collected by docker/k8s
    print(f'Got payload: {payload}')
    # use the `client` instance to use Cortex client libraries
    content_client = ManagedContentClient(client);
    if 'activationId' in params:
        file_name = f'jobchain-data-{params["activationId"]}'
    else:
        #
        file_name = f'jobchain-data-{int(time.time())}'
    # Read `recordCount` from payload, have a default value of raising an Exception is recommended.
    record_count = payload.get('recordCount', 1000)
    # This is streaming the records to Cortex's managed content
    content_client.upload_streaming(file_name, datagen_stream(record_count), 'application/x-jsonlines')
    print(f'Wrote datafile to managed content key: {file_name}')
示例#8
0
def put(req: dict):
    payload = req['payload']
    client = Cortex.client(api_endpoint=req["apiEndpoint"],
                           project=req["projectId"],
                           token=req["token"])

    session_id = None
    if "session_id" in payload:
        session_id = payload["session_id"]
    else:
        return {'payload': "session_id is required"}
    data = {}
    if "data" in payload:
        data = payload["data"]
    else:
        return {'payload': "data is required"}
    result = SessionClient(client).put_session_data(session_id, data,
                                                    req["projectId"])
    return {"payload": result}
示例#9
0
def get(req: dict):
    payload = req['payload']
    client = Cortex.client(api_endpoint=req["apiEndpoint"],
                           project=req["projectId"],
                           token=req["token"])

    session_id = None
    if "session_id" in payload:
        session_id = payload["session_id"]
    else:
        return {'payload': "session_id is required"}
    key = None
    if "key" in payload:
        key = payload["key"]
        if len(key) < 1:
            key = None
    session_client = SessionClient(client)
    session = session_client.get_session_data(session_id, key,
                                              req["projectId"])
    return {'payload': session}
示例#10
0
def process(params):
    # create a Cortex client instance from the job's parameters
    client = Cortex.client(api_endpoint=params['apiEndpoint'],
                           token=params['token'])
    # get he agent payload
    payload = params['payload']
    # You can print logs to the console these are collected by docker/k8s
    print(f'Got payload: {payload}')
    if 'activationId' in params:
        content_key = f'jobchain-data-{params["activationId"]}'
    else:
        if 'datafileKey' not in payload:
            raise Exception("'datafileKey' is required in the payload")
        content_key = payload['datafileKey']
    print(f'Fetching datafile from managed content: {content_key}')
    # use the `client` instance to use Cortex client libraries
    content_client = ManagedContentClient(client)
    # This is streaming the records to Cortex's managed content
    # if this was called as part of an agent
    content = content_client.download(content_key)
    df = pd.read_json(content, lines=True)
    counts = df['color'].value_counts()
    print(f'{counts.to_json()}')
def load_model(api_endpoint: str, token: str, project_id: str,
               experiment_name: str, run_id: str, artifact_key: str):
    global model

    if not experiment_name:
        raise ValueError(
            "experiment-name is required if a model is not initialized")

    # Initialize Cortex Client
    client = Cortex.client(api_endpoint=api_endpoint,
                           token=token,
                           project=project_id)

    # Load Model from the experiment run
    logging.info("Loading model artifacts from experiment run...")
    try:
        experiment = client.experiment(experiment_name)
        run = experiment.get_run(run_id) if run_id else experiment.last_run()
        model = run.get_artifact(artifact_key)
    except Exception as e:
        logging.error("Error: Failed to load model: {}".format(e))
        raise

    logging.info("Model Loaded!")
    # Save model
    pickle.dump(clf, open(local_pickle_file, "wb"))


# The starting point for the job
if __name__ == '__main__':
    # Get agent/skill activation request body
    request_body = json.loads(sys.argv[1])
    api_endpoint = request_body["apiEndpoint"]
    project = request_body["projectId"]
    token = request_body["token"]
    experiment_name = request_body["payload"]["experiment_name"]

    train_and_save_model()

    # Create Cortex client and create experiment
    client = Cortex.client(api_endpoint=api_endpoint,
                           project=project,
                           token=token)
    experiment = client.experiment(experiment_name)

    # Upload model to experiment run in Cortex
    model = open(local_pickle_file, "rb")
    run = experiment.start_run()
    run.log_artifact_stream("model", model)
    run.set_meta("algo", "RandomForestClassifier Model")

    print(
        f'Created experiment "{experiment_name}". Started Run {run.id}. Logged RandomForestClassifier model.'
    )
def train(params):
    project = params['projectId']
    # create a Cortex client instance from the job's parameters
    client = Cortex.client(api_endpoint=params['apiEndpoint'], project=project, token=params['token'])

    payload = params['payload']
    # Read connection
    connection_name = payload['connection_name']
    print(f'Reading connection {connection_name}')
    connection = client.get_connection(connection_name)

    # Download training data using connection
    download_training_data(connection)
    print(f'Downloaded training data for {connection_name}')

    random.seed(0)
    np.random.seed(0)

    # Load dataset
    data = pd.read_csv('german_credit_eval.csv')

    # Separate outcome
    y = data['outcome']
    x = data.drop('outcome', axis=1)

    # Bring in test and training data
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)

    # Create an encoder
    cat_columns = [
        'checkingstatus',
        'history',
        'purpose',
        'savings',
        'employ',
        'status',
        'others',
        'property',
        'age',
        'otherplans',
        'housing',
        'job',
        'telephone',
        'foreign'
    ]
    encoder = CatEncoder(cat_columns, x, normalize=True)
    encoded_x_train = encoder(x_train.values)
    encoded_x_test = encoder(x_test.values)

    # Train a decision tree model
    dtree = DecisionTreeClassifier(criterion='entropy', random_state=0)
    dtree.fit(encoded_x_train, y_train.values)
    dtree_acc = dtree.score(encoded_x_test, y_test.values)

    # Train a multi-layer perceptron model
    mlp = MLPClassifier(hidden_layer_sizes=(20, 20), max_iter=2000)
    mlp.fit(encoded_x_train, y_train.values)
    mlp_acc = mlp.score(encoded_x_test, y_test.values)

    # Train a support vector machine model
    SVM = svm.SVC(gamma='scale', probability=True)
    SVM.fit(encoded_x_train, y_train.values)
    svm_acc = SVM.score(encoded_x_test, y_test.values)

    # Train a logistic regression model
    logit = LogisticRegression(random_state=0, solver='lbfgs')
    logit.fit(encoded_x_train, y_train.values)
    logit_acc = logit.score(encoded_x_test, y_test.values)

    # Save model meta-data

    model_name = payload["model_name"]

    save_model(client, project, model_name, payload.get("model_title", ""), payload.get("model_description", ""),
               payload.get("model_source", ""), payload.get("model_type", ""), payload.get("model_status", ""), payload.get("model_tags", []))

    # Save models as pickle files and Save experiments
    pickle_model(dtree, encoder, 'Decision Tree', dtree_acc, 'Basic Decision Tree model', 'german_credit_dtree.pkl')
    pickle_model(logit, encoder, 'LOGIT', logit_acc, 'Basic LOGIT model', 'german_credit_logit.pkl')
    pickle_model(mlp, encoder, 'MLP', mlp_acc, 'Basic MLP model', 'german_credit_mlp.pkl')
    pickle_model(SVM, encoder, 'SVM', svm_acc, 'Basic SVM model', 'german_credit_svm.pkl')

    save_experiment(client, 'gc_dtree_exp', 'german_credit_dtree.pkl', 'DecisionTreeClassifier', model_name, project)
    save_experiment(client, 'gc_logit_exp', 'german_credit_logit.pkl', 'LogisticRegression', model_name, project)
    save_experiment(client, 'gc_mlp_exp', 'german_credit_mlp.pkl', 'MLPClassifier', model_name, project)
    save_experiment(client, 'gc_svm_exp', 'german_credit_svm.pkl', 'SVM', model_name, project)
def make_batch_predictions(input_params):
    conn_params = {}
    url = input_params["apiEndpoint"]
    token = input_params["token"]
    project = input_params["projectId"]
    skill_name = input_params["skillName"]
    outcome = input_params["properties"]["outcome"]

    # Initialize Cortex Client
    client = Cortex.client(api_endpoint=url, token=token, project=project)

    # Read cortex connection details
    connection = client.get_connection(input_params["properties"]["connection-name"])
    for p in connection['params']:
        conn_params.update({p['name']: p['value']})
    log_message(msg=f"Connection Params: {str(conn_params)}", log=get_logger(skill_name), level=logging.INFO)
    # Load Model from the experiment run
    model = load_model(client, input_params["properties"]["experiment-name"], input_params["properties"]["run-id"], project)

    if connection.get("connectionType") == "s3":
        output_path = input_params["properties"]["output-path"]
        secret_key = input_params["properties"]["aws-secret"]
        conf = SparkConf().set("fs.s3a.access.key", conn_params.get('publicKey')) \
            .set("fs.s3a.secret.key", secret_key) \
            .set("fs.s3a.endpoint", conn_params.get("s3Endpoint")) \
            .set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")

        # Initialize spark session
        spark = initialize_spark_session(conf=conf)
        sc = spark.sparkContext

        # Get S3 file path of the dataset
        file = conn_params["uri"]

        # Create spark data-frame for prediction
        df = spark.read.option("inferSchema", True).csv(file, header=True)
        df = df.drop(outcome)
        log_message(msg=f"DataFrame Schema: {str(df.printSchema())}", log=get_logger(skill_name), level=logging.INFO)

        # Make predictions
        df = score_predictions(df, model, outcome, sc, skill_name)
        # Converting struct to double
        for t in df.dtypes:
            if t[1] == "struct<_1:double>":
                df = df.withColumn(t[0], col(t[0]).getField("_1"))

        # Writing to output
        df.write.csv(output_path, mode='append', header=True)

    elif connection.get("connectionType") == "mongo":
        output_collection = input_params["properties"]["output-collection"]
        mongo_uri = input_params["properties"]["mongo-uri"]
        database = conn_params.get("database")
        collection = conn_params.get("collection")

        spark = initialize_spark_session(conf=None)
        sc = spark.sparkContext
        df = spark.read.format("com.mongodb.spark.sql.DefaultSource").option("uri", mongo_uri) \
            .option("database", database) \
            .option("collection", collection).load()
        df = df.drop(outcome, "_id")
        log_message(msg=f"DataFrame Schema after Prediction: {str(df.printSchema())}", log=get_logger(skill_name), level=logging.INFO)

        # Score predictions
        df = score_predictions(df, model, outcome, sc, skill_name)
        for t in df.dtypes:
            if t[1] == "struct<_1:double>":
                df = df.withColumn(t[0], col(t[0]).getField("_1"))

        # Writing to output
        df.write.format("com.mongodb.spark.sql.DefaultSource") \
            .mode("append").option("uri", mongo_uri) \
            .option("database", database) \
            .option("collection", output_collection).save()
    else:
        # Implement based on requirement
        spark = initialize_spark_session(conf=None)
    spark.stop()
from cortex import Cortex
from cortex.model import Model, ModelClient
from cortex.experiment import Experiment, ExperimentClient
from cortex.connection import ConnectionClient, Connection
from cortex.skill import SkillClient

params = {
    "projectId": PROJECT_ID,
    "apiEndpoint": API_ENDPOINT,
    "token": CORTEX_TOKEN
}

if __name__ == "__main__":
    client = Cortex.client(api_endpoint=params['apiEndpoint'],
                           project=params['projectId'],
                           token=params['token'])
    cc = ConnectionClient(client)
    conn_params = {}
    with open("conn.json") as f:
        conn_params = json.load(f)

    conn_params["params"] = []
    for name, value in CONN_PARAMS.items():
        conn_params["params"].append({"name": name, "value": value})

    # create a secret called awssecretadmin in your project which contains the aws secret key

    # create a connection
    cc.save_connection(project=PROJECT_ID, connection=conn_params)
示例#16
0
            for y in val.keys():
                s_val = val[y]
                args.append(key)
                args.append("{}={}".format(y, s_val))
    return args


if __name__ == '__main__':
    input_params = json.loads(sys.argv[1])
    url = input_params["apiEndpoint"]
    token = input_params["token"]
    project = input_params["projectId"]
    skill_name = input_params["skillName"]
    experiment_name = input_params["properties"]["experiment-name"]
    run_id = input_params["properties"]["run-id"]
    client = Cortex.client(api_endpoint=url, token=token, project=project)
    experiment_client = ExperimentClient(client)
    result = experiment_client.get_experiment(experiment_name, project)
    experiment = Experiment(result, project, experiment_client)
    run = experiment.get_run(run_id)
    conn_params = {}
    connection = client.get_connection(
        input_params["properties"]["connection-name"])
    for p in connection['params']:
        conn_params.update({p['name']: p['value']})
    spark_config = run.get_artifact('spark-config')
    log_message(msg=f"Spark Config: {str(spark_config)}",
                log=get_logger(skill_name),
                level=logging.INFO)
    run_args = get_runtime_args(spark_config)
    run_args.append("local:///opt/spark/work-dir/src/main/python/main.py")
示例#17
0
def make_batch_predictions(input_params):
    logging.info("Batch Prediction: Invoke Request:{}".format(input_params))
    conn_params = {}
    url = input_params["apiEndpoint"]
    token = input_params["token"]
    project = input_params["projectId"]
    outcome = input_params["properties"]["outcome"]
    batch_size = int(input_params["properties"]["batch-size"])

    try:
        # Initialize Cortex Client
        client = Cortex.client(api_endpoint=url, token=token, project=project)

        # Read cortex connection details
        connection = client.get_connection(
            input_params["properties"]["connection-name"])
        for p in connection['params']:
            conn_params.update({p['name']: p['value']})
        print(conn_params)
        logging.info("connection params", conn_params)

        # Load Model from the experiment run
        model = load_model(client,
                           input_params["properties"]["experiment-name"],
                           input_params["properties"]["run-id"],
                           input_params["properties"]["model-artifact"])
        logging.info("Model Loaded!")

        if connection.get("connectionType") == "s3":
            s3_output_path = input_params["properties"]["output-path"]
            # Get S3 file path of the dataset
            uri = conn_params["uri"]
            s3_client = init_s3_client(conn_params.get('publicKey'),
                                       conn_params.get('secretKey'))
            local_path = download_file(s3_client, uri)
            output_path = 'temp.csv'
            for chunked_df in pd.read_csv(local_path,
                                          header=0,
                                          sep=",",
                                          chunksize=batch_size):
                if outcome in chunked_df.columns.tolist():
                    chunked_df = chunked_df.drop(outcome, axis=1)
                logging.info("Processing records of size: {}".format(
                    chunked_df.shape[0]))

                # Score Predictions for a Batch
                predicted_df = score_predictions(chunked_df, model=model)
                if not os.path.isfile(output_path):
                    predicted_df.to_csv(output_path, index=False)
                predicted_df.to_csv(output_path,
                                    mode='a',
                                    header=False,
                                    index=False)

            # Uploading file to S3
            upload_file(s3_client, output_path, s3_output_path)

        elif connection.get("connectionType") == "mongo":
            output_collection = input_params["properties"]["output-collection"]
            mongo_uri = conn_params.get("uri")
            database = conn_params.get("database")
            collection = conn_params.get("collection")
            client = pymongo.MongoClient(mongo_uri)
            total_records = client[database][collection].count({})
            skip_records = 0
            while skip_records < total_records:
                cursor = client[database][collection].find(
                    {}).limit(batch_size).skip(skip_records)
                # Expand the cursor and construct the DataFrame
                chunked_df = pd.DataFrame(list(cursor))
                if not chunked_df.empty:
                    if outcome in chunked_df.columns.tolist():
                        chunked_df = chunked_df.drop([outcome, "_id"], axis=1)
                    logging.info("Processing records of size: {}".format(
                        chunked_df.shape[0]))

                    # Score Predictions for a Batch
                    predicted_df = score_predictions(chunked_df, model=model)
                    predicted_df.reset_index(inplace=True, drop=True)
                    data_dict = predicted_df.to_dict("records")
                    # Insert collection
                    client[database][output_collection].insert_many(data_dict)
                    skip_records += batch_size
                else:
                    break
            client.close()
        logging.info("Prediction Job Completed!")
    except Exception as e:
        logging.error("Error while processing batch predictions. Message: ", e)
示例#18
0
def get_interfaces(schema_version):
    cortex = Cortex.client()
    builder = ProfileBuilder(cortex)
    profile_schema = builder.profiles(schema_version)
    return profile_schema