示例#1
0
def create_aml_workspace(cfg):
    """ Creates the AML workspace if it doesn't exist. If it does
    exist, return the existing one.
    input : cfg : AMLConfiguration object containing all creation parameters
    output : ws : type workspace
    """
    try:
        log.info('Trying to retrieve config file from local filesystem.')
        ws = Workspace.from_config()
        if ws.name == cfg.AMLConfig.workspace:
            log.info('Workspace found with name: ' + ws.name)
            log.info('  Azure region: ' + ws.location)
            log.info('  Subscription id: ' + ws.subscription_id)
            log.info('  Resource group: ' + ws.resource_group)
        else:
            log.error('Workspace found ({}), but not the same as in the JSON config file ({}). Please delete config folder (aml_config) and restart.'.format(ws.name, cfg.AMLConfig.workspace))
            exit(-2)
    except:
        log.info('Unable to find AML config files in (aml_config) - attempting to Creating them.')
        try:
            log.info('Creating the workspace on Azure.')
            ws = Workspace.create(name = cfg.AMLConfig.workspace, 
                auth = cfg.Credentials,
                subscription_id = cfg.subscription_id,
                resource_group = cfg.AMLConfig.resource_group, 
                location = cfg.AMLConfig.location,
                create_resource_group = True,
                exist_ok = False)
            log.info('Workspace created. Saving details to file in (aml_config) to accelerate further launches.')
            ws.get_details()
            ws.write_config()
        except Exception as exc:
            log.error('Unable to create the workspace on Azure. Error Message : ' + str(exc))
            exit(-2)
    return ws
示例#2
0
def get_workspace(config_file):
    ws = Workspace.from_config(config_file)
    print('Workspace name: ' + ws.name,
          'Azure region: ' + ws.location,
          'Subscription id: ' + ws.subscription_id,
          'Resource group: ' + ws.resource_group,
          sep='\n')
def main(train_path, pred_path, n_pred, dt, target, time_limit_min):
    df_train = pd.read_csv(train_path)
    df_train[dt] = pd.to_datetime(df_train[dt])

    time_series_settings = {
        "time_column_name": dt,
        "max_horizon": n_pred,
        "target_lags": "auto",
        "target_rolling_window_size": "auto"
    }
    automl_config = AutoMLConfig(task="forecasting",
                                 training_data=df_train,
                                 label_column_name=target,
                                 n_cross_validations=5,
                                 max_cores_per_iteration=-1,
                                 path=os.environ["SCRATCH"],
                                 experiment_timeout_minutes=time_limit_min,
                                 ensemble_download_models_timeout_sec=3600,
                                 **time_series_settings)
    ws = Workspace.from_config()
    experiment = Experiment(ws, "experiment")
    best_run, fitted_model = experiment.submit(automl_config,
                                               show_output=True).get_output()

    print("Best pipeline:")
    try:
        ensemble = vars(fitted_model.steps[1][1])["_wrappedEnsemble"]
        print(ensemble.__class__)
        steps = ensemble.estimators_
    except:
        steps = fitted_model.steps
    best_pipeline = ""
    for i, step in enumerate(steps):
        best_pipeline += f"{i}. {str(step)}\n"
    print(best_pipeline)

    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.max_colwidth', -1)
    print(fitted_model.named_steps["timeseriestransformer"].
          get_engineered_feature_names())
    featurization_summary = fitted_model.named_steps[
        "timeseriestransformer"].get_featurization_summary()
    print(pd.DataFrame.from_records(featurization_summary))

    x_pred = pd.date_range(df_train[dt].iloc[-1],
                           periods=n_pred + 1,
                           freq=pd.infer_freq(df_train[dt]))[1:]
    y_pred = fitted_model.forecast(forecast_destination=x_pred[-1])[0]
    #     y_pred = fitted_model.forecast(pd.DataFrame({dt: x_pred}))[0]

    df_pred = pd.DataFrame({dt: x_pred, target: y_pred})
    df_pred.to_csv(pred_path, index=False)
# auth = InteractiveLoginAuthentication(tenant_id = 'mytenantid')
# ws = Workspace.from_config(auth = auth)
# ```
#
# If you need to run in an environment where interactive login is not possible, you can use Service Principal authentication by replacing the `ws = Workspace.from_config()` line in the cell below with the following:
#
# ```
# from azureml.core.authentication import ServicePrincipalAuthentication
# auth = auth = ServicePrincipalAuthentication('mytenantid', 'myappid', 'mypassword')
# ws = Workspace.from_config(auth = auth)
# ```
# For more details, see [aka.ms/aml-notebook-auth](http://aka.ms/aml-notebook-auth)

# In[85]:

ws = Workspace.from_config()

# Choose a name for the experiment and specify the project folder.
experiment_name = 'automl-classification'
project_folder = './sample_projects/automl-classification'

experiment = Experiment(ws, experiment_name)

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace Name'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = experiment.name
import json
import pickle
import numpy as np
import pandas as pd
from azureml.core.workspace import Workspace
import azureml.train.automl
from sklearn.externals import joblib
from azureml.core.model import Model

ws = Workspace.from_config('./config.json')

from azureml.core.webservice import Webservice, AciWebservice, AksWebservice
service = AciWebservice(ws, "sentiment-scorer-korean")
# service = AksWebservice(ws, "sentiment-scorer-korean-aks")

# input_sample = pd.DataFrame({'id': pd.Series(['6471903'], dtype='int64'), 'document': pd.Series(['진짜 별로다 헐 ㅡ'], dtype='object')})
from load_dataset import testdata as input_sample

import json
test = json.dumps({"data": input_sample.values.tolist()})
result = service.run(input_data=bytes(test, encoding="utf8"))

input_sample['predicted'] = list(json.loads(result).values())[0]
print(input_sample)
示例#6
0
def load_workspace_from_config():
    return Workspace.from_config()
示例#7
0
    exp = get_experiment(ws, experiment_name)
    print(
        'Cancelling existing experiment with name: {}'.format(experiment_name))
    for run in tqdm(list(exp.get_runs())):
        run.cancel()


if __name__ == "__main__":
    print("SDK Version:", azureml.core.VERSION)
    set_diagnostics_collection(send_diagnostics=True)

    # Read in config
    conf = Config(config_filepath='~/aml_secrets/aml_secrets_rr2msrlabs.yaml')

    # Config region
    conf_aml = conf['aml_config']
    conf_cluster = conf['cluster_config']
    conf_docker = conf['azure_docker']
    conf_experiment = conf['experiment']
    # endregion

    # Initialize workspace
    # Make sure you have downloaded your workspace config
    ws = Workspace.from_config(path=conf_aml['aml_config_file'])
    print('Workspace name: ' + ws.name,
          'Azure region: ' + ws.location,
          'Subscription id: ' + ws.subscription_id,
          'Resource group: ' + ws.resource_group,
          sep='\n')

    launch_experiment(ws, conf_aml, conf_cluster, conf_docker, conf_experiment)
示例#8
0
from azureml.core.workspace import Workspace
from azureml.core.compute import ComputeTarget, AksCompute
from azureml.exceptions import ComputeTargetException
from azureml.core.webservice import AksWebservice
from azureml.core.model import InferenceConfig, Model
from azureml.core.environment import Environment, DEFAULT_GPU_IMAGE

# Initialize a workspace
ws = Workspace.from_config(
    "C:/Users/Danilo.Bento/Icon Dropbox/DEVDATA/RO/DEVELOPMENT/SIB2/dev/.azureml/config.json"
)
print('Workspace name: ' + ws.name,
      'Azure region: ' + ws.location,
      'Subscription id: ' + ws.subscription_id,
      'Resource group: ' + ws.resource_group,
      'Workspace connected',
      sep='\n')

# Choose a name for your cluster
aks_name = "SIB2-AKS-GPU"

# Check to see if the cluster already exists and create it if non existant
try:
    aks_target = ComputeTarget(workspace=ws, name=aks_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    # Provision AKS cluster with GPU machine
    prov_config = AksCompute.provisioning_configuration(vm_size="Standard_NC6")

    # Create the cluster
示例#9
0
import numpy as np
from sklearn.metrics import mean_absolute_error
from azureml.train.automl.automlexplainer import retrieve_model_explanation
from azureml.core.model import Model
from azureml.core.image import ContainerImage
from azureml.core.image.image import Image
from azureml.core import Webservice
from azureml.core.webservice import AciWebservice

# try:
# setting the local env to hadnle missing packages
run_user_managed = RunConfiguration()
run_user_managed.environment.python.user_managed_dependencies = False

# Create workspace object for existing one and create an experiment
ws = Workspace.from_config('subscription.json')
print(ws.name, ws.location, ws.resource_group, ws.location, sep='\t')
experiment = Experiment(workspace=ws, name='experiment1')

# full path to training data,testing data
file_path1 = os.path.join(os.getcwd(), "cumodelwo2014.csv")
dflowtr = dprep.auto_read_file(path=file_path1)
file_path2 = os.path.join(os.getcwd(), "test2014.csv")
dflowte = dprep.auto_read_file(path=file_path2)

# Specifying x(causal) and y(response) attributes in training data
dflowtr_x = dflowtr.keep_columns([
    'cell-ID', 'Soil_Name', 'MEAN_Yld_V', 'COUNT_Yld', 'MEAN_Eleva',
    'RANGE_Elev', 'Crop-Type', 'V.A.T(F)', 'R.A.T(F)', 'M.A.T(F)',
    'V.PET(inch)', 'R.PET(inch)', 'M.PET(inch)', 'V.T.R(inch)', 'R.T.R(inch)',
    'M.T.R(inch)'
示例#10
0
parser = argparse.ArgumentParser()
parser.add_argument('experiment', help='Azure ML experiment name')
parser.add_argument('--workspace-config',
                    default="azureml_config.json",
                    help='Download from the Azure ML portal')
parser.add_argument('--compute',
                    default="nc6v3",
                    help='Azure ML training cluster')
parser.add_argument('--max_epochs', type=int, default=300)
args = parser.parse_args()

print(args)

# load workspace configuration from the config.json file
ws = Workspace.from_config(path=args.workspace_config)
print('=' * 40)
print(ws)

# create an experiment
exp = Experiment(workspace=ws, name=args.experiment)
print('=' * 40)
print(exp)

# specify a cluster
compute_target = ws.compute_targets[args.compute]
print('=' * 40)
print(compute_target)

# Mount the blob to the training container
# NOTE: (prerequisite) unzip and upload the ePillID data to the blob
示例#11
0
# Check core SDK version number for debugging purposes
import azureml.core
print("SDK Version:", azureml.core.VERSION)

subscription_id = "fac34303-435d-4486-8c3f-7094d82a0b60"
resource_group = "aml-notebooks"
workspace_name = "haieastus2ws3"
workspace_region = 'eastus2'  # or eastus2euap

# import the Workspace class and check the azureml SDK version
from azureml.core.workspace import Workspace, WorkspaceException

ws = Workspace.create(name=workspace_name,
                      subscription_id=subscription_id,
                      resource_group=resource_group,
                      location=workspace_region)
ws.get_details()

ws.write_config()

# load workspace configuratio from ./aml_config/config.json file.
my_workspace = Workspace.from_config()

print(my_workspace.get_details())
示例#12
0
from azureml.core.authentication import AzureCliAuthentication
from azureml.core.workspace import Workspace
from azureml.core.datastore import Datastore
from azureml.core.dataset import Dataset

dstore_name = 'mldemodatastore'
ds_file = "movielens100k.movies"

# Configure workspace
cli_auth = AzureCliAuthentication()
ws = Workspace.from_config(auth=cli_auth)

# Access your dataset
dataset = Dataset.get(ws, ds_file)

# Load in-memory Dataset to your local machine as pandas dataframe
pdDf = dataset.to_pandas_dataframe()
print(pdDf.head())
示例#13
0
def peptide_identification(args):
    print(datetime.now(), ': Peptid identification starts...')
    print('Settings: ')
    print(args)

    # PLATO setting
    subclusterCount = args.subclusterCount
    spy = args.spy
    spy_portion = args.spy_portion
    RN = args.RN
    rnd_all = args.rnd_all  # If random method, include all decoys
    rnd_portion = args.rnd_portion  # If random method, include rnd.portion of positive set, default 1: pos set = neg set
    replicates_cnt = args.replicates_cnt
    include_label = args.include_label
    AML_preprocess = args.AML_preprocess
    output_folder = args.output_folder

    # AutoML parameter setting
    autoML_best_model_selection = args.autoML_best_model_selection
    autoML_iterations = args.autoML_iterations

    metric = args.metric  # Other metrics: azureml.train.automl.utilities.get_primary_metrics('classification')
    cv_fold = args.cv_fold

    # Input, output
    file_name = args.sample_name
    input_path = args.input_folder
    output_path = output_folder + '/' + file_name
    log_file = output_path + '_autoML_errors_log.html'

    # Instantiate AutoML config and create an experiment in autoML workspace
    ws = Workspace.from_config()
    experiment_name = file_name
    experiment = Experiment(ws, experiment_name)
    print(datetime.now(),
          ': Assigned experiment ' + experiment_name + ' on Azure portal ')

    output = {}
    output['SDK version'] = azureml.core.VERSION
    output['Workspace Name'] = ws.name
    output['Resource Group'] = ws.resource_group
    output['Location'] = ws.location
    outputDf = pd.DataFrame(data=output, index=[''])
    print(outputDf)

    print(datetime.now(), ': Reading inputs')
    # Read POSITIVES and ALL inputs
    positives_path = glob.glob(input_path + file_name + '*POSITIVES*')
    raw_positives = pd.read_csv(positives_path[0], sep='\t')

    if AML_preprocess == True:
        all_path = glob.glob(input_path + file_name + '-ALL.txt')
        raw_all = pd.read_csv(all_path[0], sep='\t')
        # Extract new features
        # First and last three amino acides of peptide sequences as features - If NA then B category
        raw_all['Peptide'] = raw_all.Peptide.str.replace(
            r'([\(\[]).*?([\)\]])', r'B', regex=True)
        raw_all['P1'] = raw_all['Peptide'].str[0]
        raw_all['P2'] = raw_all['Peptide'].str[2]
        raw_all['P3'] = raw_all['Peptide'].str[3]
        raw_all['P4'] = raw_all['Peptide'].str[-4]
        raw_all['P5'] = raw_all['Peptide'].str[-3]
        raw_all['P6'] = raw_all['Peptide'].str[-1]

    else:
        all_path = glob.glob(input_path + file_name +
                             '_percolator_feature.txt')
        raw_all = pd.read_csv(all_path[0], sep='\t')

    raw_all['Class'] = 0

    # Make positive and test set
    test_data = raw_all.drop(['ScanNr', 'Proteins'], axis=1)
    positive_set = pd.merge(left=pd.DataFrame(raw_positives['SpecId']),
                            right=pd.DataFrame(test_data),
                            how='left',
                            left_on='SpecId',
                            right_on='SpecId')
    positive_set['Class'] = 1

    # Remove decoys in positive set, if there is any
    decoys_in_positive_idx = positive_set.index[positive_set['Label'] ==
                                                -1].tolist()
    positive_set = positive_set[positive_set['Label'] != -1]

    # Dataframe to store predictions
    all_predictions = pd.DataFrame({
        'SpecId': list(test_data['SpecId']),
        'Peptide': list(test_data['Peptide']),
        'Label': list(test_data['Label'])
    })
    prediction_summary = all_predictions

    # Prepare test set for modeling
    y_test = test_data['Class']
    if include_label == True:
        X_test = test_data.drop(['SpecId', 'Peptide', 'Class'], axis=1)
    else:
        X_test = test_data.drop(['SpecId', 'Peptide', 'Label', 'Class'],
                                axis=1)

    # Prepare positive set for modeling
    positive_set_idx = [
        test_data['SpecId'].tolist().index(x)
        for x in positive_set['SpecId'].tolist()
        if x in test_data['SpecId'].tolist()
    ]

    # Used to create the negative set
    decoys_idx = np.setdiff1d(
        test_data.index[test_data['Label'] == -1].tolist(),
        decoys_in_positive_idx).tolist()

    global gower_dist_avg
    if RN == True:
        if os.path.exists(input_path + file_name +
                          'gower_dist_avg.npy') == False:
            print(datetime.now(), ': Calculating Gower distance')
            gower_dist = gower.gower_matrix(test_data)
            selected_rows = gower_dist[positive_set_idx]
            gower_dist_avg = np.mean(selected_rows, axis=0)
            print(datetime.now(), ': Saving Gower distance matrix')
            np.save(input_path + '/' + file_name + 'gower_dist_avg.npy',
                    gower_dist_avg)  # save
        else:
            print(datetime.now(), ': Loading Gower distance matrix from ',
                  input_path + file_name + 'gower_dist_avg.npy')
            gower_dist_avg = np.load(input_path + file_name +
                                     'gower_dist_avg.npy')  # load

    if spy == True:
        all_spies = pd.DataFrame()
    '''
    Create train set by concatinating positive and negative set, build model(s) using autoML
    and store predictions based on the best model
    '''
    for rep in range(0, replicates_cnt):
        print(datetime.now(), ': Replicate #', rep + 1)
        if spy == True:
            # Exclude spy_portion of training data to be the spies
            positive_set = positive_set.sample(n=len(positive_set),
                                               random_state=rep *
                                               100).reset_index(drop=True)
            spySet_size = round(len(positive_set) * spy_portion)
            spies_ID = positive_set.loc[1:spySet_size, ['SpecId']]
            positive_set_wSpy = positive_set.iloc[spySet_size +
                                                  1:len(positive_set)]

        if RN == False:
            if rnd_all == True:
                # Negative set includes all decoys
                negative_set_idx = decoys_idx
            else:
                # Negative set idx includes rnd_portion times of |positive_set| indecies
                random.seed(rep)
                random.shuffle(decoys_idx)
                negative_set_idx = decoys_idx[0:rnd_portion *
                                              len(positive_set)]
        else:
            print(datetime.now(), ': Starts estimating RNs')
            negative_set_idx = reliable_negative(test_data, positive_set,
                                                 subclusterCount, rep)
            print(datetime.now(), ': Ends estimating RNs')

        negative_set = test_data.iloc[negative_set_idx]

        if spy == True:
            train_data = pd.concat([positive_set_wSpy, negative_set], axis=0)
        else:
            train_data = pd.concat([positive_set, negative_set], axis=0)

        y_train = train_data['Class']
        if include_label == True:
            X_train = train_data.drop(['SpecId', 'Peptide', 'Class'], axis=1)
        else:
            X_train = train_data.drop(['SpecId', 'Peptide', 'Class', 'Label'],
                                      axis=1)

        print('Training set size:', len(y_train), '\nTest set size:',
              len(y_test))

        automl_config = AutoMLConfig(task='classification',
                                     debug_log=log_file,
                                     primary_metric=metric,
                                     iteration_timeout_minutes=200,
                                     iterations=autoML_iterations,
                                     verbosity=logging.INFO,
                                     preprocess=AML_preprocess,
                                     X=X_train,
                                     y=y_train,
                                     n_cross_validations=cv_fold,
                                     model_explainability=True)

        print(datetime.now(), ': modeling replicate #' + str(rep + 1) + '...')
        local_run = experiment.submit(automl_config, show_output=True)

        if autoML_best_model_selection == False:
            # Retrieve the Best Model based on bunch of metrics
            children = list(local_run.get_children())
            metricslist = {}
            for run in children:
                properties = run.get_properties()
                metrics = {
                    k: v
                    for k, v in run.get_metrics().items()
                    if isinstance(v, float)
                }
                metricslist[int(properties['iteration'])] = metrics

            rundata = pd.DataFrame(metricslist).sort_index(1)
            tmp = rundata.T.sort_values([
                'AUC_weighted', 'f1_score_weighted',
                'precision_score_weighted', 'recall_score_weighted',
                'weighted_accuracy'
            ],
                                        ascending=False)
            rundata = tmp.sort_values('log_loss', ascending=True).T
            best_run_iteration = rundata.columns.values[0]
            rundata.to_csv(output_path + '_metrics_list_' + str(rep) + '.txt')
            best_run, fitted_model = local_run.get_output(
                iteration=best_run_iteration)
        else:
            best_run, fitted_model = local_run.get_output()

        print('Best run: ', best_run)
        print(datetime.now(), ': Saving best model and predictions')
        # Save the best model, prediction value and probability
        modelname = output_path + '_model_' + str(rep) + '.sav'
        joblib.dump(fitted_model, modelname)
        y_pred_val = fitted_model.predict(X_test)
        y_pred_prob = fitted_model.predict_proba(X_test)

        # Add the results of the replicate to all predictions table
        all_predictions['pred_rep' + str(rep)] = list(y_pred_val)
        all_predictions['prob_rep' + str(rep)] = list(
            [item[1] for item in y_pred_prob])

        # Overwrite prediction values based on the spies cutoff
        if spy == True:
            threshold = min(
                pd.merge(spies_ID, all_predictions,
                         on='SpecId')['prob_rep' + str(rep)])
            all_predictions['pred_rep' + str(rep)] = np.where(
                all_predictions['prob_rep' + str(rep)] >= threshold, 1, 0)
            all_spies['SpecId' + str(rep)] = spies_ID['SpecId']
            all_spies['Prob_rep' + str(rep)] = list(
                pd.merge(spies_ID, all_predictions,
                         on=['SpecId'])['prob_rep' + str(rep)])

        print(datetime.now(), ': Replicate #' + str(rep + 1) + ' processed!')
        all_predictions.to_csv(output_path + '_all_predictions.csv',
                               index=False)

    if spy == True:
        all_spies.to_csv(output_path + '_all_spies.csv', index=False)

    print(datetime.now(), ': Generate prediction summary of all replicates')
    pred_col_indecies = [
        col for col in all_predictions.columns if 'pred' in col
    ]
    prob_col_indecies = [
        col for col in all_predictions.columns if 'prob' in col
    ]

    prediction_summary['Std'] = all_predictions[prob_col_indecies].std(
        skipna=True, axis=1)
    prediction_summary['Min'] = all_predictions[prob_col_indecies].min(
        skipna=True, axis=1)
    prediction_summary['Max'] = all_predictions[prob_col_indecies].max(
        skipna=True, axis=1)
    prediction_summary['Avg'] = all_predictions[prob_col_indecies].mean(
        skipna=True, axis=1)
    prediction_summary['Median'] = all_predictions[prob_col_indecies].median(
        skipna=True, axis=1)
    prediction_summary['Vote'] = all_predictions[pred_col_indecies].sum(
        skipna=True, axis=1)
    prediction_summary.to_csv(output_path + '_prediction_summary.txt',
                              sep='\t',
                              index=False)

    # Feature importance
    print(datetime.now(), ': Output feature importance of the best run')
    client = ExplanationClient.from_run(best_run)
    raw_explanations = client.download_model_explanation(
        top_k=len(X_test.columns))
    print('Raw feature importance')
    print(raw_explanations.get_feature_importance_dict())
    d = raw_explanations.get_feature_importance_dict()
    raw_feature_importance = pd.DataFrame(list(d.items()))
    raw_feature_importance.to_csv(output_path + '_raw_feature_importance.csv',
                                  index=False)
    # Engineered
    engineered_explanations = client.download_model_explanation(
        top_k=len(X_test.columns))
    print('Engineered feature importance')
    print(engineered_explanations.get_feature_importance_dict())
    d = engineered_explanations.get_feature_importance_dict()
    engineered_feature_importance = pd.DataFrame(list(d.items()))
    engineered_feature_importance.to_csv(output_path +
                                         '_engineered_feature_importance.csv',
                                         index=False)

    now = datetime.now()
    print(datetime.now(), ': Program end')
示例#14
0
def main():
    
    # local compute
    run_user_managed = RunConfiguration()
    run_user_managed.environment.python.user_managed_dependencies = False

    # print to check azure sdk installation
    print(azureml.core.VERSION)

    # create workspace object to connect to omtest workspace in MLSERVICE
    ws = Workspace.from_config('./config.json')
    # default data store
    # ds = ws.get_default_datastore()
    # print(ds)

    # choose a name for the run history container in the workspace
    experiment_name = 'automated-ml-regression'
    # project folder
    project_folder = './automated-ml-regression'

    output = {}
    output['SDK version'] = azureml.core.VERSION
    output['Subscription ID'] = ws.subscription_id
    output['Workspace'] = ws.name
    output['Resource Group'] = ws.resource_group
    output['Location'] = ws.location
    output['Project Directory'] = project_folder
    pd.set_option('display.max_colwidth', -1)
    pd.DataFrame(data=output, index=['']).T

    # stats for all the columns
    dflow = dprep.auto_read_file(path='/Users/omprakashnekkanti/Desktop/Spring 2019/CS445-Capstone/automatedML/cuformodel.csv')
    print(type(dflow))
    dflow.get_profile()

    # filepath as a string
    file_path = os.path.join(os.getcwd(), 'cuformodel.csv')
    print(file_path)
    print(type(file_path))

    # dflow_prepared = dprep.Dataflow.open(file_path)
    # dflow_prepared.get_profile()

    dflow_X = dflow.keep_columns([
        'cell-ID', 'Soil_Name', 'MEAN_Yld_V', 'COUNT_Yld', 'MEAN_Eleva',
        'RANGE_Elev', 'Crop-Type', 'V.A.T(F)', 'R.A.T(F)', 'M.A.T(F)',
        'V.PET(inch)', 'R.PET(inch)', 'M.PET(inch)', 'V.T.R(inch)', 'R.T.R(inch)',
        'M.T.R(inch)'
    ])
    dflow_y = dflow.keep_columns('NormalizedYield')

    x_df = dflow_X.to_pandas_dataframe()
    y_df = dflow_y.to_pandas_dataframe()

    x_train, x_test, y_train, y_test = train_test_split(
        x_df, y_df, test_size=0.2, random_state=223)
    # flatten y_train to 1d array
    y_train.values.flatten()

    automl_settings = {
        "iteration_timeout_minutes": 20,
        "iterations": 40,
        "primary_metric": 'mean_absolute_error',
        "preprocess": False,
        "verbosity": logging.INFO,
        "n_cross_validations": 10
    }

    # local compute
    automated_ml_config = AutoMLConfig(
        task='regression',
        debug_log='automated_ml_errors.log',
        path=project_folder,
        X=x_train.values,
        y=y_train.values.flatten(),
        **automl_settings)
    experiment = Experiment(ws, experiment_name)
    local_run = experiment.submit(automated_ml_config, show_output=True)
示例#15
0
def main():
    ws = Workspace.from_config()
    
    df = get_DDoS_dataset(ws)
示例#16
0
def get_workspace_or_default(subscription_id=None,
                             resource_group=None,
                             workspace_name=None,
                             auth=None,
                             project_path=None,
                             logger=None):
    """
    Order is
    1) Get workspace from the specified parameters,
    2) From project context,
    3) Using az configure defaults.
    :param workspace_name:
    :param resource_group:
    :param auth:
    :param project_path:
    :return:
    """

    if not logger:
        logger = module_logger

    if not auth:
        auth = get_cli_specific_auth()
        logger.debug("No auth specified, using authentication {}".format(
            type(auth).__name__))

    if resource_group and workspace_name:
        # Simple case where both are specified. The only way to get workspace with no
        # az configure support for 'mlworkspace' is user explicitly specified parameters
        # Technically resource group can be az configured in
        if not subscription_id:
            subscription_id = get_default_subscription_id(auth)
        return Workspace(subscription_id,
                         resource_group,
                         workspace_name,
                         auth=auth)

    if project_path:
        logger.debug("Project path %s set", project_path)
        try:
            return Workspace.from_config(path=project_path,
                                         auth=auth,
                                         _logger=logger)
        except UserErrorException as ex:
            if project_path != ".":
                logger.warning(
                    "The provided path %s did not contain a config.json, "
                    "falling back to CLI configuration.", project_path)

    if not subscription_id:
        subscription_id = get_default_subscription_id(auth)

    if not workspace_name:
        workspace_name = get_workspace_or_default_name(
            workspace_name,
            throw_error=True,
            subscription_id=subscription_id,
            auth=auth,
            project_path=project_path)
    if not resource_group:
        resource_group = get_resource_group_or_default_name(
            resource_group,
            throw_error=True,
            subscription_id=subscription_id,
            auth=auth,
            project_path=project_path)

    return Workspace(subscription_id,
                     resource_group,
                     workspace_name,
                     auth=auth)
示例#17
0
    def __init__(self, config_filepath: str) -> None:

        # read in config
        self.conf = Config(config_filepath)

        # config region
        self.conf_aml = self.conf['aml_config']
        self.conf_storage = self.conf['storage']
        self.conf_cluster = self.conf['cluster_config']
        self.conf_docker = self.conf['azure_docker']
        self.conf_experiment = self.conf['experiment']
        # end region

        # initialize workspace
        self.ws = Workspace.from_config(path=self.conf_aml['aml_config_file'])
        print('Workspace name: ' + self.ws.name,
              'Azure region: ' + self.ws.location,
              'Subscription id: ' + self.ws.subscription_id,
              'Resource group: ' + self.ws.resource_group,
              sep='\n')

        # register blobs
        # TODO: make blob registration more flexible
        self.input_ds = Datastore.register_azure_blob_container(
            workspace=self.ws,
            datastore_name=self.conf_storage['input_datastore_name'],
            container_name=self.conf_storage['input_container_name'],
            account_name=self.conf_storage['input_azure_storage_account_name'],
            account_key=self.conf_storage['input_azure_storage_account_key'],
            create_if_not_exists=False)

        self.output_ds = Datastore.register_azure_blob_container(
            workspace=self.ws,
            datastore_name=self.conf_storage['output_datastore_name'],
            container_name=self.conf_storage['output_container_name'],
            account_name=self.
            conf_storage['output_azure_storage_account_name'],
            account_key=self.conf_storage['output_azure_storage_account_key'],
            create_if_not_exists=False)

        # create compute cluster
        try:
            self.compute_target = ComputeTarget(
                workspace=self.ws, name=self.conf_cluster['cluster_name'])
            print(self.compute_target.get_status().serialize())
        except Exception as e:
            print('Encountered error trying to get the compute target')
            print(f'Exception was {e}')
            sys.exit(1)

        self.project_folder = self.conf_experiment['project_folder']

        # setup custom docker usage
        self.image_registry_details = ContainerRegistry()
        self.image_registry_details.address = self.conf_docker[
            'image_registry_address']
        self.image_registry_details.username = self.conf_docker[
            'image_registry_username']
        self.image_registry_details.password = self.conf_docker[
            'image_registry_password']

        self.user_managed_dependencies = True