Exemplo n.º 1
0
    parser.add_argument("--output", help="Output from First & Hidden Layers", action='store',  nargs='?', default=64, type=int)
    parser.add_argument("--train_batch_size", help="Training Batch Size", nargs='?', action='store', default=128, type=int)
    parser.add_argument("--epochs", help="Number of epochs for training", nargs='?', action='store', default=20, type=int)

    args = parser.parse_args()
    print("drop_rate", args.drop_rate)
    print("input_dim", args.input_dim)
    print("size", args.bs)
    print("output", args.output)
    print("train_batch_size", args.train_batch_size)
    print("epochs", args.epochs)

    data = gen_data(input_dim=input_dim, bsize=bs)
    model = build_model(in_dim=input_dim, drate=drop_rate, out=output)

    start_time: float = time()
    with mlflow.start_run():
        results = compile_and_run_model(model, data, epochs=epochs, batch_size=bs)
        mlflow.log_param("drop_rate", args.drop_rate)
        mlflow.log_param("input_dim", args.input_dim)
        mlflow.log_param("size", args.bs)
        mlflow.log_param("output", args.output)
        mlflow.log_param("train_batch_size", args.train_batch_size)
        mlflow.log_param("epochs", args.epochs)
        mlflow.log_param("loss", results[0])
        mlflow.log_param("acc", results[1])

    end_time: float = time()
    
    print("Run time = %d" % (end_time-start_time))
Exemplo n.º 2
0
fig.savefig('/dbfs/mlflow/iris/iris1.png')
plt.close(fig)
display()
display()

!rm -r /dbfs/mlflow/iris

# 1st iteration

mlflow.start_run()
dtc = DecisionTreeClassifier(random_state=10)
dtc.fit(X_train,Y_train) 
y_pred_class= dtc.predict(X_test)
accuracy= metrics.accuracy_score(Y_test,y_pred_class)
print (accuracy) 
mlflow.log_param("random_state",10 )
mlflow.log_metric("accuracy" , accuracy) 
mlflow.sklearn.log_model (dtc , "model") 
modelpath = "/dbfs/mlflow/iris/model-%s-%f" % ("decision_tree", 1) 

mlflow.sklearn.save_model (dtc, modelpath) # going to save pickle model alongwith my experiment ,(scikit generates pickle file of model.)	

mlflow.log_artifact("iris1.png" )# saving my artifacts. other artifacts could be feature columns, data with different versions,

#mlflow.end_run()


# 2nd iteration

mlflow.start_run()
dtc = DecisionTreeClassifier(max_depth=1,random_state=10)  #change
Exemplo n.º 3
0
def test_search_runs():
    mlflow.set_experiment("exp-for-search")
    # Create a run and verify that the current active experiment is the one we just set
    logged_runs = {}
    with mlflow.start_run() as active_run:
        logged_runs["first"] = active_run.info.run_id
        mlflow.log_metric("m1", 0.001)
        mlflow.log_metric("m2", 0.002)
        mlflow.log_metric("m1", 0.002)
        mlflow.log_param("p1", "a")
        mlflow.set_tag("t1", "first-tag-val")
    with mlflow.start_run() as active_run:
        logged_runs["second"] = active_run.info.run_id
        mlflow.log_metric("m1", 0.008)
        mlflow.log_param("p2", "aa")
        mlflow.set_tag("t2", "second-tag-val")

    def verify_runs(runs, expected_set):
        assert set([r.info.run_id for r in runs
                    ]) == set([logged_runs[r] for r in expected_set])

    experiment_id = MlflowClient().get_experiment_by_name(
        "exp-for-search").experiment_id

    # 2 runs in this experiment
    assert len(MlflowClient().list_run_infos(experiment_id,
                                             ViewType.ACTIVE_ONLY)) == 2

    # 2 runs that have metric "m1" > 0.001
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.0001")
    verify_runs(runs, ["first", "second"])

    # 1 run with has metric "m1" > 0.002
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.002")
    verify_runs(runs, ["second"])

    # no runs with metric "m1" > 0.1
    runs = MlflowClient().search_runs([experiment_id], "metrics.m1 > 0.1")
    verify_runs(runs, [])

    # 1 run with metric "m2" > 0
    runs = MlflowClient().search_runs([experiment_id], "metrics.m2 > 0")
    verify_runs(runs, ["first"])

    # 1 run each with param "p1" and "p2"
    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])
    runs = MlflowClient().search_runs([experiment_id], "params.p2 != 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])
    runs = MlflowClient().search_runs([experiment_id], "params.p2 = 'aa'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])

    # 1 run each with tag "t1" and "t2"
    runs = MlflowClient().search_runs([experiment_id],
                                      "tags.t1 = 'first-tag-val'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])
    runs = MlflowClient().search_runs([experiment_id], "tags.t2 != 'qwerty'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])
    runs = MlflowClient().search_runs([experiment_id],
                                      "tags.t2 = 'second-tag-val'",
                                      ViewType.ALL)
    verify_runs(runs, ["second"])

    # delete "first" run
    MlflowClient().delete_run(logged_runs["first"])
    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ALL)
    verify_runs(runs, ["first"])

    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.DELETED_ONLY)
    verify_runs(runs, ["first"])

    runs = MlflowClient().search_runs([experiment_id], "params.p1 = 'a'",
                                      ViewType.ACTIVE_ONLY)
    verify_runs(runs, [])
Exemplo n.º 4
0
def cMAPPS(experiment_name,
           prepros_params,
           train_params,
           dataset_no,
           tracking=True,
           path=None):
    from Input import cMAPSS as ci
    from Preprocess import cMAPSS as CP
    from Testing import cMAPSS as ct
    import Training as tr

    if tracking == True:

        mlflow.set_tracking_uri('sqlite:///mlflow.db')

        mlflow.set_experiment(experiment_name)

        with mlflow.start_run():

            if path is not None:
                ci.set_datapath(path)

            if dataset_no in range(1, 5):
                ci.get_data(dataset_no)
                mlflow.log_param("DataSet Number", dataset_no)
            else:
                raise Exception('Please choose a number between 1 and 4')

            cp = CP(**prepros_params)
            cp.preprocess(ci.Train_input)

            run_id = mlflow.active_run().info.run_id

            tmpdir = tempfile.TemporaryDirectory()

            rnn_ff = tr.RNN_to_FF(cp.features,
                                  **train_params,
                                  model_dir=tmpdir.name,
                                  run_id=run_id)
            rnn_ff.create_model(cp.no_splits)

            rnn_ff.train_model(cp.splits_in, cp.splits_out, cp.no_splits)

            mlflow.log_param('Features', cp.features)
            mlflow.log_params(prepros_params)

            mlflow.log_params(train_params)
            mlflow.log_params({
                'MSE_Train': rnn_ff.loss.tolist,
                'MSE_Validation': rnn_ff.val_loss.tolist,
                'Delta_MSE': rnn_ff.del_loss.tolist
            })

            mlflow.log_artifacts(tmpdir.name)

            tmpdir.cleanup()
            # Tags

            mlflow.set_tags({
                'RMSE_Train': (rnn_ff.loss**0.5).tolist,
                'RMSE_Validation': (rnn_ff.val_loss**0.5).tolist
            })

            cp.preprocess(ci.Test_input, isTrain=False)
            ct.get_score(rnn_ff.model, cp.test_in, ci.RUL_input)

            mlflow.log_params({
                'Score': ct.score.tolist,
                'Test_MSE': ct.mse.tolist,
                'Combined Score': ct.cm_score,
                'Combined MSE': ct.cm_mse
            })

            mlflow.set_tags({
                'Test RMSE': (ct.mse**0.5).tolist,
                'Combined RMSE': ct.cm_mse**0.5
            })

    else:
        if path is not None:
            ci.set_datapath(path)
        if dataset_no in range(1, 5):
            ci.get_data(dataset_no)
        else:
            raise Exception('Please choose a number between 1 and 4')
        cp = CP(**prepros_params)
        cp.preprocess(ci.Train_input)
        rnn_ff = tr.RNN_to_FF(cp.features, **train_params)
        rnn_ff.create_model(cp.no_splits)
        rnn_ff.train_model(cp.splits_in, cp.splits_out, cp.no_splits)
        cp.preprocess(ci.Test_input, isTrain=False)
        ct.get_score(rnn_ff.models, cp.test_in, ci.RUL_input)

        return ci, cp, rnn_ff, ct

    clear_session()
Exemplo n.º 5
0
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
    l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5

    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")

        print(mlflow.get_artifact_uri())
Exemplo n.º 6
0
def train_and_evaluate(config_path: str):

    logger = logging.getLogger()
    logger.info('Parsing the config file supplied')
    config = read_params(config_path)

    target_col = [config.base.target_col]
    alpha = config.estimators.ElasticNet.params.alpha
    l1_ratio = config.estimators.ElasticNet.params.l1_ratio
    model_dir = Path.cwd() / config.model_dir

    logger.info('Reading the Training and Test Data')
    train_df = pd.read_csv(config.split_data['train_path'], sep=',')
    test_df = pd.read_csv(config.split_data['test_path'], sep=',')

    train_y = train_df[target_col]
    test_y = test_df[target_col]

    train_x = train_df.drop(target_col, axis=1)
    test_x = test_df.drop(target_col, axis=1)

    mlflow_config = config.mlflow_config
    remote_tracking_uri = mlflow_config.remote_server_uri
    mlflow.set_tracking_uri(remote_tracking_uri)

    mlflow.set_experiment(mlflow_config.experiment_name)

    with mlflow.start_run(run_name=mlflow_config.run_name) as mlops_run:
        logger.info('Training Started Data')
        lr = ElasticNet(alpha=alpha,
                        l1_ratio=l1_ratio,
                        random_state=config.base.random_state)

        lr.fit(train_x, train_y)
        logger.info('Training finished')

        logger.info('Predicting on the Test Data')
        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)

        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        logger.info('Model Saving !!!!')

        tracking_url_type_store = urlparse(mlflow.get_artifact_uri()).scheme
        if tracking_url_type_store != "file":
            mlflow.sklearn.log_model(
                lr,
                "model",
                registered_model_name=mlflow_config.registered_model_name)
        else:
            mlflow.sklearn.log_model(lr, "model")

        logger.info('Training and Evaluating finished Model Saved !!!!')
Exemplo n.º 7
0
MAX_DEPTH = 2
model = RandomForestRegressor(n_estimators=N_ESTIMATORS, max_depth=MAX_DEPTH)
model = model.fit(x_train, y_train.values.ravel())

# save model 
joblib.dump(model, 'models/model.joblib') 
joblib.dump(column_order, 'models/column_order.joblib')

if settings.SHOULD_USE_MLFLOW:
    # log training run to mlflow
    mlflow.set_tracking_uri(uri=f'http://{settings.MLFLOW_IP}:5000')
    if os.environ.get('CI', '') == 'true':
        mlflow.set_experiment('CI')
    else:
        mlflow.set_experiment('dev')

    with mlflow.start_run() as run:
        # calculate evaluation metrics
        y_test_pred = model.predict(x_test)
        rmse = sqrt(metrics.mean_squared_error(y_true=y_test, y_pred=y_test_pred))
        r2_score = metrics.r2_score(y_true=y_test, y_pred=y_test_pred)

        # log hyperparameters to mlflow
        mlflow.log_param('n_estimators', N_ESTIMATORS)
        mlflow.log_param('max_depth', MAX_DEPTH)
        
        # log metrics to mlflow
        mlflow.log_metric("rmse_validation_data", rmse)
        mlflow.log_metric("r2_score_validation_data", r2_score)
else:
    print('Not logging training run because MLFlow tracking server is not up, or its URL is not set in train.py')
Exemplo n.º 8
0
        exp_name = "ntnu_course_classifier"

    mlflow.set_experiment(exp_name)

    with mlflow.start_run() as run:
        # Get path to save model
        tracking_uri = mlflow.tracking.get_tracking_uri()
        print("Logging to " + tracking_uri)
        artifact_uri = mlflow.get_artifact_uri()
        if artifact_uri.startswith("file:///"):
            artifact_uri = artifact_uri.split("file:///")[1]
        print("Saving artifacts to " + artifact_uri)
        model_path = artifact_uri + model_prefix

        # Log params
        mlflow.log_param("seed", seed)
        mlflow.log_param("use_idf", use_idf)
        mlflow.log_param("use_stoplist", use_stoplist)
        mlflow.log_param("ngrams", args.ngrams)
        mlflow.log_param("data_file", data_file)
        mlflow.log_param("model_type", args.model)
        #if args.model == "nb":
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("fit_prior", fit_prior)
        #if args.model == "log_reg":
        mlflow.log_param("C", C)

        ## Data import and cleaning
        df = pd.read_csv(data_file, usecols=[1, 2, 3, 4, 5, 6])
        df = df.dropna()
Exemplo n.º 9
0
    def run_train_cv(self) -> None:
        """クロスバリデーションでの学習・評価を行う

        学習・評価とともに、各foldのモデルの保存、スコアのログ出力についても行う
        """
        # mlflow
        mlflow.set_experiment(self.exp_name)
        mlflow.start_run(run_name=self.run_name)
        logger.info(f'{self.run_name} - start training cv')

        scores = []
        va_idxes = []
        preds = []

        # Adversarial validation
        if self.advanced and 'adversarial_validation' in self.advanced:
            X_train = self.X_train
            X_test = self.X_test
            X_train['target'] = 0
            X_test['target'] = 1
            X_train = pd.concat([X_train, X_test],
                                sort=False).reset_index(drop=True)
            y_train = X_train['target']
            X_train.drop('target', axis=1, inplace=True)
            X_test.drop('target', axis=1, inplace=True)
            self.X_train = X_train
            self.y_train = y_train

        # 各foldで学習を行う
        for i_fold in range(self.cv.n_splits):
            # 学習を行う
            logger.info(f'{self.run_name} fold {i_fold} - start training')
            model, va_idx, va_pred, score = self.train_fold(i_fold)
            logger.info(
                f'{self.run_name} fold {i_fold} - end training - score {score}'
            )

            # モデルを保存する
            model.save_model()

            # 結果を保持する
            va_idxes.append(va_idx)
            scores.append(score)
            preds.append(va_pred)

        # 各foldの結果をまとめる
        va_idxes = np.concatenate(va_idxes)
        order = np.argsort(va_idxes)
        preds = np.concatenate(preds, axis=0)
        preds = preds[order]

        if self.evaluation_metric == 'log_loss':
            cv_score = log_loss(self.y_train, preds, eps=1e-15, normalize=True)
        elif self.evaluation_metric == 'mean_absolute_error':
            cv_score = mean_absolute_error(self.y_train, preds)
        elif self.evaluation_metric == 'rmse':
            cv_score = np.sqrt(mean_squared_error(self.y_train, preds))
        elif self.evaluation_metric == 'auc':
            cv_score = roc_auc_score(self.y_train, preds)
        elif self.evaluation_metric == 'prauc':
            cv_score = average_precision_score(self.y_train, preds)

        logger.info(f'{self.run_name} - end training cv - score {cv_score}')

        # 予測結果の保存
        Data.dump(preds, f'../output/pred/{self.run_name}-train.pkl')

        # mlflow
        self.run_id = mlflow.active_run().info.run_id
        log_param('model_name', self.model_cls.__class__.__name__)
        log_param('fe_name', self.fe_name)
        log_param('train_params', self.params)
        log_param('cv_strategy', str(self.cv))
        log_param('evaluation_metric', self.evaluation_metric)
        log_metric('cv_score', cv_score)
        log_param(
            'fold_scores',
            dict(
                zip([f'fold_{i}' for i in range(len(scores))],
                    [round(s, 4) for s in scores])))
        log_param('cols_definition', self.cols_definition)
        log_param('description', self.description)
        mlflow.end_run()
def log_anomaly(experimentID, run_name, params, traindata, testdata):

    warnings.filterwarnings("ignore")
    np.random.seed(40)

    with mlflow.start_run(experiment_id=experimentID,
                          run_name=run_name) as run:
        # Create model, train it, and create predictions
        iForest = IsolationForest(**params)

        iForest.fit(traindata)

        ##Predict with train data
        train_predictions = iForest.predict(traindata)
        test_predictions = iForest.predict(testdata)

        # Log model
        mlflow.sklearn.log_model(iForest, "ib-isolationforest-model")

        # Log params
        [mlflow.log_param(param, value) for param, value in params.items()]

        #Accuracy Metrics
        train_accuracy = round(
            list(train_predictions).count(1) / train_predictions.shape[0],
            2)  #create train accuracy metrics

        test_accuracy = round(
            list(test_predictions).count(1) / test_predictions.shape[0],
            2)  #create test accuracy metrics

        print('Accuracy Metrics:')
        print('--------------------------------------------')
        print(f'Accuracy for train data: {train_accuracy}')
        print(f'Accuracy for test data: {test_accuracy}')
        print(' ')

        # Log metrics
        mlflow.log_metric("train_accuracy", train_accuracy)
        mlflow.log_metric("test_accuracy", test_accuracy)

        #Log Artifact
        training_scores = iForest.decision_function(traindata)
        test_scores = iForest.decision_function(testdata)

        # Create and lot plot
        fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)

        #fig = plt.figure(figsize=(10,8))
        title = fig.suptitle("Decision Function Score", fontsize=14)
        fig.subplots_adjust(top=0.85, wspace=0.3)

        #ax = fig.add_subplot(1,1,1)
        ax1.set_title("Decision function score for trainingdata")
        ax1.hist(training_scores,
                 bins='auto',
                 alpha=0.7,
                 color='#0504aa',
                 rwidth=0.85)

        #ax1 = fig.add_subplot(2,1,1)
        ax2.set_title("Decision function score for testdata")
        ax2.hist(test_scores,
                 bins='auto',
                 alpha=0.7,
                 color='#0504aa',
                 rwidth=0.85)

        #create Confusion Matrix
        #cm = confusion_matrix(y_test, test_predictions)
        #sns.heatmap(cm, annot=True, cmap="coolwarm", fmt="d", linewidths=.5, ax=ax1)

        n_id = run.info.run_uuid

        table_name = 'activityhistory'

        fig_path = 'data' + '/' + 'decisionfunction' + '/' + table_name + '_' + str(
            n_id) + '.png'

        fig.savefig(fig_path)

        mlflow.log_artifact(fig_path)

        print(fig)

        return f'RunID:{n_id}'
Exemplo n.º 11
0



# In[17]:

# train all layers
for layer in model.layers:
    layer.trainable = True
callbacks_list = [checkpoint, csv_logger, reduceLROnPlat, early, qwk]
model.compile(loss='categorical_crossentropy',
            # loss=kappa_loss,
            optimizer=Adam(lr=1e-4))
model.fit_generator(
    train_mixup,
    steps_per_epoch=np.ceil(float(len(train_x)) / float(batch_size)),
    validation_data=valid_generator,
    validation_steps=np.ceil(float(len(valid_x)) / float(batch_size)),
    epochs=epochs,
    verbose=1,
    workers=1, use_multiprocessing=False,
    callbacks=callbacks_list)

qwk = QWKEvaluation(validation_data=(valid_generator, valid_y),
                    batch_size=batch_size, interval=1)
# In[18]:
mlflow.log_param("size", SIZE)
mlflow.log_metric("qwk", qwk)
	
mlflow.log_artifact("resnet.png")
Exemplo n.º 12
0

if __name__ == "__main__":

    exp_name = 'Iris'
    ver_name = '0.0'
    run_name = 'Iris Run'

    print("Experiment [{0}]".format(exp_name))
    mlflow.set_experiment(exp_name)

    with mlflow.start_run(source_version=ver_name,
                          run_name=run_name):

        print('Reading dataset')
        mlflow.log_param("Dataset", 'Iris')
        iris = sklearn.datasets.load_iris()
        X = iris.data
        y = iris.target

        print('Splitting')
        test_proportion = 0.4
        mlflow.log_param("Test proportion", test_proportion)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_proportion, shuffle=True)

        print('Creating estimator using Kmeans')
        kmeans_n_clusters = 3
        mlflow.log_param("Kmeans N Clusters", kmeans_n_clusters)
        est = KMeans(n_clusters=kmeans_n_clusters)

        print('Fitting estimator')
Exemplo n.º 13
0
        metavar="LR",
        help="learning rate (default: 0.1)",
    )

    args = parser.parse_args()
    dict_args = vars(args)

    with mlflow.start_run(run_name="Titanic_Captum_mlflow"):
        (
            net,
            train_features,
            train_labels,
            test_features,
            test_labels,
            feature_names,
        ) = train()

        compute_accuracy(net,
                         train_features,
                         train_labels,
                         title="Train Accuracy")
        test_input_tensor = compute_accuracy(net,
                                             test_features,
                                             test_labels,
                                             title="Test Accuracy")
        feature_conductance(net, test_input_tensor)
        layer_conductance(net, test_input_tensor)
        neuron_conductance(net, test_input_tensor)
        mlflow.log_param("Train Size", len(train_labels))
        mlflow.log_param("Test Size", len(test_labels))
Exemplo n.º 14
0
    mlflow.set_experiment(train_data[0].metric_name)
    mlflow.start_run()
    mlflow_run_id = mlflow.active_run().info.run_id

    # keep track of the model name as a mlflow run tag
    mlflow.set_tag("model", model_mp.model_name)

    # keep track of labels as tags in the mlflow experiment
    for label in train_data[0].label_config:
        mlflow.set_tag(label, train_data[0].label_config[label])

    # store the metric with labels as a tag so it can be copied into grafana to view the real metric
    mlflow.set_tag("metric", metric)

    # log parameters before run
    mlflow.log_param("retraining_interval_minutes",
                     str(Configuration.retraining_interval_minutes))
    mlflow.log_param("rolling_training_window_size",
                     str(Configuration.rolling_training_window_size))
    mlflow.log_param("true_anomaly_threshold",
                     str(Configuration.true_anomaly_threshold))

    # initial run with just the train data
    model_mp.train(
        prediction_duration=Configuration.retraining_interval_minutes)

    # store the predicted dataframe and the true dataframe
    predicted_df = model_mp.predicted_df
    true_df = Metric(test_data_list[0]).metric_values.set_index("ds")

    # Label True Anomalies
    true_df["anomaly"] = label_true_anomalies(
Exemplo n.º 15
0
def train(hyp, opt, device, tb_writer=None, wandb=None):
    logger.info(
        colorstr("hyperparameters: ") + ", ".join(f"{k}={v}"
                                                  for k, v in hyp.items()))
    save_dir, epochs, batch_size, total_batch_size, weights, rank = (
        Path(opt.save_dir),
        opt.epochs,
        opt.batch_size,
        opt.total_batch_size,
        opt.weights,
        opt.global_rank,
    )

    # Directories
    wdir = save_dir / "weights"
    wdir.mkdir(parents=True, exist_ok=True)  # make dir
    last = wdir / "last.pt"
    best = wdir / "best.pt"
    results_file = save_dir / "results.txt"

    # Save run settings
    with open(save_dir / "hyp.yaml", "w") as f:
        yaml.dump(hyp, f, sort_keys=False)
    with open(save_dir / "opt.yaml", "w") as f:
        # yaml.dump(vars(opt), f, sort_keys=False)  # opt 実行パラメータ
        yaml.dump(str(opt), f, sort_keys=False)

    # Configure
    plots = not opt.evolve  # create plots
    cuda = device.type != "cpu"
    init_seeds(2 + rank)
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
    with torch_distributed_zero_first(rank):
        check_dataset(data_dict)  # check
    train_path = data_dict["train"]
    test_path = data_dict["val"]
    nc = 1 if opt.single_cls else int(data_dict["nc"])  # number of classes
    names = (["item"] if opt.single_cls and len(data_dict["names"]) != 1 else
             data_dict["names"])  # class names
    assert len(names) == nc, "%g names found for nc=%g dataset in %s" % (
        len(names),
        nc,
        opt.data,
    )  # check

    # Model
    pretrained = weights.endswith(".pt")
    if pretrained:
        with torch_distributed_zero_first(rank):
            attempt_download(weights)  # download if not found locally
        ckpt = torch.load(weights, map_location=device)  # load checkpoint
        if hyp.get("anchors"):
            ckpt["model"].yaml["anchors"] = round(
                hyp["anchors"])  # force autoanchor
        model = Model(opt.cfg or ckpt["model"].yaml, ch=3,
                      nc=nc).to(device)  # create
        exclude = ["anchor"] if opt.cfg or hyp.get("anchors") else [
        ]  # exclude keys
        state_dict = ckpt["model"].float().state_dict()  # to FP32
        state_dict = intersect_dicts(state_dict,
                                     model.state_dict(),
                                     exclude=exclude)  # intersect
        model.load_state_dict(state_dict, strict=False)  # load
        logger.info(
            "Transferred %g/%g items from %s" %
            (len(state_dict), len(model.state_dict()), weights))  # report
    else:
        model = Model(opt.cfg, ch=3, nc=nc).to(device)  # create

    # Freeze
    freeze = []  # parameter names to freeze (full or partial)
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
            print("freezing %s" % k)
            v.requires_grad = False

    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(round(nbs / total_batch_size),
                     1)  # accumulate loss before optimizing
    hyp["weight_decay"] *= total_batch_size * accumulate / nbs  # scale weight_decay
    logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")

    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in model.named_modules():
        if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
            pg2.append(v.bias)  # biases
        if isinstance(v, nn.BatchNorm2d):
            pg0.append(v.weight)  # no decay
        elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
            pg1.append(v.weight)  # apply decay

    if opt.adam:
        optimizer = optim.Adam(pg0,
                               lr=hyp["lr0"],
                               betas=(hyp["momentum"],
                                      0.999))  # adjust beta1 to momentum
    else:
        optimizer = optim.SGD(pg0,
                              lr=hyp["lr0"],
                              momentum=hyp["momentum"],
                              nesterov=True)

    optimizer.add_param_group({
        "params": pg1,
        "weight_decay": hyp["weight_decay"]
    })  # add pg1 with weight_decay
    optimizer.add_param_group({"params": pg2})  # add pg2 (biases)
    logger.info("Optimizer groups: %g .bias, %g conv.weight, %g other" %
                (len(pg2), len(pg1), len(pg0)))
    del pg0, pg1, pg2

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
    if opt.linear_lr:
        lf = (lambda x: (1 - x / (epochs - 1)) *
              (1.0 - hyp["lrf"]) + hyp["lrf"])  # linear
    else:
        lf = one_cycle(1, hyp["lrf"], epochs)  # cosine 1->hyp['lrf']
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # plot_lr_scheduler(optimizer, scheduler, epochs)

    # Logging
    if rank in [-1, 0] and wandb and wandb.run is None:
        opt.hyp = hyp  # add hyperparameters
        wandb_run = wandb.init(
            config=opt,
            resume="allow",
            project="YOLOv5"
            if opt.project == "runs/train" else Path(opt.project).stem,
            name=save_dir.stem,
            id=ckpt.get("wandb_id") if "ckpt" in locals() else None,
        )
    loggers = {"wandb": wandb}  # loggers dict

    # Resume
    start_epoch, best_fitness = 0, 0.0
    if pretrained:
        # Optimizer
        if ckpt["optimizer"] is not None:
            optimizer.load_state_dict(ckpt["optimizer"])
            best_fitness = ckpt["best_fitness"]

        # Results
        if ckpt.get("training_results") is not None:
            with open(results_file, "w") as file:
                file.write(ckpt["training_results"])  # write results.txt

        # Epochs
        start_epoch = ckpt["epoch"] + 1
        if opt.resume:
            assert (
                start_epoch > 0
            ), "%s training to %g epochs is finished, nothing to resume." % (
                weights,
                epochs,
            )
        if epochs < start_epoch:
            logger.info(
                "%s has been trained for %g epochs. Fine-tuning for %g additional epochs."
                % (weights, ckpt["epoch"], epochs))
            epochs += ckpt["epoch"]  # finetune additional epochs

        del ckpt, state_dict

    # Image sizes
    gs = int(model.stride.max())  # grid size (max stride)
    nl = model.model[
        -1].nl  # number of detection layers (used for scaling hyp['obj'])
    imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size
                         ]  # verify imgsz are gs-multiples

    # DP mode
    if cuda and rank == -1 and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    # SyncBatchNorm
    if opt.sync_bn and cuda and rank != -1:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
        logger.info("Using SyncBatchNorm()")

    # EMA
    ema = ModelEMA(model) if rank in [-1, 0] else None

    # DDP mode
    if cuda and rank != -1:
        model = DDP(model,
                    device_ids=[opt.local_rank],
                    output_device=opt.local_rank)

    # Trainloader
    dataloader, dataset = create_dataloader(
        train_path,
        imgsz,
        batch_size,
        gs,
        opt,
        hyp=hyp,
        augment=True,
        cache=opt.cache_images,
        rect=opt.rect,
        rank=rank,
        world_size=opt.world_size,
        workers=opt.workers,
        image_weights=opt.image_weights,
        quad=opt.quad,
        prefix=colorstr("train: "),
    )
    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
    nb = len(dataloader)  # number of batches
    assert (
        mlc < nc
    ), "Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g" % (
        mlc,
        nc,
        opt.data,
        nc - 1,
    )

    # Process 0
    if rank in [-1, 0]:
        ema.updates = start_epoch * nb // accumulate  # set EMA updates
        testloader = create_dataloader(
            test_path,
            imgsz_test,
            batch_size * 2,
            gs,
            opt,  # testloader
            hyp=hyp,
            cache=opt.cache_images and not opt.notest,
            rect=True,
            rank=-1,
            world_size=opt.world_size,
            workers=opt.workers,
            pad=0.5,
            prefix=colorstr("val: "),
        )[0]

        if not opt.resume:
            labels = np.concatenate(dataset.labels, 0)
            c = torch.tensor(labels[:, 0])  # classes
            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
            # model._initialize_biases(cf.to(device))
            if plots:
                plot_labels(labels, save_dir, loggers)
                if tb_writer:
                    tb_writer.add_histogram("classes", c, 0)

            # Anchors
            if not opt.noautoanchor:
                check_anchors(dataset,
                              model=model,
                              thr=hyp["anchor_t"],
                              imgsz=imgsz)

    # Model parameters
    hyp["box"] *= 3.0 / nl  # scale to layers
    hyp["cls"] *= nc / 80.0 * 3.0 / nl  # scale to classes and layers
    hyp["obj"] *= (imgsz / 640)**2 * 3.0 / nl  # scale to image size and layers
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
    model.class_weights = (
        labels_to_class_weights(dataset.labels, nc).to(device) * nc
    )  # attach class weights
    model.names = names

    # Start training
    t0 = time.time()
    nw = max(round(hyp["warmup_epochs"] * nb),
             1000)  # number of warmup iterations, max(3 epochs, 1k iterations)
    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
    maps = np.zeros(nc)  # mAP per class
    results = (0, 0, 0, 0, 0, 0, 0
               )  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
    scheduler.last_epoch = start_epoch - 1  # do not move
    scaler = amp.GradScaler(enabled=cuda)
    compute_loss = ComputeLoss(model)  # init loss class
    logger.info(f"Image sizes {imgsz} train, {imgsz_test} test\n"
                f"Using {dataloader.num_workers} dataloader workers\n"
                f"Logging results to {save_dir}\n"
                f"Starting training for {epochs} epochs...")
    for epoch in range(
            start_epoch, epochs
    ):  # epoch ------------------------------------------------------------------
        model.train()

        # Update image weights (optional)
        if opt.image_weights:
            # Generate indices
            if rank in [-1, 0]:
                cw = (model.class_weights.cpu().numpy() * (1 - maps)**2 / nc
                      )  # class weights
                iw = labels_to_image_weights(dataset.labels,
                                             nc=nc,
                                             class_weights=cw)  # image weights
                dataset.indices = random.choices(
                    range(dataset.n), weights=iw,
                    k=dataset.n)  # rand weighted idx
            # Broadcast if DDP
            if rank != -1:
                indices = (torch.tensor(dataset.indices)
                           if rank == 0 else torch.zeros(dataset.n)).int()
                dist.broadcast(indices, 0)
                if rank != 0:
                    dataset.indices = indices.cpu().numpy()

        # Update mosaic border
        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders

        mloss = torch.zeros(4, device=device)  # mean losses
        if rank != -1:
            dataloader.sampler.set_epoch(epoch)
        pbar = enumerate(dataloader)
        logger.info(
            ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "obj", "cls",
                                   "total", "targets", "img_size"))
        if rank in [-1, 0]:
            pbar = tqdm(pbar, total=nb)  # progress bar
        optimizer.zero_grad()
        for i, (
                imgs,
                targets,
                paths,
                _,
        ) in (
                pbar
        ):  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = (imgs.to(device, non_blocking=True).float() / 255.0
                    )  # uint8 to float32, 0-255 to 0.0-1.0

            # Warmup
            if ni <= nw:
                xi = [0, nw]  # x interp
                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
                accumulate = max(
                    1,
                    np.interp(ni, xi, [1, nbs / total_batch_size]).round())
                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x["lr"] = np.interp(
                        ni,
                        xi,
                        [
                            hyp["warmup_bias_lr"] if j == 2 else 0.0,
                            x["initial_lr"] * lf(epoch),
                        ],
                    )
                    if "momentum" in x:
                        x["momentum"] = np.interp(
                            ni, xi, [hyp["warmup_momentum"], hyp["momentum"]])

            # Multi-scale
            if opt.multi_scale:
                sz = random.randrange(imgsz * 0.5,
                                      imgsz * 1.5 + gs) // gs * gs  # size
                sf = sz / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
                          ]  # new shape (stretched to gs-multiple)
                    imgs = F.interpolate(imgs,
                                         size=ns,
                                         mode="bilinear",
                                         align_corners=False)

            # Forward
            with amp.autocast(enabled=cuda):
                pred = model(imgs)  # forward
                loss, loss_items = compute_loss(
                    pred, targets.to(device))  # loss scaled by batch_size
                if rank != -1:
                    loss *= (opt.world_size
                             )  # gradient averaged between devices in DDP mode
                if opt.quad:
                    loss *= 4.0

            # Backward
            scaler.scale(loss).backward()

            # Optimize
            if ni % accumulate == 0:
                scaler.step(optimizer)  # optimizer.step
                scaler.update()
                optimizer.zero_grad()
                if ema:
                    ema.update(model)

            # Print
            if rank in [-1, 0]:
                mloss = (mloss * i + loss_items) / (i + 1
                                                    )  # update mean losses
                mem = "%.3gG" % (torch.cuda.memory_reserved() / 1e9
                                 if torch.cuda.is_available() else 0)  # (GB)
                s = ("%10s" * 2 + "%10.4g" * 6) % (
                    "%g/%g" % (epoch, epochs - 1),
                    mem,
                    *mloss,
                    targets.shape[0],
                    imgs.shape[-1],
                )
                pbar.set_description(s)

                # Plot
                if plots and ni < 3:
                    f = save_dir / f"train_batch{ni}.jpg"  # filename
                    Thread(target=plot_images,
                           args=(imgs, targets, paths, f),
                           daemon=True).start()
                    # if tb_writer:
                    #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
                    #     tb_writer.add_graph(model, imgs)  # add model to tensorboard
                elif plots and ni == 10 and wandb:
                    wandb.log(
                        {
                            "Mosaics": [
                                wandb.Image(str(x), caption=x.name)
                                for x in save_dir.glob("train*.jpg")
                                if x.exists()
                            ]
                        },
                        commit=False,
                    )

            # end batch ------------------------------------------------------------------------------------------------
        # end epoch ----------------------------------------------------------------------------------------------------

        # Scheduler
        lr = [x["lr"] for x in optimizer.param_groups]  # for tensorboard
        scheduler.step()

        # DDP process 0 or single-GPU
        if rank in [-1, 0]:
            # mAP
            if ema:
                ema.update_attr(
                    model,
                    include=[
                        "yaml",
                        "nc",
                        "hyp",
                        "gr",
                        "names",
                        "stride",
                        "class_weights",
                    ],
                )
            final_epoch = epoch + 1 == epochs
            if not opt.notest or final_epoch:  # Calculate mAP
                results, maps, times = test.test(
                    opt.data,
                    batch_size=batch_size * 2,
                    imgsz=imgsz_test,
                    model=ema.ema,
                    single_cls=opt.single_cls,
                    dataloader=testloader,
                    save_dir=save_dir,
                    verbose=nc < 50 and final_epoch,
                    plots=plots and final_epoch,
                    log_imgs=opt.log_imgs if wandb else 0,
                    compute_loss=compute_loss,
                )

            # Write
            with open(results_file, "a") as f:
                f.write(
                    s + "%10.4g" * 7 % results +
                    "\n")  # P, R, [email protected], [email protected], val_loss(box, obj, cls)
            if len(opt.name) and opt.bucket:
                os.system("gsutil cp %s gs://%s/results/results%s.txt" %
                          (results_file, opt.bucket, opt.name))

            # Log
            tags = [
                "train/box_loss",
                "train/obj_loss",
                "train/cls_loss",  # train loss
                "metrics/precision",
                "metrics/recall",
                "metrics/mAP_0.5",
                "metrics/mAP_0.5:0.95",
                "val/box_loss",
                "val/obj_loss",
                "val/cls_loss",  # val loss
                "x/lr0",
                "x/lr1",
                "x/lr2",
            ]  # params
            for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
                if tb_writer:
                    tb_writer.add_scalar(tag, x, epoch)  # tensorboard
                if wandb:
                    wandb.log({tag: x}, step=epoch,
                              commit=tag == tags[-1])  # W&B

            # Update best mAP
            fi = fitness(np.array(results).reshape(
                1, -1))  # weighted combination of [P, R, [email protected], [email protected]]
            if fi > best_fitness:
                best_fitness = fi

            # Save model
            save = (not opt.nosave) or (final_epoch and not opt.evolve)
            if save:
                with open(results_file, "r") as f:  # create checkpoint
                    ckpt = {
                        "epoch":
                        epoch,
                        "best_fitness":
                        best_fitness,
                        "training_results":
                        f.read(),
                        "model":
                        ema.ema,
                        "optimizer":
                        None if final_epoch else optimizer.state_dict(),
                        "wandb_id":
                        wandb_run.id if wandb else None,
                    }

                # Save last, best and delete
                torch.save(ckpt, last)
                if best_fitness == fi:
                    torch.save(ckpt, best)
                del ckpt
        # end epoch ----------------------------------------------------------------------------------------------------
    # end training

    if rank in [-1, 0]:
        # Strip optimizers
        final = best if best.exists() else last  # final model
        for f in [last, best]:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
        if opt.bucket:
            os.system(f"gsutil cp {final} gs://{opt.bucket}/weights")  # upload

        # Plots
        if plots:
            plot_results(save_dir=save_dir)  # save as results.png
            if wandb:
                files = [
                    "results.png",
                    "confusion_matrix.png",
                    *[f"{x}_curve.png" for x in ("F1", "PR", "P", "R")],
                ]
                wandb.log({
                    "Results": [
                        wandb.Image(str(save_dir / f), caption=f)
                        for f in files if (save_dir / f).exists()
                    ]
                })
                if opt.log_artifacts:
                    wandb.log_artifact(artifact_or_path=str(final),
                                       type="model",
                                       name=save_dir.stem)

        # Test best.pt
        logger.info("%g epochs completed in %.3f hours.\n" %
                    (epoch - start_epoch + 1, (time.time() - t0) / 3600))
        if opt.data.endswith("coco.yaml") and nc == 80:  # if COCO
            for conf, iou, save_json in (
                [0.25, 0.45, False],
                [0.001, 0.65, True],
            ):  # speed, mAP tests
                results, _, _ = test.test(
                    opt.data,
                    batch_size=batch_size * 2,
                    imgsz=imgsz_test,
                    conf_thres=conf,
                    iou_thres=iou,
                    model=attempt_load(final, device).half(),
                    single_cls=opt.single_cls,
                    dataloader=testloader,
                    save_dir=save_dir,
                    save_json=save_json,
                    plots=False,
                )

    else:
        dist.destroy_process_group()

    wandb.run.finish() if wandb and wandb.run else None
    torch.cuda.empty_cache()

    # mlflow
    with mlflow.start_run() as run:
        # Log args into mlflow
        for key, value in hyp.items():
            mlflow.log_param(key, value)

        for key, value in vars(opt).items():
            mlflow.log_param(key, value)

        # Log results into mlflow
        for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
            # xがtorch.Tensorだったらfloatに直す
            if torch.is_tensor(x):
                x = x.item()

            # tag名に特殊記号があれば削除する
            if ":" in tag:
                tag = re.sub(r":", " ", tag)

            mlflow.log_metric(tag, x)

        # Log model
        mlflow.pytorch.log_model(model, "model")

    return results
Exemplo n.º 16
0
        logger.exception(
            "Unable to download training & test CSV, check your internet connection. Error: %s",
            e)

    # Useful for multiple runs (only doing one run in this sample notebook)
    with mlflow.start_run():
        m = Prophet()
        m.fit(df)

        # Evaluate Metrics
        df_cv = cross_validation(m,
                                 initial="730 days",
                                 period="180 days",
                                 horizon="365 days")
        df_p = performance_metrics(df_cv, rolling_window=rolling_window)

        # Print out metrics
        print("Prophet model (rolling_window=%f):" % (rolling_window))
        print("  CV: \n%s" % df_cv.head())
        print("  Perf: \n%s" % df_p.head())

        # Log parameter, metrics, and model to MLflow
        mlflow.log_param("rolling_window", rolling_window)
        mlflow.log_metric("rmse", df_p.loc[0, "rmse"])

        mlflow.pyfunc.log_model("model",
                                conda_env=conda_env,
                                python_model=FbProphetWrapper(m))
        print("Logged model with URI: runs:/{run_id}/model".format(
            run_id=mlflow.active_run().info.run_id))
Exemplo n.º 17
0
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)

    # learning rate scheduler
    warmup_steps = args.warmup_epochs * len(train_iterator)
    print(f"warmup steps: {warmup_steps}")
    scheduler = get_constant_schedule_with_warmup(
        optimizer, num_warmup_steps=warmup_steps)

    with mlflow.start_run():
        # Log our parameters into mlflow
        for key, value in vars(args).items():
            mlflow.log_param(key, value)

        comment = os.path.basename(args.output_dir)
        comment = "" if comment == "" else "_" + comment

        tb_writer = SummaryWriter(comment=comment)

        for epoch in trange(N_EPOCHS, desc="Epoch"):

            start_time = time.time()

            train_loss, train_mpp_acc = train(
                model,
                train_iterator,
                optimizer,
                sc_criterion,
Exemplo n.º 18
0
    y_train,
    scoring='accuracy',
    return_estimator=True
)

score_mean = val_info['test_score'].mean()
score_std = val_info['test_score'].std()
print(f'{score_mean} accuracy with a standard deviation of {score_std}')

# Cell
clf = val_info['estimator'][0]

# Cell
importance_df = pd.DataFrame({'feature':X_train.columns, 'importance': clf.feature_importances_}).sort_values('importance', ascending=False)

# Cell
importance_df.to_html('../output/data/feature_importance.html', index=False)

# Cell
if LOG_MLFLOW:
    with mlflow.start_run(experiment_id=EX_ID):
        mlflow.log_param('num_features', X_train.shape[1])
        mlflow.log_param('n_estimators', clf.get_params()['n_estimators'])
        mlflow.log_param('max_depth', clf.get_params()['max_depth'])
        mlflow.log_param('learning_rate', clf.get_params()['learning_rate'])
        mlflow.log_param('booster', clf.get_params()['booster'])

        mlflow.log_metric('mean_accuracy', score_mean)
        mlflow.log_metric('std_accuracy', score_std)

        mlflow.log_artifact('../output/data/feature_importance.html')
Exemplo n.º 19
0
    lr = 1e-3
    """Generate models from parameters"""
    for deep in depths:
        for hidden in hidden_channels:
            model = GUNET(in_ch=1,
                          hid_ch=800,
                          depth=deep,
                          out_ch=2,
                          pool_ratios=pooling_ratios).to(device)
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=lr,
                                         weight_decay=0.001)
            """Start the training session"""
            with start_run():
                log_param('hidden_channel', hidden)
                log_param('pooling_ratios', pooling_ratios)
                log_param('learning_rate', lr)
                log_param('depth', deep)
                for epoch in range(epochs):
                    train_loss = []
                    val_loss = []
                    running_train_los = []
                    start_time = time.time()

                    for data in sift_train_loader:
                        data = data.to(device)
                        model.train()
                        optimizer.zero_grad()
                        out = loss(model(data), data.y)
                        out.backward()
Exemplo n.º 20
0
def main(params: dict,
         output_dir: str):
    import mlflow
    print("start params={}".format(params))
    model_id = "train_0"
    logger = get_logger()
    # df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle")
    df = pd.read_pickle("../input/riiid-test-answer-prediction/split10/train_0.pickle").sort_values(["user_id", "timestamp"]).reset_index(drop=True)
    if is_debug:
        df = df.head(30000)
    df["prior_question_had_explanation"] = df["prior_question_had_explanation"].fillna(-1)
    column_config = {
        ("content_id", "content_type_id"): {"type": "category"},
        "user_answer": {"type": "category"},
        "part": {"type": "category"},
        "prior_question_elapsed_time_bin300": {"type": "category"},
        "duration_previous_content_bin300": {"type": "category"},
        "prior_question_had_explanation": {"type": "category"},
        "rating_diff_content_user_id": {"type": "numeric"},
        "qq_table2_mean": {"type": "numeric"},
        "qq_table2_min": {"type": "numeric"}
    }

    if not load_pickle or is_debug:
        feature_factory_dict = {"user_id": {}}
        feature_factory_dict["user_id"]["DurationPreviousContent"] = DurationPreviousContent()
        feature_factory_dict["user_id"]["ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder()
        feature_factory_dict["user_id"]["PreviousAnswer2"] = PreviousAnswer2(groupby="user_id",
                                                                             column="content_id",
                                                                             is_debug=is_debug,
                                                                             model_id=model_id,
                                                                             n=300)
        feature_factory_dict["user_id"]["UserContentRateEncoder"] = UserContentRateEncoder(rate_func="elo",
                                                                                           column="user_id")
        feature_factory_dict["user_id"]["QuestionQuestionTableEncoder2"] = \
            QuestionQuestionTableEncoder2(
                model_id=model_id,
                is_debug=is_debug,
                past_n=100,
                min_size=300
            )
        feature_factory_manager = FeatureFactoryManager(feature_factory_dict=feature_factory_dict,
                                                        logger=logger,
                                                        split_num=1,
                                                        model_id="train_0",
                                                        load_feature=not is_debug,
                                                        save_feature=not is_debug)

        print("all_predict")
        df = feature_factory_manager.all_predict(df)
        df = df[["user_id", "content_id", "content_type_id", "part", "user_answer", "answered_correctly",
                 "prior_question_elapsed_time_bin300", "duration_previous_content_bin300",
                 "prior_question_had_explanation", "rating_diff_content_user_id",
                 "qq_table2_mean", "qq_table2_min"]].replace(-99, -1)
        df["qq_table2_mean"] = df["qq_table2_mean"].fillna(0.65)
        df["qq_table2_min"] = df["qq_table2_min"].fillna(0.6)
        print(df.head(10))

        print("data preprocess")

        train_idx = []
        val_idx = []
        np.random.seed(0)
        for _, w_df in df[df["content_type_id"] == 0].groupby("user_id"):
            if np.random.random() < 0.01:
                # all val
                val_idx.extend(w_df.index.tolist())
            else:
                train_num = int(len(w_df) * 0.95)
                train_idx.extend(w_df[:train_num].index.tolist())
                val_idx.extend(w_df[train_num:].index.tolist())
    ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config,
                                                      dict_path="../feature_engineering/",
                                                      sequence_length=params["max_seq"],
                                                      logger=logger)
    ff_for_transformer.make_dict(df=df)
    n_skill = len(ff_for_transformer.embbed_dict[("content_id", "content_type_id")])
    if not load_pickle or is_debug:
        df["is_val"] = 0
        df["is_val"].loc[val_idx] = 1
        w_df = df[df["is_val"] == 0]
        w_df["group"] = (w_df.groupby("user_id")["user_id"].transform("count") - w_df.groupby("user_id").cumcount()) // params["max_seq"]
        w_df["user_id"] = w_df["user_id"].astype(str) + "_" + w_df["group"].astype(str)

        group = ff_for_transformer.all_predict(w_df)

        dataset_train = SAKTDataset(group,
                                    n_skill=n_skill,
                                    max_seq=params["max_seq"])

        del w_df
        gc.collect()

    ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config,
                                                      dict_path="../feature_engineering/",
                                                      sequence_length=params["max_seq"],
                                                      logger=logger)
    if not load_pickle or is_debug:
        group = ff_for_transformer.all_predict(df[df["content_type_id"] == 0])
        dataset_val = SAKTDataset(group,
                                  is_test=True,
                                  n_skill=n_skill,
                                  max_seq=params["max_seq"])

    os.makedirs("../input/feature_engineering/model080", exist_ok=True)
    if not is_debug and not load_pickle:
        with open(f"../input/feature_engineering/model080/train.pickle", "wb") as f:
            pickle.dump(dataset_train, f)
        with open(f"../input/feature_engineering/model080/val.pickle", "wb") as f:
            pickle.dump(dataset_val, f)

    if not is_debug and load_pickle:
        with open(f"../input/feature_engineering/model080/train.pickle", "rb") as f:
            dataset_train = pickle.load(f)
        with open(f"../input/feature_engineering/model080/val.pickle", "rb") as f:
            dataset_val = pickle.load(f)
        print("loaded!")
    dataloader_train = DataLoader(dataset_train, batch_size=params["batch_size"], shuffle=True, num_workers=1)
    dataloader_val = DataLoader(dataset_val, batch_size=params["batch_size"], shuffle=False, num_workers=1)

    model = SAKTModel(n_skill, embed_dim=params["embed_dim"], max_seq=params["max_seq"], dropout=dropout)

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]

    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=params["lr"],
                      weight_decay=0.01,
                      )
    num_train_optimization_steps = int(len(dataloader_train) * epochs)
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=params["num_warmup_steps"],
                                                num_training_steps=num_train_optimization_steps)
    criterion = nn.BCEWithLogitsLoss()

    model.to(device)
    criterion.to(device)

    for epoch in range(epochs):
        loss, acc, auc, auc_val = train_epoch(model, dataloader_train, dataloader_val, optimizer, criterion, scheduler, device)
        print("epoch - {} train_loss - {:.3f} auc - {:.4f} auc-val: {:.4f}".format(epoch, loss, auc, auc_val))

    preds = []
    labels = []
    with torch.no_grad():
        for item in tqdm(dataloader_val):
            x = item["x"].to(device).long()
            target_id = item["target_id"].to(device).long()
            part = item["part"].to(device).long()
            label = item["label"].to(device).float()
            elapsed_time = item["elapsed_time"].to(device).long()
            duration_previous_content = item["duration_previous_content"].to(device).long()
            prior_question_had_explanation = item["prior_q"].to(device).long()
            user_answer = item["user_answer"].to(device).long()
            rate_diff = item["rate_diff"].to(device).float()
            qq_table_mean = item["qq_table_mean"].to(device).float()
            qq_table_min = item["qq_table_min"].to(device).float()

            output = model(x, target_id, part, elapsed_time,
                           duration_previous_content, prior_question_had_explanation, user_answer,
                           rate_diff, qq_table_mean, qq_table_min)

            preds.extend(torch.nn.Sigmoid()(output[:, -1]).view(-1).data.cpu().numpy().tolist())
            labels.extend(label[:, -1].view(-1).data.cpu().numpy().tolist())

    auc_transformer = roc_auc_score(labels, preds)
    print("single transformer: {:.4f}".format(auc_transformer))
    df_oof = pd.DataFrame()
    # df_oof["row_id"] = df.loc[val_idx].index
    print(len(dataloader_val))
    print(len(preds))
    df_oof["predict"] = preds
    df_oof["target"] = labels

    df_oof.to_csv(f"{output_dir}/transformers1.csv", index=False)
    """
    df_oof2 = pd.read_csv("../output/ex_237/20201213110353/oof_train_0_lgbm.csv")
    df_oof2.columns = ["row_id", "predict_lgbm", "target"]
    df_oof2 = pd.merge(df_oof, df_oof2, how="inner")

    auc_lgbm = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values)
    print("lgbm: {:.4f}".format(auc_lgbm))

    print("ensemble")
    max_auc = 0
    max_nn_ratio = 0
    for r in np.arange(0, 1.05, 0.05):
        auc = roc_auc_score(df_oof2["target"].values, df_oof2["predict_lgbm"].values*(1-r) + df_oof2["predict"].values*r)
        print("[nn_ratio: {:.2f}] AUC: {:.4f}".format(r, auc))

        if max_auc < auc:
            max_auc = auc
            max_nn_ratio = r
    print(len(df_oof2))
    """
    if not is_debug:
        mlflow.start_run(experiment_id=10,
                         run_name=os.path.basename(__file__))

        for key, value in params.items():
            mlflow.log_param(key, value)
        mlflow.log_metric("auc_val", auc_transformer)
        mlflow.end_run()
    torch.save(model.state_dict(), f"{output_dir}/transformers.pth")
    del model
    torch.cuda.empty_cache()
    with open(f"{output_dir}/transformer_param.json", "w") as f:
        json.dump(params, f)
    if is_make_feature_factory:
        # feature factory
        feature_factory_dict = {"user_id": {}}
        feature_factory_dict["user_id"]["DurationPreviousContent"] = DurationPreviousContent(is_partial_fit=True)
        feature_factory_dict["user_id"]["ElapsedTimeBinningEncoder"] = ElapsedTimeBinningEncoder()
        feature_factory_manager = FeatureFactoryManager(feature_factory_dict=feature_factory_dict,
                                                        logger=logger,
                                                        split_num=1,
                                                        model_id="all",
                                                        load_feature=not is_debug,
                                                        save_feature=not is_debug)

        ff_for_transformer = FeatureFactoryForTransformer(column_config=column_config,
                                                          dict_path="../feature_engineering/",
                                                          sequence_length=params["max_seq"],
                                                          logger=logger)
        df = pd.read_pickle("../input/riiid-test-answer-prediction/train_merged.pickle")
        if is_debug:
            df = df.head(10000)
        df = df.sort_values(["user_id", "timestamp"]).reset_index(drop=True)
        feature_factory_manager.fit(df)
        df = feature_factory_manager.all_predict(df)
        for dicts in feature_factory_manager.feature_factory_dict.values():
            for factory in dicts.values():
                factory.logger = None
        feature_factory_manager.logger = None
        with open(f"{output_dir}/feature_factory_manager.pickle", "wb") as f:
            pickle.dump(feature_factory_manager, f)

        ff_for_transformer.fit(df)
        ff_for_transformer.logger = None
        with open(f"{output_dir}/feature_factory_manager_for_transformer.pickle", "wb") as f:
            pickle.dump(ff_for_transformer, f)
from __future__ import print_function
import mlflow

experiment_id = 0
name = 'HelloWorld Nested Runs'

with mlflow.start_run(experiment_id=experiment_id, run_name=name) as run:
    print("runId:",run.info.run_uuid," - name:",name)
    mlflow.set_tag("algo", name)
    with open("info.txt", "w") as f:
        f.write(name)
    mlflow.log_artifact("info.txt")
    for j in range(0, 2):
        name2 = name + " " + str(j)
        with mlflow.start_run(run_name=name2, nested=True) as run2:
            print("runId.2:",run2.info.run_uuid," - name2:",name2)
            mlflow.log_param("alpha", str(j+0.1))
            mlflow.log_metric("auroch", j+0.123)
            mlflow.set_tag("algo", name2)
            with open("info.txt", "w") as f:
                f.write(name2)
            mlflow.log_artifact("info.txt")
Exemplo n.º 22
0
def run(runParams: dict) -> float:
    if not runParams["paramsFile"] is None:
        with open(runParams["paramsFile"], "r") as f:
            params = yaml.safe_load(f)
            params["device"] = runParams["device"]
            params["subDeviceIdx"] = runParams["subDeviceIdx"]
    else:
        params = runParams

    if params["device"] != "cuda":
        use_cuda = False
    else:
        use_cuda = torch.cuda.is_available()

    if params["subDeviceIdx"] is None:
        subDeviceIdx = 0
    else:
        subDeviceIdx = params["subDeviceIdx"]

    device = torch.device(
        "cuda:{}".format(subDeviceIdx) if use_cuda else "cpu")
    seed = params["training"]["seed"]
    if seed is None:
        seed = np.random.randint(10000)
        logger.debug("Using random seed")
    np.random.seed(seed)
    torch.manual_seed(seed)
    if use_cuda:
        torch.cuda.manual_seed(seed)

    dataList = _asList(params["training"]["datasets"])
    logger.info(f"Datasets: {dataList}")
    nDatasets = len(dataList)

    batchSize = params["training"]["batchSize"]
    epochs = _asList(params["training"]["epochs"], nDatasets)

    dataShape = params["training"]["inputShape"]
    dataSize = dataShape[-2:]
    dataChannels = dataShape[1]

    fullLossFactor = _asList(params["training"]["fullLossCoeff"], nDatasets)
    learningRate = _asList(params["training"]["learningRate"], nDatasets)
    ewcCoeff = params["training"]["importance"]
    ewcSampleSize = params["training"]["sampleSize"]

    expName = _genExpName(dataList)
    experiment = mlflow.get_experiment_by_name(expName)
    if experiment is None:
        logger.info("Creating new experiment")
        expID = mlflow.create_experiment(expName)
    else:
        logger.info(f"Using existing experiment")
        expID = experiment.experiment_id

    with mlflow.start_run(experiment_id=expID):
        modelName = params["name"]
        mlflow.log_param("params", params)
        mlflow.log_param("name", modelName)

        coder = Codec(0, Direction.Forward, params["training"]["inputShape"],
                      params["coder"]).to(device)
        logDir = "../data/logs/" + modelName
        tbWriter = SummaryWriter(logDir)

        previousTestData = []
        previousClassifiers = []
        for datasetIdx, dataset in enumerate(dataList):
            embeddingSize = params["training"]["embeddingSize"]

            # one decoder per dataset
            classifier = Decoder(
                _asList(params["decoder"], nDatasets)[datasetIdx],
                embeddingSize,
                0,
                params["training"]["inputShape"],
                device,
                False,
            ).to(device)

            logger.info(f"\n\t==== {dataset}: TRAINING ====\n")
            train_loader, test_loader = getDatasets(dataset, batchSize,
                                                    dataSize, dataChannels)

            optimParams = chain(coder.parameters(), classifier.parameters())
            optimizer = optim.Adam(optimParams, lr=learningRate[datasetIdx])
            if datasetIdx > 0:
                ewc = EWC(
                    coder,
                    getTrainingSamples(dataList[:datasetIdx], ewcSampleSize,
                                       dataSize, dataChannels),
                    device,
                )
            else:
                ewc = None

            currentAcc = 0
            currentCorrect = 0
            currentTotalSize = len(test_loader.dataset)
            for epoch in range(1, epochs[datasetIdx] + 1):
                train(
                    coder,
                    classifier,
                    ewc,
                    ewcCoeff,
                    device,
                    train_loader,
                    optimizer,
                    epoch,
                    fullLossFactor[datasetIdx],
                    tbWriter,
                )
                logger.info(f"\n\t==== {dataset}: TEST ({dataset}) ====\n")
                currentAcc, currentCorrect = test(
                    coder,
                    classifier,
                    device,
                    test_loader,
                    len(train_loader),
                    epoch,
                    fullLossFactor[datasetIdx],
                    tbWriter,
                )

            # saveModel(coder, f'../data/{modelName}_{dataset}.pt')
            # saveModel(classifier, f'../data/{modelName}_{dataset}_classifier.pt')
            #
            # trainBuffer = encodeDataset(coder, device, train_loader)
            # testBuffer = encodeDataset(coder, device, test_loader)
            #
            # saveData(trainBuffer,f'../data/{modelName}_{dataset}_trainEmbeddings.npz')
            # saveData(testBuffer,f'../data/{modelName}_{dataset}_testEmbeddings.npz')

            totalCorrect = currentCorrect
            totalDatasetSize = currentTotalSize
            for pIdx, pTestData in enumerate(previousTestData):
                previousDataName = dataList[pIdx]
                logger.info(
                    f"\n\t==== {dataset}: Lifelong TEST  ({previousDataName}) "
                    f"====\n")
                testDatasetSize = len(pTestData.dataset)
                totalDatasetSize += testDatasetSize
                logger.info("Re-encoding test data")
                newEncodedTestData = encodeDataset(coder, device, pTestData)
                # saveData(newEncodedTestData,
                #          f'../data/{modelName}_lifelong_{previousDataName}'
                #          f'_testEmbeddings.npz')
                pClassifier = previousClassifiers[pIdx].to(device)

                previousAcc, previousCorrect = testClassifier(
                    pClassifier,
                    newEncodedTestData,
                    testDatasetSize,
                    epochs[datasetIdx],
                    f"lifelong/{previousDataName}",
                )

                totalCorrect += previousCorrect

                globalAcc = 100.0 * totalCorrect / totalDatasetSize
                mlflow.log_metric("lifelong/globalAccuracy", globalAcc,
                                  epochs[datasetIdx])
                logger.info(
                    f"Global accuracy at task {pIdx}: {globalAcc:.0f}% "
                    f"({totalCorrect}/{totalDatasetSize})")

            previousTestData.append(test_loader)
            previousClassifiers.append(classifier.cpu())

        mlflow.log_artifacts(logDir, artifact_path="events")

    return currentAcc
Exemplo n.º 23
0
import os
from random import random

from mlflow import log_metric, log_param, log_artifacts

if __name__ == "__main__":
    print("Running test.py")

    log_param("param1", 5)

    log_metric("foo", random())
    log_metric("foo", random() + 1)
    log_metric("foo", random() + 2)

    if not os.path.exists("outputs"):
        os.makedirs("outputs")
    with open("outputs/test.txt", "w") as f:
        f.write("hello world!")

    log_artifacts("outputs")
Exemplo n.º 24
0
def main():
    args = read_args()
    print_args(args)

    experiment_name = args.experiment_name
    batch_size = args.batch_size
    learning_rate = args.learning_rate
    hidden_layer_sizes = args.hidden_layer_sizes
    dropout = args.dropout
    epochs = args.epochs

    ### Output directory
    dir_name = log_dir_name(args)
    print()
    print(dir_name)
    print()
    output_dir = os.path.join('experiments', experiment_name, dir_name)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    dataset, dev_dataset, test_dataset = load_dataset(args.dataset_dir)
    nlabels = dataset[TARGET_COL].unique().shape[0]

    columns = ['Gender', 'Color1', 'Breed1']
    one_hot_columns, embedded_columns, numeric_columns = build_columns(dataset, columns)

    # TODO (optional) put these three types of columns in the same dictionary with "column types"
    X_train, y_train = process_features(dataset, one_hot_columns, numeric_columns, embedded_columns)
    direct_features_input_shape = (X_train['direct_features'].shape[1],)
    X_dev, y_dev = process_features(dev_dataset, one_hot_columns, numeric_columns, embedded_columns)
      

    ###########################################################################################################
    ### TODO: Shuffle train dataset - Done
    ###########################################################################################################
    shuffle_len = X_train['direct_features'].shape[0]
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(shuffle_len).batch(batch_size)
    ###########################################################################################################

    dev_ds = tf.data.Dataset.from_tensor_slices((X_dev, y_dev)).batch(batch_size)
    test_ds = tf.data.Dataset.from_tensor_slices(process_features(
        test_dataset, one_hot_columns, numeric_columns, embedded_columns, test=True)[0]).batch(batch_size)


    ###########################################################################################################
    ### TODO: Build the Keras model - Done
    ###########################################################################################################
    tf.keras.backend.clear_session()

    # Add one input and one embedding for each embedded column
    embedding_layers = []
    inputs = []
    for embedded_col, max_value in embedded_columns.items():
        input_layer = layers.Input(shape=(1,), name=embedded_col)
        inputs.append(input_layer)
        # Define the embedding layer
        embedding_size = int(max_value / 4)
        embedding_layers.append(
            tf.squeeze(layers.Embedding(input_dim=max_value, output_dim=embedding_size)(input_layer), axis=-2))
        print('Adding embedding of size {} for layer {}'.format(embedding_size, embedded_col))

    # Add the direct features already calculated
    direct_features_input = layers.Input(shape=direct_features_input_shape, name='direct_features')
    inputs.append(direct_features_input)
        
    # Concatenate everything together
    features = layers.concatenate(embedding_layers + [direct_features_input])

    denses = []
    dense1 = layers.Dense(hidden_layer_sizes[0], activation='relu')(features)
    denses.append(dense1)
    if len(hidden_layer_sizes) > 1:
        for hidden_layer_size in hidden_layer_sizes[1:]:
            dense = layers.Dense(hidden_layer_size, activation='relu')(denses[-1])
            denses.append(dense)
    output_layer = layers.Dense(nlabels, activation='softmax')(dense1)

    model = models.Model(inputs=inputs, outputs=output_layer)
    ###########################################################################################################
    

    ###########################################################################################################
    ### TODO: Fit the model - Done
    ###########################################################################################################
    mlflow.set_experiment(experiment_name)

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer,
              metrics=['accuracy'])

    logdir = "logs/scalars/" + dir_name
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

    with mlflow.start_run(nested=True):
        # Log model hiperparameters first
        mlflow.log_param('hidden_layer_size', hidden_layer_sizes)
        mlflow.log_param('dropout', dropout)
        mlflow.log_param('embedded_columns', embedded_columns)
        mlflow.log_param('one_hot_columns', one_hot_columns)
        mlflow.log_param('numeric_columns', numeric_columns)  # Not using these yet
        mlflow.log_param('epochs', epochs)
        mlflow.log_param('batch_size', batch_size)
        mlflow.log_param('learning_rate', learning_rate)
        

        # Train
        history = model.fit(train_ds, epochs=epochs,
                            validation_data=dev_ds,
                            callbacks=[tensorboard_callback])

        #######################################################################################################
        ### TODO: analyze history to see if model converges/overfits
        #######################################################################################################
        output_csv = os.path.join(output_dir, 'history.pickle')
        with open(output_csv, 'bw') as f:
            pickle.dump(history.history, f)
        #######################################################################################################


        #######################################################################################################
        ### TODO: Evaluate the model, calculating the metrics. - Done
        #######################################################################################################
        loss, accuracy = model.evaluate(dev_ds)
        print("*** Dev loss: {} - accuracy: {}".format(loss, accuracy))
        mlflow.log_metric('loss', loss)
        mlflow.log_metric('accuracy', accuracy)       
        predictions = model.predict(test_ds)
        #######################################################################################################


        #######################################################################################################
        ### TODO: Convert predictions to classes - Done
        #######################################################################################################
        prediction_classes = np.argmax(predictions, axis=1)
        #######################################################################################################


        #######################################################################################################
        ### TODO: Save the results for submission - Done
        #######################################################################################################
        output_csv = os.path.join(output_dir, 'submit.csv')
        submissions = pd.DataFrame(prediction_classes, columns=[TARGET_COL], index=test_dataset.PID)
        submissions.to_csv(output_csv)
Exemplo n.º 25
0
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
    l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
    random_state = random.randint(1, 100)

    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha,
                        l1_ratio=l1_ratio,
                        random_state=random_state)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("  RMSE: %s" % rmse)
        print("  MAE: %s" % mae)
        print("  R2: %s" % r2)

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_param("random_state", random_state)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")
Exemplo n.º 26
0
def main(args):

    data_dir = args.data_dir
    figure_path = args.figure_dir
    model_path = args.model_dir

    # Generate the data input path list. Each subject has 3 runs stored in 3 different files.
    subj_id = "/sub" + str(args.sub) + "/ball0"
    raw_fnames = [
        "".join([data_dir, subj_id, str(i), "_sss_trans.fif"])
        for i in range(1 if args.sub != 3 else 2, 4)
    ]

    # local
    # subj_id = "/sub"+str(args.sub)+"/ball"
    # raw_fnames = ["".join([data_dir, subj_id, str(i), "_sss.fif"]) for i in range(1, 2)]

    # Set skip_training to False if the model has to be trained, to True if the model has to be loaded.
    skip_training = False

    # Set the torch device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device = {}".format(device))

    # Initialize parameters
    parameters = Params_tunable(
        subject_n=args.sub,
        hand=args.hand,
        batch_size=args.batch_size,
        valid_batch_size=args.batch_size_valid,
        test_batch_size=args.batch_size_test,
        epochs=args.epochs,
        lr=args.learning_rate,
        duration=args.duration,
        overlap=args.overlap,
        patience=args.patience,
        device=device,
        y_measure=args.y_measure,
        s_n_layer=args.s_n_layer,
        # s_kernel_size=args.s_kernel_size,  # Local
        s_kernel_size=json.loads(" ".join(args.s_kernel_size)),
        t_n_layer=args.t_n_layer,
        # t_kernel_size=args.t_kernel_size,  # Local
        t_kernel_size=json.loads(" ".join(args.t_kernel_size)),
        max_pooling=args.max_pooling,
        ff_n_layer=args.ff_n_layer,
        ff_hidden_channels=args.ff_hidden_channels,
        dropout=args.dropout,
        activation=args.activation,
    )

    # Set if generate with RPS values or not (check network architecture used later)
    rps = True

    # Generate the custom dataset
    if rps:
        dataset = MEG_Dataset(
            raw_fnames,
            parameters.duration,
            parameters.overlap,
            parameters.y_measure,
            normalize_input=True,
        )
    else:
        dataset = MEG_Dataset_no_bp(
            raw_fnames,
            parameters.duration,
            parameters.overlap,
            parameters.y_measure,
            normalize_input=True,
        )

    # split the dataset in train, test and valid sets.
    train_len, valid_len, test_len = len_split(len(dataset))
    print(
        "{} + {} + {} = {}?".format(
            train_len, valid_len, test_len, len(dataset)
        )
    )

    # train_dataset, valid_test, test_dataset = random_split(dataset, [train_len, valid_len, test_len],
    #                                                        generator=torch.Generator().manual_seed(42))
    train_dataset, valid_test, test_dataset = random_split(
        dataset, [train_len, valid_len, test_len]
    )

    # Better vizualization
    # train_valid_dataset = Subset(dataset, list(range(train_len+valid_len)))
    # test_dataset = Subset(dataset, list(range(train_len+valid_len, len(dataset))))
    #
    # train_dataset, valid_dataset = random_split(train_valid_dataset, [train_len, valid_len])

    # Initialize the dataloaders
    trainloader = DataLoader(
        train_dataset,
        batch_size=parameters.batch_size,
        shuffle=True,
        num_workers=1,
    )
    validloader = DataLoader(
        valid_test,
        batch_size=parameters.valid_batch_size,
        shuffle=True,
        num_workers=1,
    )
    testloader = DataLoader(
        test_dataset,
        batch_size=parameters.test_batch_size,
        shuffle=False,
        num_workers=1,
    )

    # Get the n_times dimension
    with torch.no_grad():
        # Changes if RPS integration or not
        if rps:
            x, _, _ = iter(trainloader).next()
        else:
            x, _ = iter(trainloader).next()

    n_times = x.shape[-1]

    # Initialize network
    # net = LeNet5(n_times)
    # net = ResNet([2, 2, 2], 64, n_times)
    # net = SCNN(parameters.s_n_layer,
    #                    parameters.s_kernel_size,
    #                    parameters.t_n_layer,
    #                    parameters.t_kernel_size,
    #                    n_times,
    #                    parameters.ff_n_layer,
    #                    parameters.ff_hidden_channels,
    #                    parameters.dropout,
    #                    parameters.max_pooling,
    #                    parameters.activation)
    # net = MNet(n_times)
    # net = RPS_SCNN(parameters.s_n_layer,
    #                    parameters.s_kernel_size,
    #                    parameters.t_n_layer,
    #                    parameters.t_kernel_size,
    #                    n_times,
    #                    parameters.ff_n_layer,
    #                    parameters.ff_hidden_channels,
    #                    parameters.dropout,
    #                    parameters.max_pooling,
    #                    parameters.activation)

    net = RPS_MNet(n_times)
    # net = RPS_MLP()
    mlp = False

    print(net)
    # Training loop or model loading
    if not skip_training:
        print("Begin training....")

        # Check the optimizer before running (different from model to model)
        optimizer = Adam(net.parameters(), lr=parameters.lr, weight_decay=5e-4)
        # optimizer = SGD(net.parameters(), lr=parameters.lr, weight_decay=5e-4)

        scheduler = ReduceLROnPlateau(optimizer, mode="min", factor=0.5,
                                      patience=15)

        print("scheduler : ", scheduler)

        loss_function = torch.nn.MSELoss()
        start_time = timer.time()
        if rps:
            if mlp:
                net, train_loss, valid_loss = train_bp_MLP(
                    net,
                    trainloader,
                    validloader,
                    optimizer,
                    scheduler,
                    loss_function,
                    parameters.device,
                    parameters.epochs,
                    parameters.patience,
                    parameters.hand,
                    model_path,
                )
            else:
                net, train_loss, valid_loss = train_bp(
                    net,
                    trainloader,
                    validloader,
                    optimizer,
                    scheduler,
                    loss_function,
                    parameters.device,
                    parameters.epochs,
                    parameters.patience,
                    parameters.hand,
                    model_path,
                )
        else:
            net, train_loss, valid_loss = train(
                net,
                trainloader,
                validloader,
                optimizer,
                scheduler,
                loss_function,
                parameters.device,
                parameters.epochs,
                parameters.patience,
                parameters.hand,
                model_path,
            )

        train_time = timer.time() - start_time
        print("Training done in {:.4f}".format(train_time))

        # visualize the loss as the network trained
        fig = plt.figure(figsize=(10, 4))
        plt.plot(
            range(1, len(train_loss) + 1), train_loss, label="Training Loss"
        )
        plt.plot(
            range(1, len(valid_loss) + 1), valid_loss, label="Validation Loss"
        )

        # find position of lowest validation loss
        minposs = valid_loss.index(min(valid_loss)) + 1
        plt.axvline(
            minposs,
            linestyle="--",
            color="r",
            label="Early Stopping Checkpoint",
        )

        plt.xlabel("epochs")
        plt.ylabel("loss")
        # plt.ylim(0, 0.5) # consistent scale
        # plt.xlim(0, len(train_loss)+1) # consistent scale
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()
        image1 = fig
        plt.savefig(os.path.join(figure_path, "loss_plot.pdf"))

    if not skip_training:
        # Save the trained model
        save_pytorch_model(net, model_path, "Baselinemodel_SCNN_swap.pth")
    else:
        # Load the model (properly select the model architecture)
        net = RPS_MNet()
        net = load_pytorch_model(
            net, os.path.join(model_path, "model.pth"), parameters.device
        )

    # Evaluation
    print("Evaluation...")
    net.eval()
    y_pred = []
    y = []

    # if RPS integration
    with torch.no_grad():
        if rps:
            if mlp:
                for _, labels, bp in testloader:
                    labels, bp = labels.to(parameters.device), bp.to(device)
                    y.extend(list(labels[:, parameters.hand]))
                    y_pred.extend((list(net(bp))))
            else:
                for data, labels, bp in testloader:
                    data, labels, bp = (
                        data.to(parameters.device),
                        labels.to(parameters.device),
                        bp.to(device),
                    )
                    y.extend(list(labels[:, parameters.hand]))
                    y_pred.extend((list(net(data, bp))))
        else:
            for data, labels in testloader:
                data, labels = (
                    data.to(parameters.device),
                    labels.to(parameters.device),
                )
                y.extend(list(labels[:, parameters.hand]))
                y_pred.extend((list(net(data))))

    print("SCNN_swap...")
    # Calculate Evaluation measures
    mse = mean_squared_error(y, y_pred)
    rmse = mean_squared_error(y, y_pred, squared=False)
    mae = mean_absolute_error(y, y_pred)
    r2 = r2_score(y, y_pred)
    print("mean squared error {}".format(mse))
    print("root mean squared error {}".format(rmse))
    print("mean absolute error {}".format(mae))
    print("r2 score {}".format(r2))

    # plot y_new against the true value focus on 100 timepoints
    fig, ax = plt.subplots(1, 1, figsize=[10, 4])
    times = np.arange(100)
    ax.plot(times, y_pred[0:100], color="b", label="Predicted")
    ax.plot(times, y[0:100], color="r", label="True")
    ax.set_xlabel("Times")
    ax.set_ylabel("{}".format(parameters.y_measure))
    ax.set_title(
        "Sub {}, hand {}, {} prediction".format(
            str(parameters.subject_n),
            "sx" if parameters.hand == 0 else "dx",
            parameters.y_measure,
        )
    )
    plt.legend()
    plt.savefig(os.path.join(figure_path, "Times_prediction_focus.pdf"))
    plt.show()

    # plot y_new against the true value
    fig, ax = plt.subplots(1, 1, figsize=[10, 4])
    times = np.arange(len(y_pred))
    ax.plot(times, y_pred, color="b", label="Predicted")
    ax.plot(times, y, color="r", label="True")
    ax.set_xlabel("Times")
    ax.set_ylabel("{}".format(parameters.y_measure))
    ax.set_title(
        "Sub {}, hand {}, {} prediction".format(
            str(parameters.subject_n),
            "sx" if parameters.hand == 0 else "dx",
            parameters.y_measure,
        )
    )
    plt.legend()
    plt.savefig(os.path.join(figure_path, "Times_prediction.pdf"))
    plt.show()

    # scatterplot y predicted against the true value
    fig, ax = plt.subplots(1, 1, figsize=[10, 4])
    ax.scatter(np.array(y), np.array(y_pred), color="b", label="Predicted")
    ax.set_xlabel("True")
    ax.set_ylabel("Predicted")
    # plt.legend()
    plt.savefig(os.path.join(figure_path, "Scatter.pdf"))
    plt.show()

    # log the model and parameters using mlflow tracker
    with mlflow.start_run(experiment_id=args.experiment) as run:
        for key, value in vars(parameters).items():
            mlflow.log_param(key, value)

        mlflow.log_param("Time", train_time)

        mlflow.log_metric("MSE", mse)
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("MAE", mae)
        mlflow.log_metric("R2", r2)

        mlflow.log_artifact(os.path.join(figure_path, "Times_prediction.pdf"))
        mlflow.log_artifact(
            os.path.join(figure_path, "Times_prediction_focus.pdf")
        )
        mlflow.log_artifact(os.path.join(figure_path, "loss_plot.pdf"))
        mlflow.log_artifact(os.path.join(figure_path, "Scatter.pdf"))
        mlflow.pytorch.log_model(net, "models")
Exemplo n.º 27
0
def nni_single_shot_neural_architecture_search(hp: HYPERPARAMS_T, model: torch.nn.Module, losses: LOSS_FN_TERMS_T, datasets: Tuple[Dataset], opt: Type[torch.optim.Optimizer], backend_conf: 'ignite_training.BackendConfig' = None,
                                               loss_weights: LOSS_TERMS_WEIGHTS_T = None, metrics: Dict[str, METRIC_FN_T] = {}, callbacks_handler: deepcv.utils.EventsHandler = None, final_architecture_path: Union[str, Path] = None) -> Tuple[METRICS_DICT_T, Optional[Path], str]:
    """ Train model with provided NAS trainer in order to find out the best NN architecture by training a superset NN instead of performing multiple trainings/trials for each/many possible architectures.  
    Args:
        - hp: Hyperparameter dict, see ```deepcv.meta.ignite_training._check_params`` to see required and default training (hyper)parameters  
        - model: Pytorch ``torch.nn.Module`` to train  
        - losses: Loss(es) module(s) and/or callables to be used as training criterion(s) (may be a single loss function/module, a sequence of loss functions or a mapping of loss function(s) assiciated to their respective loss name(s)).  
            .. See `loss_weights` argument for more details on multiple loss terms usage and weighting.
        - datasets: Tuple of pytorch Dataset giving access to trainset, validset and an eventual testset  
        - opt: Optimizer type to be used for gradient descent  
        - backend_conf: Backend information defining distributed configuration (available GPUs, whether if CPU or GPU are used, distributed node count, ...), see ``deepcv.meta.ignite_training.BackendConfig`` class for more details.  
        - loss_weights: Optional weight/scaling/importance vector or sequence to be applied to each loss terms (defaults to 1. when `None`). This argument should contain as many scalars as there are loss terms/functions in `losses` argument. All weight values are casted to `torch.float32` and L1-norm (sum) should be different from zero due to the mean operator (loss terms ponderated sum is diveded by L1-norm of weights).  
            NOTE: Each scalar in `loss_weights` weights its respective loss term so that the total loss on which model is trained is the ponderated mean of each loss terms (e.g. when `loss_weights` contains `1.` values for each loss term, then the final loss is the mean of each of those. Another example: if `loss_weights` only contains `len()` values, then the loss on which model is trained is the sum of each terms)  
            NOTE: You may provide a mapping of weights instead of a Sequence in case you need to apply weights/factors to their respective term identified by their names (`losses` should also be a mapping in this case)
        - metrics: Additional metrics dictionnary (loss is already included in metrics to be evaluated by default)  
        - callbacks_handler: Callbacks Events handler. If not `None`, events listed in `deepcv.meta.ignite_training.TRAINING_EVENTS` will be fired at various steps of training process, allowing to extend `deepcv.meta.ignite_training.train` functionnalities ("two-way" callbacks ).  
        - final_architecture_path: File path where the final/optimal fixed architecture found by Single-Shot NAS algorithm have to be exported (JSON file which contains NNI NAS Mutable choices needed to obtain the fixed architecture from the model search space).  

    Returns a pathlib.Path to a JSON file storing the best NN model architecture found by NNI Single-Shot NAS (JSON file storing mutable layer(s)/input(s) choices made in model search space in order to define best fixed architeture found; This file is also logged to mlflow if there is an active run).  
    NOTE: Support for SPOS SingleShot NAS is untested and may be partial for now, see [NNI NAS SPOS documentation](https://nni.readthedocs.io/en/latest/NAS/SPOS.html) for more details on Evolution Tuner usage to find best model architecture.  

    *To-Do List*
        - # TODO: convert ignite metrics for NNI NAS trainer usage if needed (to Callable[['outout', 'target'], Dict[str, float]])
        - # TODO: reuse code from ignite training for output path and log final architecture as mlflow artifact
        - # TODO: Allow resuming an NNI single shot NAS experiment throught 'hp['resume_from']' parameter (if possible easyly using NNI API?)
        - # TODO: Add support for two-way callbacks using deepcv.utils.EventsHandler in a similar way than ignite_training.train (once ignite_training fully support it)
    """
    from .ignite_training import BackendConfig
    if backend_conf is None:
        backend_conf = BackendConfig()
    experiment_name, run_id = get_nni_or_mlflow_experiment_and_trial()
    run_info_msg = f'(Experiment: "{experiment_name}", run_id: "{run_id}")'
    logging.info(f'Starting Single-Shot Neural Architecture Search (NNI NAS API) training over NN architecture search space {run_info_msg}.')

    TRAINING_HP_DEFAULTS = {'optimizer_opts': ..., 'epochs': ..., 'batch_size': None, 'nni_single_shot_nas_algorithm': ..., 'output_path': Path.cwd() / 'data' / '04_training', 'log_output_dir_to_mlflow': True,
                            'log_progress_every_iters': 100, 'seed': None, 'resume_from': '', 'deterministic_cudnn': False, 'nas_mutator': None, 'nas_mutator_kwarg': dict(), 'nas_trainer_kwargs': dict()}
    hp, _ = hyperparams.to_hyperparameters(hp, TRAINING_HP_DEFAULTS, raise_if_missing=True)
    deepcv.utils.setup_cudnn(deterministic=hp['deterministic_cudnn'], seed=backend_conf.rank + hp['seed'])  # In distributed setup, we need to have different seed for each workers
    model = model.to(backend_conf.device, non_blocking=True)
    loss = loss.to(backend_conf.device) if isinstance(loss, torch.nn.Module) else loss
    num_workers = max(1, (backend_conf.ncpu - 1) // (backend_conf.ngpus_current_node if backend_conf.ngpus_current_node > 0 and backend_conf.distributed else 1))
    optimizer = opt(model.parameters(), **hp['optimizer_opts'])
    trainset, *validset_testset = datasets
    output_path = ingite_training.add_training_output_dir(hp['output_path'], backend_conf, prefix='single_shot_nas_')
    # TODO: use this output_path in trainer and export final architecture to this directory too
    # TODO: use ingite_training function to setup distributed training?

    # Creates HP scheduler from hp if respective hp arguments have been provided by user
    scheduler = None
    if hp['scheduler'] is not None:
        args_to_eval = hp['scheduler']['eval_args'] if 'eval_args' in hp['scheduler'] else {}
        scheduler_kwargs = {n: eval(v, {'hp': hp, 'iterations': len(trainset)}) if n in args_to_eval else v
                            for n, v in hp['scheduler']['kwargs'].items()}
        scheduler = nni.nas.pytorch.callbacks.LRSchedulerCallback(scheduler=hp['scheduler']['type'](optimizer=optimizer, **scheduler_kwargs))
    nas_trainer_callbacks = [scheduler, ]  # TODO: ... add user provided callbacks

    if not is_nni_run_standalone() and not is_nni_gen_search_space_mode():
        class _ReportToNNICallback(nni.nas.pytorch.callbacks.Callback):
            def __init__(self, epochs=hp['epochs']):
                self.epochs = epochs

            def on_epoch_end(self, epoch):
                # TODO: find a way to retreive metrics or evaluate model on my own (meters = AverageMeterGroup() ...), see https://nni.readthedocs.io/en/latest/_modules/nni/nas/pytorch/enas/trainer.html
                meters = ...
                if epoch >= self.epochs:
                    nni.report_final_result(meters)
                else:
                    nni.report_intermediate_result(meters)
        nas_trainer_callbacks.append(_ReportToNNICallback(hp))

    nas_trainer_kwargs = hp['nas_trainer_kwargs']
    nas_mutator = hp['nas_mutator']
    if nas_mutator is not None:
        if isinstance(nas_mutator, str):
            nas_mutator = deepcv.utils.get_by_identifier(nas_mutator)
        if not issubclass(nas_mutator, nni.nas.pytorch.base_mutator.BaseMutator):
            raise TypeError('Error: NNI SingleShot NAS Mutator argument "nas_mutator" must either be a "nni.nas.pytorch.mutables.Mutator" Type or a string identifier which resolves to a "nni.nas.pytorch.mutables.Mutator" Type.')
        nas_trainer_kwargs['mutator'] = nas_mutator(**hp['nas_mutator_kwarg'])  # Instanciate user-provided mutator
    if hp['batch_size'] is not None:
        nas_trainer_kwargs['batch_size'] = hp['batch_size']

    train_type = NNI_SINGLE_SHOT_NAS_ALGORITHMS[hp['nni_single_shot_nas_algorithm']]
    trainer = train_type(model=model, loss=loss, metrics=metrics, optimizer=optimizer, num_epochs=hp['epochs'],
                         trainset=trainset, validset=validset_testset[0], num_workers=num_workers, device=backend_conf.device,
                         log_frequency=hp['log_progress_every_iters'], callbacks=nas_trainer_callbacks, **nas_trainer_kwargs)

    # Train model with provided NAS trainer in order to find out the best NN architecture by training a superset NN instead of performing multiple trainings/trials for each/many possible architectures
    trainer.train()
    logging.info(f'Single-shot NAS training done. Validating model architecture... {run_info_msg}')
    trainer.validate()
    logging.info(f'Saving obtained NN architecture from NNI Single-Shot NAS algorithm as a JSON file and logging it to mlfow if possible... {run_info_msg}')

    # Print resulting architecture as a JSON string and save it to a JSON file if `final_architecture_path` isn't `None` (and is valid)
    architecture_choices = trainer.mutator.export()
    json_choices = deepcv.utils.replace_newlines(json.dumps(architecture_choices, indent=2, sort_keys=True, cls=nni.nas.pytorch.trainer.TorchTensorEncoder))
    logging.info(f'Final/best NN architeture obtained from NNI Single-Shot NAS wont be saved to a JSON file as `final_architecture_path` is `None`. {run_info_msg}{NL}'
                 f'NAS Mutable choices:{NL}'
                 f'``` json{NL}{json_choices}{NL}```')

    logging.info(f'Saving final/best NN architeture obtained from NNI Single-Shot NAS (and may log it to MLFLow artifacts). {run_info_msg}')

    if final_architecture_path is not None:
        final_architecture_path = Path(final_architecture_path)
        with final_architecture_path.open(mode='w', newline=NL) as json_file:  # 'w' mode will replace any existing file
            json_file.write(json_choices)  # export the final architecture to a JSON file
    if mlflow.active_run() is not None:
        if final_architecture_path is not None:
            mlflow.log_artifact(str(final_architecture_path))
            mlflow.set_tag('final_single_shot_nas_architecture_path', str(final_architecture_path))
        mlflow.log_param('final_single_shot_nas_architecture', json_choices)

    logging.info(f'Single-Shot NAS trainning procedure completed. {run_info_msg}')
    return (..., final_architecture_path, architecture_choices)  # TODO: return 'meters' metrics resulting from best evaluation on validset
Exemplo n.º 28
0
    # Instantiating model with model parameters
    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)

    # Fitting training data to the model
    model.fit(X_train, y_train)

    # Running prediction on validation dataset
    preds = model.predict(X_val)

    # Getting metrics on the validation dataset
    rmse = mean_squared_error(preds, y_val)
    abs_error = mean_absolute_error(preds, y_val)
    r2 = r2_score(preds, y_val)

    # Logging params and metrics to MLFlow
    mlflow.log_param('alpha', alpha)
    mlflow.log_param('l1_ratio', l1_ratio)
    mlflow.log_metric('rmse', rmse)
    mlflow.log_metric('abs_error', abs_error)
    mlflow.log_metric('r2', r2)

    # Logging training data
    mlflow.log_artifact(local_path='../data/wine/train.csv')

    # Logging training code
    mlflow.log_artifact(local_path='./mlflow-wine.py')

    # Logging model to MLFlow
    mlflow.sklearn.log_model(sk_model=model,
                             artifact_path='wine-pyfile-model',
                             registered_model_name='wine-pyfile-model')
Exemplo n.º 29
0
from tensorflow.keras.models import Model

import os
import mlflow
from random import random, randint
from mlflow import log_metric, log_param, log_artifacts  #pyfunc
import mlflow.tensorflow

print(tf.__version__)

if __name__ == "__main__":
    # Log a parameter (key-value pair)
    mlflow.set_tracking_uri("http://0.0.0.0:7777")  #mlflow-server:7777
    # mlflow.set_experiment("/my-experiment")

    log_param("param1", randint(0, 100))

    # Log a metric; metrics can be updated throughout the run
    log_metric("foo", random())
    log_metric("foo", random() + 1)
    log_metric("foo", random() + 2)

# Load in the data
fashion_mnist = tf.keras.datasets.fashion_mnist

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Increase one dimension so it can be used by the 2D convolutional keras layer
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
Exemplo n.º 30
0
def log_param(key, val):
    mlflow.log_param(key, val)
Exemplo n.º 31
0
Arquivo: main.py Projeto: Daiver/jff
import mlflow
import os
import time
from mlflow import log_metric, log_param, log_artifact


if __name__ == "__main__":
    mlflow.set_experiment("First")
    with mlflow.start_run():
        # Log a parameter (key-value pair)
        log_param("param1", 5)

        # Log a metric; metrics can be updated throughout the run
        for i in range(200):
            time.sleep(0.1)
            log_metric("foo1", 1 * i)
            log_metric("foo2", 2 * i)
            log_metric("foo3", 3 * i)
            log_metric("foo4", 3 * i)
            log_metric("foo5", 3 * i)
            log_metric("foo6", 3 * i)
            log_metric("foo7", 3 * i)
            log_metric("foo8", 3 * i)
            log_metric("foo9", 3 * i)
            log_metric("foo10", 3 * i)
            log_metric("foo11", 3 * i)
            log_metric("foo12", 3 * i)
            log_metric("foo13", 3 * i)
            log_metric("foo14", 3 * i)
            log_metric("foo15", 3 * i)
            log_metric("foo16", 3 * i)
Exemplo n.º 32
0
get_ipython().system(' pip install --quiet mlflow==1.12.1 neptune-mlflow==0.2.5 neptune-client==0.4.132')

get_ipython().system(' pip install --quiet --upgrade mlflow neptune-mlflow neptune-client')

## Create some MLflow runs

import os
from random import random, randint
import mlflow 

# start a run
mlflow.start_run()

# Log a parameter (key-value pair)
mlflow.log_param("param1", randint(0, 100))

# Log a metric; metrics can be updated throughout the run
mlflow.log_metric("foo", random())
mlflow.log_metric("foo", random()+1)
mlflow.log_metric("foo", random()+2)
mlflow.log_metric("foo", random()+3)

mlflow.log_metric("bar", random())
mlflow.log_metric("bar", random()+1)
mlflow.log_metric("bar", random()+2)
mlflow.log_metric("bar", random()+3)

# Log an artifact (output file)
os.makedirs("outputs", exist_ok=True)
with open("outputs/test.txt", "w") as f: