Пример #1
0
def train(network, min_loss=np.inf):
    network.to(config.device)
    train_dataloader, valid_dataloader = data.load_training_data()

    train_len = len(train_dataloader.dataset)
    valid_len = len(valid_dataloader.dataset)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(params=network.parameters(),
                                 lr=config.learning_rate,
                                 betas=(0.9, 0.995))
    for epoch in range(config.epochs):
        print(f'#### Epoch {epoch} #####')
        train_loss = 0
        valid_loss = 0

        network.train()
        for images, targets in train_dataloader:
            optimizer.zero_grad()

            outputs = network(images)
            loss = criterion(outputs, targets)
            train_loss += loss.item() * len(images)

            loss.backward()
            optimizer.step()

        train_loss /= train_len
        print(f'Training loss : {train_loss}')

        with torch.no_grad():
            network.eval()
            for images, targets in valid_dataloader:
                outputs = network(images)
                loss = criterion(outputs, targets)
                valid_loss += loss.item() * len(images)

            valid_loss /= valid_len
            print(f'Valid loss : {valid_loss}')

            if valid_loss < min_loss:
                min_loss = valid_loss
                print('*** save ***')
                network.save()
Пример #2
0
def train():
    """ Trains the model. """
    x_train, y_train = data.load_training_data()

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=utilities.IMG_SIZE))
    model.add(tf.keras.layers.Dense(128, activation=tf.nn.relu))
    model.add(tf.keras.layers.Dense(64, activation=tf.nn.relu))
    model.add(tf.keras.layers.Dense(6, activation=tf.nn.softmax))

    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=["accuracy"])

    print(model.summary())

    model.fit(x_train, y_train, epochs=30)

    return model
Пример #3
0
def main():
    """
    The main function.
    Arguments:
        1. Takes no arguments.
    """

    train_data = data.load_training_data()
    # function call to load training data
    test_data = data.load_test_data()
    # function call to load test data
    count = CountVectorizer()
    # initialize the count vector
    tfidf_transformer = TfidfTransformer()
    # initialize a tfidf transformer
    models_dict = {}
    # empty dict
    train_tfidf = features.feature_extraction(train_data, count,
                                              tfidf_transformer)
    # function call for feature extraction
    bayes = naive_bayes(train_data, train_tfidf)
    # function call to fit the Naive Bayes classifier
    models_dict['Naive Bayes'] = bayes
    # add models to dictionary
    svm = svm_classifier(train_data, train_tfidf)
    # function call to fit SVM Classifier
    models_dict['SVM'] = svm
    # add models to a dictionary
    rand_forest = random_forest_classifier(train_data, train_tfidf)
    # function to build random forest classifier
    models_dict['Random Forest'] = rand_forest
    # add models to dictionary
    logistic = logistic_regression_classifier(train_data, train_tfidf)
    # function call to build logistic regression
    models_dict['Logistic Regression'] = logistic
    # add models to dictionary
    decision_tree = decision_tree_classifier(train_data, train_tfidf)
    # function call for decision tree classifier
    models_dict['Decision Tree'] = decision_tree
    # add model to the dictionary
    predict_test_data(train_data, test_data, models_dict, count,
                      tfidf_transformer, train_tfidf)
Пример #4
0
def main(unused_argv):
    # Load training and eval data
    train_data, train_labels = data.load_training_data('train.csv')

    kaggle_classifier = tf.estimator.Estimator(
        model_fn=nn_model_fn, model_dir='./model'
    )
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(
        tensors=tensors_to_log, every_n_iter=100
    )
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        batch_size=batch_size,
        num_epochs=None,
        shuffle=True
    )
    kaggle_classifier.train(
        input_fn=train_input_fn,
        steps=steps)

    eval_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        y=train_labels,
        num_epochs=1,
        shuffle=False)
    predict_input_fn = tf.estimator.inputs.numpy_input_fn(
        x={"x": train_data},
        shuffle=False
    )
    eval_results = kaggle_classifier.evaluate(input_fn=eval_input_fn)
    predict_results = kaggle_classifier.predict(input_fn=predict_input_fn)
    for item in predict_results:
        print(item)
    print(eval_results)
Пример #5
0
def main():
    load_dotenv('.env.general')
    config = load_config('config.yml')
    Path(config.logging.handlers.debug_file_handler.filename).parent.mkdir(
        parents=True, exist_ok=True)
    Path(config.logging.handlers.info_file_handler.filename).parent.mkdir(
        parents=True, exist_ok=True)
    logging.config.dictConfig(config.logging)

    _logger.info("Loading the data")
    x, y = load_training_data()
    x_train, x_test, y_train, y_test = split_data(x, y)

    with tempfile.TemporaryDirectory() as td:
        temp_dir = Path(td)
        mlflow.set_experiment(config.experiment.name)

        params = {}
        tags = {}
        metrics = {}
        artifacts = {}

        with mlflow.start_run():
            _logger.info("Fitting the preprocessor")
            preprocessor = get_preprocessor()
            preprocessor.fit(x_train, y_train)

            _logger.info("Preprocessing the training data")
            x_train_prep = preprocessor.transform(x_train)
            x_test_prep = preprocessor.transform(x_test)

            estimator_params, search_space = get_params()

            if search_space is None:
                estimator, estimator_tags, estimator_metrics, estimator_artifacts = train_run(
                    estimator_params=estimator_params,
                    x_train_prep=x_train_prep,
                    y_train=y_train,
                    x_test_prep=x_test_prep,
                    y_test=y_test,
                    temp_dir=temp_dir)

                model = make_pipeline(preprocessor, estimator)
                params.update(
                    {f"estimator_{k}": v
                     for k, v in estimator_params.items()})
                tags.update(
                    {f"estimator_{k}": v
                     for k, v in estimator_tags.items()})
                metrics.update(estimator_metrics)
                artifacts.update(estimator_artifacts)

            else:

                def hyperopt_objective(search_params):
                    # This function is called for each set of hyper-parameters being tested by HyperOpt.
                    run_name = str(len(trials) - 1)
                    ho_params = {}
                    ho_tags = {}
                    ho_metrics = {}
                    ho_artifacts = {}

                    search_params = flatten_params(search_params)
                    search_params = prep_params(search_params)
                    ho_estimator_params = estimator_params.copy()
                    ho_estimator_params.update(search_params)

                    with mlflow.start_run(nested=True, run_name=run_name):
                        ho_estimator, ho_estimator_tags, ho_estimator_metrics, ho_estimator_artifacts = train_run(
                            estimator_params=ho_estimator_params,
                            x_train_prep=x_train_prep,
                            y_train=y_train,
                            x_test_prep=x_test_prep,
                            y_test=y_test,
                            temp_dir=temp_dir / run_name)

                        ho_model = make_pipeline(preprocessor, ho_estimator)
                        ho_params.update({
                            f"estimator_{k}": v
                            for k, v in ho_estimator_params.items()
                        })
                        ho_tags.update({
                            f"estimator_{k}": v
                            for k, v in ho_estimator_tags.items()
                        })
                        ho_metrics.update(ho_estimator_metrics)
                        ho_artifacts.update(ho_estimator_artifacts)

                        ho_tags['hyperopt'] = True

                        log_sk_model(ho_model,
                                     registered_model_name=None,
                                     params=ho_params,
                                     tags=ho_tags,
                                     metrics=ho_metrics,
                                     artifacts=ho_artifacts)

                    loss = 1 - ho_metrics[config.evaluation.primary_metric]

                    return {
                        'loss': loss,
                        'status': STATUS_OK,
                        'model': ho_model,
                        'params': ho_params,
                        'tags': ho_tags,
                        'metrics': ho_metrics,
                        'artifacts': ho_artifacts
                    }

                trials = Trials()
                fmin(fn=hyperopt_objective,
                     space=search_space,
                     algo=tpe.suggest,
                     trials=trials,
                     max_evals=config.training.max_evals,
                     rstate=np.random.RandomState(1),
                     show_progressbar=False)

                model = trials.best_trial['result']['model']
                params = trials.best_trial['result']['params']
                tags = trials.best_trial['result']['tags']
                metrics = trials.best_trial['result']['metrics']
                artifacts = trials.best_trial['result']['artifacts']

            if config.evaluation.shap_analysis:
                _logger.info("Starting shap analysis")
                shap_tags, shap_artifacts = shap_analyse(
                    model=model, x=x_train, temp_dir=Path(temp_dir) / 'shap')
                tags.update(shap_tags)
                artifacts.update(shap_artifacts)
            else:
                _logger.info("Shap analysis skipped")

            log_sk_model(model,
                         registered_model_name=None,
                         params=params,
                         tags=tags,
                         metrics=metrics,
                         artifacts=artifacts)

    return (x_train, y_train, x_test,
            y_test), model, params, tags, metrics, artifacts
Пример #6
0
    def build_training_data_loader(self) -> InputData:
        train_images, train_labels = data.load_training_data()
        train_images = train_images / 255.0

        return train_images, train_labels
Пример #7
0
        help="If true, uses model.fit() instead of model.fit_generator()",
    )
    args = parser.parse_args()

    config = {
        "hyperparameters": {
            "global_batch_size": det.Constant(value=32),
            "dense1": det.Constant(value=128),
        },
        "searcher": {"name": "single", "metric": "val_accuracy", "max_steps": 40},
    }
    config.update(json.loads(args.config))

    context = init(config, local=args.local, test=args.test, context_dir=str(pathlib.Path.cwd()))

    train_images, train_labels = data.load_training_data()
    train_images = train_images / 255.0
    train_data = _ArrayLikeAdapter(
        x=train_images, y=train_labels, batch_size=context.get_per_slot_batch_size()
    )

    test_images, test_labels = data.load_validation_data()
    test_images = test_images / 255.0
    test_data = _ArrayLikeAdapter(
        x=test_images, y=test_labels, batch_size=context.get_per_slot_batch_size()
    )

    model = build_model(context)

    if args.use_fit:
        model.fit(
  The word lengths can significantly affect the quality of encoding
  It is recommended to run this on a new dataset if you're not using UCI Medical Data that has been cleaned up.
"""
import pandas as pd
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.pyplot as plt
from tokenize_data import encode_the_words, encode_the_labels
from data import load_testing_data, load_training_data


def analyze_review_length(reviews_int):
    reviews_len = [len(x) for x in reviews_int]
    # pd.Series(reviews_len).hist()
    plt.plot(reviews_len)
    average = sum(reviews_len) / len(reviews_len)
    plt.show()
    print('Average is : ', average)
    # pd.Series(reviews_len).describe()


def perform_data_analysis(raw_data):
    reviews_int = encode_the_words(raw_data.get('review'))
    analyze_review_length(reviews_int)


if __name__ == '__main__':
    # simple testing code
    raw_data = load_training_data()
    # print(raw_data.get('review')[:10])
    perform_data_analysis(raw_data)
    # plt.figtext(10, 10, figtxt)
    # plt.savefig("results/mc_" + data_name_pred + "_pred_highres_wo2.png", dpi=300)
    plt.savefig("results/mc_" + data_pred + "_pred_n.png", dpi=200)
    plt.close()
    table = pd.concat(row, keys=keys).T
    # pdb.set_trace()
    return table


if __name__ == "__main__":
    # equations = ["w0+w1K+w2(KNlogN)", "w0+w1K+w2(KN²logN)", "w0+w1K+w2(KN^w3logN)"]
    equations = ["w0+w[1](KNlog²N)", "w0+w1K+w2(KN²logN)", "w0+w1K+w2(KN^w3logN)"]
    used_eq = [1, 2, 3]  # likelihood function has multiple versions of eqs. here define which ones will be used.

    # find and load training data files
    x_train, y_train, data_name_train = load_training_data()

    total_data_size = len(y_train)
    print("total_data_size:", total_data_size)

    y_predicted = list()

    test_folder_path = "/home/alis/Desktop/project/runtime_prediction/runtimes/test"
    files = next(os.walk(test_folder_path))[2]
    test_data_sets = sorted(list(set([x[16:-3] for x in files if re.search('np', x)])))

    # x_train, y_train = process_train_data(x_data, y_data, 100.0, 1)
    # x_train = x_data
    # y_train = y_data
    # fit the model
    trace_pymc = mcmc_fit(x_train, y_train, used_eq)