def try_model(model, current_best_model, i):
    percent = 0.5 + (i/model_run_count)*0.5
    messenger.send_update(dataset_id, {"dataset_filename": dataset_filename, "storage_location": storage_location, "manifest_filename": manifest_filename, "dataset_id": dataset_id, "label_type": label_type, "status": "running_models", "percent": percent, "model_running": str(model), "best_model": [str(current_best_model[0]), current_best_model[1]]})
    clf = GridSearchCV(model_info.models()[model](), model_info.hyperparameters()[model], cv=5)
    scores = []
    try:
        results= clf.fit(x, y)
    except ValueError:
        return current_best_model
    except TypeError:
        return current_best_model
    messenger.send_update(dataset_id, {"dataset_filename": dataset_filename, "storage_location": storage_location, "manifest_filename": manifest_filename, "dataset_id": dataset_id, "status": "model_error", "model_error": "grid search error in "+str(model), "percent": percent})
    try:
        best_model = results.best_estimator_
        scores = cross_val_score(best_model, x, y, cv=10, scoring=score_type)
    except ValueError:
        messenger.send_update(dataset_id, {"dataset_filename": dataset_filename, "storage_location": storage_location, "manifest_filename": manifest_filename, "dataset_id": dataset_id, "status": "model_error", "model_error": str(model), "percent": percent})
    if len(scores) != 0:
        if np.abs(current_best_model[-1] - np.mean(scores)) < 0.05 or current_best_model[0] == None:
            best_performing_models.append(best_model)
        if current_best_model[-1] < np.mean(scores):
            current_best_model = [best_model, np.mean(scores)]
            diagnostics.store_model(current_best_model, x, y, dataset_id, label_type, dataset_filename, storage_location, manifest_filename, conversion_pipeline, diagnostic_image_path, percent, score_type, False)
    i += 1
    return current_best_model
Exemplo n.º 2
0
def try_model(model, current_best_model, i):
    percent = (i / model_run_count) * run_multiplier
    messenger.send_update(
        dataset_id, {
            "dataset_filename": dataset_filename,
            "storage_location": storage_location,
            "manifest_filename": manifest_filename,
            "dataset_id": dataset_id,
            "label_type": label_type,
            "status": "running_models",
            "percent": percent,
            "model_running": str(model),
            "best_model": [str(current_best_model[0]), current_best_model[1]]
        })
    scores = []
    try:
        scores = cross_val_score(model, x, y, cv=10, scoring=score_type)
    except ValueError:
        messenger.send_update(
            dataset_id, {
                "dataset_filename": dataset_filename,
                "storage_location": storage_location,
                "manifest_filename": manifest_filename,
                "dataset_id": dataset_id,
                "status": "model_error",
                "model_error": str(model),
                "percent": percent
            })
    except TypeError:
        messenger.send_update(
            dataset_id, {
                "dataset_filename": dataset_filename,
                "storage_location": storage_location,
                "manifest_filename": manifest_filename,
                "dataset_id": dataset_id,
                "status": "model_error",
                "model_error": str(model),
                "percent": percent
            })
    if np.abs(current_best_model[-1] -
              np.mean(scores)) < 0.05 or current_best_model[0] == None:
        best_performing_models.append(model)
    if current_best_model[-1] < np.mean(scores):
        current_best_model = [model, np.mean(scores)]
        diagnostics.store_model(current_best_model, x, y, dataset_id,
                                label_type, dataset_filename, storage_location,
                                manifest_filename, conversion_pipeline,
                                diagnostic_image_path, percent, score_type,
                                review)
    i += 1
    return current_best_model
def try_ensemble_model(models, current_best_model, i):
    percent = 0.5 + (i/model_run_count)*0.5
    try:
        model = VotingClassifier([(str(el), el) for el in models], voting="soft")
        scores = cross_val_score(model, x, y, cv=10, scoring=score_type)
    except ValueError:
        return current_best_model
    except AttributeError:
        try:
            model = VotingClassifier([(str(el), el) for el in models])
            scores = cross_val_score(model, x, y, cv=10, scoring=score_type)
        except ValueError:
            return current_best_model
    if current_best_model[-1] < np.mean(scores):
        current_best_model = [model, np.mean(scores)]
        diagnostics.store_model(current_best_model, x, y, dataset_id, label_type, dataset_filename, storage_location, manifest_filename, conversion_pipeline, diagnostic_image_path, percent, score_type, False)
    i += 1
    return current_best_model
    i += 1
    return current_best_model
    
error = None
try:
    for model in models:
        current_best_model = try_model(model, current_best_model, i)
    if current_best_model == [None, prev_acc]:
        best_performing_models = []
        label_type = "Categorical"
        score_type = "accuracy"
        models = model_info.model_list()
        i = 1
        current_best_model = [None, prev_acc]
        for model in models:
            current_best_model = try_model(model, current_best_model, i)
    if len(best_performing_models) > 1:
        for model_count, run_count in enumerate(diagnostics.get_run_counts_by_size(best_performing_models, 50)[0]):
            model_count += 2
            for ik in range(int(run_count)):
                models = list(diagnostics.random_combination(best_performing_models, int(model_count)))
                current_best_model = try_ensemble_model(models, current_best_model, i)
    diagnostics.store_model(current_best_model, x, y, dataset_id, label_type, dataset_filename, storage_location, manifest_filename, conversion_pipeline, diagnostic_image_path, 1.0, score_type, True)
except:
    error = sys.exc_info()
    message = None
    if "message" in dir(error[1]):
        message = error[1].message
    else:
        message = error[1].__str__()
    print(json.dumps({"error": True, "error_type": str(error[0]), "message": message, "traceback": traceback.format_tb(error[2])}))
    })
scores = []
try:
    scores = cross_val_score(model, x, y, cv=10, scoring=score_type)
except ValueError:
    messenger.send_update(
        dataset_id, {
            "dataset_filename": dataset_filename,
            "storage_location": storage_location,
            "manifest_filename": manifest_filename,
            "dataset_id": dataset_id,
            "status": "model_error",
            "model_error": str(model),
            "percent": 0.6
        })
except TypeError:
    messenger.send_update(
        dataset_id, {
            "dataset_filename": dataset_filename,
            "storage_location": storage_location,
            "manifest_filename": manifest_filename,
            "dataset_id": dataset_id,
            "status": "model_error",
            "model_error": str(model),
            "percent": 0.6
        })

diagnostics.store_model([model, np.mean(scores)], x, y, dataset_id, label_type,
                        dataset_filename, storage_location, manifest_filename,
                        conversion_pipeline, diagnostic_image_path, 0.85,
                        score_type, True)