示例#1
0
def train():
    if request.method == 'POST' and 'inputFiles' in request.files:
        file = request.files['inputFiles']
        filename = secure_filename(file.filename)
        data_reload = FileContents(name=filename)
        db.create_all()
        db.session.add(data_reload)
        db.session.commit()
        data_reloaded = FileContents.query.all()
        # os.path.join is used so that paths work in every operating system
        file.save(os.path.join(folder, filename))
        # Data review
        new_data = pd.read_csv(os.path.join(folder, str(data_reloaded[-1])))
        new_dataplot = new_data.head(10)
        new_data_info = new_data.info
        new_data_size = new_data.size
        new_data_shape = new_data.shape
        dropdown_list = list(new_data.columns)
        flash('Just a moment, app is thinking!')

        if str(data_reloaded[-1]).split('.')[-1] != 'csv':
            # Forbidden, No Access
            abort(403)
    return render_template('train.html',
                           new_dataplot=new_dataplot,
                           data_reload=data_reload,
                           new_data_info=new_data.info,
                           new_data_size=new_data_size,
                           new_data_shape=new_data_shape,
                           dropdown_list=dropdown_list)
示例#2
0
def plot_2():
    if request.method == 'POST' and 'inputFiles' in request.files:
        file = request.files['inputFiles']
        filename = secure_filename(file.filename)
        data_reload = FileContents(name=filename)
        db.create_all()
        db.session.add(data_reload)
        db.session.commit()
        data_reload = FileContents.query.all()
        file.save(os.path.join(folder, filename))
        new_data = pd.read_csv(os.path.join(folder, str(data_reload[-1])))
        dropdown_list = list(new_data.columns)
        return render_template('plot_2.html', dropdown_list=dropdown_list)
示例#3
0
def fit():
    # X and Y vars
    y_var_select = request.form.get('y_var')
    multiselect = request.form.getlist('x_vars')

    # Type of forecast
    pred_type_select = request.form.get('rd_pred_type')

    # commit the prediction type
    pred_type_selected = ModelType(pred_type_select)
    db.create_all()
    db.session.add(pred_type_selected)

    # commit the X and Y vars
    xy_selection = ListXY(y_var_select, multiselect)
    db.create_all()
    db.session.add(xy_selection)
    db.session.commit()

    # testing - DELETE AFTEWARDS
    ListXY.query.all()

    # select vars
    data_reloaded = FileContents.query.all()
    new_data = pd.read_csv(os.path.join(folder, str(data_reloaded[-1])))
    new_data = new_data.dropna()  # deletes Na and NaN
    X = new_data[multiselect]
    Y = new_data[y_var_select]

    if request.form.get('rd_pred_type') == "Classification":
        # Step 1: Refactor columns with text to integer and remove NAs
        X = factorise_data(X)

        # prepare models
        seed = 7
        models = []
        models.append(('RandomForestClassifier', RandomForestClassifier()))
        models.append(
            ('GradientBoostingClassifier', GradientBoostingClassifier()))
        models.append(('LogisticRegression', LogisticRegression()))
        models.append(
            ('LinearDiscriminantAnalysis', LinearDiscriminantAnalysis()))
        models.append(('KNeighborsClassifier', KNeighborsClassifier()))
        models.append(('GaussianNB', GaussianNB()))
        models.append(('SVC', SVC()))
        # evaluate each model in turn
        results = []
        names = []
        allmodels = []
        scoring = 'accuracy'
        for name, model in models:
            kfold = model_selection.KFold(n_splits=10, random_state=seed)
            cv_results = model_selection.cross_val_score(model,
                                                         X,
                                                         Y,
                                                         cv=kfold,
                                                         scoring=scoring)
            results.append(cv_results)
            names.append(name)
            msg = "%s - %f | %f" % (name, cv_results.mean(), cv_results.std())
            allmodels.append(msg)
            model_results = results
            model_names = names

    if request.form.get('rd_pred_type') == "Regression":
        # Step 1: Refactor columns with text to integer and remove NAs
        X = factorise_data(X)

        # prepare models
        models = []
        models.append(
            ('RandomForestRegressor', RandomForestRegressor(n_estimators=200)))
        models.append(('GradientBoostingRegressor',
                       GradientBoostingRegressor(n_estimators=200)))
        models.append(('Ridge', Ridge()))
        models.append(('ElasticNet', ElasticNet()))
        models.append(('Lasso', Lasso()))
        models.append(('SVR', SVR()))
        # evaluate each model in turn
        results = []
        names = []
        allmodels = []
        for name, model in models:
            X_train, X_test, y_train, y_test = train_test_split(X,
                                                                Y,
                                                                test_size=0.3,
                                                                random_state=7)
            # standard scaler #standardises the feature variables
            sc = StandardScaler()
            X_train = sc.fit_transform(X_train)
            X_test = sc.transform(X_test)
            model_to_fit = model
            model_to_fit.fit(X_train, y_train)
            predictions = model_to_fit.predict(X_test)
            mse = mean_squared_error(y_test, predictions)
            results.append(mse)
            names.append(name)
            msg = "%s - %.2f | %s" % (name, (np.sqrt(mse)), "-")
            allmodels.append(msg)
            model_results = results
            model_names = names

    return render_template('fit.html',
                           y_var_select=y_var_select,
                           pred_type_select=pred_type_select,
                           multiselect=multiselect,
                           model_results=allmodels,
                           model_names=names)