Пример #1
0
def analysis(fileindex, xselected, yselected, analytype, criterion,
             direction):  #根据文件和选择的x值和y值,生成model
    conn = getconn()

    train = pickle.loads(conn.hget(fileindex, 'train'))
    test = pickle.loads(conn.hget(fileindex, 'test'))
    est = pickle.loads(conn.hget(fileindex, 'est'))
    if conn.hexists(fileindex, 'xselected_change'):
        xselected_change = pickle.loads(
            conn.hget(fileindex, 'xselected_change'))
    else:
        xselected_change = 'None'
    if (est != None):
        #理论f值
        from scipy.stats import f
        p = est.df_model  # 自变量个数
        n = train.shape[0]  # 行数,观测个数
        F_Theroy = f.ppf(q=0.95, dfn=p, dfd=n - p - 1)

        return {
            'model': est,
            'f1': round(est.fvalue, 3),
            'f2': round(F_Theroy, 3),
            'xselected_change': xselected_change
        }
    else:
        setmodel(fileindex, xselected, yselected, analytype, criterion,
                 direction)
        data = analysis(fileindex, xselected, yselected, analytype, criterion,
                        direction)
        return data
Пример #2
0
def setmodel(fileindex, xselected, yselected, analytype, criterion, direction):
    conn = getconn()
    newProfit = pickle.loads(conn.hget(fileindex, 'Profit'))
    if (analytype == "linear"):
        train, test = model_selection.train_test_split(newProfit,
                                                       test_size=0.2,
                                                       random_state=22)

        x = train[xselected]
        X = sm.add_constant(x)

        y = train[yselected]
        est = sm.OLS(y, X)
        est = est.fit()

        # print(type(est.params))
        # print(type(est.params.index.tolist()))
        # print(est.params.index.tolist())
        # print(type(est.params.values.tolist()))

        #redis
        filedata = {}
        filedata["train"] = pickle.dumps(train)
        filedata["test"] = pickle.dumps(test)
        filedata["est"] = pickle.dumps(est)
        conn.hset(fileindex, mapping=filedata)
        conn.expire(fileindex, 60 * 60 * 2)
    elif (analytype == "gradually"):
        data_train, data_test = model_selection.train_test_split(
            newProfit, test_size=0.2, random_state=22)
        s = []
        for x in xselected:
            s.append(x)
        s.append(yselected)
        F = FeatureSelection().stepwise(df=data_train[s],
                                        response=yselected,
                                        max_iter=200,
                                        criterion=criterion,
                                        direction=direction)
        est = F.stepwise_model
        xselected_change = F.stepwise_feat_selected_
        # data = Files.get(fileindex)
        # data["train"] = data_train
        # data["test"] = data_test
        # data["est"] = est
        # data["xselected_change"] = xselected_change

        #redis
        filedata = {}
        filedata["train"] = pickle.dumps(data_train)
        filedata["test"] = pickle.dumps(data_test)
        filedata["est"] = pickle.dumps(est)
        filedata["xselected_change"] = pickle.dumps(xselected_change)
        conn.hset(fileindex, mapping=filedata)
        conn.expire(fileindex, 60 * 60 * 2)
Пример #3
0
def savefile(file, sheet, fid):
    from analysis.linear.regression import returncloumns
    p = returncloumns(file, sheet)

    #使用redis进行存、取数据
    conn = getconn()
    #存数据
    data = {}
    data["Profit"] = pickle.dumps(p)
    conn.hmset(fid, data)
    conn.expire(fid, 60 * 60 * 2)
Пример #4
0
def uploadpre_file(request):  #上传多值预测文件
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot as plt
    file = request.FILES.get("file")
    filename = file.name
    fileindex = request.POST.get("fileindex")
    xselected = request.POST.get("xselected")
    yselected = request.POST.get("yselected")
    conn = getconn()
    Profit = pd.read_excel(file)
    Profit.dropna(inplace=True)
    est = pickle.loads(conn.hget(fileindex, 'est'))
    params = est.params.index.tolist()
    if 'const' in params:
        params.remove('const')
    try:
        Profit = Profit[params]
        Profit = sm.add_constant(Profit)
        y_pred = est.predict(Profit)
        Profit[yselected + "(预测值)"] = y_pred
        if 'const' in Profit.columns.values.tolist():
            Profit = Profit.drop('const', axis=1)
        Profit = round(Profit, 3)
        Profit = Profit.to_dict('records')
        # 散点图
        plt.scatter(range(1,
                          len(y_pred) + 1),
                    y_pred,
                    alpha=0.4,
                    edgecolor='none')
        sio = BytesIO()
        plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0)
        data = base64.encodebytes(sio.getvalue()).decode()
        src = 'data:image/png;base64,' + str(data)
        # 记得关闭,不然画出来的图是重复的
        plt.axis('off')
        plt.close()
        #Profit = [{"a":1,"b":2,"c":3},{"a":4,"b":5,"c":6}]
        mul_pre_result = {"mul_pre_values": Profit, "src": src}
        #ret1 = json.loads(json.dumps(Profit, ensure_ascii=False))
        return JsonResponse({
            "result": 1,
            "mul_pre_result": mul_pre_result
        },
                            json_dumps_params={'ensure_ascii': False})
    except KeyError as e:
        data = {"result": 500, "except": "keyerror"}
        return JsonResponse(data, json_dumps_params={'ensure_ascii': False})
    except Exception as e:
        data = {"result": 500, "except": "error"}
        return JsonResponse(data, json_dumps_params={'ensure_ascii': False})
    except BaseException as e:
        print(e)
Пример #5
0
def prediction(fileindex, xselected, yselected):
    conn = getconn()
    train = pickle.loads(conn.hget(fileindex, 'train'))
    test = pickle.loads(conn.hget(fileindex, 'test'))
    est = pickle.loads(conn.hget(fileindex, 'est'))
    if conn.hexists(fileindex, 'xselected_change'):
        xselected_change = pickle.loads(
            conn.hget(fileindex, 'xselected_change'))
    else:
        xselected_change = None
    if (xselected_change == None):  #没有xselected_change证明是线性回归
        x_test = test[xselected]
        y_test = test[yselected]
        X_test = sm.add_constant(x_test)
        y_pred = est.predict(X_test)

        plt.scatter(y_test, y_pred)
        plt.plot([y_test.min(), y_test.max()],
                 [y_test.min(), y_test.max()],
                 color='red',
                 linestyle='--')
        plt.xlabel('实际值')
        plt.ylabel('预测值')
        sio = BytesIO()
        plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0)
        data = base64.encodebytes(sio.getvalue()).decode()
        src = 'data:image/png;base64,' + str(data)
        # 记得关闭,不然画出来的图是重复的
        plt.axis('off')
        plt.close()
        return src
    else:  #有xselected_change证明是逐步回归
        x_test = test[xselected_change]
        y_test = test[yselected]
        X_test = sm.add_constant(x_test)
        y_pred = est.predict(X_test)

        plt.scatter(y_test, y_pred)
        plt.plot([y_test.min(), y_test.max()],
                 [y_test.min(), y_test.max()],
                 color='red',
                 linestyle='--')
        plt.xlabel('实际值')
        plt.ylabel('预测值')
        sio = BytesIO()
        plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0)
        data = base64.encodebytes(sio.getvalue()).decode()
        src = 'data:image/png;base64,' + str(data)
        # 记得关闭,不然画出来的图是重复的
        plt.axis('off')
        plt.close()
        return src
Пример #6
0
def variance(fileindex, xselected, yselected, oselected_1, oselected_2):
    conn = getconn()
    if conn.hexists(fileindex, 'est2'):
        est2 = pickle.loads(conn.hget(fileindex, 'est2'))
    else:
        est2 = None
    if conn.hexists(fileindex, 'none_outliers'):
        none_outliers = pickle.loads(conn.hget(fileindex, 'none_outliers'))
    else:
        none_outliers = None

    # 残差方差齐性检验 )
    ax1 = plt.subplot2grid(shape=(2, 1), loc=(0, 0))  # 设置第一张子图位置
    # 散点图绘制
    # 学生化残差与自变量散点图
    # ax1.scatter(none_outliers["蒸汽流量 "], none_outliers.resid_stu
    # 标准化残差和自变量散点图
    ax1.scatter(none_outliers[oselected_1],
                (est2.resid - est2.resid.mean()) / est2.resid.std())
    # 添加水平参考线
    ax1.hlines(y=0,
               xmin=none_outliers[oselected_1].min(),
               xmax=none_outliers[oselected_1].max(),
               color='red',
               linestyle='--')
    ax1.set_xlabel(oselected_1)
    ax1.set_ylabel('Std_Residual')

    ax2 = plt.subplot2grid(shape=(2, 1), loc=(1, 0))
    # 学生化残差与自变量散点图
    # ax2.scatter(none_outliers["拔风压力PID阀门开度 "], none_outliers.resid_stu )
    # 标准化残差和自变量散点图
    ax2.scatter(none_outliers[oselected_2],
                (est2.resid - est2.resid.mean()) / est2.resid.std())
    ax2.hlines(y=0,
               xmin=none_outliers[oselected_2].min(),
               xmax=none_outliers[oselected_2].max(),
               color='magenta',
               linestyle='--')
    ax2.set_xlabel(oselected_2)
    ax2.set_ylabel('Std_Residual')

    # 调整2子图之间距离
    plt.subplots_adjust(hspace=0.6, wspace=0.3)
    sio = BytesIO()
    plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0)
    data = base64.encodebytes(sio.getvalue()).decode()
    src = 'data:image/png;base64,' + str(data)
    # 记得关闭,不然画出来的图是重复的
    plt.axis('off')
    plt.close()
    return src
Пример #7
0
def residual(fileindex, xselected, yselected):
    conn = getconn()
    if conn.hexists(fileindex, 'est2'):
        est2 = pickle.loads(conn.hget(fileindex, 'est2'))
    else:
        est2 = None
    if (est2 != None):
        from statsmodels.stats.stattools import (durbin_watson)
        DW = ["%#8.3f" % durbin_watson(est2.wresid)]
        return DW
    else:
        setcurmodel(fileindex, xselected, yselected)
        data = residual(fileindex, xselected, yselected)
        return data
Пример #8
0
def varbp(fileindex, xselected, yselected):
    conn = getconn()
    if conn.hexists(fileindex, 'est2'):
        est2 = pickle.loads(conn.hget(fileindex, 'est2'))
    else:
        est2 = None
    if (est2 != None):
        BP = sm.stats.diagnostic.het_breuschpagan(est2.resid,
                                                  exog_het=est2.model.exog)
        return BP
    else:
        setcurmodel(fileindex, xselected, yselected)
        data = varbp(fileindex, xselected, yselected)
        return data
Пример #9
0
def norks(fileindex, yselected):  #正态性检验的K-S检验
    conn = getconn()
    train = pickle.loads(conn.hget(fileindex, 'train'))
    test = pickle.loads(conn.hget(fileindex, 'test'))
    if (len(train) >= 5000):
        data = stats.kstest(rvs=train[yselected],
                            args=(train[yselected].mean(),
                                  train[yselected].std()),
                            cdf="norm")
        type = 'kstest'
    else:
        data = stats.shapiro(train[yselected])
        type = 'shapiro'
    #return {'type':type,'data':round(data,3)}
    return {'type': type, 'data': data}
Пример #10
0
def getsin_pre_value(request):  #获取模型预测值
    if request.method == "POST":
        data = json.loads(request.body)
        fileindex = data["fileindex"]
        params = list(map(float, data["params"]))
        conn = getconn()
        est = pickle.loads(conn.hget(fileindex, 'est'))
        if 'const' in est.params.index.tolist():
            params.insert(0, 1)
        params = np.array(params)
        sin_pre_value = est.params.values * params
        sin_pre_value = sin_pre_value.sum()
        sin_pre_value = {"result": 1, "sin_pre_value": round(sin_pre_value, 3)}
        return JsonResponse(sin_pre_value,
                            json_dumps_params={'ensure_ascii': False})
Пример #11
0
def linear_correlation(fileindex, lineselected):
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot as plt
    conn = getconn()
    newProfit = pickle.loads(conn.hget(fileindex, 'Profit'))
    linedata = newProfit[lineselected].corr()
    #print(linedata)
    sns.pairplot(newProfit.loc[:, lineselected])
    sio = BytesIO()
    plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0)
    data = base64.encodebytes(sio.getvalue()).decode()
    src = 'data:image/png;base64,' + str(data)
    # 记得关闭,不然画出来的图是重复的
    plt.axis('off')
    plt.close()
    return {'src': src, 'lindata': round(linedata, 3).values.tolist()}
Пример #12
0
def multicol(fileindex, xselected):  # 返回的是二维数组
    conn = getconn()
    newProfit = pickle.loads(conn.hget(fileindex, 'Profit'))
    if conn.hexists(fileindex, 'xselected_change'):
        xselected_change = pickle.loads(
            conn.hget(fileindex, 'xselected_change'))
    else:
        xselected_change = None
    if (xselected_change == None):  # 没有xselected_change证明是线性回归
        x = newProfit[xselected]
    else:
        x = newProfit[xselected_change]
    from statsmodels.stats.outliers_influence import variance_inflation_factor
    X = sm.add_constant(x.loc[:, :])
    vif = pd.DataFrame()
    vif['features'] = X.columns
    vif["VIF Factor"] = [
        variance_inflation_factor(X.values, i) for i in range(X.shape[1])
    ]
    return round(vif, 3).values.tolist()  #将ndarray类型转为list
Пример #13
0
def sendselect(request):  #用户选择x轴和y轴,进行回归分析,返回模型数据
    if request.method == "POST":
        #print(request.body)
        fileindex = json.loads(request.COOKIES.get("fileindex"))
        xselected = json.loads(request.COOKIES.get("xselected"))
        yselected = json.loads(request.COOKIES.get("yselected"))
        analytype = json.loads(request.COOKIES.get("analytype"))
        criterion = json.loads(request.COOKIES.get("criterion"))
        direction = json.loads(request.COOKIES.get("direction"))
        xlist = json.loads(request.COOKIES.get("xlist"))
        conn = getconn()
        if (conn.exists(fileindex)):
            conn.expire(fileindex, 60 * 60 * 2)
            if (analytype == "linear"
                    and conn.hexists(fileindex, 'xselected_change')):
                conn.hdel(fileindex, 'xselected_change')
            return sendselecthelp(fileindex, xselected, yselected, analytype,
                                  criterion, direction, xlist)
        else:
            responsedata = {"result": 404, "msg": '上传的文件已过期,请重新上传'}
            return JsonResponse(responsedata,
                                json_dumps_params={'ensure_ascii': False})
Пример #14
0
def normality(fileindex, yselected):  #正态性检验
    import matplotlib
    matplotlib.use('Agg')
    from matplotlib import pyplot as plt
    import seaborn as sns
    conn = getconn()
    newProfit = pickle.loads(conn.hget(fileindex, 'Profit'))
    y = newProfit[yselected]
    mpl.rcParams['font.sans-serif'] = ['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    sns.distplot(a=y,
                 bins=10,
                 fit=stats.norm,
                 norm_hist=True,
                 hist_kws={
                     'color': 'green',
                     'edgecolor': 'black',
                 },
                 kde_kws={
                     'color': 'black',
                     'linestyle': '--',
                     'label': '核密度曲线'
                 },
                 fit_kws={
                     'color': 'red',
                     'linestyle': ':',
                     'label': '正态密度曲线'
                 })
    plt.legend()
    sio = BytesIO()
    plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0)
    data = base64.encodebytes(sio.getvalue()).decode()
    src = 'data:image/png;base64,' + str(data)
    # 记得关闭,不然画出来的图是重复的
    plt.axis('off')
    plt.close()
    return src
Пример #15
0
def setcurmodel(fileindex, xselected, yselected):
    conn = getconn()
    # data = Files.get(fileindex)
    # train = data.get("train")
    # test = data.get("test")
    # est = data.get("est")
    # xselected_change = data.get("xselected_change")

    train = pickle.loads(conn.hget(fileindex, 'train'))
    test = pickle.loads(conn.hget(fileindex, 'test'))
    est = pickle.loads(conn.hget(fileindex, 'est'))
    if conn.hexists(fileindex, 'xselected_change'):
        xselected_change = pickle.loads(
            conn.hget(fileindex, 'xselected_change'))
    else:
        xselected_change = None
    if (xselected_change == None):  # 没有xselected_change证明是线性回归
        usedx = xselected
    else:
        usedx = xselected_change
    if (est != None):
        # train, test = model_selection.train_test_split(Files.get(fileindex).get("Profit"), test_size=0.2,random_state=22)
        x = train[usedx]
        X = sm.add_constant(x)

        y = train[yselected]
        # est = sm.OLS(y, X)
        # est = est.fit()
        outliers = est.get_influence()

        resid_stu = outliers.resid_studentized_external
        contatl = pd.Series(resid_stu, name='resid_stu')
        x = x.reset_index(drop=True)
        y = y.reset_index(drop=True)
        profit_outliers = pd.concat([x, y, contatl], axis=1)

        # 求异常值
        outdata = profit_outliers.loc[np.abs(profit_outliers.resid_stu) > 2, ]
        round(outdata, 3)
        outlist = []
        ls = copy.deepcopy(usedx)
        ls.append(yselected)
        ls.append('resid_stu')
        for l in outdata.values.tolist():
            outlist.append(dict(zip(ls, l)))

        none_outliers = profit_outliers.loc[
            np.abs(profit_outliers.resid_stu) <= 2, ]
        x2 = none_outliers[usedx]
        y2 = none_outliers[yselected]

        X2 = sm.add_constant(x2.loc[:, :])
        est2 = sm.OLS(y2, X2).fit()

        #
        # data=Files.get(fileindex)
        # data["est2"]=est2
        # data["outlist"]=outlist
        # data["none_outliers"]=none_outliers

        #redis
        filedata = {}
        filedata["est2"] = pickle.dumps(est2)
        filedata["outlist"] = pickle.dumps(outlist)
        filedata["none_outliers"] = pickle.dumps(none_outliers)
        conn.hset(fileindex, mapping=filedata)
        conn.expire(fileindex, 60 * 60 * 2)
    else:
        pass
Пример #16
0
def outliertest(fileindex, xselected, yselected):
    conn = getconn()
    train = pickle.loads(conn.hget(fileindex, 'train'))
    test = pickle.loads(conn.hget(fileindex, 'test'))
    est = pickle.loads(conn.hget(fileindex, 'est'))
    if conn.hexists(fileindex, 'est2'):
        est2 = pickle.loads(conn.hget(fileindex, 'est2'))
    else:
        est2 = None
    if conn.hexists(fileindex, 'outlist'):
        outlist = pickle.loads(conn.hget(fileindex, 'outlist'))
    else:
        outlist = None
    if conn.hexists(fileindex, 'xselected_change'):
        xselected_change = pickle.loads(
            conn.hget(fileindex, 'xselected_change'))
    else:
        xselected_change = None

    if (est2 != None):
        if (xselected_change == None):  # 没有xselected_change证明是线性回归
            x_test = test[xselected]
            y_test = test[yselected]
            X_test = sm.add_constant(x_test)
            y_pred = est.predict(X_test)
            # 画图
            plt.scatter(y_test, y_pred)
            plt.plot([y_test.min(), y_test.max()],
                     [y_test.min(), y_test.max()],
                     color='red',
                     linestyle='--')
            plt.xlabel('实际值')
            plt.ylabel('预测值')
            sio = BytesIO()
            plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0)
            data = base64.encodebytes(sio.getvalue()).decode()
            src = 'data:image/png;base64,' + str(data)
            # 记得关闭,不然画出来的图是重复的
            plt.axis('off')
            plt.close()

            return {
                'model': est2.summary().as_html(),
                'outdata': outlist,
                'src': src
            }
        else:
            x_test = test[xselected_change]
            y_test = test[yselected]
            X_test = sm.add_constant(x_test)
            y_pred = est.predict(X_test)
            #画图
            plt.scatter(y_test, y_pred)
            plt.plot([y_test.min(), y_test.max()],
                     [y_test.min(), y_test.max()],
                     color='red',
                     linestyle='--')
            plt.xlabel('实际值')
            plt.ylabel('预测值')
            sio = BytesIO()
            plt.savefig(sio, format='png', bbox_inches='tight', pad_inches=0.0)
            data = base64.encodebytes(sio.getvalue()).decode()
            src = 'data:image/png;base64,' + str(data)
            # 记得关闭,不然画出来的图是重复的
            plt.axis('off')
            plt.close()
            return {
                'model': est2.summary().as_html(),
                'outdata': outlist,
                'src': src
            }
    else:
        setcurmodel(fileindex, xselected, yselected)
        data = outliertest(fileindex, xselected, yselected)
        return data