Пример #1
0
def tree_model(Xt, yt, Xv, yv, runRF, runGBRT, runGBRT2):
    assert runRF + runGBRT + runGBRT2 == 1
    if runRF:
        model_name = "Random Forest"
        max_depth = np.arange(2, 7, 2)
        max_features = [Xt.shape[1] // 3]
        # max_depth = np.arange(1, 3)
        # max_features = [3, 5, 10]
        param_grid = {'max_dep': max_depth, 'max_fea': max_features}
        params = list(ParameterGrid(param_grid))
    elif runGBRT or runGBRT2:
        model_name = "Boosting Trees"
        # boosting params
        num_trees = [1000]
        learning_rate = [0.01, 0.1]
        # loss = ['huber']
        # cart tree params
        max_depth = [2, 4, 6]
        param_grid = {
            'num_trees': num_trees,
            'max_dep': max_depth,
            'lr': learning_rate
        }
        params = list(ParameterGrid(param_grid))

    tis = time.time()
    scores = []
    model_list = []
    for p in tqdm(params):
        print(p)
        if runRF:
            # params = {
            #     'colsample_bynode': 0.33,
            #     'learning_rate': 1,
            #     'max_depth': p['max_dep'],
            #     'num_parallel_tree': 300,
            #     'objective': 'reg:squarederror',
            #     'subsample': 0.8,
            #     'random_state': 0,
            #     'tree_method': 'gpu_hist'
            # }
            # tree_m = xgb.XGBRFRegressor(n_estimators=300, max_depth=p['max_dep'], colsample_bynode=0.33
            #                             random_state=0, tree_method='gpu_hist')
            tree_m = RandomForestRegressor(n_estimators=300,
                                           max_depth=p['max_dep'],
                                           max_features=p['max_fea'],
                                           min_samples_split=10,
                                           random_state=0,
                                           n_jobs=cpu_count() - 3)
            tree_m.fit(Xt, yt.reshape(-1, ))
        elif runGBRT:
            tree_m = xgb.XGBRegressor(n_estimators=p['num_trees'],
                                      max_depth=p['max_dep'],
                                      learning_rate=p['lr'],
                                      objective='reg:pseudohubererror',
                                      random_state=0,
                                      tree_method='gpu_hist')

            callbacks = [
                xgb.callback.EarlyStopping(rounds=0.1 * p['num_trees'],
                                           save_best=True)
            ]  # early_stopping_rounds=0.2*p['num_trees'],
            tree_m = tree_m.fit(Xt,
                                yt.reshape(-1, ),
                                eval_set=[(Xv, yv.reshape(-1, ))],
                                verbose=False,
                                callbacks=callbacks)
            print(f"gbrt best iter {tree_m.get_booster().best_iteration}",
                  "lowest error",
                  tree_m.get_booster().best_score)
            # tree_m = GradientBoostingRegressor(max_depth=p['max_dep'], n_estimators=p['num_trees'],
            #                                    learning_rate=p['lr'],
            #                                    min_samples_split=10, loss=p['loss'], min_samples_leaf=10,
            #                                    subsample=p['subsample'], random_state=0)
        elif runGBRT2:
            tree_m = xgb.XGBRegressor(n_estimators=p['num_trees'],
                                      max_depth=p['max_dep'],
                                      learning_rate=p['lr'],
                                      objective='reg:squarederror',
                                      random_state=0,
                                      tree_method='gpu_hist')
            callbacks = [
                xgb.callback.EarlyStopping(rounds=0.1 * p['num_trees'],
                                           save_best=True)
            ]  # early_stopping_rounds=0.2*p['num_trees'],
            tree_m = tree_m.fit(Xt,
                                yt.reshape(-1, ),
                                eval_set=[(Xv, yv.reshape(-1, ))],
                                verbose=False,
                                callbacks=callbacks)
            print(f"gbrt best iter {tree_m.get_booster().best_iteration}",
                  "lowest error",
                  tree_m.get_booster().best_score)
        else:
            raise NotImplementedError()

        yv_hat = tree_m.predict(Xv).reshape(-1, 1)
        score = cal_r2(yv, yv_hat)
        print('params: ' + str(p) + '. CV r2-validation:' +
              "{0:.3%}".format(score))
        scores.append(score)
        model_list.append(tree_m)
    tic = time.time()
    print(f"{model_name} train time: ", tic - tis)

    best_p = params[np.argmax(scores)]
    best_model = model_list[np.argmax(scores)]
    logger.info('best params for rf: ' + str(best_p))
    tree_m = best_model
    return tree_m
Пример #2
0
def mainFunc(request):

    #윤진씨
    studentData = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='cp949')
    #     print(studentData.head(3))
    #     studentData.성적.value_counts()

    #     plot_div = sns.countplot(x='부모의학교만족도',data = studentData, hue='성적',palette='bright')
    #     fig = plt.gcf()
    #     fig.savefig('C:/work/py_sou/chartdb/mychart/static/image/chart.png', dpi=fig.dpi)
    #     df = studentData.groupby(['성적','부모의학교만족도'])
    print(studentData.groupby(['성적'])['부모의학교만족도'].size())

    fig = go.Figure(data=[
        go.Bar(name='Good',
               x=['H', 'L', 'M'],
               y=studentData[studentData['부모의학교만족도'] == 'Good'].groupby(
                   ['성적', '부모의학교만족도']).size(),
               marker_color='#9bb1d6'),
        go.Bar(name='Bad',
               x=['H', 'L', 'M'],
               y=studentData[studentData['부모의학교만족도'] == 'Bad'].groupby(
                   ['성적', '부모의학교만족도']).size(),
               marker_color='#a39bd6'),
    ])
    # Change the bar mode
    fig = fig.update_layout(barmode='group',
                            width=600,
                            height=600,
                            xaxis_title='성적',
                            yaxis_title='합계',
                            plot_bgcolor='rgba(0,0,0,0)',
                            paper_bgcolor='rgba(255,255,255,0)',
                            font=dict(family='Courier New, monospace',
                                      color='#fff',
                                      size=18))  #데이터를 그룹화하여 표에 적용

    #     fig = px.bar(studentData, x='성적', y='부모의학교만족도', barmode='group',height=400)

    plot_div = plot(fig, output_type='div')

    #전공별 비율
    fig2 = go.Figure(data=[
        go.Pie(labels=[
            'Arabic', 'Biology', 'Chemistry', 'English', 'French', 'Geology',
            'History', 'IT', 'Math', 'Quran', 'Science', 'Spanish'
        ],
               values=studentData.groupby(['전공']).size(),
               textinfo='label+percent',
               insidetextorientation='radial')
    ])
    fig2 = fig2.update_layout(width=550,
                              height=500,
                              paper_bgcolor='rgba(255,255,255,0)',
                              plot_bgcolor='rgba(0,0,0,0)',
                              font=dict(family='Courier New, monospace',
                                        color='#fff',
                                        size=18),
                              showlegend=False)
    pie_div = plot(fig2, output_type='div')
    #     print('룰루랄ㄹ라\n',studentData.groupby(['전공']).size())
    #전공별 성적 비교----의미가 있는지 없는지 확인받기
    majors = [
        'Arabic', 'Biology', 'Chemistry', 'English', 'French', 'Geology',
        'History', 'IT', 'Math', 'Quran', 'Science', 'Spanish'
    ]

    fig3 = go.Figure()
    fig3.add_trace(
        go.Scatter(
            x=studentData[studentData['성적'] == 'H'].groupby(['전공']).size(),
            y=majors,
            marker=dict(color="crimson", size=12),
            mode="markers",
            name="High",
        ))

    fig3.add_trace(
        go.Scatter(
            x=studentData[studentData['성적'] == 'M'].groupby(['전공']).size(),
            y=majors,
            marker=dict(color="gold", size=12),
            mode="markers",
            name="Middle",
        ))

    fig3.add_trace(
        go.Scatter(
            x=studentData[studentData['성적'] == 'L'].groupby(['전공']).size(),
            y=majors,
            marker=dict(color="black", size=12),
            mode="markers",
            name="Low",
        ))

    fig3.update_layout(xaxis_title="학생수",
                       yaxis_title="전공",
                       width=700,
                       height=500,
                       paper_bgcolor='rgba(255,255,255,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       font=dict(family='Courier New, monospace',
                                 color='#fff',
                                 size=18))

    last_div = plot(fig3, output_type='div')

    fig4 = go.Figure(data=[
        go.Bar(name='7일이하',
               x=['H', 'L', 'M'],
               y=studentData[studentData['결석일수'] == 'Under-7'].groupby(
                   ['성적']).size(),
               text=studentData[studentData['결석일수'] == 'Under-7'].groupby(
                   ['성적']).size(),
               textposition='auto',
               marker_color='rgb(204,153,153)'),
        go.Bar(name='7일이상',
               x=['H', 'L', 'M'],
               y=studentData[studentData['결석일수'] == 'Above-7'].groupby(
                   ['성적']).size(),
               text=studentData[studentData['결석일수'] == 'Above-7'].groupby(
                   ['성적']).size(),
               textposition='auto',
               marker_color='rgb(255,204,204)')
    ])
    print(studentData[studentData['결석일수'] == 'Above-7'].groupby(['성적']).size())
    fig4.update_layout(xaxis_title="성적",
                       yaxis_title="학생수",
                       width=600,
                       height=600,
                       paper_bgcolor='rgba(255,255,255,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       font=dict(family='Courier New, monospace',
                                 color='#fff',
                                 size=18))

    ab_plot = plot(fig4, output_type='div')

    fig5 = px.scatter_matrix(studentData,
                             dimensions=["발표수", "과정반복수", "새공지사항확인수", "토론참여수"],
                             color="성적",
                             width=1200,
                             height=1000)
    fig5.update_layout(width=800,
                       height=800,
                       paper_bgcolor='rgba(255,255,255,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       font=dict(family='Courier New, monospace',
                                 color='#fff',
                                 size=18))

    plot5_div = plot(fig5, output_type='div')

    #찬규씨
    plt.clf()
    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='euc-kr')
    data['성적'] = data['성적'].map({'H': 2, 'M': 1, 'L': 0})

    # 국가별 성적 상.중.하 인원 분포도

    df = pd.DataFrame({"국적": data['국적'], "성적": data['성적']})
    df7 = pd.crosstab(df.성적, df.국적, margins=True)
    #     print(df7.columns)

    for i in df7.columns:
        df7[i] = df7[i].values / df7.loc['All', i] * 100
    # result = df7['Egypt'].values / df7.loc['All', 'Egypt'] * 100
    # print(result)
    df7 = df7.drop(['All'])
    fig8 = go.Figure(data=[
        go.Bar(name='H',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df7.iloc[0, :17].values,
               marker_color='rgb(152,105,247)'),
        go.Bar(name='M',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df7.iloc[1, :17].values,
               marker_color='rgb(247,152,105)'),
        go.Bar(name='L',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df7.iloc[2, :17].values,
               marker_color='rgb(105,247,152)'),
    ])
    # Change the bar mode
    fig8.update_layout(barmode='stack',
                       width=900,
                       paper_bgcolor='rgba(255,255,255,0)',
                       plot_bgcolor='rgba(0,0,0,0)',
                       font=dict(family='Courier New, monospace',
                                 color='#fff',
                                 size=18))
    plot10_div = plot(fig8, output_type='div')

    #--------- 국가별 성별 비율 그래프

    df1 = pd.DataFrame({"국적": data['국적'], "성별": data['성별']})
    print(df1)
    df8 = pd.crosstab(df1.성별, df1.국적, margins=True)
    print(df8)
    df8 = df8.drop(['All'])
    fig50 = go.Figure(data=[
        go.Bar(name='M',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[0, :17].values,
               marker_color='rgb(247,105,200)'),
        go.Bar(name='F',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[1, :17].values,
               marker_color='rgb(105,200,247)'),
    ])
    # Change the bar mode
    fig50.update_layout(barmode='stack',
                        width=900,
                        paper_bgcolor='rgba(255,255,255,0)',
                        plot_bgcolor='rgba(0,0,0,0)',
                        font=dict(family='Courier New, monospace',
                                  color='#fff',
                                  size=18))
    cg_graph = plot(fig50, output_type='div')

    fig9 = px.violin(data,
                     y="과정반복수",
                     x="결석일수",
                     color="성별",
                     box=True,
                     points="all",
                     hover_data=df.columns)
    fig9.update_layout(width=1300)
    fig9.update_layout(height=300)
    plot11_div = plot(fig9, output_type='div')

    # 3---1-1-1-1-1-1-1-1-1-1-1-1-1-1-1=-1--1-1-1-1-1-1--

    df1 = pd.DataFrame({"결석일수": data['결석일수'], "새공지사항확인수": data['새공지사항확인수']})
    print(df1)
    df8 = pd.crosstab(df1.결석일수, df1.새공지사항확인수, margins=True)
    print(df8)
    df8 = df8.drop(['All'])
    fig50 = go.Figure(data=[
        go.Bar(name='Under-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[0, :17].values),
        go.Bar(name='Above-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[1, :17].values),
    ])
    # Change the bar mode
    fig50.update_layout(xaxis_title='국적', yaxis_title='새공지사항확인수')
    fig50.update_layout(width=700)
    fig50.update_layout(height=250)
    cg_graph7 = plot(fig50, output_type='div')

    df1 = pd.DataFrame({"결석일수": data['결석일수'], "토론참여수": data['토론참여수']})
    print(df1)
    df8 = pd.crosstab(df1.결석일수, df1.토론참여수, margins=True)
    print(df8)
    df8 = df8.drop(['All'])
    fig51 = go.Figure(data=[
        go.Bar(name='Under-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[0, :17].values),
        go.Bar(name='Above-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[1, :17].values),
    ])
    # Change the bar mode
    fig51.update_layout(xaxis_title='국적', yaxis_title='토론참여수')
    fig51.update_layout(width=700)
    fig51.update_layout(height=250)
    cg_graph71 = plot(fig51, output_type='div')

    df1 = pd.DataFrame({"결석일수": data['결석일수'], "발표수": data['발표수']})
    print(df1)
    df8 = pd.crosstab(df1.결석일수, df1.발표수, margins=True)
    print(df8)
    df8 = df8.drop(['All'])
    fig52 = go.Figure(data=[
        go.Bar(name='Under-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[0, :17].values),
        go.Bar(name='Above-7',
               x=[
                   'Egypt', 'Iran', 'Iraq', 'Jordan', 'KW', 'Lybia', 'Morocco',
                   'Palestine', 'SaudiArabia', 'Syria', 'Tunis', 'USA',
                   'lebanon', 'venzuela'
               ],
               y=df8.iloc[1, :17].values),
    ])
    # Change the bar mode
    fig52.update_layout(xaxis_title='국적', yaxis_title='발표수')
    fig52.update_layout(width=700)
    fig52.update_layout(height=250)
    cg_graph72 = plot(fig52, output_type='div')

    # 33333 3 3  3 3 - - - - - - -2-2-2- 2- 2- 22 2 2  2 222  3D model
    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='euc-kr')
    data['결석일수'] = data['결석일수'].map({'Under-7': 0, 'Above-7': 1})
    data['성별'] = data['성별'].map({'M': 0, 'F': 1})
    data['성적'] = data['성적'].map({'L': 0, 'M': 1, 'H': 2})

    fig54 = px.scatter_3d(data[:480],
                          x='발표수',
                          y='토론참여수',
                          z='새공지사항확인수',
                          color='결석일수',
                          size_max=5,
                          symbol='성별')

    # tight layout
    fig54.update_layout(margin=dict(l=0, r=0, b=0, t=0))
    fig54.update_traces(opacity=1,
                        marker=dict(showscale=True, reversescale=True, cmid=6))
    fig54.update_layout(width=800)
    fig54.update_layout(height=700)
    fig54.update_layout(plot_bgcolor='rgba(255,255,255,0.5)')

    cg_graph74 = plot(fig54, output_type='div')

    # 랜덤포레스트 사용
    # 모델 평가 생성
    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='euc-kr')

    df2 = pd.DataFrame({
        "결석일수": data['결석일수'],
        "발표수": data['발표수'],
        "새공지사항확인수": data['새공지사항확인수'],
        "토론참여수": data['토론참여수']
    })
    df2['결석일수'] = data['결석일수'].map({'Under-7': 0, 'Above-7': 1})

    x = df2[['발표수', '토론참여수', '새공지사항확인수']].values  # 2차원
    y = df2[['결석일수']].values  # 1차원

    # 여기서부터 분류 예측 모델별 생성 후 정확도 분석
    # 모델 평가 생성 - > 정확도 분석

    # 랜덤포레스트
    new_x = [[0, 20, 10]]

    model = RandomForestRegressor(n_estimators=1000, criterion='mse').fit(x, y)
    modelflatten = model.predict(x)[:10]

    a = model.predict(x)
    xh = np.where(a.flatten() > 0.5, 1, 0)
    # print(xh.shape)
    # print(y.shape)
    #     print('RandomForestRegressor : ' , accuracy_score(y,xh))
    #     print('RandomForestRegressor 새로운 값으로 예측 : ' ,model.predict(new_x))
    #     print('===========================================================')

    # LinearRegression
    model1 = LinearRegression().fit(x, y)
    model1flatten = model1.predict(x)[:10]
    #     print('예측값 ', np.where(model1flatten.flatten() > 0.5 , 1, 0))
    #     print('실제값 : ', y[:10].ravel())
    a = model1.predict(x)
    xh = np.where(a.flatten() > 0.5, 1, 0)
    resultLinearRegression = accuracy_score(y, xh)
    #     print('LinearRegression : ' , resultLinearRegression)
    #     print('LinearRegression 새로운 값으로 예측 : ' ,model.predict(new_x))
    #     print('===========================================================')
    #

    # KNeighborsRegressor
    model2 = KNeighborsRegressor(n_neighbors=3).fit(x, y)
    model2flatten = model2.predict(x)[:10]
    #     print('예측값 ', np.where(model2flatten.flatten() > 0.5 , 1, 0))
    #     print('실제값 : ', y[:10].ravel())
    a = model2.predict(x)
    xh = np.where(a.flatten() > 0.5, 1, 0)
    resultKNeighborsRegressor = accuracy_score(y, xh)
    #     print('KNeighborsRegressor : ' , accuracy_score(y,xh))
    #     print('KNeighborsRegressor 새로운 값으로 예측 : ' ,model.predict(new_x))
    #     print('===========================================================')
    #

    # XGboost 96%의 확률
    model3 = XGBRegressor(n_estimators=100).fit(x, y)
    model3flatten = model3.predict(x)[:10]
    #     print('예측값 ', np.where(model3flatten.flatten() > 0.5 , 1, 0))
    #     print('실제값 : ', y[:10].ravel())
    a = model3.predict(x)
    xh = np.where(a.flatten() > 0.5, 1, 0)
    resultXGboost = accuracy_score(y, xh)
    #     print('XGboost : ' , accuracy_score(y,xh))
    #     print('XGboost 새로운 값으로 예측 : ' ,model.predict(new_x))
    #     print('===========================================================')
    #     # = = = = == = = = = = = = 모델별 시각화
    import plotly.figure_factory as ff

    text = [['모델명', '예측값', '실제값', '정확도'],
            [
                'RandomForestRegressor',
                np.where(modelflatten.flatten() > 0.5, 1, 0), y[:10].ravel(),
                accuracy_score(y, xh)
            ],
            [
                'LinearRegression',
                np.where(model1flatten.flatten() > 0.5, 1, 0), y[:10].ravel(),
                resultLinearRegression
            ],
            [
                'KNeighborsRegressor',
                np.where(model2flatten.flatten() > 0.5, 1, 0), y[:10].ravel(),
                resultKNeighborsRegressor
            ],
            [
                'XGboost',
                np.where(model3flatten.flatten() > 0.5, 1, 0), y[:10].ravel(),
                resultXGboost
            ]]

    colorscale = [[0, '#272D31'], [.5, '#ffffff'], [1, '#ffffff']]
    font = ['#FCFCFC', 'red', 'black', 'black', 'red']

    fig51 = ff.create_table(text, colorscale=colorscale, font_colors=font)
    fig51.layout.width = 1300
    cg_chart = plot(fig51, output_type='div')

    # =================================

    #경석씨
    df = data
    #     print(data.columns)
    # LabelEncoder
    le = LabelEncoder()
    # apply "le.fit_transform"
    df = df.apply(le.fit_transform)
    # 성적 순으로 숫자를 재배치
    df.loc[df['성적'] == 0, '성적'] = 3
    df.loc[df['성적'] == 2, '성적'] = 2
    df.loc[df['성적'] == 1, '성적'] = 1

    #     print(df['성적'].head())
    # #     print(df)
    #     print(df['발표수'].head(20))
    #     print(np.corrcoef(df['발표수'], df['국적']))
    #     print(df.corr())
    #     피어슨의 상관계수는 일반적으로,
    #     값이 -1.0 ~ -0.7 이면, 강한 음적 상관관계
    #     값이 -0.7 ~ -0.3 이면, 뚜렷한 음적 상관관계
    #     값이 -0.3 ~ -0.1 이면, 약한 음적 상관관계
    #     값이 -0.1 ~ +0.1 이면, 없다고 할 수 있는 상관관계
    #     값이 +0.1 ~ +0.3 이면, 약한 양적 상관관계
    #     값이 +0.3 ~ +0.7 이면, 뚜렷한 양적 상관관계
    #     값이 +0.7 ~ +1.0 이면, 강한 양적 상관관계로 해석됩니다.

    fig20 = px.imshow(df.corr(),
                      x=[
                          '성별', '국적', '출생지', '교육단계', '학년', '학급', '전공', '학기',
                          '담당부모', '발표수', '과정반복수', '새공지사항확인수', '토론참여수',
                          '부모의학교만족도', '결석일수', '성적'
                      ],
                      y=[
                          '성별', '국적', '출생지', '교육단계', '학년', '학급', '전공', '학기',
                          '담당부모', '발표수', '과정반복수', '새공지사항확인수', '토론참여수',
                          '부모의학교만족도', '결석일수', '성적'
                      ],
                      width=1000,
                      height=900,
                      color_continuous_scale='RdBu_r')
    fig20.update_layout(width=800,
                        height=800,
                        paper_bgcolor='rgba(255,255,255,0)',
                        plot_bgcolor='rgba(0,0,0,0)',
                        font=dict(family='Courier New, monospace',
                                  color='#fff',
                                  size=18))
    plot20_div = plot(fig20, output_type='div')
    #     print(studentData[studentData['성적']=='M'].groupby(['전공']).size())

    #--------------------------------------

    #박윤호
    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/education.csv',
        encoding='euc-kr')

    #     print(data)
    label = LabelEncoder()
    Cat_Colums = data.dtypes.pipe(
        lambda Features: Features[Features == 'object']).index

    for col in Cat_Colums:
        data[col] = label.fit_transform(data[col])

    x = data.drop('성적', axis=1)
    y = data['성적']

    X_train, X_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=52)

    model = XGBClassifier(max_depth=10,
                          learning_rate=0.1,
                          n_estimators=100,
                          seed=10)
    fit_model = model.fit(X_train, y_train)

    pred = fit_model.predict(X_train)
    #     model.save('yh_xgboost.hdf5')

    #     model = tf.keras.models.load_model('yh_xgboost.hdf5')
    #yh_acc = accuracy_score(y_train, y_test)
    #print('예측값 : ', pred[:5])
    #print('실제값 : ', np.array(test_y[:5]))

    #print('분류 정확도 : ', accuracy_score(test_y, pred))
    feature_important = model.get_booster().get_score(importance_type='weight')
    keys = list(feature_important.keys())
    values = list(feature_important.values())

    #print('특성 중요도 :\n{}'.format(model.feature_importances_))

    yh_fig1 = go.Figure(
        go.Bar(
            x=values,
            y=keys,
            marker=dict(color='#F88137'),
            name='성적과 관련된 중요도 그래프',
            orientation='h',
        ))

    yh_fig1.update_layout(legend=dict(x=0.029, y=1.038, font_size=10),
                          margin=dict(l=100, r=20, t=70, b=70),
                          paper_bgcolor='rgba(255,255,255,0.2)',
                          plot_bgcolor='rgba(255,255,255,0.2)',
                          font=dict(color='#000', size=30),
                          height=600)

    yh_fig1.update_layout(yaxis={'categoryorder': 'total ascending'})

    yh_grap1 = plot(yh_fig1, output_type='div')

    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/education.csv',
        encoding='euc-kr')
    yh_fig2 = px.scatter(data,
                         x="발표수",
                         y="토론참여수",
                         color="성적",
                         size='과정반복수',
                         hover_data=['토론참여수'])
    yh_fig2.update_layout(
        paper_bgcolor='rgba(255,255,255,0.2)',
        plot_bgcolor='rgba(255,255,255,0.2)',
        font=dict(color='#000', size=30),
        height=550,
        width=1400,
    )
    yh_grap2 = plot(yh_fig2, output_type='div')

    data = pd.read_csv(
        'https://raw.githubusercontent.com/pyh3887/Django_final_project/master/student.csv',
        encoding='euc-kr')
    data['성적'] = data['성적'].map({'H': 2, 'M': 1, 'L': 0})
    fig = px.scatter_3d(data,
                        x='발표수',
                        y='토론참여수',
                        z='과정반복수',
                        color='성적',
                        opacity=0.7)
    # tight layout
    fig.update_layout(margin=dict(l=0, r=0, b=0, t=0), width=600, height=700)
    yh_3D = plot(fig, output_type='div')

    #--------------------------------------------------------------

    #----------------------------------------------------------------------------
    #경석이형

    return render(
        request, 'full.html', {
            'yh_grap1': yh_grap1,
            'yh_grap2': yh_grap2,
            'yh_3D': yh_3D,
            'cg_graph74': cg_graph74,
            'cg_graph72': cg_graph72,
            'cg_graph71': cg_graph71,
            'cg_graph7': cg_graph7,
            'yj_grap1': plot_div,
            'yj_pie': pie_div,
            'yj_grap2': last_div,
            'yj_grap3': ab_plot,
            'yj_scatter': plot5_div,
            'cg_graph1': plot10_div,
            'cg_graph2': plot11_div,
            'cg_graph3': cg_graph,
            'cg_chart': cg_chart,
            'heatmap': plot20_div
        })