예제 #1
0
파일: train.py 프로젝트: teslacool/sen_emb
def score(model, val_data, train_data, params):
    model.eval()
    src_sen_emb = []
    tgt_sen_emb = []
    for data in val_data:
        src, tgt, _ = model(data)
        src_sen_emb.append(src.cpu().detach().numpy())
        tgt_sen_emb.append(tgt.cpu().detach().numpy())
    src = np.concatenate(src_sen_emb, 0)
    # src = remove_pc(src)
    tgt = np.concatenate(tgt_sen_emb, 0)
    # tgt = remove_pc(tgt)
    # src_sen_emb_ = []
    # tgt_sen_emb_ = []
    # for i,data in enumerate(train_data):
    #     src_,tgt_,_ = model(data)
    #     src_sen_emb_.append(src_.cpu().detach().numpy())
    #     tgt_sen_emb_.append(tgt_.cpu().detach().numpy())
    #
    # src_ = np.concatenate(src_sen_emb_, 0)
    # # src_ = remove_pc(src_)
    # tgt_ = np.concatenate(tgt_sen_emb_, 0)
    # # tgt_ = remove_pc(tgt_)
    # mapping = torch.nn.Linear(300,300,bias=False)
    # M = np.dot(tgt_.transpose(),src_)
    # u,s,v_t = scipy.linalg.svd(M, full_matrices=True)
    # mapping.weight.data.copy_(torch.from_numpy(u.dot(v_t)).float())
    # # cal_topk_csls(torch.from_numpy(src),torch.from_numpy(tgt))
    #
    # src = mapping(torch.from_numpy(src)).detach().numpy()
    get_score(torch.from_numpy(src), torch.from_numpy(tgt), params.score_file)
예제 #2
0
def get_sgd_model_score(training_features, training_response, test_features,
                        test_response):
    # Run the optimization over different values of the regularization strengh
    penalty_func = 'l1'
    C = 1
    model = SGDClassifier(loss='log',
                          penalty=penalty_func,
                          fit_intercept=True,
                          n_iter=500,
                          alpha=C)
    #model = LogisticRegression(penalty=penalty_func, dual=False,  tol=0.01, C=C, fit_intercept=True)
    model.fit(training_features, training_response)
    test_pred = model.predict_proba(test_features)
    return get_score(test_response, test_pred[:, 1])
예제 #3
0
def get_lg_model_score(training_features, training_response, test_features,
                       test_response):
    # Run the optimization over different values of the regularization strengh
    penalty_func = 'l2'
    C = 1
    model = LogisticRegression(penalty=penalty_func,
                               dual=False,
                               tol=0.00001,
                               C=C,
                               fit_intercept=True)
    model.fit(training_features, training_response)
    test_pred = model.predict_proba(test_features)

    return get_score(test_response, test_pred[:, 1])
예제 #4
0
    def fit_and_validate(self, ):
        '''
        拟合并验证
        :return:
        '''
        from m_VAR import m_VAR_model
        from util import re_log1p
        self.data_prepro()
        l2 = int(self.re_cl.get())
        steps_valid = 10
        Y_valid, Y_future, Y_exog, Y_exog_future = self.prepare_Y(
            0, -steps_valid)
        model = m_VAR_model(Y_valid, exog=Y_exog, maxlags=self.maxlags, l2=l2)
        model.fit()
        self.model = model
        y_pred = predict_fix(
            model.forecast(
                Y_valid,
                steps_valid,
                exog_future=Y_exog_future,
            ))

        score_mae, score_mr = get_score(y_pred=y_pred, y_true=Y_future)
        if self.v2.get():
            self.verbose_list.insert(
                0, '滞后阶数:%d, 正则化系数:%d, 考虑注入井, mae:%.6f, mer:%.6f' %
                (int(self.Lag_cl.get()), l2, score_mae, score_mr))
        else:
            self.verbose_list.insert(
                0, '滞后阶数:%d, 正则化系数:%d, mae:%.6f, mer:%.6f' %
                (int(self.Lag_cl.get()), l2, score_mae, score_mr))
        if self.v3.get():
            point_forecast, forc_lower, forc_upper = model.forecast_interval(
                np.array(Y_valid),
                steps_valid,
                exog_future=Y_exog_future,
            )
            forc_lower = re_log1p(forc_lower)
            forc_upper = re_log1p(forc_upper)
            self.validate_window = m_forecast_plot_window(
                Y_valid, Y_future, y_pred, fore_cov=(forc_lower, forc_upper))
        else:
            self.validate_window = m_forecast_plot_window(
                Y_valid,
                Y_future,
                y_pred,
            )
예제 #5
0
def _plot_pr(config, cases_df, controls_df, case_total, control_total, alpha,
             fig_path):
    fig, ax = plt.subplots()
    #ax.set_ylim(0, 0.5)
    ax.set_yticks(np.arange(0, 0.51, 0.1))
    for name, info in config.items():
        case_scores = util.get_score(cases_df, name)
        control_scores = util.get_score(controls_df, name)
        print(name, case_scores.shape, control_scores.shape)

        estimated_tpr, estimated_num, pvalues = _calc_precision_vec(
            case_scores, control_scores, case_total, control_total,
            info['thres_vec'], alpha)
        print(estimated_tpr)
        print(estimated_num)

        ax.scatter(estimated_num, estimated_tpr, color=info['color'])
        if name == 'cnn_prob':
            name = 'MVP'
        if 'DNN' in name:
            name = 'FCNN'
        c = info['color']
        marker = '.'
        for num_, tpr_, pvalue_ in zip(estimated_num, estimated_tpr, pvalues):
            markersize = marker_size_pvalue(pvalue_)
            ax.plot(num_, tpr_, markersize=markersize, marker=marker, color=c)
        ax.plot(estimated_num,
                estimated_tpr,
                linestyle='-',
                color=info['color'],
                label=name.split('_')[0])

        for thres_, num_, tpr_ in zip(info['thres_vec'], estimated_num,
                                      estimated_tpr):
            ax.annotate("{}".format(thres_),
                        xy=(num_, tpr_),
                        ha='left',
                        va='bottom')

        if name == 'MVP_rank':
            estimated_tpr, estimated_num, pvalues = _calc_precision_vec(
                case_scores, control_scores, case_total, control_total, [-1.0],
                alpha)
            c = 'purple'
            ax.plot(estimated_num[0],
                    estimated_tpr[0],
                    color=c,
                    marker='.',
                    markersize=marker_size_pvalue(pvalues[0]))
            #ax.plot(estimated_num,
            #        estimated_tpr,
            #        linestyle='-',
            #        color=c,
            #        label=name.split('_')[0])
            ax.annotate("All Mis",
                        xy=(estimated_num[0], estimated_tpr[0]),
                        ha='left',
                        va='bottom')

    ax.set_ylabel('Estimated Positive Predictive Value', weight='normal')
    ax.set_xlabel('Estimated number of risk variants', weight='normal')
    lgnd = ax.legend(loc='upper right')
    # plot pvalue legend
    ls = []
    for p in [10**-8, 10**-6, 10**-4, 10**-2]:
        l, = ax.plot([], [],
                     'o',
                     marker='.',
                     markersize=-np.log(p),
                     color='black')
        ls.append(l)
    labels = ["10E-8", "10E-6", "10E-4", "10E-2"]
    leg = ax.legend(ls,
                    labels,
                    numpoints=1,
                    ncol=4,
                    frameon=False,
                    loc='lower center',
                    handlelength=2,
                    borderpad=0,
                    handletextpad=1,
                    title='p value')
    leg.get_title().set_fontsize('8')  # legend 'Title' fontsize

    #plt.setp(plt.gca().get_legend().get_texts(),
    #         fontsize='20')  # legend 'list' fontsize
    plt.gca().add_artist(lgnd)
    #ax.set_xlim([50, 300])
    #plt.xticks(fontsize = 28)
    #plt.yticks(fontsize = 28)
    #plt.legend(loc="upper right")
    #ax.legend(loc="best")
    plt.savefig(fig_path, format='pdf', bbox_inches='tight')
    plt.close(fig)
예제 #6
0
def _get_scores(asd, chd, control, n):
    return util.get_score(asd, n),\
            util.get_score(chd, n),\
            util.get_score(control, n)
예제 #7
0
cvec = [0.0001, 0.001, .01, 0.1, 1]
cvec = [0.01, .1, 1, 100, 1000]
score_best = -100
bestC = 10

penalty_func = 'l2'

for C in cvec:
    model = SGDClassifier(loss='log',
                          penalty=penalty_func,
                          fit_intercept=True,
                          n_iter=1000,
                          alpha=C)
    model.fit(training_features, training_response)
    validate_pred = model.predict(validate_features)
    score = get_score(validate_response, validate_pred)
    print C, score
    np.set_printoptions(precision=3)
    #print model.coef_, (model.coef_[0]**2).sum()
    if score_best < score:
        bestC = C
        score_best = score

print "==============================================", bestC
model = SGDClassifier(loss='log',
                      penalty=penalty_func,
                      fit_intercept=True,
                      n_iter=1000,
                      alpha=bestC)
model.fit(training_features, training_response)
validate_pred = model.predict(validate_features)
예제 #8
0
파일: test_sgd.py 프로젝트: vema555/logit
training_features, training_response, validate_features, validate_response, test_features, test_response = get_bank_data()



cvec = [ 0.0001, 0.001, .01, 0.1, 1]
cvec = [ 0.01, .1, 1, 100, 1000 ]
score_best = -100
bestC = 10

penalty_func = 'l2'

for C in cvec:
	model = SGDClassifier(loss='log', penalty=penalty_func, fit_intercept=True, n_iter=1000, alpha=C )
	model.fit(training_features, training_response)
	validate_pred = model.predict(validate_features)
	score = get_score( validate_response, validate_pred)
	print C, score
	np.set_printoptions(precision=3)
 	#print model.coef_, (model.coef_[0]**2).sum()	
 	if score_best < score:
 		bestC = C
 		score_best = score


print "==============================================" , bestC
model = SGDClassifier(loss='log', penalty=penalty_func, fit_intercept=True, n_iter=1000, alpha=bestC )
model.fit(training_features, training_response)
validate_pred = model.predict(validate_features)
print bestC, get_score( validate_response, validate_pred)