def run(data):
    train = data.loc[data.ret != -1].reset_index(drop=True)
    test = data.loc[data.ret == -1].reset_index(drop=True)
    feat_arr = [
        '162', '110', '86', '168', '8', '84', '113', '96', '60', '108', '194',
        '170', '66', '89', '165', '192', '24', '18', '366', '258', '354',
        '360', '11', '276', '120', '158', '270', '246', '372', '6', '12',
        '164', '342', '81', '57', '254', '252', '63', '176', '374', '77'
    ]
    lgb_params = {
        'boosting_type': 'gbdt',
        'num_leaves': 150,
        'reg_alpha': 0.,
        'reg_lambda': 1,
        'n_estimators': 60,
        'objective': 'binary',
        'subsample': 0.9,
        'colsample_bytree': 0.9,
        'learning_rate': 0.1,
        'min_child_weight': 5
    }
    s = CV(_df=train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, round_cv=3, n_splits=10)
    pred = s.get_result(test[feat_arr])
    result = test[['file_name']].reset_index(drop=True).copy()
    result['ret'] = pred
    result['ret'].loc[result['ret'] > 0.01] = 1
    result['ret'].loc[result['ret'] <= 0.01] = 0
    result = result.rename(columns={'file_name': 'id'})
    return result
def filterChoice(filterFile):
    filterTerms = []
    noOfFilters = int(input('Do you want to filter ' + filterFile +' by 1 or multiple terms? Choose \n 1. 1 term' \
        '\n 2. Multiple terms \n'))
    if noOfFilters == 1:
        print("What term to you want to filter by? Here are the categories:")
        for k, v in CV.items():
            print('{key}: {values}'.format(
                key=k,
                values=', '.join('{}'.format(', '.join(x.split()))
                                 for x in v)))
        filterTerm = input("What term do you want to filter by? \n")
        if (any(filterTerm in value for value in CV.values())):
            print('Filter by ' + filterTerm)
        else:
            print("Please choose a category to filter by")
    elif noOfFilters == 2:
        print("What term to you want to filter by? Here are the categories:")
        for k, v in CV.items():
            print('{key}: {values}'.format(
                key=k,
                values=', '.join('{}'.format(', '.join(x.split()))
                                 for x in v)))
        filterTerms = input(
            "What terms do you want to filter by? (Split up the terms with a ', ' (comma)) \n"
        )
        terms_list = re.split("[, ] ", filterTerms)
        #print(terms_list)
    else:
        print('Choose either options 1 or 2')
 def test_get_seed_exists(self):
     '''
     Tests to see whether the get_seed() method properly returns a seed given no input seed
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     kfold=CV(X,Y,seed=None)
     self.assertTrue(kfold.get_seed())
 def test_shuffle_method_x(self):
     '''
     Tests to see whether the shuffle method works right for the feature dataframe
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     kfold=CV(X,Y)
     x_shuff , _ = kfold.shuffle()
     self.assertFalse(x_shuff.equals(X))
 def test_shuffle_method_y(self):
     '''
     Tests to see whether the shuffle method works right for the label dataframe
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     kfold=CV(X,Y)
     _ , y_shuff = kfold.shuffle()
     self.assertFalse(y_shuff.equals(Y))
 def test_get_seed_returns_correct(self):
     '''
     Tests to see whether the get_seed() method properly returns a seed given an input seed
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     seed = 42
     kfold=CV(X,Y,seed=42)
     self.assertEqual(seed,kfold.get_seed())
示例#7
0
 def Loop(self):
     out = pd.DataFrame()
     for t in range(self.loop):
         data = self.getdata()
         cv = CV(data, self.label_col, self.n_splits)
         cv.nFold()
         out = pd.concat([out, cv.out], ignore_index=True)
         out = out.groupby('ID').apply(lambda x: x.mean())
     return out
 def test_CV_split(self):
     '''
     Tests the split method to see whether the splits are of the right shape and length
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     n_splits = 3 
     # Initialize CV 
     kfold=CV(X,Y)
     # Do splits of 3
     self.assertEqual(len(kfold.split(n_splits=n_splits)[0]), 3)
     self.assertEqual(len(kfold.split(n_splits=n_splits)[1]), 3)
 def test_seed_shuffle(self):
     '''
     Tests to see whether inputting a seed creates reproducible X and Y dataframes.
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     seed = 42
     kfold=CV(X,Y,seed=42)
     x = X.sample(frac=1,random_state=seed)
     x.reset_index(drop=True,inplace=True)
     y = Y.sample(frac=1,random_state=seed)
     y.reset_index(drop=True,inplace=True)
     self.assertTrue(x.equals(kfold.shuffle()[0]))
 def test_init_shuffle_x(self):  
     '''
     Tests to see whether the x and y dataframes are shuffled properly in the constructor
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     kfold=CV(X,Y)
     self.assertFalse(kfold.x.equals(X))
 def test_init_shuffle_y(self):
     '''
     Tests to see whether the class returns the x and y dataframes properly
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     kfold=CV(X,Y)
     self.assertFalse(kfold.y.equals(Y))
 def test_CV_init(self):
     '''
     Tests the constructor for the CV class to see whether the CV object, X, and Y dataframes are instanciated
     '''
     X=pd.DataFrame([1,2,3,4,5,6])
     Y=pd.DataFrame([1,2,3,4,4,5])
     kfold = CV(X,Y)
     self.assertIsInstance(kfold, CV)
     self.assertIsInstance(kfold.y, pd.DataFrame)
     self.assertIsInstance(kfold.x, pd.DataFrame)
示例#13
0
 def collectCV(self):
     for cvFilePath, cvFileName, cvPost in zip(self.fileNamesWithPath,
                                               self.CVFileName,
                                               self.cvPostList):
         try:
             newCV = CV(cvFileName, cvFilePath, cvPost)
             self.CVList.append(newCV)
         except Exception as e:
             print(cvFileName)
             print("in collection of CV \t" + str(e))
示例#14
0
def cv_register():
    try:
        new_session_id = (len(cv_list) + 1)
        new_cv = CV(session_id=new_session_id, status=True)
        cv_list.append(new_cv)

        cv_details = {'status': True, 'session': new_session_id}

        cv_camera_connector()

        return new_session_id
    except Exception as e:
        print("Exception: " + str(e))
        traceback.print_exc()
        return jsonify({'status': False, 'exception': str(e)})
           "\n"
           "\n\t You are expected to enter these options in the correct order followed"
           "\n\t by the URLs or Headlines that you wish to test !"
           "\n"
           "\t Input arguments needed:\n"
           "\n\t\t [--url/--headline] =  Input a URL to extract a headline from or input headline directly\n"
           "\t\t [URL or TITLE) = Input the URLs and/or headlines enclosed in single '' or double \"\" quotes. \n"
           "\t\t [--filter] = Specifies that you would like to filter your search.\n"
           "\t\t [Filter(s)] = Input the terms you want the headline or URL to be filtered by. \n"
           "\t\t [-output] = Specifies that you would like your output saved to a specific path.\n"
           "\t\t [Path] = Enter the file path where you would like your output to be stored.\n"
           "\n\t Example Input:\n"
           "\n\t --headline 'David Jimson is a good bloke, apparently !' --filter Europe Person Sport\n"
           "\t [cont.] --output C:\Program Files\example.json"
           "\n\t The following terms are valid filters:")
     for k, v in CV.items():
         print('\t\t {key}: {values}'.format(key=k, values=', '.join('{}'.format(', '.join(x.split())) for x in v)))
     break
 if was_option:
     continue
 elif last_option == "--url" or last_option == "-u":
     url = args[i]
     import urllib.request
     with urllib.request.urlopen('http://python.org/') as response:
         html = response.read()
 elif last_option == "--headline" or last_option == "-i":
     headlines.append(args[i])
 elif last_option == "--filter" or last_option == "-f":
     CV_vals = CV.values()
     CV_single = []
     for sublist in CV_vals:
示例#16
0
from CV import CV
import os
import sys
import datetime

if __name__ == '__main__':
    cwd = r'D:\temp\新建文件夹'
    time = str(datetime.datetime.now())
    time = time.replace(':', ':')
    cwd2 = cwd + '\\temp_coco56_' + time
    os.mkdir(cwd2)
    os.chdir(cwd2)

    ins = CV()
    os.chdir('..')
    ins.combineTs()

    os.rmdir(cwd2)
    pass
示例#17
0
from preprocess import Prep
from CV import CV
from performance import Portfolio, MarketIntradayPortfolio
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from pylab import *
from datetime import datetime

HS300 = getStock_C('000300')
SP500 = getStock_A('^GSPC')
HS300 = addFeatures(HS300)
SP500 = addFeatures(SP500)
HS300.drop('ADOSC', axis=1)
X_train, y_train, X_test, y_test = Prep(HS300)
Classify(X_train, y_train, X_test, y_test, 'RF')
CV(X_train, y_train, 9, 'RF')
clf = LDA()
y_pred = clf.fit(X_train, y_train).predict(X_test)
symbol = 'CSI300'
start_test = datetime(2014,1,1)
end_period = datetime(2015,9,29)
bars = HS300[['Open','AdjClose']]

bars = bars[start_test:end_period]

signals = pd.DataFrame(index=bars.index)
signals['signal'] = 0.0
signals['signal'] = y_pred
#Short the stock
signals.signal[signals.signal == 0] = -1
from CV import CV
import os
import sys
import datetime

if  __name__ == '__main__':
    cwd = r'D:\\temp\新建文件夹'
    time = str(datetime.datetime.now())
    time = time.replace(':',':')
    cwd2 = cwd + '\\temp_coco56_' + time
    os.mkdir(cwd2)
    os.chdir(cwd2)

    ins = CV()
    ins.split_OneVedio_Into_MultipleVedios_WithNumber(2)
    
    os.chdir('..')
    os.rmdir(cwd2)
    pass
示例#19
0
from CV import CV
import os
import sys
import datetime

if __name__ == '__main__':
    cwd = r'D:\temp\新建文件夹'
    time = str(datetime.datetime.now())
    time = time.replace(':', ':')
    cwd2 = cwd + '\\temp_coco56_' + time
    os.mkdir(cwd2)
    os.chdir(cwd2)

    ins = CV()
    #ins.Transcode(aimedFormat='.ts', dealOldFilesMode=1)
    ins.Transcode(aimedFormat='.mp4', dealOldFilesMode=1)

    os.chdir('..')
    os.rmdir(cwd2)
    pass
if __name__=='__main__':
    """
    test_data = {'a' : [coll.defaultdict(list, {'aa' : range(10,100,10),
                                                'ab' : range(1,10)}),
                        coll.defaultdict(list, {'ac' : range(1,10,2),
                                                'aa' : [2,2,2]}),
                        coll.defaultdict(list, {'ad' : range(10, 20, 3)})
                    ],
                 'b' : [coll.defaultdict(list, {'ba' : range(1,20,3),
                                                'bb' : range(1,15)}),
                        coll.defaultdict(list, {'bc' : range(2,20,4)}),
                        coll.defaultdict(list, {'bd' : range(50,300,150)})
                    ],
    }
    """
    test_data = pp.split_samples(pp.load_data())
    for u in test_data.keys():
        if u not in {'9999999','SERLHOU'}:
            del test_data[u]
    print test_data.keys()
    test_cv = CV(DensityAuth, test_data)
    '''

    for i in test_cv.partition_data('shit', test_data['a'], 1):
        f**k.pprint(i)
    '''

    for i in test_cv.validate():
        pass
    print "DONESKI"
示例#21
0
import cv2 as cv
import time
from EmotionRecognition import EmotionRecognition
from FaceDetection import FaceDetection
from CV import CV

compV = CV(True)  # true for is on computer. False for on raspbery pi

image, found = compV.findFace()
if found:
    print compV.processEmotion(image)
    import sys
    import csv
    import os

    start_time = time.time()
    print start_time, 'initializing algorithm'

    test_data,pkd = filter_users_val(split_samples(load_data()))
    """
    for u in test_data.keys():
        if u not in {'1227981','ADabongofo'}:
            del test_data[u]
            del pkd[u]
    """
    print test_data.keys()
    test_cv = CV(DensityAuth, test_data,pkd)
    with open('./kde_result.csv', 'rw+') as outfile:
        result_writer = csv.writer(outfile)

        for n,i in enumerate(test_cv.validate()):
            train_res, cv_res = i
            result_writer.writerow(['user',
                                    'train_IPR', 'train_FRR', 'train_GT', 'train_IT',
                                    'CV_IPR', 'CV_FRR', 'CV_GT', 'CV_IT'])
            for u in train_res.keys():
                result_writer.writerow([u] +
                                       list(train_res[u]) +
                                       list(cv_res[u]))
            result_writer.writerow([])
            print start_time-time.time(), '- finished validation', n
def get_plot(vid_name):
    print("--------------------------------------")
    print(vid_name[3:])

    # Run CV for given video
    comp_vis = CV(vid_name)

    img = comp_vis.run_cv()
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Estimate trajectory for the bottom of the ball
    all_data_bottom = np.asarray(comp_vis.bottom_ball)
    x_bottom_data = all_data_bottom[:, 0]
    y_bottom_data = all_data_bottom[:, 1]

    # Ordinary least squares (OLS)
    bottom_w_OLS = PolyRegression(x_bottom_data, y_bottom_data, 2, OLS)
    bottom_w_OLS.get_weight_vector()

    # Total least squares (TLS)
    bottom_w_TLS = PolyRegression(x_bottom_data, y_bottom_data, 2, TLS)
    bottom_w_TLS.get_weight_vector()

    # RANSAC
    bottom_w_RANSAC = PolyRegression(x_bottom_data, y_bottom_data, 2, RANSAC)
    bottom_w_RANSAC.get_weight_vector()

    # Estimate trajectory for the bottom of the ball
    all_data_top = np.asarray(comp_vis.top_ball)
    x_top_data = all_data_top[:, 0]
    y_top_data = all_data_top[:, 1]

    # OLS
    top_w_OLS = PolyRegression(x_top_data, y_top_data, 2, OLS)
    top_w_OLS.get_weight_vector()

    # TLS
    top_w_TLS = PolyRegression(x_top_data, y_top_data, 2, TLS)
    top_w_TLS.get_weight_vector()

    # RANSAC
    top_w_RANSAC = PolyRegression(x_top_data, y_top_data, 2, RANSAC)
    top_w_RANSAC.get_weight_vector()

    # Plot results
    plt.imshow(img)
    plt.plot(x_top_data,
             top_w_OLS.predict(x_top_data),
             label=f'OLS, {top_w_OLS.eqn_str}')
    plt.plot(x_top_data,
             top_w_TLS.predict(x_top_data),
             label=f'TLS, {top_w_TLS.eqn_str}')
    plt.plot(x_top_data,
             top_w_RANSAC.predict(x_top_data),
             label=f'RANSAC, {top_w_RANSAC.eqn_str}')

    plt.legend()
    plt.title(f"Top Trajectory\n{vid_name[3:]}")
    plt.grid(True)

    plt.figure()
    plt.imshow(img)
    plt.plot(x_bottom_data,
             bottom_w_OLS.predict(x_bottom_data),
             label=f'OLS, {bottom_w_OLS.eqn_str}')
    plt.plot(x_bottom_data,
             bottom_w_TLS.predict(x_bottom_data),
             label=f'TLS, {bottom_w_TLS.eqn_str}')
    plt.plot(x_bottom_data,
             bottom_w_RANSAC.predict(x_bottom_data),
             label=f'RANSAC, {bottom_w_RANSAC.eqn_str}')

    plt.legend()
    plt.title(f"Bottom Trajectory\n{vid_name[3:]}")
    plt.grid(True)
    plt.show()

    print("")
示例#24
0
def run(data, result_temp):
    test = data.loc[data.ret==-1].reset_index(drop=True)
    data = data.loc[data.ret!=-1].reset_index(drop=True)
    file_name_dict = {}
    for f1 in os.listdir(config.TRAIN_PATH):
        for f2 in os.listdir(config.TRAIN_PATH+f1):
            file_name_dict[f2] = int(f1)
    data['multi_label'] = data.file_name.apply(lambda x:file_name_dict[x])
    data = data.loc[data['multi_label']!=14].reset_index(drop=True)
    
    clf = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=10, 
                             learning_rate=0.1, n_estimators=100, 
                             subsample_for_bin=200000, objective='multiclass', 
                             min_child_weight=1, min_child_samples=20, 
                             subsample=0.7, subsample_freq=0, 
                             colsample_bytree=0.7, 
                             reg_alpha=0.0, reg_lambda=0.0, 
                             random_state=3)
    
    train_x, val_x, train_y, val_y = train_test_split(data.drop(['file_name', 'ret', 'multi_label'], axis=1), 
                                                      data['multi_label'], 
                                                      random_state=3, 
                                                      test_size=0.3)
    clf.fit(train_x, train_y, verbose=False,early_stopping_rounds=100, eval_metric='logloss', eval_set=[(val_x, val_y)])
    
    pred_val = clf.predict(val_x)
    result_val = pd.DataFrame(index=list(range(len(val_x))))
    result_val['label'] = val_y.tolist()
    result_val['pred'] = pred_val
    
    pred_test = clf.predict(test[train_x.columns.tolist()])
    result_test = pd.DataFrame(index=list(range(len(test))))
    result_test['pred'] = pred_test
    test['multi_label'] = pred_test
    pred = clf.predict_proba(test[train_x.columns.tolist()])
    temp = []
    for i in range(len(pred)):
        temp.append(np.max(pred[i]))
    test['prob'] = temp
    
    
    '''单独训练'''
    print('training...')
    result_dict = {}
    result_prob_dict = {}
    c = Counter(data.multi_label)
    for class_ in tqdm(list(c.keys())):
        lgb_params = { 'boosting_type':'gbdt', 'num_leaves':8, 
                   'reg_alpha':0., 'reg_lambda':1, 
                   'n_estimators':30, 'objective':'binary',
                   'subsample':0.7, 'colsample_bytree':0.6, 
                   'learning_rate':0.1, 'min_child_weight':1}
        s = CV(_df=data.loc[data.multi_label==class_].drop(['file_name', 'multi_label'], axis=1).reset_index(drop=True), 
                     label_name='ret')
        s.CV(is_print=False, lgb_params=lgb_params, round_cv=3, n_splits=8) # , eval_metrics=f1_score
        test_temp = test.loc[test.multi_label==class_].reset_index(drop=True)
        pred_temp = s.get_result(test_temp.drop(['file_name', 'multi_label','prob', 'ret'], axis=1))
        for i in range(len(test_temp)):
            result_dict[test_temp['file_name'][i]] = pred_temp[i]
            result_prob_dict[test_temp['file_name'][i]] = test_temp['prob'][i]
    
            
    df = pd.DataFrame(index=range(len(result_dict)))
    df['id'] = result_dict.keys()
    df['ret'] = result_dict.values()
    df['prob'] = result_prob_dict.values()
    
    df['multi_score'] = 2*(1-df.ret)**2*df.prob/((1-df.ret)**2+df.prob)

    dict_ = {}
    tp_df = df.loc[np.logical_and(df.prob>0.999, df.ret<0.1)].copy()
    tp_df = tp_df.reset_index(drop=True)
    for i in range(len(tp_df)):
        dict_[tp_df['id'][i]] = 0
    print(len(dict_))
    result = result_temp.copy()
    result['pred_2'] = result['id'].apply(lambda x:0 if x in dict_ else 1)
    result['pred_2'] = result['pred_2'] * result['ret']
    r = result[['id', 'pred_2']].copy()
    r.columns = ['id', 'ret']
    r['ret'] = r['ret'].astype(int)
    return r
def run(data, result_best):
    feat_arr = [
        '185_new', '237_new', '176_new', '243_new', '544_new', '85_new',
        '245_new', '103_new', '249_new', '83_new', '545_new', '555_new',
        '183_new', '187_new', '135_new', '161_new', '89_new', '171_new',
        '242_new', '529_new', '91_new', '146_new', '547_new', '123_new',
        '576_new', '97_new', '447_new', '475_new', '141_new', '143_new',
        '159_new', '452_new', '540_new', '543_new', '239_new', '573_new',
        '145_new', '163_new', '181_new', '355_new'
    ]
    # 名字转换
    temp_1 = os.listdir(config.TRAIN_PATH)[0]
    d = pd.read_csv(config.TRAIN_PATH + '/' + temp_1 + '/' +
                    os.listdir(config.TRAIN_PATH + '/' + temp_1)[0])
    name_lst = []
    for col in d.columns:
        name_lst.append(col + '_var')
    for col in d.columns.tolist() + ['_功角', '_视在功率', '_变频器出入口温差', '_变频器出入口压力']:
        name_lst.append(col + '_mean')
        name_lst.append(col + '_min')
        name_lst.append(col + '_max')
        name_lst.append(col + '_ptp')
        name_lst.append(col + '_median')
        name_lst.append(col + '_sum')
    for col in [['叶片1角度', '叶片2角度', '叶片3角度'], ['变桨电机1电流', '变桨电机2电流', '变桨电机3电流'],
                ['x方向振动值', 'y方向振动值'],
                [
                    '发电机定子温度1', '发电机定子温度2', '发电机定子温度3', '发电机定子温度4', '发电机定子温度5',
                    '发电机定子温度6'
                ], ['发电机空气温度1', '发电机空气温度2'], ['主轴承温度1', '主轴承温度2'],
                ['变桨电机1功率估算', '变桨电机2功率估算', '变桨电机3功率估算'],
                ['叶片1电池箱温度', '叶片2电池箱温度', '叶片3电池箱温度'],
                ['叶片1变桨电机温度', '叶片2变桨电机温度', '叶片3变桨电机温度'],
                ['叶片1变频器箱温度', '叶片2变频器箱温度', '叶片3变频器箱温度'],
                ['叶片1超级电容电压', '叶片2超级电容电压', '叶片3超级电容电压'],
                ['驱动1晶闸管温度', '驱动2晶闸管温度', '驱动3晶闸管温度'],
                ['驱动1输出扭矩', '驱动2输出扭矩', '驱动3输出扭矩']]:
        name_lst.append('_'.join(col) + '_mean')
        name_lst.append('_'.join(col) + '_sum')
        name_lst.append('_'.join(col) + '_var')
    dict_name = {}
    col_lst = data.columns.tolist()[:-1]
    for i in range(len(name_lst)):
        dict_name[col_lst[i]] = name_lst[i]

    data = data[
        feat_arr +
        [str(name_lst.index('液压制动压力_max')) + '_new', 'ret', 'file_name']]
    data.columns = [
        dict_name[i]
        for i in feat_arr + [str(name_lst.index('液压制动压力_max')) + '_new']
    ] + ['ret', 'file_name']

    test = data.loc[data.ret == -1].reset_index(drop=True)
    data = data.loc[data.ret != -1].reset_index(drop=True)

    file_name_dict = {}
    for f1 in os.listdir(config.TRAIN_PATH):
        for f2 in os.listdir(config.TRAIN_PATH + f1):
            file_name_dict[f2] = int(f1)

    data['multi_label'] = data.file_name.apply(lambda x: file_name_dict[x])
    data_14 = data.loc[data['multi_label'] == 14].reset_index(drop=True)
    data = data.loc[data['multi_label'] != 14].reset_index(drop=True)

    lgb_params = {
        'boosting_type': 'gbdt',
        'num_leaves': 8,
        'reg_alpha': 0.,
        'reg_lambda': 1,
        'n_estimators': 50,
        'objective': 'binary',
        'subsample': 0.7,
        'colsample_bytree': 0.6,
        'learning_rate': 0.1,
        'min_child_weight': 1
    }
    feat_arr = [dict_name[i] for i in feat_arr]
    # =============================================================================
    #     '''6751 - 6755  test_02.csv 0816'''
    # =============================================================================
    temp_test = test.loc[np.logical_and(
        np.logical_and(test['液压制动压力_max'] < 1.32, test['液压制动压力_max'] > 1),
        test['x方向振动值_mean'] < -1.5)]
    temp_val = temp_test  # test
    temp_train = data
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.2:
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.2:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()
    '''6755 - 6772  submission_3.csv 0816'''
    temp_test = test.loc[np.logical_and(
        np.logical_and(test['x方向振动值_mean'] < 3.4, test['x方向振动值_mean'] > 1.2),
        np.logical_and(test['y方向振动值_mean'] < 3, test['y方向振动值_mean'] > 2))]

    temp_val = temp_test  # test
    temp_train = data
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.4:
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.35:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()
    '''6772 - 6773  test.csv 0816'''
    temp_test = test.loc[np.logical_and(
        np.logical_and(test['液压制动压力_max'] > 1.32, test['液压制动压力_max'] > 1),
        np.logical_and(
            np.logical_and(test['x方向振动值_mean'] < -0.3,
                           test['x方向振动值_mean'] < 22.05),
            np.logical_and(test['y方向振动值_mean'] < .8,
                           test['y方向振动值_mean'] > 0)))]

    temp_val = temp_test  # test
    temp_train = data
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.4:
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.4:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.4:
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.4:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()
    '''6773 - 6816'''
    test['temp'] = test['x方向振动值_mean'] + 1.2 - test['y方向振动值_mean']
    temp_test = test.loc[np.logical_and(
        np.logical_and(
            np.logical_and(
                test['x方向振动值_mean'] < 0.34,  # 0.34
                test['x方向振动值_mean'] > -0.25),
            np.logical_and(test['y方向振动值_mean'] > 0,
                           test['y方向振动值_mean'] < 1.8)),
        test['temp'] < 0)]
    temp_val = temp_test  # test
    temp_train = data
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/678.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.18:
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    temp_val = temp_test  # test
    temp_train = data
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] > 0.18:
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] > 0.18:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])
    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()
    '''6822 - 6834'''
    temp_test = test.loc[np.logical_and(
        np.logical_and(test['x方向振动值_mean'] < 2, test['x方向振动值_mean'] > 1.25),
        np.logical_and(test['y方向振动值_mean'] < 3.2, test['y方向振动值_mean'] > 2))]

    temp_val = temp_test  # test
    temp_train = data  # .loc[:len(data)-1454-2496-1] # -2496, 1454
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.34:  # 0.18 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/6822.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.35:  # 0.18 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/6816.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.34:  # 0.18 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_best_2 = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i
    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.35 and temp_result.pred[i] > 0.35:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])
    result_best_2.ret[ix_lst] = 1
    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/6816.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.35:  # 0.18 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_best_2 = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i
    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.35 and temp_result.pred[i] > 0.35:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])
    result_best_2.ret[ix_lst] = 1
    result_best_2 = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i
    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.35 and temp_result.pred[i] > 0.34:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])
    result_best_2.ret[ix_lst] = 1
    result_best = result_best_2.copy()
    test['temp'] = test['x方向振动值_mean'] + 0.75 - test['y方向振动值_mean']
    temp_test = test.loc[np.logical_and(
        np.logical_and(
            np.logical_and(
                test['y方向振动值_mean'] < 1.25,  # 1.25 , 1
                test['y方向振动值_mean'] > 0.75),  # 0.75, 1
            np.logical_and(test['x方向振动值_mean'] > 0.25,
                           test['x方向振动值_mean'] < 0.6)),
        test['temp'] > 0)]

    temp_val = temp_test  # test
    temp_train = data  # .loc[:len(data)-1454-2496-1] # -2496, 1454
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/6816_new.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.45:  # 0.18 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/6822.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.45:  # 0.18 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/6822.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.4:  # 0.18 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/6816_new.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.4:  # 0.18 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.4:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()
    temp_test = test.loc[np.logical_and(
        np.logical_and(test['x方向振动值_mean'] < 1.7, test['x方向振动值_mean'] > 1.36),
        np.logical_and(test['y方向振动值_mean'] > 1.4, test['y方向振动值_mean'] < 1.8))]

    temp_val = temp_test  # test
    temp_train = data  # .loc[:len(data)-1454-2496-1] # -2496, 1454
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])
    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/6822_new.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.27:  # 0.18 0.27   0.4
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_best.ret.sum()
    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.27:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    # =============================================================================
    #     result_sub.to_csv('../V9_final/result/0820_1.csv', index=False)
    #     result_best = pd.read_csv('../V9_final/result/0820_1.csv')
    # =============================================================================
    result_best = result_sub.copy()
    temp_test = test.loc[np.logical_and(
        np.logical_and(test['x方向振动值_mean'] < 1, test['x方向振动值_mean'] > 0.8),
        np.logical_and(test['y方向振动值_mean'] > 0.9, test['y方向振动值_mean'] < 1.1))]

    temp_val = temp_test  # test
    temp_train = data  # .loc[:len(data)-1454-2496-1] # -2496, 1454
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])
    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/0820_1.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.39:  # 0.18 0.27   0.4 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.39:  # 0.18 0.27   0.4 0.27
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.39:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()

    test['temp'] = test['x方向振动值_mean'] + 0.2 - test['y方向振动值_mean']
    temp_test = test.loc[np.logical_and(
        np.logical_and(
            test['液压制动压力_max'] > 1,
            np.logical_and(
                np.logical_and(test['x方向振动值_mean'] < 0.4,
                               test['x方向振动值_mean'] > -0.3),
                np.logical_and(test['y方向振动值_mean'] > -0.3,
                               test['y方向振动值_mean'] < 0.14))),
        test['temp'] > 0)]

    temp_val = temp_test  # test
    temp_train = data  # .loc[:len(data)-1454-2496-1] # -2496, 1454
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])
    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    # result_best = pd.read_csv('../V9_final/result/0820_2.csv')
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.3:  # 0.18 0.27   0.4  0.27  0.39
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_best.ret.sum()
    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.3:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()
    temp_test = test.loc[np.logical_and(
        np.logical_and(test['x方向振动值_mean'] < 3, test['x方向振动值_mean'] > 1.8),
        np.logical_and(test['y方向振动值_mean'] > 1, test['y方向振动值_mean'] < 2.1))]

    temp_val = temp_test  # test
    temp_train = data  # .loc[:len(data)-1454-2496-1] # -2496, 1454
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])
    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.38:  # 0.18 0.27   0.4  0.27  0.39  0.3
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.37:  # 0.18 0.27   0.4  0.27  0.39  0.3
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.37:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()
    temp_test = test.loc[np.logical_and(
        np.logical_and(
            test['x方向振动值_mean'] < -0.8,  # -0.55
            test['x方向振动值_mean'] > -2),
        np.logical_and(test['y方向振动值_mean'] > -0.9,
                       test['y方向振动值_mean'] < -0.2))]
    temp_val = temp_test  # test
    temp_train = data  # .loc[:len(data)-1454-2496-1] # -2496, 1454
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])
    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.25:  # 0.18 0.27   0.4  0.27  0.39  0.3  0.37
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.25:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_best = result_sub.copy()
    temp_test = test.loc[np.logical_and(
        np.logical_and(test['x方向振动值_mean'] < 1, test['x方向振动值_mean'] > 0),
        np.logical_and(test['y方向振动值_mean'] > 1.9, test['y方向振动值_mean'] < 2.5))]

    temp_val = temp_test  # test
    temp_train = data  # .loc[:len(data)-1454-2496-1] # -2496, 1454
    s = CV(_df=temp_train[['ret'] + feat_arr],
           label_name='ret',
           random_state=3,
           is_val=False)
    s.CV(is_print=False, lgb_params=lgb_params, n_splits=5, round_cv=1)
    pred = s.get_result(temp_val[feat_arr])
    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.2:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_sub.ret.sum()
    temp_result = temp_val[['file_name']].reset_index(drop=True).copy()
    temp_result['pred'] = pred

    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = result_best.ret[i]

    temp_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[
                i] < 0.2:  # 0.18 0.27   0.4  0.27  0.39  0.3  0.37  0.25
            temp_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub = result_best.copy()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] < 0.2:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 0
    result_sub.ret.sum()
    dict_result_best = {}
    for i in range(len(result_best)):
        dict_result_best[result_best.id[i]] = i

    ix_lst = []
    for i in range(len(temp_result)):
        if temp_result.pred[i] > 0.8:
            ix_lst.append(dict_result_best[temp_result.file_name[i]])

    result_sub.ret[ix_lst] = 1
    result_best = result_sub.copy()
    return result_best
if __name__=='__main__':
    from CV import CV
    from preprocessor import split_samples, load_data, filter_users_val

    P.np.seterr(all='ignore')

    all_data, pkd = filter_users_val(split_samples(load_data()))

    for u in all_data.keys():
        if all_data[u] == []:
            del all_data[u]
            del pkd[u]


    gbfa = CV(lambda: GammaBFAuth(all_data), 
              all_data, 
              pkd)

    with open('./bf_result.csv', 'rw+') as res_file:
        result_writer = csv.writer(res_file)
        result_writer.writerow(['user',
                                'CV_IPR', 'CV_FRR', 'CV_GT', 'CV_IT'])

        for n,i in enumerate(gbfa.validate_user('1227981')):
            cv_res = i
            '''
            result_writer.writerow(['user',
                                    'train_IPR', 'train_FRR', 'train_GT', 'train_IT',
                                    'CV_IPR', 'CV_FRR', 'CV_GT', 'CV_IT'])

            for u in train_res.keys():
from CV import CV

if __name__ == '__main__':
    d = r'H:\度盘\siki学院公开课第009期-忍者跑酷 Ninja'
    cv1 = CV(workDir=d, sleepTime=1)
    # speed = 1
    # speed = 1.1
    # speed = 1.2
    # speed = 1.3
    # speed = 1.4
    # speed = 1.5
    speed = 1.6
    # speed = 1.8
    # speed = 2
    dealOldFilesMode = 0
    gpu = False
    threads = 2
    cv1.dealV(speed, dealOldFilesMode, gpu, threads)
    pass
示例#28
0
 def populate():
     cv = CV(input.get())
     cv.set_job(job_title.get())
     cv.set_company_name(company_name.get())
     cv.set_company_name_short(company_name_short.get())
     cv.set_company_addr(company_addr.get())
     cv.set_company_province(company_province.get())
     cv.set_receiver(receiver.get())
     cv.set_receiver_title(receiver_title.get())
     cv.set_receiver_last_name(receiver_last_name.get())
     cv.set_paragraph(paragraph.get("1.0", "end"))
     cv.populate(output.get())
示例#29
0
from CV import CV

if __name__ == '__main__':
    d = r'D:\s\De\度盘\C#项目开发实战入门(光盘资源)\Video'
    cv1 = CV(workDir=d, sleepTime=0)
    # speed = 1
    # speed = 1.1
    # speed = 1.2
    # speed = 1.3
    # speed = 1.4
    # speed = 1.5
    speed = 1.6
    # speed = 1.8
    # speed = 2
    dealOldFilesMode = 0
    gpu = False
    cv1.dealV(speed, dealOldFilesMode, gpu, 8)
示例#30
0
 def feed(self, arr, d=0.001):
     temp_dict = {}
     for item in self.data.columns.tolist(): temp_dict[item] = 1
     for item in arr:
         assert item in temp_dict
     # start
     '''拼接'''
     train_csr = sparse.csr_matrix((len(self.data[[self.label_name]].loc[np.logical_and(self.data[self.label_name]!=-1, self.data['val_tags']==0)]), 0))
     train_val_csr = sparse.csr_matrix((len(self.data[[self.label_name]].loc[np.logical_and(self.data[self.label_name]!=-1, self.data['val_tags']==1)]), 0))
     test_csr = sparse.csr_matrix((len(self.data[[self.label_name]].loc[self.data[self.label_name]==-1]), 0))
     _onehot_feature = []
     _cv_feature = []
     _row_feature = []
     for item in arr:
         if item not in config.type_dict: 
             _row_feature.append(item)
         elif config.type_dict[item] == 'cv':
             _cv_feature.append(item)
         elif config.type_dict[item] == 'onehot':
             _onehot_feature.append(item)
         else:
             print('name error')
             return
     for features in _onehot_feature:
         self.data[features] = LabelEncoder().fit_transform(self.data[features].astype(str))
     _train = self.data.loc[np.logical_and(self.data[self.label_name]!=-1, self.data['val_tags']==0)]
     _train_val = self.data.loc[np.logical_and(self.data[self.label_name]!=-1, self.data['val_tags']==1)]
     _test = self.data.loc[self.data[self.label_name]==-1]
     enc = OneHotEncoder()
     for feature in _onehot_feature:
         enc.fit(self.data[feature].values.reshape(-1, 1))
         train_csr = sparse.hstack((train_csr, enc.transform(_train[feature].values.reshape(-1, 1))), 'csr', 'bool')
         train_val_csr = sparse.hstack((train_val_csr, enc.transform(_train_val[feature].values.reshape(-1, 1))), 'csr', 'bool')
         test_csr = sparse.hstack((test_csr, enc.transform(_test[feature].values.reshape(-1, 1))), 'csr', 'bool')
     cv = CountVectorizer(min_df=20)
     for feature in _cv_feature:
         self.data[feature] = self.data[feature].astype(str)
         cv.fit(self.data[feature])
         train_csr = sparse.hstack((train_csr, cv.transform(_train[feature].astype(str))), 'csr', 'bool')
         train_val_csr = sparse.hstack((train_val_csr, cv.transform(_train_val[feature].astype(str))), 'csr', 'bool')
         test_csr = sparse.hstack((test_csr, cv.transform(_test[feature].astype(str))), 'csr', 'bool')
     train_csr = sparse.hstack((sparse.csr_matrix(_train[_row_feature]), train_csr), 'csr').astype('float32')
     train_val_csr = sparse.hstack((sparse.csr_matrix(_train_val[_row_feature]), train_val_csr), 'csr').astype('float32')
     test_csr = sparse.hstack((sparse.csr_matrix(_test[_row_feature]), test_csr), 'csr').astype('float32')
     
     if len(self.train_score_lst) != 0:
         for ix in range(len(self.train_score_lst)):
             train_csr = sparse.hstack((sparse.csr_matrix(np.array(self.train_score_lst[ix]).reshape(-1, 1)), train_csr), 'csr').astype('float32')
             train_val_csr = sparse.hstack((sparse.csr_matrix(np.array(self.train_val_score_lst[ix]).reshape(-1, 1)), train_val_csr), 'csr').astype('float32')
             test_csr = sparse.hstack((sparse.csr_matrix(np.array(self.test_score_lst[ix]).reshape(-1, 1)), test_csr), 'csr').astype('float32')
     '''CV,与之前的轮子直接对接'''
     lgb_params = { 'boosting_type':'gbdt', 'num_leaves':200, 
                    'reg_alpha':1, 'reg_lambda':1, 
                    'n_estimators':100000, 'objective':'binary',
                    'subsample':0.7, 'colsample_bytree':0.6, 
                    'learning_rate':0.02, 'min_child_weight':1}
     c = CV(_df=train_csr, y=_train[self.label_name].values, 
            random_state=self.random_state, is_val=False)
     c.CV(is_print=True, lgb_params=lgb_params, n_splits=5, round_cv=1)
     self.train_pred = 0
     for item in c.MS_arr:
         self.train_pred += np.array(item['pred_train'])
     self.train_pred /= len(c.MS_arr)
     self.train_score_lst.append(self.train_pred)
     self.test_score_lst.append(c.get_result(test_csr))
     self.train_val_score_lst.append(c.get_result(train_val_csr))
     self.c = c
     self.c_lst.append(c)