# sel.append(('ANOVA', SelectKBest(f_classif, k=num_fea))) # sel.append(('TSCR', SelectKBest(t_score.t_score, k=num_fea))) # sel.append(('GINI', SelectKBest(gini_index.gini_index, k=5))) sel.append(('FSCR', SelectKBest(fisher_score.fisher_score, k=num_fea))) # sel.append(('RELF', SelectKBest(reliefF.reliefF, k=num_fea))) output = open("namesFeat.txt", "w") scoring = 'roc_auc' # UNIVARIATE FEATURE SELECTION X CLASSIFICATION (10 fold CV) for name, model in models: for kind, selection in sel: print(kind) pipe = make_pipeline(MinMaxScaler(), selection, model) kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(pipe, X_train, Y_train, cv=kfold) # msg = "%s %s: %f (%f)\n" % (kind, name, cv_results.mean(), cv_results.std()) #output.write(msg) pipe.fit(X_train, Y_train) feat = pipe.named_steps['selectkbest'].get_support() featNum = 0 for val in feat: featNum += 1 if val == True:
def start(): device = 0 frame_rate_ratio = 7 process_speed = 1 mirror = True # Video reader cam = cv2.VideoCapture(device) # CV_CAP_PROP_FPS input_fps = cam.get(cv2.CAP_PROP_FPS) print("Running at {} fps.".format(input_fps)) ret_val, orig_image = cam.read() width = orig_image.shape[1] height = orig_image.shape[0] factor = 0.3 i = 0 # default is 0 resize_fac = 1 while True: cv2.waitKey(10) if cam.isOpened() is False or ret_val is False: break if mirror: orig_image = cv2.flip(orig_image, 1) tic = time.time() cropped = crop(orig_image, width, factor) input_image = cv2.resize(cropped, (0, 0), fx=1 / resize_fac, fy=1 / resize_fac, interpolation=cv2.INTER_CUBIC) print('Processing frame: ', i) toc = time.time() print('processing time is %.5f' % (toc - tic)) toc = time.time() print('processing time is %.5f' % (toc - tic)) canvas = cv2.resize(input_image, (0, 0), fx=4, fy=2, interpolation=cv2.INTER_CUBIC) imgForText = cv2.resize(canvas, (700, 700), interpolation=cv2.INTER_CUBIC) img = cv2.cvtColor(canvas, cv2.COLOR_BGR2HSV).astype("float32") (h, s, v) = cv2.split(img) s = s * 0 s = np.clip(s, 0, 255) img = cv2.merge([h, s, v]) img = cv2.cvtColor(img.astype("uint8"), cv2.COLOR_HSV2BGR) image = cv2.resize(img, (700, 700), interpolation=cv2.INTER_CUBIC) x_list, y_list = [], [] for x in np.arange(0, 700, 1): for y in np.arange(0, 700, 1): # if np.all(image[y][x] == (0, 0, 0)): if np.all(image[y][x] == (255, 255, 255)): x_list.append(x) y_list.append(700 - y) peaks, _ = scipy.signal.find_peaks(y_list, height=400) if (len(peaks) != 0): try: extrac = extract_feat(image, peaks[0] - 90, peaks[0] + 96) json_file = open('best_model.json', 'r') loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json) loaded_model.load_weights("best-model.h5") print("Loaded model from disk") loaded_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) train_new = np.reshape(extrac, (186, 1)) scaler = MinMaxScaler(feature_range=(0, 1)) train_new1 = scaler.fit_transform(train_new) p = np.reshape(train_new1, (1, 186)) predictions = loaded_model.predict(np.reshape(p, (1, 186, 1))) if str(np.argmax(predictions, axis=1)[0]) == '0': output = 'N' elif str(np.argmax(predictions, axis=1)[0]) == '1': output = 'S' elif str(np.argmax(predictions, axis=1)[0]) == '2': output = 'V' elif str(np.argmax(predictions, axis=1)[0]) == '3': output = 'F' elif str(np.argmax(predictions, axis=1)[0]) == '4': output = 'U' cv2.putText(imgForText, output, (x_list[peaks[0]], 100), cv2.FONT_HERSHEY_SIMPLEX, 0.7, 0) cv2.putText( imgForText, 'prob: ' + str( round( predictions[0][np.argmax(predictions, axis=1)[0]], 2)), (x_list[peaks[0]], 120), cv2.FONT_HERSHEY_SIMPLEX, 0.4, 0) if cv2.waitKey(1) & 0xFF == ord('q'): break except IndexError: print('immagine non posizionata correttamente') cv2.imshow('frame', imgForText) ret_val, orig_image = cam.read() i += 1
@author: PRAJWAL """ import numpy as np import matplotlib.pyplot as plt import pandas as pd from tensorflow import random dataset_training = pd.read_csv('../GOOG_train.csv') dataset_training.head() training_data = dataset_training[['Open']].values from sklearn.preprocessing import MinMaxScaler sc = MinMaxScaler(feature_range=(0, 1)) training_data_scaled = sc.fit_transform(training_data) X_train = [] y_train = [] for i in range(60, 1258): X_train.append(training_data_scaled[i - 60:i, 0]) y_train.append(training_data_scaled[i, 0]) X_train, y_train = np.array(X_train), np.array(y_train) X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) from keras.models import Sequential from keras.layers import Dense, LSTM, Dropout seed = 1
# -*- coding: utf-8 -*- """ Created on Wed May 30 11:03:07 2018 @author: user """ import pandas as pd df = pd.read_csv( 'https://raw.githubusercontent.com/rasbt/pattern_classification/master/data/wine_data.csv', header=None, usecols=[0, 1, 2]) df.columns = ['Class label', 'Alcohol', 'Malic acid'] from sklearn.preprocessing import StandardScaler, MinMaxScaler sc = StandardScaler() df.iloc[:, [1, 2]] = sc.fit_transform(df.iloc[:, [1, 2]]) mms = MinMaxScaler() df1 = mms.fit_transform(df)
plt.scatter(range(len(results)),results,c='r') plt.title('Validate') plt.show() """ """ Crear pronostico de ventas """ ultimosDias = df['2018-11-16':'2018-11-30'] values = ultimosDias.values values = values.astype('float32') #normalize features values = values.reshape(-1,1) scaler = MinMaxScaler(feature_range=(-1,1)) scaled = scaler.fit_transform(values) reframed = series_to_supervised(scaled, pasos, 1) reframed.drop(reframed.columns[[7]], axis=1, inplace=True) values = reframed.values x_test = values[6:, :] x_test = x_test.reshape((x_test.shape[0],1,x_test.shape[1])) def aggValue(x_test, nVal): for i in range(x_test.shape[2]-1): x_test[0][0][i]=x_test[0][0][i+1] x_test[0][0][x_test.shape[2]-1]=nVal return x_test
import pickle import numpy as np import os import matplotlib.pyplot as plt from sklearn.preprocessing import MinMaxScaler b = os.path.exists(r"D:\ZSNJAP01\flight\prediction\linedlyt") if b: print("path exist") else: os.makedirs(r'D:\ZSNJAP01\flight\prediction\linedlyt') with open(r"D:\ZSNJAP01\flight\delaydata\30min\lineavgt.pkl", 'rb') as f: featime = pickle.load(f) #ndarray dim = [3623,4,5] featime2d = np.reshape(featime, [-1, featime.shape[1] * featime.shape[2]]) scaler1 = MinMaxScaler(feature_range=(0, 1)) featime2d = scaler1.fit_transform(featime2d).squeeze() featime = np.reshape(featime2d, [-1, featime.shape[1], featime.shape[2]]) featime = featime[:, :, :, np.newaxis] time_step = 2 batch_size = 30 filters = 6 kernel_size = (2, 2) strides = (1, 1) padding = 'same' data_format = 'channels_last' input_shape = (batch_size, time_step, featime[0].shape[0], featime[0].shape[1], featime[0].shape[2]) dilation_rate = (1, 1) activation = 'a(x) = x' recurrent_activation = 'a(x) = x'
# Any results you write to the current directory are saved as output. train_data = pd.read_csv('../input/train.csv') test_data = pd.read_csv('../input/test.csv') train_data.head() train_data.describe() train_labels = train_data.Cover_Type.values test_id = test_data.Id.values train_data.drop(['Soil_Type7', 'Soil_Type15', 'Id', 'Cover_Type'], axis=1, inplace=True) test_data.drop(['Soil_Type7', 'Soil_Type15', 'Id'], axis=1, inplace=True) print(train_data.shape, test_data.shape) min_max_scaler = MinMaxScaler( ) # If you did not use the scaler, you will get higher accuracy train_data = min_max_scaler.fit_transform(train_data) test_data = min_max_scaler.fit_transform(test_data) distance_matrix = pairwise_distances(train_data, metric='euclidean') print(distance_matrix.shape) sorted_distance_index = np.argsort(distance_matrix, axis=1).astype(np.uint16) print(sorted_distance_index) sorted_distance_labels = train_labels[sorted_distance_index].astype(np.uint8) print(sorted_distance_labels) max_k = 100 k_matrix = np.empty((len(sorted_distance_labels), 0), dtype=np.uint8) for k in range(1, max_k + 1): k_along_rows = np.apply_along_axis( lambda x: np.bincount(x).argmax(), axis=1,
def index(): import numpy as np df = pdr.get_data_yahoo(request.args.get('stockname')) df1 = df.reset_index()['Close'] scaler = MinMaxScaler(feature_range=(0, 1)) df1 = scaler.fit_transform(np.array(df1).reshape(-1, 1)) training_size = int(len(df1) * 0.65) test_size = len(df1) - training_size train_data, test_data = df1[0:training_size, :], df1[ training_size:len(df1), :1] # import numpy # convert an array of values into a dataset matrix def create_dataset(dataset, time_step=1): dataX, dataY = [], [] for i in range(len(dataset) - time_step - 1): a = dataset[i:(i + time_step), 0] ###i=0, 0,1,2,3-----99 100 dataX.append(a) dataY.append(dataset[i + time_step, 0]) return np.array(dataX), np.array(dataY) time_step = 100 X_train, y_train = create_dataset(train_data, time_step) X_test, ytest = create_dataset(test_data, time_step) # reshape input to be [samples, time steps, features] which is required for LSTM X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1) ### Create the Stacked LSTM model import tensorflow as tf import numpy as np from tensorflow import keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras.layers import LSTM model = tf.keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])]) model = Sequential() if (request.args.get('stockname') == 'IDEA.NS') or (request.args.get('stockname') == 'AAPL') or (request.args.get('stockname') == 'TSLA'): model.add(LSTM(50, return_sequences=True, input_shape=(100, 1))) model.add(LSTM(50, return_sequences=True)) else: model.add(LSTM(50, return_sequences=True, input_shape=(100, 1))) model.add(LSTM(50, return_sequences=True)) model.add(LSTM(50, return_sequences=True)) model.add(LSTM(50, return_sequences=True)) model.add(LSTM(50)) model.add(Dense(1)) if (request.args.get('stockname') == 'IDEA.NS') or (request.args.get('stockname') == 'AAPL') or (request.args.get('stockname') == 'TSLA'): model.compile(loss='mean_squared_error', optimizer='adam') else: model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy']) if (request.args.get('stockname') == 'IDEA.NS') or (request.args.get('stockname') == 'AAPL') or (request.args.get('stockname') == 'TSLA'): model.fit(X_train, y_train, validation_data=(X_test, ytest), epochs=10, batch_size=64, verbose=1) else: model.fit( X_train, y_train, validation_data=(X_test, ytest), epochs=10, batch_size=10, verbose=2) # Actual Epoch 50. Changing to 10 for current demo train_predict = model.predict(X_train) test_predict = model.predict(X_test) ##Transformback to original form --- rescaling train_predict = scaler.inverse_transform(train_predict) test_predict = scaler.inverse_transform(test_predict) ### Plotting # shift train predictions for plotting look_back = 100 trainPredictPlot = np.empty_like(df1) trainPredictPlot[:, :] = np.nan trainPredictPlot[look_back:len(train_predict) + look_back, :] = train_predict # shift test predictions for plotting testPredictPlot = np.empty_like(df1) testPredictPlot[:, :] = np.nan testPredictPlot[len(train_predict) + (look_back * 2) + 1:len(df1) - 1, :] = test_predict if request.args.get('stockname') == 'AAPL' or request.args.get( 'stockname') == 'TSLA': x_input = test_data[341:].reshape(1, -1) # elif request.args.get('stockname') == 'TSLA': # x_input = test_data[341:].reshape(1,-1) else: x_input = test_data[331:].reshape(1, -1) temp_input = list(x_input) temp_input = temp_input[0].tolist() # demonstrate prediction for next 10 days from numpy import array lst_output = [] n_steps = 100 i = 0 if (request.args.get('stockname') == 'IDEA.NS') or (request.args.get('stockname') == 'AAPL') or (request.args.get('stockname') == 'TSLA'): while (i < 3): if (len(temp_input) > 100): x_input = np.array(temp_input[1:]) x_input = x_input.reshape(1, -1) x_input = x_input.reshape((1, n_steps, 1)) yhat = model.predict(x_input, verbose=0) temp_input.extend(yhat[0].tolist()) temp_input = temp_input[1:] lst_output.extend(yhat.tolist()) i = i + 1 else: x_input = x_input.reshape((1, n_steps, 1)) yhat = model.predict(x_input, verbose=0) temp_input.extend(yhat[0].tolist()) lst_output.extend(yhat.tolist()) i = i + 1 else: while (i < 10): if (len(temp_input) > 100): # print(temp_input) x_input = np.array(temp_input[1:]) x_input = x_input.reshape(1, -1) x_input = x_input.reshape((1, n_steps, 1)) # print(x_input) yhat = model.predict(x_input, verbose=0) temp_input.extend(yhat[0].tolist()) temp_input = temp_input[1:] # print(temp_input) lst_output.extend(yhat.tolist()) i = i + 1 else: x_input = x_input.reshape((1, n_steps, 1)) yhat = model.predict(x_input, verbose=0) temp_input.extend(yhat[0].tolist()) lst_output.extend(yhat.tolist()) i = i + 1 if (request.args.get('stockname') == 'IDEA.NS') or (request.args.get('stockname') == 'AAPL') or (request.args.get('stockname') == 'TSLA'): day_new = np.arange(1, 101) # testdata 100indexes day_pred = np.arange(101, 104) # 101-131-predicted # FOR RELIANCE else: day_new = np.arange(1, 101) # testdata 100indexes day_pred = np.arange(101, 111) # 101-131-predicted if (request.args.get('stockname') == 'AAPL') or (request.args.get('stockname') == 'TSLA'): numpyData3 = scaler.inverse_transform(df1[1158:]) # FOR TSLA # elif (request.args.get('stockname') =='TSLA'): # numpyData3 = scaler.inverse_transform(df1[1158:]) # FOR RELIANCE else: numpyData3 = scaler.inverse_transform(df1[1131:]) numpyData4 = scaler.inverse_transform(lst_output) daynew = {"x": day_new.tolist(), "y": day_pred.tolist()} daypred = {"x": numpyData3.tolist(), "y": numpyData4.tolist()} data = [] data.append(daynew) data.append(daypred) final_data = {"data": data} print(final_data) return final_data
img_test = pd.read_csv( '/Users/deangao/Desktop/CSM226/covidProject_data/radiomics/test_covid_normal_pn.csv' ) all_data = pd.concat([img_train, img_test], axis=0) img_train = img_train.drop(columns=['id', 'Entropy', 'Uniformity', 'Energy']) img_test = img_test.drop(columns=['id', 'Entropy', 'Uniformity', 'Energy']) all_data = all_data.drop(columns=['id', 'Entropy', 'Uniformity', 'Energy']) # In[5]: train_X, train_y = img_train.iloc[:, :-1], img_train.iloc[:, -1] test_X, test_y = img_test.iloc[:, :-1], img_test.iloc[:, -1] labels = {0: 'normal lung', 1: 'pneumonia lung', 2: 'covid lung'} scaler = MinMaxScaler() scaled_data = scaler.fit_transform(train_X) scaler2 = MinMaxScaler() scaled_all = scaler2.fit_transform(all_data.iloc[:, :-1]) scaled_all_data = pd.concat([ pd.DataFrame(all_data.iloc[:, -1].reset_index()), pd.DataFrame(scaled_all, columns=train_X.columns) ], axis=1) scaled_all_data = scaled_all_data.drop(['index'], axis=1) # In[6]: all_data.shape all_data.iloc[:, -1]
return df_training, df_test # Import data sets, split into training and test sets dataset_all = pd.read_csv('./datasets/' + dataset_csv) dataset_training, dataset_test = create_training_test(dataset_all, testing_records) training_set = dataset_training.iloc[:, 1:7].values ###training_set = dataset_training.iloc[:, 4:5].values test_set = dataset_test.iloc[:, 1:7].values ###test_set = dataset_test.iloc[:, 4:5].values # Apply scaling from sklearn.preprocessing import MinMaxScaler scale = MinMaxScaler(feature_range=(0, 1)) # Apply scale object on data training_set_scaled = scale.fit_transform(training_set) X_train = [] y_train = [] for i in range(timesteps, training_set_scaled.shape[0]): ###X_train.append(training_set_scaled[i-timesteps:i, 0]) ###y_train.append(training_set_scaled[i, 0]) X_train.append(training_set_scaled[i - timesteps:i, 0:6]) y_train.append(training_set_scaled[ i, 3:4]) #y_train.append(i, 3:4 ) or y_train.append(i, 0:4 ) ?, #79, 9:53
# Livro Ciência de Dados e Aprendizado de Máquina - https://www.amazon.com.br/dp/B07X1TVLKW # Livro Inteligência Artificial com Python - Redes Neurais Intuitivas - https://www.amazon.com.br/dp/B087YSVVXW # Livro Redes Neurais Artificiais - https://www.amazon.com.br/dp/B0881ZYYCJ import numpy as np import pandas as pd from keras.models import Sequential from keras.layers import Dense, Dropout from sklearn.preprocessing import MinMaxScaler from keras.layers import LSTM import matplotlib.pyplot as plt base = pd.read_csv('petr4-treinamento.csv') base = base.dropna() base_treino = base.iloc[:, 1:2].values normalizador = MinMaxScaler(feature_range = (0,1)) base_treino_normalizada = normalizador.fit_transform(base_treino) previsores = [] preco_real = [] for i in range(90,1242): previsores.append(base_treino_normalizada[i-90:i,0]) preco_real.append(base_treino_normalizada[i,0]) previsores, preco_real = np.array(previsores), np.array(preco_real) previsores = np.reshape(previsores,(previsores.shape[0], previsores.shape[1],1)) regressor = Sequential() regressor.add(LSTM(units = 100, return_sequences = True,
a4 = Rain_test1[3 * 31 * 24:4 * 31 * 24].resample('3H').sum() a5 = Rain_test1[4 * 31 * 24:5 * 31 * 24].resample('3H').sum() Qi_test1['Rain_sum'] = pd.concat([a1, a2, a3, a4, a5], axis=0, ignore_index=True) #%% # 环境表数据 # T气温、w风向、wd风速 每天 Environmentdata = pd.read_excel('./data/环境表.xlsx') # 填充缺失 Environmentdata['T'].fillna(method='ffill', inplace=True) Environmentdata['w'].fillna(method='ffill', inplace=True) # wd归一化 from sklearn.preprocessing import MinMaxScaler ss = MinMaxScaler() Environmentdata['wd'] = ss.fit_transform(Environmentdata['wd'].values.reshape( -1, 1)) Environmentdata['TimeStample'] = pd.to_datetime(Environmentdata['TimeStample'], format='%Y-%m-%d') Environmentdata = dataframe_cut(Environmentdata, begin_time='{}-01-01'.format(y_start), end_time='{}-12-31'.format(y_end)) #%% # 测试数据 Environmentdata_test = pd.read_excel('./final_data/环境表.xlsx') # 填充缺失 Environmentdata_test['T'].fillna(method='ffill', inplace=True) Environmentdata_test['w'].fillna(method='ffill', inplace=True) # wd归一化
y, sr = librosa.load(wav_path, sr=None) D = np.abs( librosa.core.stft(y, n_fft=640, hop_length=160, win_length=640, window='hann')) noise_txt_path = noise_path + "/{}".format("noise.txt") n_D = np.loadtxt(noise_txt_path) b = int((D.shape[1] - n_D.shape[1]) / 2) a = D.shape[1] - n_D.shape[1] - b n_D = np.pad(n_D, ((0, 0), (a, b)), 'constant') D_T = (librosa.amplitude_to_db(D, ref=np.max)).T minMax = MinMaxScaler() audio_feature_std = minMax.fit_transform(D_T) print("original audio_feature shape", audio_feature_std.shape) length = int((audio_feature_std.shape)[0]) original_feature = audio_feature_std if length == 4 * int(length / 4): loop_num = int(length / 4) else: loop_num = 1 + int(length / 4) audio_feature = np.zeros((loop_num, 1284), dtype=np.float32) PAD = np.zeros((1, 321), dtype=np.float32)
# lr.fit(x_dtrain, y_dtrain) # predictions = lr.predict(x_test) # print('AUC',roc_auc_score(y_test, predictions)) # print('准确率',accuracy_score(y_test,predictions)) # 拟合模型 # lr = LogisticRegression(C=1.0, penalty='l2', solver='newton-cg', class_weight='balanced') # lr.fit(dataset12_x, dataset12_y) # joblib.dump(lr, 'lr.model') # 保存训练的RF模型 # predict test set lr = joblib.load('lr.model') result = lr.predict_proba(dataset3_x) result = pd.DataFrame(result) result.index = dataset3.index result.columns = ['0', 'probability'] result.drop('0', axis=1, inplace=True) dataset3_preds['label'] = result.copy() dataset3_preds.label = MinMaxScaler().fit_transform(dataset3_preds.label.reshape(-1, 1)) dataset3_preds.sort_values(by=['Coupon_id', 'label'], inplace=True) dataset3_preds.to_csv("lr_preds.csv", index=None, header=None) print(dataset3_preds.describe()) cost_time = time.time() - start_time print('cost_time', cost_time)
else: Len = len(real_list) total = 0 for i in range(Len): total = total + abs(pred_list[i] / real_list[i] - 1) return (total / Len) ## 创建model look_back = 1 model = Sequential() model.add(LSTM(32, input_shape=(24, 1))) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') scaler = MinMaxScaler(feature_range=(0, 1)) # 正则化函数 MAPE_list = [0 for i in range(120)] for i in range(120): train_data_norm = scaler.fit_transform(pd.DataFrame(data_list[i][1:145])) train_data_norm = np.array(train_data_norm).tolist() train_data_norm = list(chain.from_iterable(train_data_norm)) # del(train_data_norm[0]) X, y = split_sequence(train_data_norm, 24) n_features = 1 X = X.reshape((X.shape[0], X.shape[1], n_features)) # reshape input to be [samples, time steps, features] #trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) model.fit(X, y, epochs=50, batch_size=1, verbose=2) pred_val = [] pred_data = train_data_norm[120:145]
print(df.shape) print(df.columns) print(df.head()) X = df.drop('benign_0__mal_1', axis=1).values y = df['benign_0__mal_1'].values print('X shape: ', X.shape, 'y shape: ', y.shape) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101) scaler = MinMaxScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) print('train shape(Data points, Feature): ', X_train) early_stop = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25) model = Sequential() #X_train feature = 30 model.add(Dense(30, activation='relu'))
def train_predict(pipe_data): # lgb param_grid = [{ 'num_leaves': [20], 'min_data_in_leaf': [2, 3], 'objective': ['regression'], 'max_depth': [3, 4, 5], 'learning_rate': [0.06, 0.12, 0.24], "min_child_samples": [3], "boosting": ["gbdt"], "feature_fraction": [0.7], "bagging_freq": [1], "bagging_fraction": [1], "bagging_seed": [11], "metric": ['mse'], "lambda_l2": [0.0003, 0.001, 0.003], "verbosity": [-1] }] lgb_best_params = find_best_params(pipe_data, lgb_predict, param_grid) X_train, y_train, X_test, test_idx = split_data(pipe_data, target_name='target') min_max_scaler = MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) X_test = min_max_scaler.transform(X_test) oof_lgb, predictions_lgb = lgb_predict(X_train, y_train, X_test, params=lgb_best_params, verbose_eval=200) # # xgb param_grid = [{ 'silent': [1], 'nthread': [4], 'eval_metric': ['rmse'], 'eta': [0.03], 'objective': ['reg:linear'], 'max_depth': [4, 5, 6], 'num_round': [1000], 'subsample': [0.4, 0.6, 0.8, 1], 'colsample_bytree': [0.7, 0.9, 1], }] xgb_best_params = find_best_params(pipe_data, xgb_predict, param_grid) X_train, y_train, X_test, test_idx = split_data(pipe_data, target_name='target') min_max_scaler = MinMaxScaler() X_train = min_max_scaler.fit_transform(X_train) X_test = min_max_scaler.transform(X_test) oof_xgb, predictions_xgb = xgb_predict(X_train, y_train, X_test, params=xgb_best_params, verbose_eval=200) # # 模型融合 stacking train_stack = np.vstack([oof_lgb, oof_xgb]).transpose() test_stack = np.vstack([predictions_lgb, predictions_xgb]).transpose() folds_stack = RepeatedKFold(n_splits=5, n_repeats=2, random_state=4590) oof_stack = np.zeros(train_stack.shape[0]) predictions = np.zeros(test_stack.shape[0]) for fold_, (trn_idx, val_idx) in enumerate(folds_stack.split(train_stack, y_train)): print("fold {}".format(fold_)) trn_data, trn_y = train_stack[trn_idx], y_train[trn_idx] val_data, val_y = train_stack[val_idx], y_train[val_idx] clf_3 = BayesianRidge() clf_3.fit(trn_data, trn_y) oof_stack[val_idx] = clf_3.predict(val_data) predictions += clf_3.predict(test_stack) / 10 final_score = mean_squared_error(y_train, oof_stack) print(final_score) return predictions