示例#1
0
# sel.append(('ANOVA', SelectKBest(f_classif, k=num_fea)))
# sel.append(('TSCR', SelectKBest(t_score.t_score, k=num_fea)))
# sel.append(('GINI', SelectKBest(gini_index.gini_index, k=5)))
sel.append(('FSCR', SelectKBest(fisher_score.fisher_score, k=num_fea)))
# sel.append(('RELF', SelectKBest(reliefF.reliefF, k=num_fea)))

output = open("namesFeat.txt", "w")

scoring = 'roc_auc'

# UNIVARIATE FEATURE SELECTION X CLASSIFICATION (10 fold CV)

for name, model in models:
    for kind, selection in sel:
        print(kind)
        pipe = make_pipeline(MinMaxScaler(), selection, model)
        kfold = model_selection.KFold(n_splits=10, random_state=seed)

        cv_results = model_selection.cross_val_score(pipe,
                                                     X_train,
                                                     Y_train,
                                                     cv=kfold)
        # msg = "%s %s: %f (%f)\n" % (kind, name, cv_results.mean(), cv_results.std())
        #output.write(msg)

        pipe.fit(X_train, Y_train)
        feat = pipe.named_steps['selectkbest'].get_support()
        featNum = 0
        for val in feat:
            featNum += 1
            if val == True:
def start():
    device = 0
    frame_rate_ratio = 7
    process_speed = 1
    mirror = True

    # Video reader
    cam = cv2.VideoCapture(device)
    # CV_CAP_PROP_FPS
    input_fps = cam.get(cv2.CAP_PROP_FPS)
    print("Running at {} fps.".format(input_fps))

    ret_val, orig_image = cam.read()

    width = orig_image.shape[1]
    height = orig_image.shape[0]
    factor = 0.3

    i = 0  # default is 0
    resize_fac = 1

    while True:

        cv2.waitKey(10)

        if cam.isOpened() is False or ret_val is False:
            break

        if mirror:
            orig_image = cv2.flip(orig_image, 1)

        tic = time.time()

        cropped = crop(orig_image, width, factor)

        input_image = cv2.resize(cropped, (0, 0),
                                 fx=1 / resize_fac,
                                 fy=1 / resize_fac,
                                 interpolation=cv2.INTER_CUBIC)

        print('Processing frame: ', i)
        toc = time.time()
        print('processing time is %.5f' % (toc - tic))

        toc = time.time()
        print('processing time is %.5f' % (toc - tic))

        canvas = cv2.resize(input_image, (0, 0),
                            fx=4,
                            fy=2,
                            interpolation=cv2.INTER_CUBIC)

        imgForText = cv2.resize(canvas, (700, 700),
                                interpolation=cv2.INTER_CUBIC)

        img = cv2.cvtColor(canvas, cv2.COLOR_BGR2HSV).astype("float32")

        (h, s, v) = cv2.split(img)
        s = s * 0
        s = np.clip(s, 0, 255)
        img = cv2.merge([h, s, v])

        img = cv2.cvtColor(img.astype("uint8"), cv2.COLOR_HSV2BGR)

        image = cv2.resize(img, (700, 700), interpolation=cv2.INTER_CUBIC)

        x_list, y_list = [], []
        for x in np.arange(0, 700, 1):
            for y in np.arange(0, 700, 1):
                # if np.all(image[y][x] == (0, 0, 0)):
                if np.all(image[y][x] == (255, 255, 255)):
                    x_list.append(x)
                    y_list.append(700 - y)

        peaks, _ = scipy.signal.find_peaks(y_list, height=400)

        if (len(peaks) != 0):

            try:
                extrac = extract_feat(image, peaks[0] - 90, peaks[0] + 96)

                json_file = open('best_model.json', 'r')
                loaded_model_json = json_file.read()
                json_file.close()
                loaded_model = model_from_json(loaded_model_json)

                loaded_model.load_weights("best-model.h5")
                print("Loaded model from disk")

                loaded_model.compile(loss='categorical_crossentropy',
                                     optimizer='adam',
                                     metrics=['accuracy'])

                train_new = np.reshape(extrac, (186, 1))

                scaler = MinMaxScaler(feature_range=(0, 1))

                train_new1 = scaler.fit_transform(train_new)

                p = np.reshape(train_new1, (1, 186))

                predictions = loaded_model.predict(np.reshape(p, (1, 186, 1)))

                if str(np.argmax(predictions, axis=1)[0]) == '0':
                    output = 'N'
                elif str(np.argmax(predictions, axis=1)[0]) == '1':
                    output = 'S'
                elif str(np.argmax(predictions, axis=1)[0]) == '2':
                    output = 'V'
                elif str(np.argmax(predictions, axis=1)[0]) == '3':
                    output = 'F'
                elif str(np.argmax(predictions, axis=1)[0]) == '4':
                    output = 'U'

                cv2.putText(imgForText, output, (x_list[peaks[0]], 100),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, 0)
                cv2.putText(
                    imgForText, 'prob: ' + str(
                        round(
                            predictions[0][np.argmax(predictions, axis=1)[0]],
                            2)), (x_list[peaks[0]], 120),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, 0)

                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break
            except IndexError:
                print('immagine non posizionata correttamente')

        cv2.imshow('frame', imgForText)
        ret_val, orig_image = cam.read()

        i += 1
@author: PRAJWAL
"""

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow import random

dataset_training = pd.read_csv('../GOOG_train.csv')
dataset_training.head()

training_data = dataset_training[['Open']].values

from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0, 1))
training_data_scaled = sc.fit_transform(training_data)

X_train = []
y_train = []
for i in range(60, 1258):
    X_train.append(training_data_scaled[i - 60:i, 0])
    y_train.append(training_data_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

seed = 1
示例#4
0
# -*- coding: utf-8 -*-
"""
Created on Wed May 30 11:03:07 2018

@author: user
"""

import pandas as pd
df = pd.read_csv(
    'https://raw.githubusercontent.com/rasbt/pattern_classification/master/data/wine_data.csv',
    header=None,
    usecols=[0, 1, 2])
df.columns = ['Class label', 'Alcohol', 'Malic acid']

from sklearn.preprocessing import StandardScaler, MinMaxScaler
sc = StandardScaler()
df.iloc[:, [1, 2]] = sc.fit_transform(df.iloc[:, [1, 2]])

mms = MinMaxScaler()
df1 = mms.fit_transform(df)
示例#5
0
plt.scatter(range(len(results)),results,c='r')
plt.title('Validate')
plt.show()
"""

"""
Crear pronostico de ventas
"""

ultimosDias = df['2018-11-16':'2018-11-30']

values = ultimosDias.values
values = values.astype('float32')
#normalize features
values = values.reshape(-1,1)
scaler = MinMaxScaler(feature_range=(-1,1))
scaled = scaler.fit_transform(values)
reframed = series_to_supervised(scaled, pasos, 1)
reframed.drop(reframed.columns[[7]], axis=1, inplace=True)

values = reframed.values
x_test = values[6:, :]
x_test = x_test.reshape((x_test.shape[0],1,x_test.shape[1]))


def aggValue(x_test, nVal):
    for i in range(x_test.shape[2]-1):
        x_test[0][0][i]=x_test[0][0][i+1]
    x_test[0][0][x_test.shape[2]-1]=nVal
    return x_test
示例#6
0
import pickle
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

b = os.path.exists(r"D:\ZSNJAP01\flight\prediction\linedlyt")
if b:
    print("path exist")
else:
    os.makedirs(r'D:\ZSNJAP01\flight\prediction\linedlyt')

with open(r"D:\ZSNJAP01\flight\delaydata\30min\lineavgt.pkl", 'rb') as f:
    featime = pickle.load(f)  #ndarray dim = [3623,4,5]
featime2d = np.reshape(featime, [-1, featime.shape[1] * featime.shape[2]])
scaler1 = MinMaxScaler(feature_range=(0, 1))
featime2d = scaler1.fit_transform(featime2d).squeeze()
featime = np.reshape(featime2d, [-1, featime.shape[1], featime.shape[2]])
featime = featime[:, :, :, np.newaxis]
time_step = 2
batch_size = 30
filters = 6
kernel_size = (2, 2)
strides = (1, 1)
padding = 'same'
data_format = 'channels_last'
input_shape = (batch_size, time_step, featime[0].shape[0], featime[0].shape[1],
               featime[0].shape[2])
dilation_rate = (1, 1)
activation = 'a(x) = x'
recurrent_activation = 'a(x) = x'
示例#7
0
# Any results you write to the current directory are saved as output.
train_data = pd.read_csv('../input/train.csv')
test_data = pd.read_csv('../input/test.csv')
train_data.head()
train_data.describe()
train_labels = train_data.Cover_Type.values
test_id = test_data.Id.values

train_data.drop(['Soil_Type7', 'Soil_Type15', 'Id', 'Cover_Type'],
                axis=1,
                inplace=True)
test_data.drop(['Soil_Type7', 'Soil_Type15', 'Id'], axis=1, inplace=True)

print(train_data.shape, test_data.shape)
min_max_scaler = MinMaxScaler(
)  # If you did not use the scaler, you will get higher accuracy
train_data = min_max_scaler.fit_transform(train_data)
test_data = min_max_scaler.fit_transform(test_data)

distance_matrix = pairwise_distances(train_data, metric='euclidean')
print(distance_matrix.shape)
sorted_distance_index = np.argsort(distance_matrix, axis=1).astype(np.uint16)
print(sorted_distance_index)
sorted_distance_labels = train_labels[sorted_distance_index].astype(np.uint8)
print(sorted_distance_labels)
max_k = 100
k_matrix = np.empty((len(sorted_distance_labels), 0), dtype=np.uint8)
for k in range(1, max_k + 1):
    k_along_rows = np.apply_along_axis(
        lambda x: np.bincount(x).argmax(),
        axis=1,
def index():
    import numpy as np

    df = pdr.get_data_yahoo(request.args.get('stockname'))
    df1 = df.reset_index()['Close']
    scaler = MinMaxScaler(feature_range=(0, 1))
    df1 = scaler.fit_transform(np.array(df1).reshape(-1, 1))

    training_size = int(len(df1) * 0.65)
    test_size = len(df1) - training_size
    train_data, test_data = df1[0:training_size, :], df1[
        training_size:len(df1), :1]

    # import numpy
    # convert an array of values into a dataset matrix
    def create_dataset(dataset, time_step=1):
        dataX, dataY = [], []
        for i in range(len(dataset) - time_step - 1):
            a = dataset[i:(i + time_step), 0]  ###i=0, 0,1,2,3-----99   100
            dataX.append(a)
            dataY.append(dataset[i + time_step, 0])
        return np.array(dataX), np.array(dataY)

    time_step = 100
    X_train, y_train = create_dataset(train_data, time_step)
    X_test, ytest = create_dataset(test_data, time_step)

    # reshape input to be [samples, time steps, features] which is required for LSTM
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    ### Create the Stacked LSTM model
    import tensorflow as tf
    import numpy as np
    from tensorflow import keras
    from tensorflow.keras.models import Sequential
    from tensorflow.keras.layers import Dense
    from tensorflow.keras.layers import LSTM

    model = tf.keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])])
    model = Sequential()
    if (request.args.get('stockname')
            == 'IDEA.NS') or (request.args.get('stockname')
                              == 'AAPL') or (request.args.get('stockname')
                                             == 'TSLA'):
        model.add(LSTM(50, return_sequences=True, input_shape=(100, 1)))
        model.add(LSTM(50, return_sequences=True))
    else:
        model.add(LSTM(50, return_sequences=True, input_shape=(100, 1)))
        model.add(LSTM(50, return_sequences=True))
        model.add(LSTM(50, return_sequences=True))
        model.add(LSTM(50, return_sequences=True))

    model.add(LSTM(50))

    model.add(Dense(1))
    if (request.args.get('stockname')
            == 'IDEA.NS') or (request.args.get('stockname')
                              == 'AAPL') or (request.args.get('stockname')
                                             == 'TSLA'):
        model.compile(loss='mean_squared_error', optimizer='adam')
    else:
        model.compile(loss='mean_squared_error',
                      optimizer='adam',
                      metrics=['accuracy'])

    if (request.args.get('stockname')
            == 'IDEA.NS') or (request.args.get('stockname')
                              == 'AAPL') or (request.args.get('stockname')
                                             == 'TSLA'):
        model.fit(X_train,
                  y_train,
                  validation_data=(X_test, ytest),
                  epochs=10,
                  batch_size=64,
                  verbose=1)
    else:
        model.fit(
            X_train,
            y_train,
            validation_data=(X_test, ytest),
            epochs=10,
            batch_size=10,
            verbose=2)  # Actual Epoch 50. Changing to 10 for current demo

    train_predict = model.predict(X_train)
    test_predict = model.predict(X_test)

    ##Transformback to original form --- rescaling
    train_predict = scaler.inverse_transform(train_predict)
    test_predict = scaler.inverse_transform(test_predict)

    ### Plotting
    # shift train predictions for plotting
    look_back = 100
    trainPredictPlot = np.empty_like(df1)
    trainPredictPlot[:, :] = np.nan
    trainPredictPlot[look_back:len(train_predict) +
                     look_back, :] = train_predict
    # shift test predictions for plotting
    testPredictPlot = np.empty_like(df1)
    testPredictPlot[:, :] = np.nan
    testPredictPlot[len(train_predict) + (look_back * 2) + 1:len(df1) -
                    1, :] = test_predict

    if request.args.get('stockname') == 'AAPL' or request.args.get(
            'stockname') == 'TSLA':
        x_input = test_data[341:].reshape(1, -1)
    # elif request.args.get('stockname') == 'TSLA':
    #     x_input = test_data[341:].reshape(1,-1)
    else:
        x_input = test_data[331:].reshape(1, -1)

    temp_input = list(x_input)
    temp_input = temp_input[0].tolist()

    # demonstrate prediction for next 10 days
    from numpy import array

    lst_output = []
    n_steps = 100
    i = 0
    if (request.args.get('stockname')
            == 'IDEA.NS') or (request.args.get('stockname')
                              == 'AAPL') or (request.args.get('stockname')
                                             == 'TSLA'):
        while (i < 3):

            if (len(temp_input) > 100):

                x_input = np.array(temp_input[1:])

                x_input = x_input.reshape(1, -1)
                x_input = x_input.reshape((1, n_steps, 1))

                yhat = model.predict(x_input, verbose=0)
                temp_input.extend(yhat[0].tolist())
                temp_input = temp_input[1:]

                lst_output.extend(yhat.tolist())
                i = i + 1
            else:
                x_input = x_input.reshape((1, n_steps, 1))
                yhat = model.predict(x_input, verbose=0)

                temp_input.extend(yhat[0].tolist())

                lst_output.extend(yhat.tolist())
                i = i + 1
    else:
        while (i < 10):

            if (len(temp_input) > 100):
                # print(temp_input)
                x_input = np.array(temp_input[1:])
                x_input = x_input.reshape(1, -1)
                x_input = x_input.reshape((1, n_steps, 1))
                # print(x_input)
                yhat = model.predict(x_input, verbose=0)

                temp_input.extend(yhat[0].tolist())
                temp_input = temp_input[1:]
                # print(temp_input)
                lst_output.extend(yhat.tolist())
                i = i + 1
            else:
                x_input = x_input.reshape((1, n_steps, 1))
                yhat = model.predict(x_input, verbose=0)

                temp_input.extend(yhat[0].tolist())

                lst_output.extend(yhat.tolist())
                i = i + 1
    if (request.args.get('stockname')
            == 'IDEA.NS') or (request.args.get('stockname')
                              == 'AAPL') or (request.args.get('stockname')
                                             == 'TSLA'):
        day_new = np.arange(1, 101)  # testdata 100indexes
        day_pred = np.arange(101, 104)  # 101-131-predicted

    # FOR RELIANCE
    else:
        day_new = np.arange(1, 101)  # testdata 100indexes
        day_pred = np.arange(101, 111)  # 101-131-predicted

    if (request.args.get('stockname')
            == 'AAPL') or (request.args.get('stockname') == 'TSLA'):
        numpyData3 = scaler.inverse_transform(df1[1158:])

    # FOR TSLA
    # elif (request.args.get('stockname') =='TSLA'):
    #     numpyData3 = scaler.inverse_transform(df1[1158:])

    # FOR RELIANCE
    else:
        numpyData3 = scaler.inverse_transform(df1[1131:])

    numpyData4 = scaler.inverse_transform(lst_output)
    daynew = {"x": day_new.tolist(), "y": day_pred.tolist()}
    daypred = {"x": numpyData3.tolist(), "y": numpyData4.tolist()}

    data = []
    data.append(daynew)
    data.append(daypred)

    final_data = {"data": data}

    print(final_data)
    return final_data
示例#9
0
img_test = pd.read_csv(
    '/Users/deangao/Desktop/CSM226/covidProject_data/radiomics/test_covid_normal_pn.csv'
)
all_data = pd.concat([img_train, img_test], axis=0)

img_train = img_train.drop(columns=['id', 'Entropy', 'Uniformity', 'Energy'])
img_test = img_test.drop(columns=['id', 'Entropy', 'Uniformity', 'Energy'])
all_data = all_data.drop(columns=['id', 'Entropy', 'Uniformity', 'Energy'])

# In[5]:

train_X, train_y = img_train.iloc[:, :-1], img_train.iloc[:, -1]
test_X, test_y = img_test.iloc[:, :-1], img_test.iloc[:, -1]
labels = {0: 'normal lung', 1: 'pneumonia lung', 2: 'covid lung'}

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(train_X)

scaler2 = MinMaxScaler()
scaled_all = scaler2.fit_transform(all_data.iloc[:, :-1])
scaled_all_data = pd.concat([
    pd.DataFrame(all_data.iloc[:, -1].reset_index()),
    pd.DataFrame(scaled_all, columns=train_X.columns)
],
                            axis=1)
scaled_all_data = scaled_all_data.drop(['index'], axis=1)

# In[6]:

all_data.shape
all_data.iloc[:, -1]
示例#10
0
    return df_training, df_test


# Import data sets, split into training and test sets
dataset_all = pd.read_csv('./datasets/' + dataset_csv)
dataset_training, dataset_test = create_training_test(dataset_all,
                                                      testing_records)

training_set = dataset_training.iloc[:, 1:7].values
###training_set = dataset_training.iloc[:, 4:5].values
test_set = dataset_test.iloc[:, 1:7].values
###test_set = dataset_test.iloc[:, 4:5].values

# Apply scaling
from sklearn.preprocessing import MinMaxScaler
scale = MinMaxScaler(feature_range=(0, 1))

# Apply scale object on data
training_set_scaled = scale.fit_transform(training_set)

X_train = []
y_train = []

for i in range(timesteps, training_set_scaled.shape[0]):
    ###X_train.append(training_set_scaled[i-timesteps:i, 0])
    ###y_train.append(training_set_scaled[i, 0])
    X_train.append(training_set_scaled[i - timesteps:i, 0:6])
    y_train.append(training_set_scaled[
        i,
        3:4])  #y_train.append(i, 3:4 ) or y_train.append(i, 0:4 ) ?, #79, 9:53
# Livro Ciência de Dados e Aprendizado de Máquina - https://www.amazon.com.br/dp/B07X1TVLKW
# Livro Inteligência Artificial com Python - Redes Neurais Intuitivas - https://www.amazon.com.br/dp/B087YSVVXW
# Livro Redes Neurais Artificiais - https://www.amazon.com.br/dp/B0881ZYYCJ

import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from keras.layers import LSTM
import matplotlib.pyplot as plt

base = pd.read_csv('petr4-treinamento.csv')
base = base.dropna()
base_treino = base.iloc[:, 1:2].values
normalizador = MinMaxScaler(feature_range = (0,1))
base_treino_normalizada = normalizador.fit_transform(base_treino)

previsores = []
preco_real = []

for i in range(90,1242):
    previsores.append(base_treino_normalizada[i-90:i,0])
    preco_real.append(base_treino_normalizada[i,0])
    
previsores, preco_real = np.array(previsores), np.array(preco_real)
previsores = np.reshape(previsores,(previsores.shape[0], previsores.shape[1],1))

regressor = Sequential()
regressor.add(LSTM(units = 100,
                   return_sequences = True,
示例#12
0
a4 = Rain_test1[3 * 31 * 24:4 * 31 * 24].resample('3H').sum()
a5 = Rain_test1[4 * 31 * 24:5 * 31 * 24].resample('3H').sum()
Qi_test1['Rain_sum'] = pd.concat([a1, a2, a3, a4, a5],
                                 axis=0,
                                 ignore_index=True)

#%%
# 环境表数据
# T气温、w风向、wd风速  每天
Environmentdata = pd.read_excel('./data/环境表.xlsx')
# 填充缺失
Environmentdata['T'].fillna(method='ffill', inplace=True)
Environmentdata['w'].fillna(method='ffill', inplace=True)
# wd归一化
from sklearn.preprocessing import MinMaxScaler
ss = MinMaxScaler()
Environmentdata['wd'] = ss.fit_transform(Environmentdata['wd'].values.reshape(
    -1, 1))
Environmentdata['TimeStample'] = pd.to_datetime(Environmentdata['TimeStample'],
                                                format='%Y-%m-%d')
Environmentdata = dataframe_cut(Environmentdata,
                                begin_time='{}-01-01'.format(y_start),
                                end_time='{}-12-31'.format(y_end))

#%%
# 测试数据
Environmentdata_test = pd.read_excel('./final_data/环境表.xlsx')
# 填充缺失
Environmentdata_test['T'].fillna(method='ffill', inplace=True)
Environmentdata_test['w'].fillna(method='ffill', inplace=True)
# wd归一化
            y, sr = librosa.load(wav_path, sr=None)
            D = np.abs(
                librosa.core.stft(y,
                                  n_fft=640,
                                  hop_length=160,
                                  win_length=640,
                                  window='hann'))
            noise_txt_path = noise_path + "/{}".format("noise.txt")
            n_D = np.loadtxt(noise_txt_path)
            b = int((D.shape[1] - n_D.shape[1]) / 2)
            a = D.shape[1] - n_D.shape[1] - b
            n_D = np.pad(n_D, ((0, 0), (a, b)), 'constant')

            D_T = (librosa.amplitude_to_db(D, ref=np.max)).T

            minMax = MinMaxScaler()
            audio_feature_std = minMax.fit_transform(D_T)
            print("original audio_feature shape", audio_feature_std.shape)

            length = int((audio_feature_std.shape)[0])

            original_feature = audio_feature_std
            if length == 4 * int(length / 4):
                loop_num = int(length / 4)

            else:
                loop_num = 1 + int(length / 4)

            audio_feature = np.zeros((loop_num, 1284), dtype=np.float32)

            PAD = np.zeros((1, 321), dtype=np.float32)
示例#14
0
    # lr.fit(x_dtrain, y_dtrain)
    # predictions = lr.predict(x_test)
    # print('AUC',roc_auc_score(y_test, predictions))
    # print('准确率',accuracy_score(y_test,predictions))

    # 拟合模型
    # lr = LogisticRegression(C=1.0, penalty='l2', solver='newton-cg', class_weight='balanced')
    # lr.fit(dataset12_x, dataset12_y)
    # joblib.dump(lr, 'lr.model')  # 保存训练的RF模型
    # predict test set
    lr = joblib.load('lr.model')
    result = lr.predict_proba(dataset3_x)
    result = pd.DataFrame(result)
    result.index = dataset3.index
    result.columns = ['0', 'probability']
    result.drop('0',
                axis=1,
                inplace=True)
    dataset3_preds['label'] = result.copy()
    dataset3_preds.label = MinMaxScaler().fit_transform(dataset3_preds.label.reshape(-1, 1))
    dataset3_preds.sort_values(by=['Coupon_id', 'label'], inplace=True)
    dataset3_preds.to_csv("lr_preds.csv", index=None, header=None)
    print(dataset3_preds.describe())

    cost_time = time.time() - start_time
    print('cost_time', cost_time)




示例#15
0
    else:
        Len = len(real_list)
        total = 0
        for i in range(Len):
            total = total + abs(pred_list[i] / real_list[i] - 1)
        return (total / Len)


## 创建model
look_back = 1
model = Sequential()
model.add(LSTM(32, input_shape=(24, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

scaler = MinMaxScaler(feature_range=(0, 1))  # 正则化函数

MAPE_list = [0 for i in range(120)]
for i in range(120):
    train_data_norm = scaler.fit_transform(pd.DataFrame(data_list[i][1:145]))
    train_data_norm = np.array(train_data_norm).tolist()
    train_data_norm = list(chain.from_iterable(train_data_norm))
    # del(train_data_norm[0])
    X, y = split_sequence(train_data_norm, 24)
    n_features = 1
    X = X.reshape((X.shape[0], X.shape[1], n_features))
    # reshape input to be [samples, time steps, features]
    #trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    model.fit(X, y, epochs=50, batch_size=1, verbose=2)
    pred_val = []
    pred_data = train_data_norm[120:145]
print(df.shape)
print(df.columns)
print(df.head())

X = df.drop('benign_0__mal_1', axis=1).values
y = df['benign_0__mal_1'].values

print('X shape: ', X.shape, 'y shape: ', y.shape)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=101)

scaler = MinMaxScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print('train shape(Data points, Feature): ', X_train)

early_stop = EarlyStopping(monitor='val_loss',
                           mode='min',
                           verbose=1,
                           patience=25)

model = Sequential()

#X_train feature = 30
model.add(Dense(30, activation='relu'))
示例#17
0
def train_predict(pipe_data):

    # lgb
    param_grid = [{
        'num_leaves': [20],
        'min_data_in_leaf': [2, 3],
        'objective': ['regression'],
        'max_depth': [3, 4, 5],
        'learning_rate': [0.06, 0.12, 0.24],
        "min_child_samples": [3],
        "boosting": ["gbdt"],
        "feature_fraction": [0.7],
        "bagging_freq": [1],
        "bagging_fraction": [1],
        "bagging_seed": [11],
        "metric": ['mse'],
        "lambda_l2": [0.0003, 0.001, 0.003],
        "verbosity": [-1]
    }]

    lgb_best_params = find_best_params(pipe_data, lgb_predict, param_grid)

    X_train, y_train, X_test, test_idx = split_data(pipe_data,
                                                    target_name='target')
    min_max_scaler = MinMaxScaler()
    X_train = min_max_scaler.fit_transform(X_train)
    X_test = min_max_scaler.transform(X_test)
    oof_lgb, predictions_lgb = lgb_predict(X_train,
                                           y_train,
                                           X_test,
                                           params=lgb_best_params,
                                           verbose_eval=200)  #

    # xgb
    param_grid = [{
        'silent': [1],
        'nthread': [4],
        'eval_metric': ['rmse'],
        'eta': [0.03],
        'objective': ['reg:linear'],
        'max_depth': [4, 5, 6],
        'num_round': [1000],
        'subsample': [0.4, 0.6, 0.8, 1],
        'colsample_bytree': [0.7, 0.9, 1],
    }]

    xgb_best_params = find_best_params(pipe_data, xgb_predict, param_grid)

    X_train, y_train, X_test, test_idx = split_data(pipe_data,
                                                    target_name='target')
    min_max_scaler = MinMaxScaler()
    X_train = min_max_scaler.fit_transform(X_train)
    X_test = min_max_scaler.transform(X_test)
    oof_xgb, predictions_xgb = xgb_predict(X_train,
                                           y_train,
                                           X_test,
                                           params=xgb_best_params,
                                           verbose_eval=200)  #

    # 模型融合 stacking
    train_stack = np.vstack([oof_lgb, oof_xgb]).transpose()
    test_stack = np.vstack([predictions_lgb, predictions_xgb]).transpose()

    folds_stack = RepeatedKFold(n_splits=5, n_repeats=2, random_state=4590)
    oof_stack = np.zeros(train_stack.shape[0])
    predictions = np.zeros(test_stack.shape[0])

    for fold_, (trn_idx,
                val_idx) in enumerate(folds_stack.split(train_stack, y_train)):
        print("fold {}".format(fold_))
        trn_data, trn_y = train_stack[trn_idx], y_train[trn_idx]
        val_data, val_y = train_stack[val_idx], y_train[val_idx]

        clf_3 = BayesianRidge()
        clf_3.fit(trn_data, trn_y)

        oof_stack[val_idx] = clf_3.predict(val_data)
        predictions += clf_3.predict(test_stack) / 10

    final_score = mean_squared_error(y_train, oof_stack)
    print(final_score)
    return predictions