Python preprocessing示例，util.preprocessing Python示例

示例#1

0

显示文件

    def flow(self, x, *args, **kwargs):
        """ Generates batches of data.

	    Args:
	    x: np.ndarray

	    Yield: [np.ndarray, np.ndarray], np.ndarray

	    """
        while True:

            # Get soundfield samples
            sf_gt = next(super().flow(x, *args, **kwargs))
            initial_sf = copy.deepcopy(sf_gt)

            # Get mask samples
            mask = np.stack(
                [self.mask_generator.sample() for _ in range(sf_gt.shape[0])],
                axis=0)

            # preprocessing
            irregular_sf, mask = util.preprocessing(self.factor, initial_sf,
                                                    mask)

            # Scale ground truth sound field
            sf_gt = util.scale(sf_gt)

            gc.collect()
            yield [irregular_sf, mask], sf_gt

示例#2

0

显示文件

def classsification(file_dir):

    X = util.preprocessing(file_dir)
    y_pred = classify(X)

    class0 = 0
    class1 = 0
    class2 = 0

    for k in y_pred:
        if k == 0:
            class0 += 1
        elif k == 1:
            class1 += 1
        elif k == 2:
            class2 += 1

    return class0, class1, class2

示例#3

0

显示文件

# Describe your findings
'''

from sklearn import svm
import util
import numpy as np
import matplotlib.pylab as plt

# load datasets
train, test = util.load_data()

# range of parameter C
C = [1e-5, 1e-3, 1e-1, 1e1, 1e3]

# preprocessing
X, y = util.preprocessing(train, 2.0)

# traing svm and store the L2-norm of w
w_norm = []

for c in C:
    print(">>>>> C = {} >>>".format(c))
    clf = svm.SVC(C=c, kernel='linear')
    clf.fit(X, y)
    print('w = ', clf.coef_)
    print('b = ', clf.intercept_)
    norm = np.linalg.norm(clf.coef_)
    print("|w|", norm)
    w_norm.append(norm)  # default L-2 norm

# plot result (|w| v.s. log(C))

示例#4

0

显示文件

import pandas as pd
import util
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor
from sklearn.externals import joblib
from sklearn.metrics import classification_report

#build 7's model after case study
data7=pd.read_excel('201707_label new.xlsx')
#data7=data7[data7['是否接通']==1]
#data7=data7[['存折计划' in c for c in data7['租机计划']]]
#data7['跪舔']=data7['跪舔']-data7['label']#这个feature使f1-score增加0.02
data7.loc[data7['跪舔']>=1,'跪舔']=1
data7=util.preprocessing(data7)
X=features=util.extractFeatures(data7)
y=data7['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model=GradientBoostingRegressor()
model.fit(X_train, y_train)
print(model.score(X_train,y_train))
print(model.score(X_test,y_test))
y_pre=model.predict(X_test)
a=pd.DataFrame({"pre":y_pre,"y_test":y_test}).merge(data7,how='left',left_index=True,right_index=True)[['pre','跪舔','y_test']]
for k in range(50,105,5):
    k=k/100
    a['pre_weight_{}'.format(k)]=k*a.pre+(1-k)*a['跪舔']
    util.showReport(a['pre_weight_{}'.format(k)],a.y_test,30)
#util.showFigure1(y_pre,y_test)
#util.showPRfigure(y_pre,y_test)
#util.showReport(y_pre,y_test,20)
'''

示例#5

0

显示文件

print(model.score(X_test,y_test))
y_pre=model.predict(X_test)
util.showFigure1(y_pre,y_test)
util.showPRfigure(y_pre,y_test)
util.showReport(y_pre,y_test,50)
'''

#predict 9 with 8's model
data8 = pd.read_excel('8月租机到期数据-结果.xlsx')
data8 = data8[data8['是否接通'] == 1]
data8['跪舔'] = data8['跪舔'] - data8['label']
data8.loc[data8['跪舔'] >= 1, '跪舔'] = 1
data9 = pd.read_csv('201709-租机到期数据.csv', encoding='gbk')
data9.loc[data9['跪舔'] >= 1, '跪舔'] = 1
data = pd.concat((data8, data9), axis=0, join='inner', ignore_index=True)
data = util.preprocessing(data)
X = features = util.extractFeatures(data, num_train=data8.shape[0])
X_train = X[:data8.shape[0]]
X_test = X[data8.shape[0]:]
y_train = data8['label']
model = GradientBoostingRegressor()
model.fit(X_train, y_train)
print(model.score(X_train, y_train))
y_pre = model.predict(X_test)
data9['label'] = y_pre
data9['label'] = 0.9 * data9['label'] + 0.1 * data9['跪舔']
data9.sort_values(by='label', ascending=False, inplace=True)
data9.to_csv('201709_score.csv')

#slice
data9 = data9[[

示例#6

0

显示文件

文件： inference.py 项目： xzm2004260/sound-field-neural-network

def reconstruct_soundfield(model, sf_sample, mask, factor, frequencies,
                           filename, num_file, com_num, results_dict):
    """ Reconstruct and evaluate sound field

        Args:
        model: keras model
        sf_sample: np.ndarray
        factor: int
        frequencies: list
        filename: string
        num_file: int
        com_num: int
        results_dict: dict



        Returns: dict

    """

    # Create one sample batch. Expand dims
    sf_sample = np.expand_dims(sf_sample, axis=0)
    sf_gt = copy.deepcopy(sf_sample)

    mask = np.expand_dims(mask, axis=0)
    mask_gt = copy.deepcopy(mask)

    # preprocessing
    irregular_sf, mask = util.preprocessing(factor, sf_sample, mask)

    #predict sound field
    pred_sf = model.predict([irregular_sf, mask])

    #measured observations. To use in postprocessing
    measured_sf = util.downsampling(factor, copy.deepcopy(sf_gt))
    measured_sf = util.apply_mask(measured_sf, mask_gt)

    #compute csv fields
    split_filename = filename[:-4].split('_')
    pattern = np.where(mask_gt[0, :, :, 0].flatten() == 1)[0]
    num_mic = len(pattern)

    for freq_num, freq in enumerate(frequencies):

        #Postprocessing
        reconstructed_sf_slice = util.postprocessing(pred_sf, measured_sf,
                                                     freq_num, pattern, factor)

        #Compute Metrics
        reconstructed_sf_slice = util.postprocessing(pred_sf, measured_sf,
                                                     freq_num, pattern, factor)
        nmse = util.compute_NMSE(sf_gt[0, :, :, freq_num],
                                 reconstructed_sf_slice)

        data_range = sf_gt[0, :, :, freq_num].max() - sf_gt[0, :, :,
                                                            freq_num].min()
        ssim = util.compute_SSIM(sf_gt[0, :, :, freq_num].astype('float32'),
                                 reconstructed_sf_slice, data_range)

        average_pressure_real = util.compute_average_pressure(sf_gt[0, :, :,
                                                                    freq_num])
        average_pressure_predicted = util.compute_average_pressure(
            reconstructed_sf_slice)
        average_pressure_previous = util.compute_average_pressure(
            measured_sf[0, :, :, freq_num])

        #store results
        results_dict['freq'].append(freq)
        results_dict['name'].append(filename[:-4])
        results_dict['xDim'].append(split_filename[2])
        results_dict['yDim'].append(split_filename[3])
        results_dict['m2'].append(split_filename[4])
        results_dict['num_mics'].append(num_mic)
        results_dict['num_comb'].append(com_num)
        results_dict['num_file'].append(num_file)
        results_dict['pattern'].append(pattern)
        results_dict['NMSE'].append(nmse)
        results_dict['SSIM'].append(ssim)
        results_dict['p_real'].append(average_pressure_real)
        results_dict['p_predicted'].append(average_pressure_predicted)
        results_dict['p_previous'].append(average_pressure_previous)

    return results_dict

示例#7

0

显示文件

文件： inference.py 项目： xzm2004260/sound-field-neural-network

def visualize(config_path):
    """ Plot predictions of trained model on real data.

        Args:
        config_path: string

    """

    config = util.load_config(config_path)
    print('Loaded configuration from: %s' % config_path)

    frequencies = util.get_frequencies()

    session_dir = config_path[:config_path.rfind('/') + 1]

    checkpoint_path = get_latest_checkpoint_path(session_dir)
    if not checkpoint_path:
        print(
            'Error: No checkpoint found in same directory as configuration file.'
        )
        return

    model = sfun.SFUN(config, train_bn=False)

    visualization_path = os.path.join(session_dir, 'visualization')
    if not os.path.exists(visualization_path): os.makedirs(visualization_path)

    filepath = os.path.join(config['dataset']['path'], 'real_soundfields',
                            'RoomB_soundfield.mat')

    mask_generator = data.MaskGenerator(
        config['dataset']['xSamples'] // config['dataset']['factor'],
        config['dataset']['ySamples'] // config['dataset']['factor'],
        len(frequencies),
        num_mics=config['visualization']['num_mics'])

    # Get measured sound field
    sf_sample = util.load_RoomB_soundfield(filepath,
                                           config['visualization']['source'])
    sf_gt = np.expand_dims(copy.deepcopy(sf_sample), axis=0)
    initial_sf = np.expand_dims(sf_sample, axis=0)

    # Get mask samples
    mask = mask_generator.sample()
    mask = np.expand_dims(mask, axis=0)

    # preprocessing
    irregular_sf, mask = util.preprocessing(config['dataset']['factor'],
                                            initial_sf, mask)

    # Scale ground truth sound field
    sf_gt = util.scale(sf_gt)

    print('\nPlotting Ground Truth Sound Field Scaled...')
    for num_freq, freq in enumerate(frequencies):
        print('\tat frequency ' + str(freq))
        util.plot_2D(
            sf_gt[0, ..., num_freq],
            os.path.join(visualization_path,
                         str(freq) + '_Hz_Ground_Truth.png'))

    print('\nPlotting Irregular Sound Field...')
    for num_freq, freq in enumerate(frequencies):
        print('\tat frequency ' + str(freq))
        util.plot_2D(
            irregular_sf[0, ..., num_freq],
            os.path.join(visualization_path,
                         str(freq) + '_Hz_Irregular_SF.png'))

    print('\nPlotting Mask...')
    for num_freq, freq in enumerate(frequencies):
        print('\tat frequency ' + str(freq))
        util.plot_2D(
            mask[0, ..., num_freq],
            os.path.join(visualization_path,
                         str(freq) + '_Hz_Mask.png'))

    pred_sf = model.predict([irregular_sf, mask])

    print('\nPlotting Predicted Sound Field...')
    for num_freq, freq in enumerate(frequencies):
        print('\tat frequency ' + str(freq))
        util.plot_2D(
            pred_sf[0, ..., num_freq],
            os.path.join(visualization_path,
                         str(freq) + '_Hz_Pred_SF.png'))

示例#8

0

显示文件

import pandas as pd
import util
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor
from sklearn.externals import joblib

#build 6's model after case study
data6=pd.read_excel('201706.xlsx')
#data6=data6[data6['是否接通']==1]
#data6=data6[['存折计划' in c for c in data6['租机计划']]]
data6=util.preprocessing(data6)
X=features=util.extractFeatures(data6)
y=data6['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model=RandomForestRegressor(n_jobs=-1)
model.fit(X_train, y_train)
print(model.score(X_train,y_train))
print(model.score(X_test,y_test))
y_pre=model.predict(X_test)
util.showFigure1(y_pre,y_test)
util.showPRfigure(y_pre,y_test)
util.showReport(y_pre,y_test,20)
print(model.feature_importances_)
'''
#predict 7 with 6's model
data6=pd.read_excel('201706.xlsx')
data6=data6[data6['是否接通']==1]
data7=pd.read_excel('201707.xlsx')
data=pd.concat((data6,data7), axis=0, join='inner',ignore_index=True)
data=util.preprocessing(data)
X=features=util.extractFeatures(data)

示例#9

0

显示文件

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
'''Preprocessing data
standardize testing data using training data's mean and standard deviation
'''
data_train = pd.read_csv('D_train.csv')
data_test = pd.read_csv('D_test.csv')

data_train_new, new_label = preprocessing(data_train)
data_test_new, new_label = preprocessing(data_test)
# new_label = ['x_mean','y_mean','z_mean','x_st','y_st','z_st','x_max','y_max','z_max','x_min','y_min','z_min']
scaler = StandardScaler()
data_train_new[new_label] = scaler.fit_transform(
    data_train_new[new_label].to_numpy())
data_test_new[new_label] = scaler.transform(
    data_test_new[new_label].to_numpy())

train_X, train_label = get_X_and_label(data_train_new)
test_X, test_label = get_X_and_label(data_test_new)
'''Using Parameters to print confusion matrix and Classification Report for testing data
The following are:
1. Naive Bayes with Gaussian Density Estimation
2. Bayes with density Estimation, KNN
3. Support Vector Machine with radial basis funtion Kernel

示例#10

0

显示文件

文件： hw2_adaboost_stump.py 项目： skyshine102/NTUCS_ML2019_Tech

def main():
    # Load data and parsing
    train = util.load_data("hw2_adaboost_train.dat.txt")
    X_train, y_train = util.preprocessing(train)
    test = util.load_data("hw2_adaboost_test.dat.txt")
    X_test, y_test = util.preprocessing(test)
    print("The shape of X_train is ({},{})".format(X_train.shape[0],
                                                   X_train.shape[1]))

    # initialize weights = 1/N
    N = len(y_train)
    print("N = ", N)

    # initialzie iterations = 300
    T = 300

    # Start training Adaboost-Stump
    weights = np.ones(N) * (1 / N)
    print("Initial weights =", weights[:5])
    alphas, g_funcs, E_in_gt, E_in_Gt, U_t = adaboost(X_train, y_train,
                                                      decision_stump,
                                                      decision_stump_predict,
                                                      weights, T)
    # plot results
    print(">>>> plot E_in_gt >>>>")
    plt.plot(E_in_gt)
    plt.savefig("Q13.png")
    plt.show()
    print(
        "From the plot, we can see that E_in(g_t) is neither increasing nor decreasing."
    )
    print("The plot is somewhat like periodic wave.")
    print(
        "It's because in each round of training, reweighting is made for more diverse hypothesis."
    )
    print("The diversity results in no guarantee for the performance of g_t.")
    print("E_in_gT = ", E_in_gt[-1])

    print(">>>> plot E_in_Gt >>>>")
    plt.plot(E_in_Gt)
    plt.savefig("Q14.png")
    plt.show()
    print("From the plot, we can see that E_in(G_t) is decreasing.")
    print(
        "It's because with more rounds of training, the ensembled model is using more diversed base models for prediction."
    )
    print("Therefore, the performace of G_t is getting better.")
    print(
        "From the proof of Q18, we can see that E_in(G_t) will be 0 within O(log(N)) steps, which can be observed in this plot."
    )
    print("E_in_GT = ", E_in_Gt[-1])

    print(">>>> plot U_t >>>>")
    plt.plot(U_t)
    plt.savefig("Q15.png")
    plt.show()
    print("From the plot, we can see that U_t is decreasing exponentially.")
    print("Since epsilon_t < 1/2, the result is expected.")
    print("The trend matches the result of Q17.")
    print("U_T = ", U_t[-1])

    #
    E_out_Gt = []
    for step in range(1, T):
        y_test_estimated_by_ensemble = calculate_ensembled_G(
            alphas[:step], g_funcs[:step], X_test)
        E_out_Gt.append(zero_one_loss(y_test, y_test_estimated_by_ensemble))

    print(">>>> plot E_out_Gt >>>>")
    plt.plot(E_out_Gt)
    plt.savefig("Q16.png")
    print(
        "From the plot, we can see E_out(G_t) is generally decreasing --> then increasing a bit --> then saturating."
    )
    print(
        "The result shows that we may consider an early stopping scheme by validation due to the saturation."
    )
    print("E_out_Gt = ", E_out_Gt[-1])