def flow(self, x, *args, **kwargs): """ Generates batches of data. Args: x: np.ndarray Yield: [np.ndarray, np.ndarray], np.ndarray """ while True: # Get soundfield samples sf_gt = next(super().flow(x, *args, **kwargs)) initial_sf = copy.deepcopy(sf_gt) # Get mask samples mask = np.stack( [self.mask_generator.sample() for _ in range(sf_gt.shape[0])], axis=0) # preprocessing irregular_sf, mask = util.preprocessing(self.factor, initial_sf, mask) # Scale ground truth sound field sf_gt = util.scale(sf_gt) gc.collect() yield [irregular_sf, mask], sf_gt
def classsification(file_dir): X = util.preprocessing(file_dir) y_pred = classify(X) class0 = 0 class1 = 0 class2 = 0 for k in y_pred: if k == 0: class0 += 1 elif k == 1: class1 += 1 elif k == 2: class2 += 1 return class0, class1, class2
# Describe your findings ''' from sklearn import svm import util import numpy as np import matplotlib.pylab as plt # load datasets train, test = util.load_data() # range of parameter C C = [1e-5, 1e-3, 1e-1, 1e1, 1e3] # preprocessing X, y = util.preprocessing(train, 2.0) # traing svm and store the L2-norm of w w_norm = [] for c in C: print(">>>>> C = {} >>>".format(c)) clf = svm.SVC(C=c, kernel='linear') clf.fit(X, y) print('w = ', clf.coef_) print('b = ', clf.intercept_) norm = np.linalg.norm(clf.coef_) print("|w|", norm) w_norm.append(norm) # default L-2 norm # plot result (|w| v.s. log(C))
import pandas as pd import util from sklearn.cross_validation import train_test_split from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor from sklearn.externals import joblib from sklearn.metrics import classification_report #build 7's model after case study data7=pd.read_excel('201707_label new.xlsx') #data7=data7[data7['是否接通']==1] #data7=data7[['存折计划' in c for c in data7['租机计划']]] #data7['跪舔']=data7['跪舔']-data7['label']#这个feature使f1-score增加0.02 data7.loc[data7['跪舔']>=1,'跪舔']=1 data7=util.preprocessing(data7) X=features=util.extractFeatures(data7) y=data7['label'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) model=GradientBoostingRegressor() model.fit(X_train, y_train) print(model.score(X_train,y_train)) print(model.score(X_test,y_test)) y_pre=model.predict(X_test) a=pd.DataFrame({"pre":y_pre,"y_test":y_test}).merge(data7,how='left',left_index=True,right_index=True)[['pre','跪舔','y_test']] for k in range(50,105,5): k=k/100 a['pre_weight_{}'.format(k)]=k*a.pre+(1-k)*a['跪舔'] util.showReport(a['pre_weight_{}'.format(k)],a.y_test,30) #util.showFigure1(y_pre,y_test) #util.showPRfigure(y_pre,y_test) #util.showReport(y_pre,y_test,20) '''
print(model.score(X_test,y_test)) y_pre=model.predict(X_test) util.showFigure1(y_pre,y_test) util.showPRfigure(y_pre,y_test) util.showReport(y_pre,y_test,50) ''' #predict 9 with 8's model data8 = pd.read_excel('8月租机到期数据-结果.xlsx') data8 = data8[data8['是否接通'] == 1] data8['跪舔'] = data8['跪舔'] - data8['label'] data8.loc[data8['跪舔'] >= 1, '跪舔'] = 1 data9 = pd.read_csv('201709-租机到期数据.csv', encoding='gbk') data9.loc[data9['跪舔'] >= 1, '跪舔'] = 1 data = pd.concat((data8, data9), axis=0, join='inner', ignore_index=True) data = util.preprocessing(data) X = features = util.extractFeatures(data, num_train=data8.shape[0]) X_train = X[:data8.shape[0]] X_test = X[data8.shape[0]:] y_train = data8['label'] model = GradientBoostingRegressor() model.fit(X_train, y_train) print(model.score(X_train, y_train)) y_pre = model.predict(X_test) data9['label'] = y_pre data9['label'] = 0.9 * data9['label'] + 0.1 * data9['跪舔'] data9.sort_values(by='label', ascending=False, inplace=True) data9.to_csv('201709_score.csv') #slice data9 = data9[[
def reconstruct_soundfield(model, sf_sample, mask, factor, frequencies, filename, num_file, com_num, results_dict): """ Reconstruct and evaluate sound field Args: model: keras model sf_sample: np.ndarray factor: int frequencies: list filename: string num_file: int com_num: int results_dict: dict Returns: dict """ # Create one sample batch. Expand dims sf_sample = np.expand_dims(sf_sample, axis=0) sf_gt = copy.deepcopy(sf_sample) mask = np.expand_dims(mask, axis=0) mask_gt = copy.deepcopy(mask) # preprocessing irregular_sf, mask = util.preprocessing(factor, sf_sample, mask) #predict sound field pred_sf = model.predict([irregular_sf, mask]) #measured observations. To use in postprocessing measured_sf = util.downsampling(factor, copy.deepcopy(sf_gt)) measured_sf = util.apply_mask(measured_sf, mask_gt) #compute csv fields split_filename = filename[:-4].split('_') pattern = np.where(mask_gt[0, :, :, 0].flatten() == 1)[0] num_mic = len(pattern) for freq_num, freq in enumerate(frequencies): #Postprocessing reconstructed_sf_slice = util.postprocessing(pred_sf, measured_sf, freq_num, pattern, factor) #Compute Metrics reconstructed_sf_slice = util.postprocessing(pred_sf, measured_sf, freq_num, pattern, factor) nmse = util.compute_NMSE(sf_gt[0, :, :, freq_num], reconstructed_sf_slice) data_range = sf_gt[0, :, :, freq_num].max() - sf_gt[0, :, :, freq_num].min() ssim = util.compute_SSIM(sf_gt[0, :, :, freq_num].astype('float32'), reconstructed_sf_slice, data_range) average_pressure_real = util.compute_average_pressure(sf_gt[0, :, :, freq_num]) average_pressure_predicted = util.compute_average_pressure( reconstructed_sf_slice) average_pressure_previous = util.compute_average_pressure( measured_sf[0, :, :, freq_num]) #store results results_dict['freq'].append(freq) results_dict['name'].append(filename[:-4]) results_dict['xDim'].append(split_filename[2]) results_dict['yDim'].append(split_filename[3]) results_dict['m2'].append(split_filename[4]) results_dict['num_mics'].append(num_mic) results_dict['num_comb'].append(com_num) results_dict['num_file'].append(num_file) results_dict['pattern'].append(pattern) results_dict['NMSE'].append(nmse) results_dict['SSIM'].append(ssim) results_dict['p_real'].append(average_pressure_real) results_dict['p_predicted'].append(average_pressure_predicted) results_dict['p_previous'].append(average_pressure_previous) return results_dict
def visualize(config_path): """ Plot predictions of trained model on real data. Args: config_path: string """ config = util.load_config(config_path) print('Loaded configuration from: %s' % config_path) frequencies = util.get_frequencies() session_dir = config_path[:config_path.rfind('/') + 1] checkpoint_path = get_latest_checkpoint_path(session_dir) if not checkpoint_path: print( 'Error: No checkpoint found in same directory as configuration file.' ) return model = sfun.SFUN(config, train_bn=False) visualization_path = os.path.join(session_dir, 'visualization') if not os.path.exists(visualization_path): os.makedirs(visualization_path) filepath = os.path.join(config['dataset']['path'], 'real_soundfields', 'RoomB_soundfield.mat') mask_generator = data.MaskGenerator( config['dataset']['xSamples'] // config['dataset']['factor'], config['dataset']['ySamples'] // config['dataset']['factor'], len(frequencies), num_mics=config['visualization']['num_mics']) # Get measured sound field sf_sample = util.load_RoomB_soundfield(filepath, config['visualization']['source']) sf_gt = np.expand_dims(copy.deepcopy(sf_sample), axis=0) initial_sf = np.expand_dims(sf_sample, axis=0) # Get mask samples mask = mask_generator.sample() mask = np.expand_dims(mask, axis=0) # preprocessing irregular_sf, mask = util.preprocessing(config['dataset']['factor'], initial_sf, mask) # Scale ground truth sound field sf_gt = util.scale(sf_gt) print('\nPlotting Ground Truth Sound Field Scaled...') for num_freq, freq in enumerate(frequencies): print('\tat frequency ' + str(freq)) util.plot_2D( sf_gt[0, ..., num_freq], os.path.join(visualization_path, str(freq) + '_Hz_Ground_Truth.png')) print('\nPlotting Irregular Sound Field...') for num_freq, freq in enumerate(frequencies): print('\tat frequency ' + str(freq)) util.plot_2D( irregular_sf[0, ..., num_freq], os.path.join(visualization_path, str(freq) + '_Hz_Irregular_SF.png')) print('\nPlotting Mask...') for num_freq, freq in enumerate(frequencies): print('\tat frequency ' + str(freq)) util.plot_2D( mask[0, ..., num_freq], os.path.join(visualization_path, str(freq) + '_Hz_Mask.png')) pred_sf = model.predict([irregular_sf, mask]) print('\nPlotting Predicted Sound Field...') for num_freq, freq in enumerate(frequencies): print('\tat frequency ' + str(freq)) util.plot_2D( pred_sf[0, ..., num_freq], os.path.join(visualization_path, str(freq) + '_Hz_Pred_SF.png'))
import pandas as pd import util from sklearn.cross_validation import train_test_split from sklearn.ensemble import GradientBoostingRegressor,RandomForestRegressor from sklearn.externals import joblib #build 6's model after case study data6=pd.read_excel('201706.xlsx') #data6=data6[data6['是否接通']==1] #data6=data6[['存折计划' in c for c in data6['租机计划']]] data6=util.preprocessing(data6) X=features=util.extractFeatures(data6) y=data6['label'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) model=RandomForestRegressor(n_jobs=-1) model.fit(X_train, y_train) print(model.score(X_train,y_train)) print(model.score(X_test,y_test)) y_pre=model.predict(X_test) util.showFigure1(y_pre,y_test) util.showPRfigure(y_pre,y_test) util.showReport(y_pre,y_test,20) print(model.feature_importances_) ''' #predict 7 with 6's model data6=pd.read_excel('201706.xlsx') data6=data6[data6['是否接通']==1] data7=pd.read_excel('201707.xlsx') data=pd.concat((data6,data7), axis=0, join='inner',ignore_index=True) data=util.preprocessing(data) X=features=util.extractFeatures(data)
from sklearn.svm import SVC from sklearn.neighbors import KNeighborsClassifier from sklearn.ensemble import RandomForestClassifier from sklearn import tree from sklearn.metrics import classification_report from sklearn.naive_bayes import GaussianNB from sklearn.neural_network import MLPClassifier from sklearn.linear_model import Perceptron '''Preprocessing data standardize testing data using training data's mean and standard deviation ''' data_train = pd.read_csv('D_train.csv') data_test = pd.read_csv('D_test.csv') data_train_new, new_label = preprocessing(data_train) data_test_new, new_label = preprocessing(data_test) # new_label = ['x_mean','y_mean','z_mean','x_st','y_st','z_st','x_max','y_max','z_max','x_min','y_min','z_min'] scaler = StandardScaler() data_train_new[new_label] = scaler.fit_transform( data_train_new[new_label].to_numpy()) data_test_new[new_label] = scaler.transform( data_test_new[new_label].to_numpy()) train_X, train_label = get_X_and_label(data_train_new) test_X, test_label = get_X_and_label(data_test_new) '''Using Parameters to print confusion matrix and Classification Report for testing data The following are: 1. Naive Bayes with Gaussian Density Estimation 2. Bayes with density Estimation, KNN 3. Support Vector Machine with radial basis funtion Kernel
def main(): # Load data and parsing train = util.load_data("hw2_adaboost_train.dat.txt") X_train, y_train = util.preprocessing(train) test = util.load_data("hw2_adaboost_test.dat.txt") X_test, y_test = util.preprocessing(test) print("The shape of X_train is ({},{})".format(X_train.shape[0], X_train.shape[1])) # initialize weights = 1/N N = len(y_train) print("N = ", N) # initialzie iterations = 300 T = 300 # Start training Adaboost-Stump weights = np.ones(N) * (1 / N) print("Initial weights =", weights[:5]) alphas, g_funcs, E_in_gt, E_in_Gt, U_t = adaboost(X_train, y_train, decision_stump, decision_stump_predict, weights, T) # plot results print(">>>> plot E_in_gt >>>>") plt.plot(E_in_gt) plt.savefig("Q13.png") plt.show() print( "From the plot, we can see that E_in(g_t) is neither increasing nor decreasing." ) print("The plot is somewhat like periodic wave.") print( "It's because in each round of training, reweighting is made for more diverse hypothesis." ) print("The diversity results in no guarantee for the performance of g_t.") print("E_in_gT = ", E_in_gt[-1]) print(">>>> plot E_in_Gt >>>>") plt.plot(E_in_Gt) plt.savefig("Q14.png") plt.show() print("From the plot, we can see that E_in(G_t) is decreasing.") print( "It's because with more rounds of training, the ensembled model is using more diversed base models for prediction." ) print("Therefore, the performace of G_t is getting better.") print( "From the proof of Q18, we can see that E_in(G_t) will be 0 within O(log(N)) steps, which can be observed in this plot." ) print("E_in_GT = ", E_in_Gt[-1]) print(">>>> plot U_t >>>>") plt.plot(U_t) plt.savefig("Q15.png") plt.show() print("From the plot, we can see that U_t is decreasing exponentially.") print("Since epsilon_t < 1/2, the result is expected.") print("The trend matches the result of Q17.") print("U_T = ", U_t[-1]) # E_out_Gt = [] for step in range(1, T): y_test_estimated_by_ensemble = calculate_ensembled_G( alphas[:step], g_funcs[:step], X_test) E_out_Gt.append(zero_one_loss(y_test, y_test_estimated_by_ensemble)) print(">>>> plot E_out_Gt >>>>") plt.plot(E_out_Gt) plt.savefig("Q16.png") print( "From the plot, we can see E_out(G_t) is generally decreasing --> then increasing a bit --> then saturating." ) print( "The result shows that we may consider an early stopping scheme by validation due to the saturation." ) print("E_out_Gt = ", E_out_Gt[-1])