def plot_best_worst(): """ Plots the pdfs of the training set for the best and worst draws and compare with the whole training set. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec',0,1),('Bandwidth',5,0),('CentralF',1,0),('Centroid_time',4,0),('Dur',4,1),('Ene0-5',1,4),('Ene5-10',0,4),('Ene',0,3),('F_low',4,2),('F_up',0,7),('IFslope',7,8),('Kurto',2,0),('MeanPredF',1,4),('PredF',1,4),('RappMaxMean',0,1),('RappMaxMeanTF',4,0),('Skewness',2,5),('TimeMaxSpec',4,0),('Rectilinearity',8,3),('Planarity',1,2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict['feat_log'] = ['AsDec','Ene','Kurto','RappMaxMean'] opt.opdict['feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR','Z') list_files = glob.glob(os.path.join('../lib/Piton','learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6*m) mcv = int(0.2*m) mtest = int(0.2*m) for feat,best,worst in feat_list: print feat, best, worst fig = plt.figure() fig.set_facecolor('white') # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians plt.plot(g[feat]['vec'],g[feat]['VT'],'k',lw=2.,label='VT') plt.plot(g[feat]['vec'],g[feat]['EB'],'k--',lw=2.,label='EB') labels = ['best','worst'] colors = ['r','g'] b_file = list_files[best] w_file = list_files[worst] for ifile,file in enumerate([b_file,w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat],index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians plt.plot(g_train[feat]['vec'],g_train[feat]['VT'],'-',c=colors[ifile],label=labels[ifile]) plt.plot(g_train[feat]['vec'],g_train[feat]['EB'],'--',c=colors[ifile]) plt.legend() plt.title(feat) plt.savefig('%s/best_worst_%s.png'%(opt.opdict['fig_path'],feat)) plt.show()
def compare_pdfs_train(): """ Affiche et compare les pdfs des différents training sets. """ from options import MultiOptions opt = MultiOptions() opt.opdict['stations'] = ['IJEN'] opt.opdict['channels'] = ['Z'] opt.opdict['Types'] = ['Tremor', 'VulkanikB', '?'] opt.opdict['train_file'] = '%s/train_10' % (opt.opdict['libdir']) opt.opdict[ 'label_filename'] = '%s/Ijen_reclass_all.csv' % opt.opdict['libdir'] train = read_binary_file(opt.opdict['train_file']) nb_tir = len(train) for sta in opt.opdict['stations']: for comp in opt.opdict['channels']: opt.x, opt.y = opt.features_onesta(sta, comp) X = opt.x Y = opt.y c = ['r', 'b', 'g'] lines = ['-', '--', '-.', ':', '-', '--', '-.', ':', '*', 'v'] features = opt.opdict['feat_list'] for feat in features: print feat opt.opdict['feat_list'] = [feat] fig = plt.figure() fig.set_facecolor('white') for tir in range(nb_tir): tr = map(int, train[tir]) opt.x = X.reindex(index=tr, columns=[feat]) opt.y = Y.reindex(index=tr) opt.classname2number() opt.compute_pdfs() g = opt.gaussians for it, t in enumerate(opt.types): plt.plot(g[feat]['vec'], g[feat][t], ls=lines[tir], color=c[it]) plt.title(feat) plt.legend(opt.types) plt.show()
def compare_pdfs_train(): """ Affiche et compare les pdfs des différents training sets. """ from options import MultiOptions opt = MultiOptions() opt.opdict['stations'] = ['IJEN'] opt.opdict['channels'] = ['Z'] opt.opdict['Types'] = ['Tremor','VulkanikB','?'] opt.opdict['train_file'] = '%s/train_10'%(opt.opdict['libdir']) opt.opdict['label_filename'] = '%s/Ijen_reclass_all.csv'%opt.opdict['libdir'] train = opt.read_binary_file(opt.opdict['train_file']) nb_tir = len(train) for sta in opt.opdict['stations']: for comp in opt.opdict['channels']: opt.x, opt.y = opt.features_onesta(sta,comp) X = opt.x Y = opt.y c = ['r','b','g'] lines = ['-','--','-.',':','-','--','-.',':','*','v'] features = opt.opdict['feat_list'] for feat in features: print feat opt.opdict['feat_list'] = [feat] fig = plt.figure() fig.set_facecolor('white') for tir in range(nb_tir): tr = map(int,train[tir]) opt.x = X.reindex(index=tr,columns=[feat]) opt.y = Y.reindex(index=tr) opt.classname2number() opt.compute_pdfs() g = opt.gaussians for it,t in enumerate(opt.types): plt.plot(g[feat]['vec'],g[feat][t],ls=lines[tir],color=c[it]) plt.title(feat) plt.legend(opt.types) plt.show()
def plot_pdf_subsets(): """ Plots the pdfs of the training set, CV set and test set on the same figure. One subfigure for each event type. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec',0,1),('Bandwidth',5,0),('CentralF',1,0),('Centroid_time',4,0),('Dur',4,1),('Ene0-5',1,4),('Ene5-10',0,4),('Ene',0,3),('F_low',4,2),('F_up',0,7),('IFslope',7,8),('Kurto',2,0),('MeanPredF',1,4),('PredF',1,4),('RappMaxMean',0,1),('RappMaxMeanTF',4,0),('Skewness',2,5),('TimeMaxSpec',4,0),('Rectilinearity',8,3),('Planarity',1,2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict['feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR','Z') print len(y_all) list_files = glob.glob(os.path.join('../lib/Piton','learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6*m) mcv = int(0.2*m) mtest = int(0.2*m) for feat,best,worst in feat_list: print feat, best, worst fig = plt.figure(figsize=(10,4)) fig.set_facecolor('white') ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians ax1.plot(g[feat]['vec'],g[feat]['VT'],'k',lw=2.) ax2.plot(g[feat]['vec'],g[feat]['EB'],'k',lw=2.) labels = ['best','worst'] colors = ['r','g'] b_file = list_files[best] w_file = list_files[worst] for ifile,file in enumerate([b_file,w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat],index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians ax1.plot(g_train[feat]['vec'],g_train[feat]['VT'],'-',c=colors[ifile],label=labels[ifile]) ax2.plot(g_train[feat]['vec'],g_train[feat]['EB'],'-',c=colors[ifile],label=labels[ifile]) # CV SET opt.x = x_all.reindex(columns=[feat],index=dic[mtraining:mtraining+mcv]) opt.y = y_all.reindex(index=dic[mtraining:mtraining+mcv]) opt.compute_pdfs() g_cv = opt.gaussians ax1.plot(g_cv[feat]['vec'],g_cv[feat]['VT'],'--',c=colors[ifile]) ax2.plot(g_cv[feat]['vec'],g_cv[feat]['EB'],'--',c=colors[ifile]) # TEST SET opt.x = x_all.reindex(columns=[feat],index=dic[mtraining+mcv:]) opt.y = y_all.reindex(index=dic[mtraining+mcv:]) opt.compute_pdfs() g_test = opt.gaussians ax1.plot(g_test[feat]['vec'],g_test[feat]['VT'],':',c=colors[ifile]) ax2.plot(g_test[feat]['vec'],g_test[feat]['EB'],':',c=colors[ifile]) ax1.set_title('VT') ax2.set_title('EB') ax1.legend() ax2.legend() plt.suptitle(feat) plt.savefig('%s/subsets_%s.png'%(opt.opdict['fig_path'],feat)) plt.show()
def plot_best_worst(): """ Plots the pdfs of the training set for the best and worst draws and compare with the whole training set. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0), ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4), ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2), ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0), ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1), ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5), ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3), ('Planarity', 1, 2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict['feat_log'] = ['AsDec', 'Ene', 'Kurto', 'RappMaxMean'] opt.opdict[ 'feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR', 'Z') list_files = glob.glob(os.path.join('../lib/Piton', 'learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6 * m) mcv = int(0.2 * m) mtest = int(0.2 * m) for feat, best, worst in feat_list: print feat, best, worst fig = plt.figure() fig.set_facecolor('white') # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians plt.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2., label='VT') plt.plot(g[feat]['vec'], g[feat]['EB'], 'k--', lw=2., label='EB') labels = ['best', 'worst'] colors = ['r', 'g'] b_file = list_files[best] w_file = list_files[worst] for ifile, file in enumerate([b_file, w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians plt.plot(g_train[feat]['vec'], g_train[feat]['VT'], '-', c=colors[ifile], label=labels[ifile]) plt.plot(g_train[feat]['vec'], g_train[feat]['EB'], '--', c=colors[ifile]) plt.legend() plt.title(feat) plt.savefig('%s/best_worst_%s.png' % (opt.opdict['fig_path'], feat)) plt.show()
def plot_pdf_subsets(): """ Plots the pdfs of the training set, CV set and test set on the same figure. One subfigure for each event type. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0), ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4), ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2), ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0), ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1), ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5), ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3), ('Planarity', 1, 2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict[ 'feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR', 'Z') print len(y_all) list_files = glob.glob(os.path.join('../lib/Piton', 'learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6 * m) mcv = int(0.2 * m) mtest = int(0.2 * m) for feat, best, worst in feat_list: print feat, best, worst fig = plt.figure(figsize=(10, 4)) fig.set_facecolor('white') ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians ax1.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2.) ax2.plot(g[feat]['vec'], g[feat]['EB'], 'k', lw=2.) labels = ['best', 'worst'] colors = ['r', 'g'] b_file = list_files[best] w_file = list_files[worst] for ifile, file in enumerate([b_file, w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians ax1.plot(g_train[feat]['vec'], g_train[feat]['VT'], '-', c=colors[ifile], label=labels[ifile]) ax2.plot(g_train[feat]['vec'], g_train[feat]['EB'], '-', c=colors[ifile], label=labels[ifile]) # CV SET opt.x = x_all.reindex(columns=[feat], index=dic[mtraining:mtraining + mcv]) opt.y = y_all.reindex(index=dic[mtraining:mtraining + mcv]) opt.compute_pdfs() g_cv = opt.gaussians ax1.plot(g_cv[feat]['vec'], g_cv[feat]['VT'], '--', c=colors[ifile]) ax2.plot(g_cv[feat]['vec'], g_cv[feat]['EB'], '--', c=colors[ifile]) # TEST SET opt.x = x_all.reindex(columns=[feat], index=dic[mtraining + mcv:]) opt.y = y_all.reindex(index=dic[mtraining + mcv:]) opt.compute_pdfs() g_test = opt.gaussians ax1.plot(g_test[feat]['vec'], g_test[feat]['VT'], ':', c=colors[ifile]) ax2.plot(g_test[feat]['vec'], g_test[feat]['EB'], ':', c=colors[ifile]) ax1.set_title('VT') ax2.set_title('EB') ax1.legend() ax2.legend() plt.suptitle(feat) plt.savefig('%s/subsets_%s.png' % (opt.opdict['fig_path'], feat)) plt.show()