def plot_soutenance(): """ Plot des PDFs des 4 attributs définis par Clément pour le ppt de la soutenance. """ from options import MultiOptions opt = MultiOptions() opt.opdict['channels'] = ['Z'] #opt.opdict['feat_train'] = 'clement_train.csv' #opt.opdict['feat_test'] = 'clement_test.csv' opt.opdict['feat_list'] = ['AsDec', 'Dur', 'Ene', 'KRapp'] #opt.opdict['feat_log'] = ['AsDec','Dur','Ene','KRapp'] opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() gauss = opt.gaussians fig = plt.figure(figsize=(12, 2.5)) fig.set_facecolor('white') for ifeat, feat in enumerate(sorted(gauss)): ax = fig.add_subplot(1, 4, ifeat + 1) ax.plot(gauss[feat]['vec'], gauss[feat]['VT'], ls='-', c='b', lw=2.) ax.plot(gauss[feat]['vec'], gauss[feat]['EB'], ls='-', c='r', lw=2.) ax.set_title(feat) ax.xaxis.set_ticks_position('bottom') ax.xaxis.set_ticklabels('') ax.yaxis.set_ticks_position('left') ax.yaxis.set_ticklabels('') if ifeat == 0: ax.legend(['VT', 'EB'], loc=1, prop={'size': 10}) plt.savefig('/home/nadege/Dropbox/Soutenance/pdfs.png') plt.show()
def __init__(self): MultiOptions.__init__(self) print "ANALYSIS OF %s" % self.opdict['result_path'] self.results = self.read_binary_file(self.opdict['result_path']) self.opdict['feat_list'] = self.results['features'] del self.results['features'] self.do_analysis()
def run_unsupervised(): from options import MultiOptions opt = MultiOptions() opt.opdict['method'] = 'kmean' from unsupervised import classifier classifier(opt)
def __init__(self): MultiOptions.__init__(self) print "ANALYSIS OF %s"%self.opdict['result_path'] self.results = self.read_binary_file(self.opdict['result_path']) self.opdict['feat_list'] = self.results['features'] del self.results['features'] self.do_analysis()
def __init__(self): MultiOptions.__init__(self) print "ANALYSIS OF %s"%self.opdict['result_path'] self.opdict['class_auto_file'] = 'auto_class.csv' self.opdict['class_auto_path'] = '%s/%s/%s'%(self.opdict['outdir'],self.opdict['method'].upper(),self.opdict['class_auto_file']) self.concatenate_results() self.display_results()
def plot_soutenance(): """ Plot des PDFs des 4 attributs définis par Clément pour le ppt de la soutenance. """ from options import MultiOptions opt = MultiOptions() opt.opdict['channels'] = ['Z'] #opt.opdict['feat_train'] = 'clement_train.csv' #opt.opdict['feat_test'] = 'clement_test.csv' opt.opdict['feat_list'] = ['AsDec','Dur','Ene','KRapp'] #opt.opdict['feat_log'] = ['AsDec','Dur','Ene','KRapp'] opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() gauss = opt.gaussians fig = plt.figure(figsize=(12,2.5)) fig.set_facecolor('white') for ifeat,feat in enumerate(sorted(gauss)): ax = fig.add_subplot(1,4,ifeat+1) ax.plot(gauss[feat]['vec'],gauss[feat]['VT'],ls='-',c='b',lw=2.) ax.plot(gauss[feat]['vec'],gauss[feat]['EB'],ls='-',c='r',lw=2.) ax.set_title(feat) ax.xaxis.set_ticks_position('bottom') ax.xaxis.set_ticklabels('') ax.yaxis.set_ticks_position('left') ax.yaxis.set_ticklabels('') if ifeat == 0: ax.legend(['VT','EB'],loc=1,prop={'size':10}) plt.savefig('/home/nadege/Dropbox/Soutenance/pdfs.png') plt.show()
def __init__(self): MultiOptions.__init__(self) print "ANALYSIS OF %s" % self.opdict['result_path'] self.opdict['class_auto_file'] = 'auto_class.csv' self.opdict['class_auto_path'] = '%s/%s/%s' % ( self.opdict['outdir'], self.opdict['method'].upper(), self.opdict['class_auto_file']) self.concatenate_results() self.display_results()
def run_all(): from options import MultiOptions opt = MultiOptions() #opt.count_number_of_events() ### UNSUPERVISED METHOD ### if opt.opdict['method'] == 'kmeans': from unsupervised import classifier classifier(opt) ### SUPERVISED METHODS ### elif opt.opdict['method'] in ['lr','svm','svm_nl','lrsk']: from do_classification import classifier classifier(opt) from results import AnalyseResults res = AnalyseResults() if res.opdict['plot_confusion']: res.plot_confusion() elif opt.opdict['method'] in ['ova','1b1']: from do_classification import classifier classifier(opt) from results import AnalyseResultsExtraction res = AnalyseResultsExtraction()
def run_all(): from options import MultiOptions opt = MultiOptions() #opt.count_number_of_events() from do_classification import classifier classifier(opt) if opt.opdict['method'] == 'lr' or opt.opdict[ 'method'] == 'svm' or opt.opdict['method'] == 'lrsk': from results import AnalyseResults res = AnalyseResults() if res.opdict['plot_confusion']: res.plot_confusion() else: from results import AnalyseResultsExtraction res = AnalyseResultsExtraction()
def compare_pdfs_train(): """ Affiche et compare les pdfs des différents training sets. """ from options import MultiOptions opt = MultiOptions() opt.opdict['stations'] = ['IJEN'] opt.opdict['channels'] = ['Z'] opt.opdict['Types'] = ['Tremor', 'VulkanikB', '?'] opt.opdict['train_file'] = '%s/train_10' % (opt.opdict['libdir']) opt.opdict[ 'label_filename'] = '%s/Ijen_reclass_all.csv' % opt.opdict['libdir'] train = read_binary_file(opt.opdict['train_file']) nb_tir = len(train) for sta in opt.opdict['stations']: for comp in opt.opdict['channels']: opt.x, opt.y = opt.features_onesta(sta, comp) X = opt.x Y = opt.y c = ['r', 'b', 'g'] lines = ['-', '--', '-.', ':', '-', '--', '-.', ':', '*', 'v'] features = opt.opdict['feat_list'] for feat in features: print feat opt.opdict['feat_list'] = [feat] fig = plt.figure() fig.set_facecolor('white') for tir in range(nb_tir): tr = map(int, train[tir]) opt.x = X.reindex(index=tr, columns=[feat]) opt.y = Y.reindex(index=tr) opt.classname2number() opt.compute_pdfs() g = opt.gaussians for it, t in enumerate(opt.types): plt.plot(g[feat]['vec'], g[feat][t], ls=lines[tir], color=c[it]) plt.title(feat) plt.legend(opt.types) plt.show()
def compare_lissage(): """ Comparaison des kurtosis avec deux lissages différents. """ plot_envelopes() from options import MultiOptions opt = MultiOptions() opt.opdict['channels'] = ['Z'] # Lissage sur des fenêtres de 0.5 s opt.opdict['feat_list'] = ['Kurto'] opt.opdict['feat_log'] = ['Kurto'] opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.x.columns = opt.opdict['feat_list'] opt.compute_pdfs() gauss_stand = opt.gaussians # Lissage sur des fenêtres de 1 s opt.opdict['feat_train'] = '0610_Piton_trainset.csv' opt.opdict['feat_test'] = '0610_Piton_testset.csv' opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() gauss_1s = opt.gaussians # Lissage sur des fenêtres de 5 s opt.opdict['feat_train'] = '1809_Piton_trainset.csv' opt.opdict['feat_test'] = '1809_Piton_testset.csv' opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() gauss_5s = opt.gaussians # Lissage sur des fenêtres de 10 s opt.opdict['feat_train'] = '0510_Piton_trainset.csv' opt.opdict['feat_test'] = '0510_Piton_testset.csv' opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() gauss_10s = opt.gaussians ### PLOT OF SUPERPOSED PDFs ### fig = plt.figure(figsize=(12, 2.5)) fig.set_facecolor('white') for feat in sorted(opt.gaussians): maxi = int( np.max([ gauss_stand[feat]['vec'], gauss_1s[feat]['vec'], gauss_5s[feat]['vec'], gauss_10s[feat]['vec'] ])) ax1 = fig.add_subplot(141) ax1.plot(gauss_stand[feat]['vec'], gauss_stand[feat]['VT'], ls='-', c='b', lw=2., label='VT') ax1.plot(gauss_stand[feat]['vec'], gauss_stand[feat]['EB'], ls='-', c='r', lw=2., label='EB') ax1.set_xlim([0, maxi]) ax1.set_xlabel(feat) ax1.set_title('0.5 s') ax1.legend(prop={'size': 10}) ax2 = fig.add_subplot(142) ax2.plot(gauss_1s[feat]['vec'], gauss_1s[feat]['VT'], ls='-', c='b', lw=2.) ax2.plot(gauss_1s[feat]['vec'], gauss_1s[feat]['EB'], ls='-', c='r', lw=2.) ax2.set_xlim([0, maxi]) ax2.set_xlabel(feat) ax2.set_title('1 s') ax2.set_yticklabels('') ax3 = fig.add_subplot(143) ax3.plot(gauss_5s[feat]['vec'], gauss_5s[feat]['VT'], ls='-', c='b', lw=2.) ax3.plot(gauss_5s[feat]['vec'], gauss_5s[feat]['EB'], ls='-', c='r', lw=2.) ax3.set_xlim([0, maxi]) ax3.set_xlabel(feat) ax3.set_title('5 s') ax3.set_yticklabels('') ax4 = fig.add_subplot(144) ax4.plot(gauss_10s[feat]['vec'], gauss_10s[feat]['VT'], ls='-', c='b', lw=2.) ax4.plot(gauss_10s[feat]['vec'], gauss_10s[feat]['EB'], ls='-', c='r', lw=2.) ax4.set_xlim([0, maxi]) ax4.set_xlabel(feat) ax4.set_title('10 s') ax4.set_yticklabels('') #plt.savefig('%s/features/comp_%s.png'%(opt.opdict['outdir'],feat)) plt.show()
def plot_best_worst(): """ Plots the pdfs of the training set for the best and worst draws and compare with the whole training set. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0), ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4), ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2), ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0), ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1), ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5), ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3), ('Planarity', 1, 2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict['feat_log'] = ['AsDec', 'Ene', 'Kurto', 'RappMaxMean'] opt.opdict[ 'feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR', 'Z') list_files = glob.glob(os.path.join('../lib/Piton', 'learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6 * m) mcv = int(0.2 * m) mtest = int(0.2 * m) for feat, best, worst in feat_list: print feat, best, worst fig = plt.figure() fig.set_facecolor('white') # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians plt.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2., label='VT') plt.plot(g[feat]['vec'], g[feat]['EB'], 'k--', lw=2., label='EB') labels = ['best', 'worst'] colors = ['r', 'g'] b_file = list_files[best] w_file = list_files[worst] for ifile, file in enumerate([b_file, w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians plt.plot(g_train[feat]['vec'], g_train[feat]['VT'], '-', c=colors[ifile], label=labels[ifile]) plt.plot(g_train[feat]['vec'], g_train[feat]['EB'], '--', c=colors[ifile]) plt.legend() plt.title(feat) plt.savefig('%s/best_worst_%s.png' % (opt.opdict['fig_path'], feat)) plt.show()
def compare_pdfs_train(): """ Affiche et compare les pdfs des différents training sets. """ from options import MultiOptions opt = MultiOptions() opt.opdict['stations'] = ['IJEN'] opt.opdict['channels'] = ['Z'] opt.opdict['Types'] = ['Tremor','VulkanikB','?'] opt.opdict['train_file'] = '%s/train_10'%(opt.opdict['libdir']) opt.opdict['label_filename'] = '%s/Ijen_reclass_all.csv'%opt.opdict['libdir'] train = opt.read_binary_file(opt.opdict['train_file']) nb_tir = len(train) for sta in opt.opdict['stations']: for comp in opt.opdict['channels']: opt.x, opt.y = opt.features_onesta(sta,comp) X = opt.x Y = opt.y c = ['r','b','g'] lines = ['-','--','-.',':','-','--','-.',':','*','v'] features = opt.opdict['feat_list'] for feat in features: print feat opt.opdict['feat_list'] = [feat] fig = plt.figure() fig.set_facecolor('white') for tir in range(nb_tir): tr = map(int,train[tir]) opt.x = X.reindex(index=tr,columns=[feat]) opt.y = Y.reindex(index=tr) opt.classname2number() opt.compute_pdfs() g = opt.gaussians for it,t in enumerate(opt.types): plt.plot(g[feat]['vec'],g[feat][t],ls=lines[tir],color=c[it]) plt.title(feat) plt.legend(opt.types) plt.show()
def plot_pdf_subsets(): """ Plots the pdfs of the training set, CV set and test set on the same figure. One subfigure for each event type. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec', 0, 1), ('Bandwidth', 5, 0), ('CentralF', 1, 0), ('Centroid_time', 4, 0), ('Dur', 4, 1), ('Ene0-5', 1, 4), ('Ene5-10', 0, 4), ('Ene', 0, 3), ('F_low', 4, 2), ('F_up', 0, 7), ('IFslope', 7, 8), ('Kurto', 2, 0), ('MeanPredF', 1, 4), ('PredF', 1, 4), ('RappMaxMean', 0, 1), ('RappMaxMeanTF', 4, 0), ('Skewness', 2, 5), ('TimeMaxSpec', 4, 0), ('Rectilinearity', 8, 3), ('Planarity', 1, 2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict[ 'feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR', 'Z') print len(y_all) list_files = glob.glob(os.path.join('../lib/Piton', 'learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6 * m) mcv = int(0.2 * m) mtest = int(0.2 * m) for feat, best, worst in feat_list: print feat, best, worst fig = plt.figure(figsize=(10, 4)) fig.set_facecolor('white') ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians ax1.plot(g[feat]['vec'], g[feat]['VT'], 'k', lw=2.) ax2.plot(g[feat]['vec'], g[feat]['EB'], 'k', lw=2.) labels = ['best', 'worst'] colors = ['r', 'g'] b_file = list_files[best] w_file = list_files[worst] for ifile, file in enumerate([b_file, w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat], index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians ax1.plot(g_train[feat]['vec'], g_train[feat]['VT'], '-', c=colors[ifile], label=labels[ifile]) ax2.plot(g_train[feat]['vec'], g_train[feat]['EB'], '-', c=colors[ifile], label=labels[ifile]) # CV SET opt.x = x_all.reindex(columns=[feat], index=dic[mtraining:mtraining + mcv]) opt.y = y_all.reindex(index=dic[mtraining:mtraining + mcv]) opt.compute_pdfs() g_cv = opt.gaussians ax1.plot(g_cv[feat]['vec'], g_cv[feat]['VT'], '--', c=colors[ifile]) ax2.plot(g_cv[feat]['vec'], g_cv[feat]['EB'], '--', c=colors[ifile]) # TEST SET opt.x = x_all.reindex(columns=[feat], index=dic[mtraining + mcv:]) opt.y = y_all.reindex(index=dic[mtraining + mcv:]) opt.compute_pdfs() g_test = opt.gaussians ax1.plot(g_test[feat]['vec'], g_test[feat]['VT'], ':', c=colors[ifile]) ax2.plot(g_test[feat]['vec'], g_test[feat]['EB'], ':', c=colors[ifile]) ax1.set_title('VT') ax2.set_title('EB') ax1.legend() ax2.legend() plt.suptitle(feat) plt.savefig('%s/subsets_%s.png' % (opt.opdict['fig_path'], feat)) plt.show()
def plot_pdf_subsets(): """ Plots the pdfs of the training set, CV set and test set on the same figure. One subfigure for each event type. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec',0,1),('Bandwidth',5,0),('CentralF',1,0),('Centroid_time',4,0),('Dur',4,1),('Ene0-5',1,4),('Ene5-10',0,4),('Ene',0,3),('F_low',4,2),('F_up',0,7),('IFslope',7,8),('Kurto',2,0),('MeanPredF',1,4),('PredF',1,4),('RappMaxMean',0,1),('RappMaxMeanTF',4,0),('Skewness',2,5),('TimeMaxSpec',4,0),('Rectilinearity',8,3),('Planarity',1,2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict['feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR','Z') print len(y_all) list_files = glob.glob(os.path.join('../lib/Piton','learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6*m) mcv = int(0.2*m) mtest = int(0.2*m) for feat,best,worst in feat_list: print feat, best, worst fig = plt.figure(figsize=(10,4)) fig.set_facecolor('white') ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians ax1.plot(g[feat]['vec'],g[feat]['VT'],'k',lw=2.) ax2.plot(g[feat]['vec'],g[feat]['EB'],'k',lw=2.) labels = ['best','worst'] colors = ['r','g'] b_file = list_files[best] w_file = list_files[worst] for ifile,file in enumerate([b_file,w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat],index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians ax1.plot(g_train[feat]['vec'],g_train[feat]['VT'],'-',c=colors[ifile],label=labels[ifile]) ax2.plot(g_train[feat]['vec'],g_train[feat]['EB'],'-',c=colors[ifile],label=labels[ifile]) # CV SET opt.x = x_all.reindex(columns=[feat],index=dic[mtraining:mtraining+mcv]) opt.y = y_all.reindex(index=dic[mtraining:mtraining+mcv]) opt.compute_pdfs() g_cv = opt.gaussians ax1.plot(g_cv[feat]['vec'],g_cv[feat]['VT'],'--',c=colors[ifile]) ax2.plot(g_cv[feat]['vec'],g_cv[feat]['EB'],'--',c=colors[ifile]) # TEST SET opt.x = x_all.reindex(columns=[feat],index=dic[mtraining+mcv:]) opt.y = y_all.reindex(index=dic[mtraining+mcv:]) opt.compute_pdfs() g_test = opt.gaussians ax1.plot(g_test[feat]['vec'],g_test[feat]['VT'],':',c=colors[ifile]) ax2.plot(g_test[feat]['vec'],g_test[feat]['EB'],':',c=colors[ifile]) ax1.set_title('VT') ax2.set_title('EB') ax1.legend() ax2.legend() plt.suptitle(feat) plt.savefig('%s/subsets_%s.png'%(opt.opdict['fig_path'],feat)) plt.show()
def read_data_for_features_extraction(save=False): """ Extracts the features from all seismic files If option 'save' is set, then save the pandas DataFrame as a .csv file """ from options import MultiOptions opt = MultiOptions() if save: if os.path.exists(opt.opdict['feat_filepath']): print "WARNING !! File %s already exists"%opt.opdict['feat_filepath'] print "Check if you really want to replace it..." sys.exit() list_features = opt.opdict['feat_list'] df = pd.DataFrame(columns=list_features) hob_all = {} # Classification tsort = opt.read_classification() tsort.index = tsort.Date tsort = tsort.reindex(columns=['Date','Type']) list_sta = opt.opdict['stations'] for ifile in range(tsort.shape[0]): date = tsort.values[ifile,0] type = tsort.values[ifile,1] for sta in list_sta: print "#####",sta counter = 0 for comp in opt.opdict['channels']: ind = (date,sta,comp) dic = pd.DataFrame(columns=list_features,index=[ind]) dic['EventType'] = type dic['Ponset'] = 0 list_files = glob.glob(os.path.join(opt.opdict['datadir'],sta,'*%s.D'%comp,'*%s.D*%s_%s*'%(comp,str(date)[:8],str(date)[8:]))) list_files.sort() if len(list_files) > 0: file = list_files[0] print ifile, file if opt.opdict['option'] == 'norm': counter = counter + 1 dic = extract_norm_features(list_features,date,file,dic) elif opt.opdict['option'] == 'hash': permut_file = '%s/permut_%s'%(opt.opdict['libdir'],opt.opdict['feat_test'].split('.')[0]) dic = extract_hash_features(list_features,date,file,dic,permut_file,plot=True) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): from waveform_features import polarization_analysis d_mean = (df.Dur[(date,sta,comp)] + df.Dur[(date,sta,'E')] + df.Dur[(date,sta,'Z')])/3. po_mean = int((df.Ponset[(date,sta,comp)] + df.Ponset[(date,sta,'E')] + df.Ponset[(date,sta,'Z')])/3) list_files = [file,file.replace("N.D","E.D"),file.replace("N.D","Z.D")] rect, plan, az, iang = polarization_analysis(list_files,d_mean,po_mean,plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(date,sta,'Z')], df.Rectilinearity[(date,sta,'N')], df.Rectilinearity[(date,sta,'E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(date,sta,'Z')], df.Planarity[(date,sta,'N')], df.Planarity[(date,sta,'E')] = plan, plan, plan if list_features or 'Azimuth': df.Azimuth[(date,sta,'Z')], df.Azimuth[(date,sta,'N')], df.Azimuth[(date,sta,'E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(date,sta,'Z')], df.Incidence[(date,sta,'N')], df.Incidence[(date,sta,'E')] = iang, iang, iang if save: print "Features written in %s"%opt.opdict['feat_filepath'] df.to_csv(opt.opdict['feat_filepath'])
def read_data_for_features_extraction(set='test',save=False): """ Extracts the features from all seismic files If option 'save' is set, then save the pandas DataFrame as a .csv file """ from scipy.io.matlab import mio from options import MultiOptions opt = MultiOptions() if set == 'train': opt.opdict['feat_filename'] = '%s/features/%s'%(opt.opdict['outdir'],opt.opdict['feat_train']) print opt.opdict['feat_filename'] if save: if os.path.exists(opt.opdict['feat_filename']): print "WARNING !! File %s already exists"%opt.opdict['feat_filename'] print "Check if you really want to replace it..." sys.exit() list_features = opt.opdict['feat_list'] df = pd.DataFrame(columns=list_features) if set == 'test': datafiles = glob.glob(os.path.join(opt.opdict['datadir'],'TestSet/SigEve_*')) datafiles.sort() liste = [os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0] for i in range(len(datafiles))] liste = map(int,liste) # sort the list of file following the event number liste.sort() tsort = opt.read_csvfile(opt.opdict['label_filename']) tsort.index = tsort.Date for ifile,numfile in enumerate(liste): file = os.path.join(opt.opdict['datadir'],'TestSet/SigEve_%d.mat'%numfile) print ifile,file mat = mio.loadmat(file) counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 ind = (numfile,'BOR',comp) dic = pd.DataFrame(columns=list_features,index=[ind]) dic['EventType'] = tsort[tsort.Date==numfile].Type.values[0] dic['Ponset'] = 0 s = SeismicTraces(mat,comp) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s,list_features,dic) elif opt.opdict['option'] == 'hash': if ifile in [409,1026,1027,1028,1993,2121,2122,2123,2424,2441,3029,3058,3735,3785,3852,3930,4200,4463,4464,4746,6150,6382,6672,6733]: continue dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(numfile,'BOR',comp)] + df.Dur[(numfile,'BOR','E')] + df.Dur[(numfile,'BOR','Z')])/3. po_mean = int((df.Ponset[(numfile,'BOR',comp)] + df.Ponset[(numfile,'BOR','E')] + df.Ponset[(numfile,'BOR','Z')])/3) s.read_all_files(mat,False) rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(numfile,'BOR','Z')], df.Rectilinearity[(numfile,'BOR','N')], df.Rectilinearity[(numfile,'BOR','E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(numfile,'BOR','Z')], df.Planarity[(numfile,'BOR','N')], df.Planarity[(numfile,'BOR','E')] = plan, plan, plan if list_features or 'Azimuth': df.Azimuth[(numfile,'BOR','Z')], df.Azimuth[(numfile,'BOR','N')], df.Azimuth[(numfile,'BOR','E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(numfile,'BOR','Z')], df.Incidence[(numfile,'BOR','N')], df.Incidence[(numfile,'BOR','E')] = iang, iang, iang elif set == 'train': datafile = os.path.join(opt.opdict['datadir'],'TrainingSetPlusSig_2.mat') mat = mio.loadmat(datafile) hob_all_EB = {} for i in range(mat['KurtoEB'].shape[1]): print "EB", i if i!=10 and i!=61: continue counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 dic = pd.DataFrame(columns=list_features,index=[(i,'BOR',comp)]) dic['EventType'] = 'EB' dic['Ponset'] = 0 s = SeismicTraces(mat,comp,train=[i,'EB']) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s,list_features,dic) elif opt.opdict['option'] == 'hash': dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False) df = df.append(dic) neb = i+1 if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(i,'BOR',comp)] + df.Dur[(i,'BOR','E')] + df.Dur[(i,'BOR','Z')])/3. po_mean = int((df.Ponset[(i,'BOR',comp)] + df.Ponset[(i,'BOR','E')] + df.Ponset[(i,'BOR','Z')])/3) s.read_all_files(mat,train=[i,'EB']) rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(i,'BOR','Z')], df.Rectilinearity[(i,'BOR','N')], df.Rectilinearity[(i,'BOR','E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(i,'BOR','Z')], df.Planarity[(i,'BOR','N')], df.Planarity[(i,'BOR','E')] = plan, plan, plan if 'Azimuth' in list_features: df.Azimuth[(i,'BOR','Z')], df.Azimuth[(i,'BOR','N')], df.Azimuth[(i,'BOR','E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(i,'BOR','Z')], df.Incidence[(i,'BOR','N')], df.Incidence[(i,'BOR','E')] = iang, iang, iang for i in range(mat['KurtoVT'].shape[1]): print "VT", i+neb if i != 5: continue counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 dic = pd.DataFrame(columns=list_features,index=[(i+neb,'BOR',comp)]) dic['EventType'] = 'VT' dic['Ponset'] = 0 s = SeismicTraces(mat,comp,train=[i,'VT']) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s,list_features,dic) elif opt.opdict['option'] == 'hash': dic = extract_hash_features(s,list_features,dic,opt.opdict['permut_file'],plot=False) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(i+neb,'BOR',comp)] + df.Dur[(i+neb,'BOR','E')] + df.Dur[(i+neb,'BOR','Z')])/3. po_mean = int((df.Ponset[(i+neb,'BOR',comp)] + df.Ponset[(i+neb,'BOR','E')] + df.Ponset[(i+neb,'BOR','Z')])/3) s.read_all_files(mat,train=[i,'VT']) rect, plan, az, iang = polarization_analysis(s,d_mean,po_mean,plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(i+neb,'BOR','Z')], df.Rectilinearity[(i+neb,'BOR','N')], df.Rectilinearity[(i+neb,'BOR','E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(i+neb,'BOR','Z')], df.Planarity[(i+neb,'BOR','N')], df.Planarity[(i+neb,'BOR','E')] = plan, plan, plan if 'Azimuth' in list_features: df.Azimuth[(i+neb,'BOR','Z')], df.Azimuth[(i+neb,'BOR','N')], df.Azimuth[(i+neb,'BOR','E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(i+neb,'BOR','Z')], df.Incidence[(i+neb,'BOR','N')], df.Incidence[(i+neb,'BOR','E')] = iang, iang, iang if save: print "Features written in %s"%opt.opdict['feat_filename'] df.to_csv(opt.opdict['feat_filename'])
def compare_ponsets(set='test'): """ Compare the Ponsets determined either with the kurtosis gradient, either with the frequency stack of the spectrogram. """ from scipy.io.matlab import mio from options import MultiOptions opt = MultiOptions() if set == 'test': datafiles = glob.glob( os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*')) datafiles.sort() liste = [ os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0] for i in range(len(datafiles)) ] liste = map(int, liste) # sort the list of file following the event number liste.sort() df = pd.read_csv('%s/features/Piton_testset.csv' % opt.opdict['outdir'], index_col=False) df = df.reindex( columns=['Dur_freq', 'Ponset_freq', 'Dur_grad', 'Ponset_grad']) for ifile, numfile in enumerate(liste): file = os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_%d.mat' % numfile) print ifile, file mat = mio.loadmat(file) for comp in opt.opdict['channels']: ind = (numfile, 'BOR', comp) df_one = df.reindex(index=[str(ind)]) pfr = df_one.Ponset_freq pgr = df_one.Ponset_grad dfr = df_one.Dur_freq dgr = df_one.Dur_grad s = SeismicTraces(mat, comp) fig = plt.figure(figsize=(9, 4)) fig.set_facecolor('white') plt.plot(s.tr, 'k') plt.plot([pfr, pfr], [np.min(s.tr), np.max(s.tr)], 'r', lw=2., label='freq') plt.plot([pgr, pgr], [np.min(s.tr), np.max(s.tr)], 'r--', lw=2., label='grad') plt.plot([pfr + dfr * 1. / s.dt, pfr + dfr * 1. / s.dt], [np.min(s.tr), np.max(s.tr)], 'y', lw=2.) plt.plot([pgr + dgr * 1. / s.dt, pgr + dgr * 1. / s.dt], [np.min(s.tr), np.max(s.tr)], 'y--', lw=2.) plt.legend() plt.show()
def read_data_for_features_extraction(set='test', save=False): """ Extracts the features from all seismic files If option 'save' is set, then save the pandas DataFrame as a .csv file """ from scipy.io.matlab import mio from options import MultiOptions opt = MultiOptions() if set == 'train': opt.opdict['feat_filename'] = '%s/features/%s' % ( opt.opdict['outdir'], opt.opdict['feat_train']) print opt.opdict['feat_filename'] if save: if os.path.exists(opt.opdict['feat_filename']): print "WARNING !! File %s already exists" % opt.opdict[ 'feat_filename'] print "Check if you really want to replace it..." sys.exit() list_features = opt.opdict['feat_list'] df = pd.DataFrame(columns=list_features) if set == 'test': datafiles = glob.glob( os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*')) datafiles.sort() liste = [ os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0] for i in range(len(datafiles)) ] liste = map(int, liste) # sort the list of file following the event number liste.sort() tsort = opt.read_csvfile(opt.opdict['label_filename']) tsort.index = tsort.Date for ifile, numfile in enumerate(liste): file = os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_%d.mat' % numfile) print ifile, file mat = mio.loadmat(file) counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 ind = (numfile, 'BOR', comp) dic = pd.DataFrame(columns=list_features, index=[ind]) dic['EventType'] = tsort[tsort.Date == numfile].Type.values[0] dic['Ponset'] = 0 s = SeismicTraces(mat, comp) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s, list_features, dic) elif opt.opdict['option'] == 'hash': if ifile in [ 409, 1026, 1027, 1028, 1993, 2121, 2122, 2123, 2424, 2441, 3029, 3058, 3735, 3785, 3852, 3930, 4200, 4463, 4464, 4746, 6150, 6382, 6672, 6733 ]: continue dic = extract_hash_features(s, list_features, dic, opt.opdict['permut_file'], plot=False) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(numfile, 'BOR', comp)] + df.Dur[(numfile, 'BOR', 'E')] + df.Dur[(numfile, 'BOR', 'Z')]) / 3. po_mean = int((df.Ponset[(numfile, 'BOR', comp)] + df.Ponset[(numfile, 'BOR', 'E')] + df.Ponset[(numfile, 'BOR', 'Z')]) / 3) s.read_all_files(mat, False) rect, plan, az, iang = polarization_analysis(s, d_mean, po_mean, plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(numfile, 'BOR', 'Z')], df.Rectilinearity[( numfile, 'BOR', 'N')], df.Rectilinearity[( numfile, 'BOR', 'E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(numfile, 'BOR', 'Z')], df.Planarity[( numfile, 'BOR', 'N')], df.Planarity[(numfile, 'BOR', 'E')] = plan, plan, plan if list_features or 'Azimuth': df.Azimuth[(numfile, 'BOR', 'Z')], df.Azimuth[( numfile, 'BOR', 'N')], df.Azimuth[(numfile, 'BOR', 'E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(numfile, 'BOR', 'Z')], df.Incidence[( numfile, 'BOR', 'N')], df.Incidence[(numfile, 'BOR', 'E')] = iang, iang, iang elif set == 'train': datafile = os.path.join(opt.opdict['datadir'], 'TrainingSetPlusSig_2.mat') mat = mio.loadmat(datafile) hob_all_EB = {} for i in range(mat['KurtoEB'].shape[1]): print "EB", i if i != 10 and i != 61: continue counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 dic = pd.DataFrame(columns=list_features, index=[(i, 'BOR', comp)]) dic['EventType'] = 'EB' dic['Ponset'] = 0 s = SeismicTraces(mat, comp, train=[i, 'EB']) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s, list_features, dic) elif opt.opdict['option'] == 'hash': dic = extract_hash_features(s, list_features, dic, opt.opdict['permut_file'], plot=False) df = df.append(dic) neb = i + 1 if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(i, 'BOR', comp)] + df.Dur[(i, 'BOR', 'E')] + df.Dur[(i, 'BOR', 'Z')]) / 3. po_mean = int( (df.Ponset[(i, 'BOR', comp)] + df.Ponset[(i, 'BOR', 'E')] + df.Ponset[(i, 'BOR', 'Z')]) / 3) s.read_all_files(mat, train=[i, 'EB']) rect, plan, az, iang = polarization_analysis(s, d_mean, po_mean, plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(i, 'BOR', 'Z')], df.Rectilinearity[( i, 'BOR', 'N')], df.Rectilinearity[(i, 'BOR', 'E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(i, 'BOR', 'Z')], df.Planarity[( i, 'BOR', 'N')], df.Planarity[(i, 'BOR', 'E')] = plan, plan, plan if 'Azimuth' in list_features: df.Azimuth[(i, 'BOR', 'Z')], df.Azimuth[( i, 'BOR', 'N')], df.Azimuth[(i, 'BOR', 'E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(i, 'BOR', 'Z')], df.Incidence[( i, 'BOR', 'N')], df.Incidence[(i, 'BOR', 'E')] = iang, iang, iang for i in range(mat['KurtoVT'].shape[1]): print "VT", i + neb if i != 5: continue counter = 0 for comp in opt.opdict['channels']: counter = counter + 1 dic = pd.DataFrame(columns=list_features, index=[(i + neb, 'BOR', comp)]) dic['EventType'] = 'VT' dic['Ponset'] = 0 s = SeismicTraces(mat, comp, train=[i, 'VT']) list_attr = s.__dict__.keys() if len(list_attr) > 2: if opt.opdict['option'] == 'norm': dic = extract_norm_features(s, list_features, dic) elif opt.opdict['option'] == 'hash': dic = extract_hash_features(s, list_features, dic, opt.opdict['permut_file'], plot=False) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): d_mean = (df.Dur[(i + neb, 'BOR', comp)] + df.Dur[(i + neb, 'BOR', 'E')] + df.Dur[(i + neb, 'BOR', 'Z')]) / 3. po_mean = int((df.Ponset[(i + neb, 'BOR', comp)] + df.Ponset[(i + neb, 'BOR', 'E')] + df.Ponset[(i + neb, 'BOR', 'Z')]) / 3) s.read_all_files(mat, train=[i, 'VT']) rect, plan, az, iang = polarization_analysis(s, d_mean, po_mean, plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[( i + neb, 'BOR', 'Z')], df.Rectilinearity[( i + neb, 'BOR', 'N')], df.Rectilinearity[(i + neb, 'BOR', 'E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(i + neb, 'BOR', 'Z')], df.Planarity[( i + neb, 'BOR', 'N')], df.Planarity[(i + neb, 'BOR', 'E')] = plan, plan, plan if 'Azimuth' in list_features: df.Azimuth[(i + neb, 'BOR', 'Z')], df.Azimuth[( i + neb, 'BOR', 'N')], df.Azimuth[(i + neb, 'BOR', 'E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(i + neb, 'BOR', 'Z')], df.Incidence[( i + neb, 'BOR', 'N')], df.Incidence[(i + neb, 'BOR', 'E')] = iang, iang, iang if save: print "Features written in %s" % opt.opdict['feat_filename'] df.to_csv(opt.opdict['feat_filename'])
def compare_clement(): """ Comparaison des attributs de Clément avec ceux que j'ai recalculés. """ from options import MultiOptions opt = MultiOptions() opt.opdict['channels'] = ['Z'] # Mes calculs opt.opdict['feat_list'] = ['Dur','AsDec','RappMaxMean','Kurto','KRapp'] opt.opdict['feat_log'] = ['AsDec','RappMaxMean','Kurto'] #opt.opdict['feat_list'] = ['Ene'] #opt.opdict['feat_log'] = ['Ene'] opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.x.columns = opt.opdict['feat_list'] opt.compute_pdfs() my_gauss = opt.gaussians if 'Kurto' in opt.opdict['feat_list'] and 'RappMaxMean' in opt.opdict['feat_list']: fig = plt.figure() fig.set_facecolor('white') plt.plot(np.log(opt.x.Kurto),np.log(opt.x.RappMaxMean),'ko') plt.xlabel('Kurto') plt.ylabel('RappMaxMean') plt.show() # Les calculs de Clément #opt.opdict['feat_list'] = ['Dur','AsDec','RappMaxMean','Kurto','Ene'] opt.opdict['feat_log'] = [] opt.opdict['feat_train'] = 'clement_train.csv' opt.opdict['feat_test'] = 'clement_test.csv' opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() # Trait plein --> Clément # Trait tireté --> moi opt.plot_superposed_pdfs(my_gauss,save=False)
def read_data_for_features_extraction(save=False): """ Extracts the features from all seismic files If option 'save' is set, then save the pandas DataFrame as a .csv file """ from options import MultiOptions opt = MultiOptions() if save: if os.path.exists(opt.opdict['feat_filename']): print "WARNING !! File %s already exists" % opt.opdict[ 'feat_filename'] print "Check if you really want to replace it..." sys.exit() list_features = opt.opdict['feat_list'] df = pd.DataFrame(columns=list_features) hob_all = {} # Classification tsort = opt.read_classification() tsort.index = tsort.Date tsort = tsort.reindex(columns=['Date', 'Type']) list_sta = opt.opdict['stations'] for ifile in range(tsort.shape[0]): date = tsort.values[ifile, 0] type = tsort.values[ifile, 1] for sta in list_sta: print "#####", sta counter = 0 for comp in opt.opdict['channels']: ind = (date, sta, comp) dic = pd.DataFrame(columns=list_features, index=[ind]) dic['EventType'] = type dic['Ponset'] = 0 list_files = glob.glob( os.path.join( opt.opdict['datadir'], sta, '*%s.D' % comp, '*%s.D*%s_%s*' % (comp, str(date)[:8], str(date)[8:]))) list_files.sort() if len(list_files) > 0: file = list_files[0] print ifile, file if opt.opdict['option'] == 'norm': counter = counter + 1 dic = extract_norm_features(list_features, date, file, dic) elif opt.opdict['option'] == 'hash': permut_file = '%s/permut_%s' % ( opt.opdict['libdir'], opt.opdict['feat_test'].split('.')[0]) dic = extract_hash_features(list_features, date, file, dic, permut_file, plot=True) df = df.append(dic) if counter == 3 and ('Rectilinearity' in list_features or 'Planarity' in list_features or 'Azimuth' in list_features or 'Incidence' in list_features): from waveform_features import polarization_analysis d_mean = (df.Dur[(date, sta, comp)] + df.Dur[ (date, sta, 'E')] + df.Dur[(date, sta, 'Z')]) / 3. po_mean = int((df.Ponset[(date, sta, comp)] + df.Ponset[ (date, sta, 'E')] + df.Ponset[(date, sta, 'Z')]) / 3) list_files = [ file, file.replace("N.D", "E.D"), file.replace("N.D", "Z.D") ] rect, plan, az, iang = polarization_analysis(list_files, d_mean, po_mean, plot=False) if 'Rectilinearity' in list_features: df.Rectilinearity[(date, sta, 'Z')], df.Rectilinearity[( date, sta, 'N')], df.Rectilinearity[(date, sta, 'E')] = rect, rect, rect if 'Planarity' in list_features: df.Planarity[(date, sta, 'Z')], df.Planarity[( date, sta, 'N')], df.Planarity[(date, sta, 'E')] = plan, plan, plan if list_features or 'Azimuth': df.Azimuth[(date, sta, 'Z')], df.Azimuth[( date, sta, 'N')], df.Azimuth[(date, sta, 'E')] = az, az, az if 'Incidence' in list_features: df.Incidence[(date, sta, 'Z')], df.Incidence[( date, sta, 'N')], df.Incidence[(date, sta, 'E')] = iang, iang, iang if save: print "Features written in %s" % opt.opdict['feat_filename'] df.to_csv(opt.opdict['feat_filename'])
def plot_best_worst(): """ Plots the pdfs of the training set for the best and worst draws and compare with the whole training set. """ from options import MultiOptions, read_binary_file opt = MultiOptions() feat_list = [('AsDec',0,1),('Bandwidth',5,0),('CentralF',1,0),('Centroid_time',4,0),('Dur',4,1),('Ene0-5',1,4),('Ene5-10',0,4),('Ene',0,3),('F_low',4,2),('F_up',0,7),('IFslope',7,8),('Kurto',2,0),('MeanPredF',1,4),('PredF',1,4),('RappMaxMean',0,1),('RappMaxMeanTF',4,0),('Skewness',2,5),('TimeMaxSpec',4,0),('Rectilinearity',8,3),('Planarity',1,2)] opt.opdict['feat_list'] = opt.opdict['feat_all'] opt.opdict['feat_log'] = ['AsDec','Ene','Kurto','RappMaxMean'] opt.opdict['feat_filename'] = '../results/Piton/features/Piton_trainset.csv' opt.opdict['label_filename'] = '../lib/Piton/class_train_set.csv' x_all, y_all = opt.features_onesta('BOR','Z') list_files = glob.glob(os.path.join('../lib/Piton','learning*')) list_files.sort() m = len(y_all) mtraining = int(0.6*m) mcv = int(0.2*m) mtest = int(0.2*m) for feat,best,worst in feat_list: print feat, best, worst fig = plt.figure() fig.set_facecolor('white') # ALL opt.x = x_all.reindex(columns=[feat]) opt.y = y_all.reindex(index=opt.x.index) opt.opdict['feat_list'] = [feat] opt.compute_pdfs() g = opt.gaussians plt.plot(g[feat]['vec'],g[feat]['VT'],'k',lw=2.,label='VT') plt.plot(g[feat]['vec'],g[feat]['EB'],'k--',lw=2.,label='EB') labels = ['best','worst'] colors = ['r','g'] b_file = list_files[best] w_file = list_files[worst] for ifile,file in enumerate([b_file,w_file]): dic = read_binary_file(file) # TRAINING SET opt.x = x_all.reindex(columns=[feat],index=dic[:mtraining]) opt.y = y_all.reindex(index=dic[:mtraining]) opt.compute_pdfs() g_train = opt.gaussians plt.plot(g_train[feat]['vec'],g_train[feat]['VT'],'-',c=colors[ifile],label=labels[ifile]) plt.plot(g_train[feat]['vec'],g_train[feat]['EB'],'--',c=colors[ifile]) plt.legend() plt.title(feat) plt.savefig('%s/best_worst_%s.png'%(opt.opdict['fig_path'],feat)) plt.show()
def compare_clement(): """ Comparaison des attributs de Clément avec ceux que j'ai recalculés. """ from options import MultiOptions opt = MultiOptions() opt.opdict['channels'] = ['Z'] # Mes calculs opt.opdict['feat_list'] = ['Dur', 'AsDec', 'RappMaxMean', 'Kurto', 'KRapp'] opt.opdict['feat_log'] = ['AsDec', 'RappMaxMean', 'Kurto'] #opt.opdict['feat_list'] = ['Ene'] #opt.opdict['feat_log'] = ['Ene'] opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.x.columns = opt.opdict['feat_list'] opt.compute_pdfs() my_gauss = opt.gaussians if 'Kurto' in opt.opdict['feat_list'] and 'RappMaxMean' in opt.opdict[ 'feat_list']: fig = plt.figure() fig.set_facecolor('white') plt.plot(np.log(opt.x.Kurto), np.log(opt.x.RappMaxMean), 'ko') plt.xlabel('Kurto') plt.ylabel('RappMaxMean') plt.show() # Les calculs de Clément #opt.opdict['feat_list'] = ['Dur','AsDec','RappMaxMean','Kurto','Ene'] opt.opdict['feat_log'] = [] opt.opdict['feat_train'] = 'clement_train.csv' opt.opdict['feat_test'] = 'clement_test.csv' opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() # Trait plein --> Clément # Trait tireté --> moi opt.plot_superposed_pdfs(my_gauss, save=False)
def compare_lissage(): """ Comparaison des kurtosis avec deux lissages différents. """ plot_envelopes() from options import MultiOptions opt = MultiOptions() opt.opdict['channels'] = ['Z'] # Lissage sur des fenêtres de 0.5 s opt.opdict['feat_list'] = ['Kurto'] opt.opdict['feat_log'] = ['Kurto'] opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.x.columns = opt.opdict['feat_list'] opt.compute_pdfs() gauss_stand = opt.gaussians # Lissage sur des fenêtres de 1 s opt.opdict['feat_train'] = '0610_Piton_trainset.csv' opt.opdict['feat_test'] = '0610_Piton_testset.csv' opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() gauss_1s = opt.gaussians # Lissage sur des fenêtres de 5 s opt.opdict['feat_train'] = '1809_Piton_trainset.csv' opt.opdict['feat_test'] = '1809_Piton_testset.csv' opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() gauss_5s = opt.gaussians # Lissage sur des fenêtres de 10 s opt.opdict['feat_train'] = '0510_Piton_trainset.csv' opt.opdict['feat_test'] = '0510_Piton_testset.csv' opt.do_tri() opt.x = opt.xs[0] opt.y = opt.ys[0] opt.compute_pdfs() gauss_10s = opt.gaussians ### PLOT OF SUPERPOSED PDFs ### fig = plt.figure(figsize=(12,2.5)) fig.set_facecolor('white') for feat in sorted(opt.gaussians): maxi = int(np.max([gauss_stand[feat]['vec'],gauss_1s[feat]['vec'],gauss_5s[feat]['vec'],gauss_10s[feat]['vec']])) ax1 = fig.add_subplot(141) ax1.plot(gauss_stand[feat]['vec'],gauss_stand[feat]['VT'],ls='-',c='b',lw=2.,label='VT') ax1.plot(gauss_stand[feat]['vec'],gauss_stand[feat]['EB'],ls='-',c='r',lw=2.,label='EB') ax1.set_xlim([0,maxi]) ax1.set_xlabel(feat) ax1.set_title('0.5 s') ax1.legend(prop={'size':10}) ax2 = fig.add_subplot(142) ax2.plot(gauss_1s[feat]['vec'],gauss_1s[feat]['VT'],ls='-',c='b',lw=2.) ax2.plot(gauss_1s[feat]['vec'],gauss_1s[feat]['EB'],ls='-',c='r',lw=2.) ax2.set_xlim([0,maxi]) ax2.set_xlabel(feat) ax2.set_title('1 s') ax2.set_yticklabels('') ax3 = fig.add_subplot(143) ax3.plot(gauss_5s[feat]['vec'],gauss_5s[feat]['VT'],ls='-',c='b',lw=2.) ax3.plot(gauss_5s[feat]['vec'],gauss_5s[feat]['EB'],ls='-',c='r',lw=2.) ax3.set_xlim([0,maxi]) ax3.set_xlabel(feat) ax3.set_title('5 s') ax3.set_yticklabels('') ax4 = fig.add_subplot(144) ax4.plot(gauss_10s[feat]['vec'],gauss_10s[feat]['VT'],ls='-',c='b',lw=2.) ax4.plot(gauss_10s[feat]['vec'],gauss_10s[feat]['EB'],ls='-',c='r',lw=2.) ax4.set_xlim([0,maxi]) ax4.set_xlabel(feat) ax4.set_title('10 s') ax4.set_yticklabels('') #plt.savefig('%s/features/comp_%s.png'%(opt.opdict['outdir'],feat)) plt.show()
def compare_ponsets(set='test'): """ Compare the Ponsets determined with the frequency stack of the spectrogram in function of the spectrogram computation parameters... """ from scipy.io.matlab import mio from features_extraction_piton import SeismicTraces from options import MultiOptions opt = MultiOptions() if set == 'test': datafiles = glob.glob( os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_*')) datafiles.sort() liste = [ os.path.basename(datafiles[i]).split('_')[1].split('.mat')[0] for i in range(len(datafiles)) ] liste = map(int, liste) # sort the list of file following the event number liste.sort() df_norm = pd.read_csv('%s/features/Piton_testset.csv' % opt.opdict['outdir'], index_col=False) df_norm = df_norm.reindex(columns=['Ponset_freq', 'Dur']) df_clement = pd.read_csv('%s/features/clement_test.csv' % opt.opdict['outdir'], index_col=False) df_clement = df_clement.reindex(columns=['Dur']) df_hash_64 = pd.read_csv('%s/features/HT_Piton_testset.csv' % opt.opdict['outdir'], index_col=False) df_hash_64 = df_hash_64.reindex(columns=['Ponset']) df_hash_32 = pd.read_csv('%s/features/HT32_Piton_testset.csv' % opt.opdict['outdir'], index_col=False) df_hash_32 = df_hash_32.reindex(columns=['Ponset']) for ifile, numfile in enumerate(liste): file = os.path.join(opt.opdict['datadir'], 'TestSet/SigEve_%d.mat' % numfile) print ifile, file mat = mio.loadmat(file) for comp in opt.opdict['channels']: ind = (numfile, 'BOR', comp) p_norm = df_norm.reindex(index=[str(ind)]).Ponset_freq p_hash_64 = df_hash_64.reindex(index=[str(ind)]).Ponset p_hash_32 = df_hash_32.reindex(index=[str(ind)]).Ponset dur = df_norm.reindex(index=[str(ind)]).Dur * 100 dur_cl = df_clement.reindex(index=[str(ind)]).Dur * 100 s = SeismicTraces(mat, comp) fig = plt.figure(figsize=(9, 4)) fig.set_facecolor('white') plt.plot(s.tr, 'k') plt.plot([p_norm, p_norm], [np.min(s.tr), np.max(s.tr)], 'r', lw=2., label='norm') plt.plot([p_norm + dur, p_norm + dur], [np.min(s.tr), np.max(s.tr)], 'r--', lw=2.) plt.plot([p_norm + dur_cl, p_norm + dur_cl], [np.min(s.tr), np.max(s.tr)], '--', c='orange', lw=2.) plt.plot([p_hash_64, p_hash_64], [np.min(s.tr), np.max(s.tr)], 'g', lw=2., label='hash_64') plt.plot([p_hash_32, p_hash_32], [np.min(s.tr), np.max(s.tr)], 'y', lw=2., label='hash_32') plt.legend() plt.show()