#help(emd.sift.sift) #for the help page of the function imf = emd.sift.sift(data, sift_thresh = 1e-8) #removing borders of the data: edge effects can be quite heavy in the high frequecies T = 30 offset = 500 data = data[offset:int(T*srate)+offset] times = times[offset:int(T*srate)+offset] WF = WF[offset:int(T*srate)+offset] imf = imf[offset:int(T*srate)+offset,:] print("Data are {}s long".format(len(data)/srate)) #computing PSD of the high frequency residuals #computing the PSD of the signal M = MESA() M.solve(imf[:,0], method = 'Standard', optimisation_method = 'CAT') freq = np.linspace(1./times[-1], 0.5*srate,1000) #vector for evaluating the spectrum spec_imf = M.spectrum(1/srate,freq) M.solve(data, method = 'Standard') spec_data = M.spectrum(1/srate,freq) M.solve(WF, method = "standard") spec_WF = M.spectrum(1/srate,freq) #running pipeline data_pipeline = imf[:,0] AnomalyDetection_pipeline(data_pipeline, srate, T_train = 10., N_step = 20000, outfile = None, plot = True, injection = WF) #plotting everything #In the order 0 mode, you can see the chirp of the injected signal!! fig = plt.figure( figsize=(8,4) )
import numpy as np import scipy.stats import matplotlib.pyplot as plt from style_sheet import init_plotting from scipy.interpolate import interp1d import matplotlib.ticker as ticker def relative_error(real, estimate): return np.abs(real - estimate) / real PSD = 'ligo' #normal or ligo save = False savedir = '../paper/Images/optimisers_comparison/' + PSD + '/' simulate_data = False methods = ['FPE', 'CAT', 'OBD'] M = MESA() init_plotting() plt.close() colors = {'FPE': 'r', 'CAT': 'k', 'OBD': 'green'} print("Dealing with ",PSD) #Initialize single spectrum dictionaries spectra, optimisers, orders, errors = {}, {}, {}, {} for method in methods: spectra[method], optimisers[method], orders[method], errors[method] = [], [], [], [] #ensemble dictionaries median, p5, p95, ensemble_error = {}, {}, {}, {} #Generating noise
def plot_PSD_imf(imf, data, srate, WF = None, title = None, folder = '.'): M = MESA() fig = plt.figure( figsize=(8,4) ) ax = fig.gca() title_str = "PSD of the EMD modes vs PSD of data" if title is not None: title_str = title_str+ ' - '+title plt.title(title_str) #spectrum of data M.solve(data, method = 'Fast', optimisation_method = 'CAT') f, spec_data = M.spectrum(1/srate) ax.loglog(f[:len(data)//2], spec_data[:len(data)//2], label= "data") if WF is not None: M.solve(WF, method = "standard") f, spec_WF = M.spectrum(1/srate) ax.loglog(f[:len(data)//2], spec_WF[:len(data)//2], label= "WF") if imf is not None: for i in range(imf.shape[1]): print("EMD component {}/{}".format(i+1 ,imf.shape[1])) M.solve(imf[:,i], method = 'Standard', optimisation_method = 'CAT') f, spec= M.spectrum(1/srate) ax.loglog(f[:len(data)//2], spec[:len(data)//2], label= "EMD {}".format(i+1)) plt.legend() if folder is None: return if title is None: fig.savefig(folder+"/PSD_imf.pdf") else: fig.savefig(folder+"/PSD_imf-{}.pdf".format(title))
if False: #plot bandpassing plt.figure() plt.title("Data vs filtered data") plt.plot(times,data) plt.plot(times,data_pass) #plt.axvline(1126259462.4-1126259447, c = 'r') plt.axvline(t_merger, c = 'r') fig_PSDseries = plt.figure(2) ax_PSDseries = fig_PSDseries.add_subplot(111) ax_PSDseries.set_ylabel("PSD") ax_PSDseries.set_yscale('log') M = MESA() M.solve(data_pass, early_stop = True, method = 'Standard') freq = np.linspace(1./times[-1], 0.5*srate,1000) spec = M.spectrum(1/srate,freq) ax_PSDseries.loglog(freq, np.abs(spec), c = 'b', label= "bandpass") M.solve(data, method = 'Standard') freq = np.linspace(1./times[-1], 0.5*srate,1000) spec = M.spectrum(1/srate,freq) ax_PSDseries.loglog(freq, spec, c = 'r', label= "standard data") plt.legend() plt.show() data = data_pass #data = data + WF
print("Using real data") #data = np.loadtxt("../examples/data/V-V1_GWOSC_4KHZ_R1-1186741846-32.txt") #data = pd.read_csv("../../GWAnomalyDetection/maxent/H-H1_GWOSC_16KHZ_R1-1126259447-32.txt.gz", skiprows = 3).to_numpy() data = pd.read_csv("H-H1_GWOSC_4KHZ_R1-1246525177-4096.txt.gz", skiprows=3).to_numpy() data = np.squeeze(data) print("Loaded data: shape {}; srate {}; length {}s".format( data.shape, srate, len(data) / srate)) for i, T in enumerate(T_list): print("Batch length: {}s".format(T)) data_T = data[-int(srate * T) - 4096 * 5:-4096 * 5] M = MESA() M.solve(data_T, early_stop=True, method='Standard') print("\tDone MESA") freqs, PSD_Welch = psd(data_T, srate, seglen[i] / float(srate), window_function=None, overlap_fraction=0.5, nfft=None, return_onesided=False) print("\tDone Welch") PSD_MESA = M.spectrum(1. / srate, freqs) np.savetxt("plot_data/plot_{}_{}.txt".format(T, use_fake_data), np.column_stack([freqs, PSD_MESA, PSD_Welch]))
T = 4 datafile = "data/V-V1_GWOSC_4KHZ_R1-1186741846-32.txt" data = np.loadtxt(datafile)[:int(T * 4096)] if srate != 4096.: data = decimate(data, int(4096 / srate), zero_phase=True) if bandpassing == 1: from scipy.signal import butter, filtfilt bb, ab = butter(4, [f_min_bp / (0.5 * srate), f_max_bp / (0.5 * srate)], btype='band') data = filtfilt(bb, ab, data) N = data.shape[0] f = np.fft.fftfreq(N, d=dt) t = np.arange(0, T, step=dt) M = MESA() start = time.perf_counter() P, ak, _ = M.solve(data, method="Fast", optimisation_method="FPE", m=int(2 * N / (2 * np.log(N)))) print("p = {0}".format(len(ak))) elapsed = time.perf_counter() elapsed = elapsed - start print("Time spent MESA: {0} s".format(elapsed)) start = time.perf_counter() PSD = M.spectrum(dt, f) elapsed = time.perf_counter() elapsed = elapsed - start print("Time spent PSD: {0} s".format(elapsed))
Wtime, Wtime_series, Wfrequency, Wfrequency_series, Wpsd = generate_data( wnoise_frequency, wnoise_spectrum, times[-1], 1 / dt) w_interp = interp1d(wnoise_frequency, wnoise_spectrum) if Ligo_noise: print('Generating Ligo noise series') #ligo_frequency, ligo_spectrum = np.loadtxt('LIGO-P1200087-v18-AdV_DESIGN_psd.dat', unpack=True) ligo_frequency, ligo_spectrum, _ = np.loadtxt( 'GWTC1_GW150914_PSDs.dat', unpack=True) Ltime, Ltime_series, Lfrequency, Lfrequency_series, Lpsd = generate_data( ligo_frequency, ligo_spectrum, times[-1], 1 / dt) l_interp = interp1d(ligo_frequency, ligo_spectrum, fill_value='extrapolate') M = MESA() init_plotting() for i, n in enumerate(segment_length): if white_noise: w_welchFreq, w_welchSpectrum = welch.psd(Wtime_series[:int(N[i])], 1 / dt, n * dt) M.solve(Wtime_series[:int(N[i])]) w_mesaSpectrum, w_mesaFreq = M.spectrum(dt) fig, ax = plt.subplots() ax.loglog(w_welchFreq[:n // 2], w_welchSpectrum[:n // 2], color='blue') ax.loglog(w_mesaFreq[:int(N[i] // 2)], w_mesaSpectrum[:int(N[i] // 2)],
parser.add_option('-T', default=None, type='float', help='duration of the data') (opts,args)=parser.parse_args() import time import matplotlib.pyplot as plt from scipy.signal import decimate srate = opts.srate dt = 1./srate datafile = opts.data d = np.genfromtxt(datafile, delimiter=',', names=True) data = d['CloseLast'][::-1] T = len(data) N = data.shape[0] f = np.fft.fftfreq(N, d=dt) t = np.arange(0,T,step=dt) M = MESA(data) start = time.perf_counter() P, ak, _ = M.solve(method = "Fast", optimisation_method = "FPE", m = int(2*N/(2*np.log(N)))) print("p = {0}".format(len(ak))) elapsed = time.perf_counter() elapsed = elapsed - start print ("Time spent MESA: {0} s".format(elapsed)) start = time.perf_counter() PSD = M.spectrum(dt,f) print(PSD) elapsed = time.perf_counter() elapsed = elapsed - start print ("Time spent PSD: {0} s".format(elapsed)) fig = plt.figure(1)
T = 6 t_forecast = 4 datafile = "V-V1_GWOSC_4KHZ_R1-1186741846-32.txt" data = np.loadtxt(datafile)[:int(T * 4096)] if srate != 4096.: data = decimate(data, int(4096 / srate), zero_phase=True) if bandpassing == 1: from scipy.signal import butter, filtfilt bb, ab = butter(4, [f_min_bp / (0.5 * srate), f_max_bp / (0.5 * srate)], btype='band') data = filtfilt(bb, ab, data) N = data.shape[0] f = np.fft.fftfreq(N, d=dt) t = np.arange(0, T, step=dt) M = MESA(data) start = time.perf_counter() P, ak, _ = M.solve(method="Fast", optimisation_method="FPE", m=int(2 * N / (2 * np.log(N)))) print("p = {}".format(len(ak))) elapsed = time.perf_counter() elapsed = elapsed - start print("Time spent MESA: {0} s".format(elapsed)) start = time.perf_counter() PSD = M.spectrum(dt, f) elapsed = time.perf_counter() elapsed = elapsed - start print("Time spent PSD: {0} s".format(elapsed)) fig = plt.figure(1)
def AnomalyDetection_pipeline(data, srate, T_train, N_step, outfile = None, plot = True, GPS_time = 0.,injection = None, method = "FPE"): "Runs the Anomaly Detection Pipeline on the data given. Trains a memspectrum model with data from 0 to T_train and forecast the rest, with a step of N_step points. If an injection is give, it is plot together with LL series" print("Running pipeline @ GPS time {}.\n\tT_train, N_step = {},{}".format(GPS_time,T_train, N_step)) #TODO: save LL in pickle, so you can have a GPS time as int and the of the LL in float LL_list = [] t_start_list = [] id_start = int(T_train*srate) #start id for forecasting train_data = data[0:id_start] times = np.linspace(0, len(data)/srate, len(data)) #print("Computing PSD on {}s of data".format(T_train)) M = MESA() P, ak, _ = M.solve(train_data, method = "Standard", optimisation_method = method, early_stop = True) #starting "pipeline" time_step = N_step/srate # step in seconds seconds of data Np = 100 #number of predictions if plot: #timeseries plot fig_timeseries = plt.figure() ax_timeseries = fig_timeseries.add_subplot(111) ax_timeseries.set_ylabel("Strain") ax_timeseries.plot(times[:id_start], train_data[:id_start], linewidth=1., color='r', zorder = 3) #plot train data #LL series plot fig_LLseries = plt.figure() if injection is not None: ax_LLseries = fig_LLseries.add_subplot(211) ax_WF = fig_LLseries.add_subplot(212) ax_WF.set_ylabel("Strain") else: ax_LLseries = fig_LLseries.add_subplot(111) ax_LLseries.set_ylabel("LL") ax_LLseries.set_yscale('log') #PSD plot fig_PSDseries = plt.figure() plt.title("PSD of {}s of training data".format(T_train)) ax_PSDseries = fig_PSDseries.add_subplot(111) ax_PSDseries.set_ylabel("PSD") ax_PSDseries.set_yscale('log') freq = np.linspace(1./T_train, 0.5*srate,1000) spec = M.spectrum(1/srate,freq) ax_PSDseries.loglog(freq, spec) ids_iterator = range(id_start,len(data)-N_step, N_step) for i, id_ in enumerate(ids_iterator): #problem here with missing gaps (or something weird related...) sys.stderr.write("\rAnalysing batch {} of {}: t in [{},{}]".format(i+1, len(ids_iterator), times[id_],times[id_+N_step])) #forecasting predictions times_batch = times[id_:id_+N_step+1] data_batch = data[id_:id_+N_step+1] #data to predict forecast_basis = data[id_-max(M.get_p(),1):id_] #batch of data that are the basis for forecasting predictions = M.forecast(forecast_basis, N_step+1, Np) #(Np, N_step) #computing LL #LL = np.zeros((int(id_step)+1,)) LL = data_LL_gauss(data_batch, predictions) l, m, h = np.percentile(predictions, [5,50,95],axis=0) if plot: #ax_LLseries.axvline(times[id_],linestyle='dashed',color='blue') #plotting vertical lines at division #plot LL, predictions and data_batch ax_LLseries.plot(times_batch, np.cumsum(LL)+1e5) ax_timeseries.fill_between(times_batch,l,h,facecolor='turquoise',alpha=0.8, zorder = 0) ax_timeseries.plot(times_batch, m, ':', linewidth=.7, color='b', zorder = 2, label = "Median prediction") ax_timeseries.plot(times_batch, data_batch, linewidth=1., color='r', zorder = 3, label = "Data") if i == 0 and plot: ax_timeseries.legend() if plot and injection is not None: WF_batch = injection[id_:id_+N_step+1] ax_WF.plot(times_batch, WF_batch, linewidth=1., color='k', zorder = 3) if isinstance(outfile, str): LL_list.append(LL) #(N_step,) t_start_list.append(times_batch[0]) sys.stderr.write('\n') #creating output dict LL_dict = {} LL_dict['GPS'] = GPS_time LL_dict['srate'] = srate LL_dict['N_batches'] = len(LL_list) LL_dict['t_start'] = np.array(t_start_list) #(N_batches,) LL_dict['LL'] = np.array(LL_list) #(N_batches,D) #print(LL_dict['LL'].shape,LL_dict['t_start'].shape) if isinstance(outfile, str): if not outfile.endswith('.pkl'): outfile +='.pkl' with open(outfile, 'wb') as f: pickle.dump(LL_dict, f, pickle.HIGHEST_PROTOCOL) return LL_dict