def peaks_align(global_peaks, local_peaks, fill_zero): lg.function_log() d_n = abs(len(global_peaks) - len(local_peaks)) score_list = [] for i in range(0, (d_n + 1)): score = 0 for j in range(0, len(local_peaks)): score = score + abs(global_peaks[i + j] - local_peaks[j]) score_list.append(score) max_score = min(score_list) max_index = score_list.index(max_score) zero = [0] * len(global_peaks) for i in range(0, len(global_peaks)): if i == max_index: for j in range(0, len(local_peaks)): if fill_zero == True: zero[i + j] = local_peaks[j] else: global_peaks[i + j] = local_peaks[j] if fill_zero == True: result = zero else: result = global_peaks return result
def df_append(path, name, f_min, f_max, im_max, real_max, exp_params_list, global_peaks, error, loop_index, df): lg.function_log() amp_txt = [] cen_txt = [] sigma_txt = [] fit_values = params_align(global_peaks, exp_params_list) df_values = list([path]) + list([name]) + list([f_min]) + list([ f_max ]) + list([im_max]) + list([real_max]) + list([error]) + fit_values if loop_index == 0: for i in range(0, len(global_peaks)): amp_txt.append('im_Z_' + str(i + 1)) cen_txt.append('freq_' + str(i + 1)) sigma_txt.append('sigma_' + str(i + 1)) column_list = amp_txt + cen_txt + sigma_txt df_list = [ 'path', 'name', 'f_min', 'f_max', 'im_max', 'real_max', 'error' ] + column_list for j in range(0, len(df_list)): df.insert(j, df_list[j], df_values[j], True) df.loc[loop_index] = df_values else: df.loc[loop_index] = df_values
def total_fit_error(x, y_real, y_fit, ter): lg.function_log() err = 0 for i in range(0, len(x)): err = err + ((y_real[i] - y_fit[i]) / ter)**2 return err
def low_pass_gauss_filter(n, sigma_filter): lg.function_log() #lg.text_log('filtering high frequencies - defined by sigma') x = np.arange(0,n,1) return (np.exp(-(x**2)*0.5/sigma_filter**2)+np.exp(-(((x-n+1)**2)*0.5/sigma_filter**2)))
def generate_bounds(params, error): lg.function_log() bounds_min = [] bounds_max = [] for i in params: bounds_min.append(i - error) bounds_max.append(i + error) bnds = ((*bounds_min, ), (*bounds_max, )) return bnds
def bounds_merge(merged_bounds): lg.function_log() bounds_min = [] bounds_max = [] for i in merged_bounds: bounds_min = bounds_min + (list(i[0])) bounds_max = bounds_max + (list(i[1])) bnds = ((*bounds_min, ), (*bounds_max, )) return bnds
def peak_filter(global_peaks, f_min, f_max): lg.function_log() global_peaks = sorted(global_peaks) peak_list = [] peak_index = [] for i in range(0, len(global_peaks)): if f_min <= global_peaks[i] <= f_max: peak_list.append(global_peaks[i]) peak_index.append(i) return peak_list, peak_index
def get_sample_name(file_path): lg.function_log() file_path = file_path.replace('\\', '/') file_path_list = file_path.split('/') parent_folder = file_path_list[len(file_path_list) - 2] sample_name = file_path_list[len(file_path_list) - 1] block_end = sample_name.find("_") sample_name = sample_name[block_end + 1:] sample_name = sample_name.replace(".txt", "") return parent_folder, sample_name
def histogram_maxima(peak_hist, plot_bins, hist_sigma, plot): lg.function_log() #hist_filter_1 = savgol_filter(peak_hist,5,3) #hist_filter = gaussian_filter1d(hist_filter_1,0.1) gauss_sigma = max(plot_bins) * hist_sigma """interpolate histogram for higher resolution""" interp_hist = interpolate.interp1d(plot_bins, peak_hist) new_inc = (plot_bins[1] - plot_bins[0]) / 10 new_bins = np.arange(min(plot_bins), max(plot_bins), new_inc) hist_filter = gaussian_filter1d(interp_hist(new_bins), gauss_sigma) """interpolation of histogram""" hist_spline = UnivariateSpline(new_bins, hist_filter, k=4, s=0) """higher number of bins for analysis of interpolation curve""" #new_bins = np.arange(0,max(peak_hist),0.1) """calculation of derivatives to find local maxima""" d_hist_spline = hist_spline.derivative() d2_hist_spline = hist_spline.derivative(2) d_hist_roots = d_hist_spline.roots() """only extract roots with positive values for d2/d2x(root)""" find_peaks_result = [] for n in range(0, len(d_hist_roots)): if d_hist_roots[n] > min(new_bins): if d_hist_roots[n] < max(new_bins): if d2_hist_spline(d_hist_roots[n]) < 0: find_peaks_result.append(d_hist_roots[n]) if plot == True: plt.plot(plot_bins, peak_hist, label='center of mass histogram') #plt.plot(plot_bins, hist_filter_1, label = 'savgol_filter') plt.plot(new_bins, hist_filter, label='gauss_filter') plt.plot(new_bins, d_hist_spline(new_bins), label='derivative') plt.legend() plt.show() cen_list = [] for n in range(0, len(find_peaks_result)): if hist_spline(find_peaks_result[n]) > 0: cen_list.append(np.round(find_peaks_result[n], 0)) return cen_list
def R_CPE_fit(x, amp1, cen1, sigma1): lg.function_log() y = _1gaussian(x, amp1, cen1, sigma1) params = [1, amp1 * 2, 0.000001] bnds = ((0.6, 1, 1E-14), (1, 1E10, 1E-2)) values, errors = scipy.optimize.curve_fit(Z_R_CPE, x, y, p0=params, bounds=bnds) return values
def interp_derivative(x, y, start, end, increment, window, p_order, d_order, output): lg.function_log() x_list = x y_list = y for n in range(0, d_order): if window > len(y_list): window = 11 filter_curve = savgol_filter(y_list, window, p_order) spline = UnivariateSpline(x_list, filter_curve, k=4) spline_der = spline.derivative() x_list = np.arange(start, end, increment) y_list = spline_der(x_list) roots_list = spline_der.roots() filter_curve = savgol_filter(y_list, window, p_order) spline = UnivariateSpline(x_list, filter_curve, k=4) spline_der2 = spline.derivative() roots_vals = spline_der2(roots_list) peak_x = [] for k in range(0, len(roots_vals)): if roots_vals[k] < 0: peak_x.append(roots_list[k]) filter_curve = savgol_filter(y, window, p_order) spline = UnivariateSpline(x, filter_curve, k=4) peak_y = spline(peak_x) peak_y = peak_y.tolist() if output == "data": result_data = {'x': x_list, 'y': y_list} if output == "peaks": result_data = {'peak_x': peak_x, 'peak_y': peak_y} result = pd.DataFrame(result_data) return result
def get_Im_Z(data): lg.function_log() data = data['file_path'] df = pd.DataFrame() n = 0 for i in data: raw = load_clean_data(1, i) frequency = raw['imag_z'] df.insert(n, i, frequency, True) n = n + 1 return df
def Z_R_CPE(log_f, n, R, Q): lg.function_log() f = 10**log_f omega = 2 * np.pi * f Z_CPE = 1 / ((1j * omega)**n * Q) Z_R = complex(R, 0) Z_res = 1 / (1 / Z_R + 1 / Z_CPE) Z_imag = -Z_res.imag Z_real = Z_res.real return Z_imag
def params_align(global_peaks, exp_fit_params): lg.function_log() n_peaks = len(global_peaks) n_params = round(len(exp_fit_params) / 3) amp_list = exp_fit_params[0:n_params] cen_list = exp_fit_params[n_params:n_params * 2] sigma_list = exp_fit_params[n_params * 2:n_params * 3] amp_new = [0] * n_peaks cen_new = [0] * n_peaks sigma_new = [0] * n_peaks local_peaks = cen_list d_n = abs(len(global_peaks) - len(local_peaks)) score_list = [] for i in range(0, (d_n + 1)): score = 0 for j in range(0, len(local_peaks)): score = score + abs(global_peaks[i + j] - local_peaks[j]) score_list.append(score) max_score = min(score_list) max_index = score_list.index(max_score) zero = [0] * len(global_peaks) for i in range(0, len(global_peaks)): if i == max_index: for j in range(0, len(local_peaks)): amp_new[i + j] = amp_list[j] cen_new[i + j] = cen_list[j] sigma_new[i + j] = sigma_list[j] result = amp_new + cen_new + sigma_new return result
def peaks_assign(global_peaks, exp_fit_params, f_min, f_max): lg.function_log() peak_ranges = [] n_peaks = len(global_peaks) n_params = round(len(exp_fit_params) / 3) amp_list = exp_fit_params[0:n_params] cen_list = exp_fit_params[n_params:n_params * 2] sigma_list = exp_fit_params[n_params * 2:n_params * 3] p_list = [] d_list = [] for d in range(0, n_peaks - 1): d_f = global_peaks[d + 1] - global_peaks[d] f_x = global_peaks[d] + d_f d_list.append(f_x) p_list.append(f_min) p_list = p_list + d_list p_list.append(f_max) amp_new = [0] * n_peaks cen_new = [0] * n_peaks sigma_new = [0] * n_peaks for j in range(0, len(cen_list)): for i in range(0, n_peaks): if p_list[i] < cen_list[j] <= p_list[i + 1]: amp_new[i] = amp_list[j] cen_new[i] = cen_list[j] sigma_new[i] = sigma_list[j] result = amp_new + cen_new + sigma_new return result
def find_extrema(x, y): lg.function_log() lg.text_log('fit spline for algebraic operations') y_spline = UnivariateSpline(x, y, k=4, s=0) lg.text_log('get extrema via roots of first derivative') d_roots = y_spline.derivative().roots() lg.img_log((x, (curve_normalize(y), curve_normalize( y_spline(x)), curve_normalize(y_spline.derivative()(x)))), title='extrema calculation (normalized)', legend=['y', 'spline fit', '1st derivative (spline)'], x_axis='x', y_axis='y') lg.text_log('assign extrema via second derivative') max_x = [x for x in d_roots if y_spline.derivative(2)(x) < 0] min_x = [x for x in d_roots if y_spline.derivative(2)(x) > 0] return max_x, y_spline(max_x), min_x, y_spline(min_x)
def find_extrema(x, y): lg.function_log() lg.text_log('fit spline for algebraic operations') y_spline = UnivariateSpline(x, y, k=4, s=0) """ plt.plot(x, y, label = 'original') plt.plot(x, y_spline(x), label = 'spline') #plt.plot(x, y_spline.derivative()(x), label = '1st derivative') plt.xlabel('frequency [Hz]') plt.ylabel('magnitude') lg.img_log('find extrema') """ lg.text_log('get extrema via roots of first derivative') d_roots = y_spline.derivative().roots() lg.text_log('assign extrema via second derivative') max_x = [x for x in d_roots if y_spline.derivative(2)(x) < 0] min_x = [x for x in d_roots if y_spline.derivative(2)(x) > 0] return max_x, y_spline(max_x), min_x, y_spline(min_x)
def generate_peaks(gauss_peaks, f, imag_z): lg.function_log() peak_df = pd.DataFrame() y_int = UnivariateSpline(f, imag_z, k=4, s=0) sigma_list = list([0.5]) * len(gauss_peaks) cen_list = gauss_peaks amp_list = y_int(gauss_peaks) gauss_params = list(amp_list) + list(cen_list) + list(sigma_list) n_peaks = len(gauss_peaks) for n in range(0, n_peaks): amp1 = gauss_params[n] cen1 = gauss_params[n + n_peaks] sigma1 = gauss_params[n + n_peaks * 2] gauss_curve = _1gaussian(f, amp1, cen1, sigma1) peak_df.insert(n, str(n), gauss_curve) return peak_df
def gauss_optimum(I): lg.function_log() cost_list = [] dI_list = [] d_args_list = [] sigma_factor = 0.1 sigma_range = np.arange(1*sigma_factor,len(I)*sigma_factor,sigma_factor) I_0 = I lg.text_log('calculate squared argsort difference for I_n-1 and I_n ') for sigma in sigma_range: I_gauss = gaussian_filter1d(I, sigma) dI = sum([(a_i - b_i)**2 for a_i, b_i in zip(I_gauss, I_0)]) d_args = sum([(a_i - b_i)**2 for a_i, b_i in zip(np.argsort(I_gauss), np.argsort(I_0))]) cost = d_args*dI I_0 = I_gauss cost_list.append(cost) dI_list.append(dI) d_args_list.append(d_args) #plt.plot(sigma_range, d_args_list, label = 'd_args') #lg.img_log('argsort difference raw') #lg.text_log('set initial d_args peak parameters') Amp_init = d_args_list[np.argsort(d_args_list)[-1]] cen_init = sigma_range[np.argsort(d_args_list)[-1]] sigma_init = abs(cen_init-sigma_range[0]) p_Amp = [Amp_init, 0, Amp_init*1.2] p_cen = [cen_init, 0, cen_init*2] p_sigma = [sigma_init, sigma_init*0.01, sigma_init*5] params, bnds = lg.var_log(fit_params((p_Amp, p_sigma, p_cen))) #lg.text_log('fit gauss peak to extract optimum sigma') gauss_peak_params, gauss_peak_errs = curve_fit(gauss_peak, sigma_range, d_args_list, p0=params, bounds=bnds) #lg.var_log(gauss_peak_params) #plt.plot(sigma_range, d_args_list, label = 'd_args') d_args_list = gauss_peak(sigma_range, *gauss_peak_params) #plt.plot(sigma_range, d_args_list, label = 'gauss peak fit') #plt.xlabel('sigma') #plt.ylabel('d_args') #lg.img_log('argsort difference peak fit') #lg.text_log('get optimum sigma for gauss filtering from maximum argsort difference') sigma_optimum = lg.var_log(gauss_peak_params[2]+abs(gauss_peak_params[1])) #lg.text_log('calculated new filtered spectrum') I_optimum = gaussian_filter1d(I, sigma_optimum) #documentation plot #plt.plot(I, label='original') #plt.plot(I_optimum, label='sigma optimum') #plt.xlabel('n') #plt.ylabel('magnitude') #lg.img_log('gauss_filtering') return I_optimum
def baseline(dt, I, report = True, logfile = None): """ Parameters ---------- dt : float; distance between time points. I : array; Signal Intensity/Magnitude. Returns ------- baseline : List; Intensity array for baseline subtraction. inclination : float; inclination of baseline calculated via linear regression. """ lg.function_log() #create spectrum to analyze #lg.text_log('calculate fourier transform') f_range, spectrum = fourier_transform(dt, I) #lg.text_log('get magnitude from spectrum') magnitude = gauss_optimum(np.abs(spectrum)) #lg.text_log('get extrema of fourier transform magnitude') max_x, max_y, min_x, min_y = lg.var_log(find_extrema(f_range, magnitude)) #print(find_extrema(f_range, magnitude)) #lg.text_log('get cutoff frequency - at first minimum') bg_cutoff = lg.var_log(min((min_x[np.argsort(min_y)[-1]]), (max_x[np.argsort(max_y)[-1]]))) #lg.text_log('get indices below cutoff') bg_indices = lg.var_log([list(f_range).index(x) for x in f_range if x < bg_cutoff]) #lg.text_log('maximum index*0.5 yields sigma for gauss filtering ') sigma_filter = lg.var_log(max(bg_indices)/2) #create full spectrum for filtering and inverse fourier transformation full_spectrum = DFT(I) #create gauss filter gauss_filter = np.asarray(low_pass_gauss_filter(len(full_spectrum), sigma_filter)) #filter spectrum by multiplication with gauss filter filtered_spectrum = np.multiply(full_spectrum, gauss_filter) #create baseline via inverse fourier transformation baseline = np.abs(iDFT(np.asarray(filtered_spectrum))) t = np.arange(0,len(I)*dt,dt) coef = np.polyfit(t,baseline,1) inclination = coef[0] poly1d_fn = np.poly1d(coef) #plt.plot(t,baseline, label = 'fourier filtering baseline') #plt.plot(t, poly1d_fn(t), label = 'line fit') #plt.xlabel('t [s]') #plt.ylabel('I A.U.') #lg.img_log('baseline') return baseline, inclination
def main(corr_threshold, root, avoid, pattern, report=False): lg.function_log() ############################################################ """0. get data""" ############################################################ data = get_file_list(pattern, avoid, root) n_samples = len(data) print(str(n_samples) + ' datasets will be analysed') ############################################################ """1. set parameters""" ############################################################ """1.1 parameters for smoothing the impedance curve""" # #filter window for savitzky golay (sg) filter (greater then sg_poly, odd) sg_global = 9 sg_local = 7 #polynomial order for savitzky golay (sg) filter (>= 3, odd) sg_poly = 3 """1.2 parameters to extract peaks from the impedance curve""" #derivative orders for peak extraction (dev_local = 1, dev_global => 3) dev_global = 5 dev_local = 1 """1.3 parameters for fitting the impedance curve - boundary settings""" #allowed relative change of peak height (0 < u_fit < 1) u_fit = 0.95 #allowed displacement from peak centre -> high value -> low displacement (div_cen > 0) div_cen = 4 #choose if global peaks should be adjusted to locally detected peaks yes -> single, no -> global peak_select = 'single' """1.4 set plot parameters""" plt.rcParams.update({'font.size': 14}) width = 14 height = 3 scaling = 1 ############################################################ """2. fit all curves - use globally extracted peaks""" ############################################################ input_control('start curve analysis?') error_list = [] #try to group data, convert source dataframe to list if no groups found try: test_f = get_Im_Z(data) corr_map = test_f.corr() labels = [get_sample_name(x)[1] for x in corr_map.columns.values] #optional plot #sns.heatmap(corr_map, xticklabels=labels, yticklabels=labels) #lg.img_log(title='nicht sortiert',x_axis='Datensatz Nr.', y_axis='Datensatz Nr.') corr_map, data_groups, label = cluster_distance( corr_map, corr_threshold) labels = [get_sample_name(x)[1] for x in corr_map.columns.values] #sns.heatmap(corr_map, xticklabels=labels, yticklabels=labels) #lg.img_log(title='nach Clustering',x_axis='Datensatz Nr.', y_axis='Datensatz Nr.') data_dict = {} for idx, group in enumerate(data_groups): group_str = 'group_' + str(idx) data_dict[group_str] = str(len(group)) print('number of data groups: ' + str(len(data_groups))) print('groups: ' + str(data_dict)) data = data_groups except Exception as e: print(e) data_groups = [] data_groups.append(data['file_path'].tolist()) print('no groups found') pass input_control('proceed? - y/n') result_list = [] result_names = [] #perform fitting for each data group for i in range(0, len(data_groups)): data = data_groups[i] test_name, f, imag_z = get_sample_dataset(data) parent_folder, sample_name = get_sample_name(test_name) sample_name = 'sample: ' + str(sample_name) print(sample_name) proceed = False while proceed == False: """1. Check peak extraction""" gauss_peaks, gauss_sigma = peak_extract(sg_global, sg_poly, dev_global, data, True) print( str(len(gauss_peaks)) + ' peaks used for fitting: ' + str(gauss_peaks)) local_peaks, local_sigma = peak_extract(sg_local, sg_poly, dev_local, test_name, True) peak_df = generate_peaks(local_peaks, f, imag_z) """display peaks""" plt.plot(f, imag_z, label=sample_name) for j in range(0, len(local_peaks)): plt.plot(f, peak_df[str(j)], label='peak ' + str(j)) plt.legend() plt.show() print('peak extraction ok? - y/n') if input() == 'y': break else: print('enter global (sg) filter window (odd, greater then 7)') sg_global = int(input()) print('enter local (sg) filter window (odd, greater then 5)') sg_local = int(input()) input_control('start fitting now? - y/n') data = data_groups[i] print('Fitting group number ' + str(i)) result = fit_spectrum(sg_global, sg_local, sg_poly, dev_global, dev_local, u_fit, div_cen, data, 3, peak_select) try: error_list = error_list + result['error'].to_list() except: pass result_names.append(r'\result pattern_' + pattern + '_group' + str(i) + '.csv') result_list.append(result) #plot data and save figure for each fitted dataset and save to csv if report == True: for i in range(0, len(result_list)): #write results to file file_name = result_names[i] result = result_list[i] result.to_csv(root + file_name) #plot result result_shape = result.shape result = result.reset_index() n_samples = result_shape[0] n_peaks = round((result_shape[1] - 6) / 3) plt.rcParams.update({'font.size': 14}) width = 14 height = 3 scaling = 1 for k in range(0, n_samples): fit_data = pd.DataFrame() fig = plt.figure(figsize=(width * scaling, height * scaling * 2)) names = result['name'] paths = result['path'] row = result.loc[k] gauss_params = row[8:len(row)] legend_temp = names[k] print(legend_temp) legend_text = legend_temp.replace('.', '_') spectrum = load_clean_data(1, paths[k]) x = spectrum['frequency'] y = spectrum['imag_z'] fit_data.insert(fit_data.shape[1], 'frequency', x, True) fit_data.insert(fit_data.shape[1], 'imag_z', y, True) plt.plot(x, y, '-', label=legend_text) amp_list = [] cen_list = [] sigma_list = [] R_CPE_list = [] n_index = 3 start = 0 end = n_peaks for n in range(start, end): amp1 = gauss_params[n] cen1 = gauss_params[n + n_peaks] sigma1 = gauss_params[n + n_peaks * 2] amp_list.append(amp1) cen_list.append(cen1) sigma_list.append(sigma1) plt.plot(x, _1gaussian(x, amp1, cen1, sigma1), label='f_' + str(n + 1) + ', sigma' + str(sigma1)) fit_data.insert(fit_data.shape[1], 'f_' + str(n + 1), _1gaussian(x, amp1, cen1, sigma1), True) sum_gauss = amp_list + cen_list + sigma_list plt.plot(x, multiple_gauss(x, *sum_gauss), label='fit') fit_data.insert(fit_data.shape[1], 'fit', multiple_gauss(x, *sum_gauss), True) plt.title(legend_text) plt.legend() plt.grid() plt.show() #save figures fig.savefig(legend_text, dpi=600) #save fitted spectra to csv fit_data.to_csv(root + '\\' + legend_text + '.csv')
def fit_spectrum(sg_global, sg_local, sg_poly, dev_global, dev_local, u_fit, div_cen, data, n_peaks, peaks): lg.function_log() fail_df = pd.DataFrame() fail_df.insert(0, 'fail_path', 'dummy') file_list = [] n_success = 0 if isinstance(data, str): file_list.append(data) else: if isinstance(data, list): file_list = data else: file_list = data['file_path'].tolist() output_df = pd.DataFrame() gauss_peaks, gauss_sigma = peak_extract(sg_global, sg_poly, dev_global, data, True, 'global peak detection') global_peaks = gauss_peaks n_data = len(file_list) for i in range(0, n_data): peak_inc = 0 print('progress: ' + str(np.round(i / n_data * 100, 2)) + ' %') if peaks == 'single': gauss_peaks, gauss_sigma = peak_extract(sg_local, sg_poly, dev_local, file_list[i], True, 'local peak detection') if len(gauss_peaks) > len(global_peaks): while len(gauss_peaks) > len(global_peaks): gauss_peaks, gauss_sigma = peak_extract( sg_local + peak_inc, sg_poly, dev_local, file_list[i], True, 'local peak detection') peak_inc = peak_inc + 2 if 1 <= len(gauss_peaks) <= len(global_peaks): print('local and global peaks used') else: print(' only global peaks used') gauss_peaks = global_peaks folder_name, sample_name = get_sample_name(file_list[i]) test_file = load_clean_data(1, file_list[i]) f = test_file['frequency'] imag_z = test_file['imag_z'] real_z = test_file['real_z'] """Filtern der Peaks über den Frequenzbereich""" gauss_peaks = peaks_align(global_peaks, gauss_peaks, False) gauss_peaks, peak_index = peak_filter(gauss_peaks, min(f), max(f)) #interpolate data imag_int = UnivariateSpline(f, imag_z, k=4, s=0) amp_list = list(imag_int(gauss_peaks)) cen_list = gauss_peaks sigma_min = 0.5 sigma_max = 1.0 p_init, bnds = fit_params(amp_list, cen_list, min(f), max(f), sigma_min, sigma_max, u_fit, div_cen) ff = 0 try: exp_fit_params, exp_fit_errs = scipy.optimize.curve_fit( multiple_gauss, f, imag_z, p0=p_init, bounds=bnds) success = True #df_append(file_list[i], sample_name, min(f), max(f), exp_fit_params, global_peaks, i, output_df) except Exception as e: print(e) success = False print(sample_name + ' failed') fail_df.loc[ff] = file_list[i] #exp_fit_param = p_init #df_append(sample_list[i], min(f), max(f), exp_fit_params, global_peaks, i, output_df) pass if success == True: error = total_fit_error(f, imag_z, multiple_gauss(f, *exp_fit_params), max(real_z)) df_append(file_list[i], sample_name, min(f), max(f), max(imag_z), max(real_z), exp_fit_params, global_peaks, error, n_success, output_df) n_success = n_success + 1 print(str(len(file_list) - n_success) + ' curves could not be fitted') return output_df
def peak_extract(sg_window, sg_poly, dev_order, data, plot=False, plot_title=''): lg.function_log() f_min_list = [] f_max_list = [] f_inc_list = [] peaks_array = [] file_list = [] if isinstance(data, str): file_list.append(data) else: if isinstance(data, list): file_list = data else: file_list = data['file_path'].tolist() """create list of all peaks found in the datasets""" for i in range(0, len(file_list)): file = load_clean_data(1, file_list[i]) f = file['frequency'] imag_z = file['imag_z'] min_f = round(min(f), 1) max_f = round(max(f), 1) f_min_list.append(min_f) f_max_list.append(max_f) f_inc = round(abs((f[1]) - (f[0])), 2) f_inc_list.append(f_inc) """extraction of peaks from interpolation curve""" #d_Z = interp_derivative(f, imag_z, min_f, max_f, f_inc, sg_window, sg_poly, dev_order, "data") peaks_d1 = interp_derivative(f, imag_z, min_f, max_f, f_inc, sg_window, sg_poly, 1, "peaks") peaks_dn = interp_derivative(f, imag_z, min_f, max_f, f_inc, sg_window, sg_poly, dev_order, "peaks") peaks_d1_list = (peaks_d1["peak_x"].values.tolist()) peaks_dn_list = (peaks_dn["peak_x"].values.tolist()) peaks_array = peaks_array + peaks_d1_list + peaks_dn_list hist_start = min(f_min_list) - 0.5 hist_end = max(f_max_list) + 1 hist_bins = max(f_inc_list) """create histogram""" peak_hist, peak_bins = np.histogram(peaks_array, bins=np.arange(hist_start, hist_end, hist_bins)) plot_bins = (peak_bins[:len(peak_bins) - 1]) """smoothing of histogram via gaussian filter""" hist_filter_1 = savgol_filter(peak_hist, 7, 5) hist_filter = gaussian_filter1d(hist_filter_1, 2.5) """interpolation of histogram""" hist_spline = UnivariateSpline(plot_bins, hist_filter, k=4, s=0) """higher number of bins for analysis of interpolation curve""" new_bins = np.arange(0, hist_end, f_inc * 0.1) """calculation of derivatives to find local maxima""" d_hist_spline = hist_spline.derivative() d2_hist_spline = hist_spline.derivative(2) d_hist_roots = d_hist_spline.roots() """only extract roots with positive values for d2/d2x(root)""" find_peaks_result = [] for n in range(0, len(d_hist_roots)): if d_hist_roots[n] > min(new_bins): if d_hist_roots[n] < max(new_bins): if d2_hist_spline(d_hist_roots[n]) < 0: find_peaks_result.append(d_hist_roots[n]) cen_list = [] for n in range(0, len(find_peaks_result)): if hist_spline(find_peaks_result[n]) > 0: cen_list.append(find_peaks_result[n]) """refinement with gaussian peak fitting""" sigma_peak_find = 0.2 params = [] amp_list = list(hist_spline(cen_list)) if len(cen_list) > 0: params, bnds = fit_params(amp_list, cen_list, hist_start, hist_end, 0.1, 1.25, 0.5, 2) hist_gauss, errs_gauss = scipy.optimize.curve_fit(multiple_gauss, plot_bins, hist_filter, p0=params, bounds=bnds) g_hist = multiple_gauss(new_bins, hist_gauss) """plot peak search result""" if plot == True: plt.rc('legend', fontsize=10) plt.plot(plot_bins, peak_hist, label='histogram') plt.plot(plot_bins, hist_filter, label='histogram smoothed') plt.plot(new_bins, d_hist_spline(new_bins), label='1. derivative') plt.title(plot_title) plt.legend() plt.show() #plt.plot(new_bins, multiple_gauss(new_bins, *params), label = 'gauss') #plt.plot(new_bins,g_hist) folder_text, sample_text = get_sample_name(file_list[i]) n_peaks = len(cen_list) gauss_peaks = [] gauss_sigma = [] errs_peaks = [] errs_sigma = [] for a in range(0, n_peaks): if hist_gauss[a] > (0.2 * np.mean(hist_gauss[0:n_peaks])): gauss_peaks.append(hist_gauss[a + n_peaks]) gauss_sigma.append(abs(hist_gauss[a + n_peaks * 2])) errs_peaks.append(errs_gauss[a + n_peaks]) errs_sigma.append(abs(errs_gauss[a + n_peaks * 2])) else: print('no local peaks found') gauss_peaks = [] gauss_sigma = [] return gauss_peaks, gauss_sigma
def fit_params(amp_list, cen_list, min_x, max_x, sigma_min, sigma_max, u_fit, div_cen): lg.function_log() bnds_amp_min = [] bnds_cen_min = [] bnds_sigma_min = [] bnds_amp_max = [] bnds_cen_max = [] bnds_sigma_max = [] fit_amp = [] fit_cen = [] fit_sigma = [] d_cen_list = [] for j in range(0, len(amp_list)): f_amp = amp_list[j] f_cen = cen_list[j] fit_amp.append(f_amp) fit_cen.append(f_cen) if j == 0: d_min = abs(min_x - cen_list[j]) d_cen_list.append(d_min) if j == (len(amp_list) - 1): d_cen = abs(max_x - cen_list[j]) / 2 d_cen_list.append(d_cen) else: d_int = abs(cen_list[j] - cen_list[j + 1]) / div_cen d_cen_list.append(d_int) d_amp = f_amp * u_fit bnds_amp_min.append(f_amp - d_amp) bnds_amp_max.append(f_amp + d_amp) for i in range(0, len(fit_cen)): bnds_cen_min.append(fit_cen[i] - d_cen_list[i]) bnds_cen_max.append(fit_cen[i] + d_cen_list[i + 1]) f_sigma = min(max((d_cen_list[i] + d_cen_list[i + 1]) / 2, sigma_min), sigma_max) #d_sigma = f_sigma*u_fit bnds_sigma_min.append(sigma_min) bnds_sigma_max.append(sigma_max) fit_sigma.append(f_sigma) if sigma_max == sigma_min: bounds_min = bnds_amp_min + bnds_cen_min bounds_max = bnds_amp_max + bnds_cen_max params = fit_amp + fit_cen else: bounds_min = bnds_amp_min + bnds_cen_min + bnds_sigma_min bounds_max = bnds_amp_max + bnds_cen_max + bnds_sigma_max params = fit_amp + fit_cen + fit_sigma bnds = ((*bounds_min, ), (*bounds_max, )) return params, bnds