def peaks_align(global_peaks, local_peaks, fill_zero):
    lg.function_log()

    d_n = abs(len(global_peaks) - len(local_peaks))

    score_list = []

    for i in range(0, (d_n + 1)):

        score = 0

        for j in range(0, len(local_peaks)):
            score = score + abs(global_peaks[i + j] - local_peaks[j])

        score_list.append(score)

    max_score = min(score_list)
    max_index = score_list.index(max_score)

    zero = [0] * len(global_peaks)

    for i in range(0, len(global_peaks)):
        if i == max_index:
            for j in range(0, len(local_peaks)):
                if fill_zero == True:
                    zero[i + j] = local_peaks[j]
                else:
                    global_peaks[i + j] = local_peaks[j]

    if fill_zero == True:
        result = zero
    else:
        result = global_peaks

    return result
def df_append(path, name, f_min, f_max, im_max, real_max, exp_params_list,
              global_peaks, error, loop_index, df):
    lg.function_log()
    amp_txt = []
    cen_txt = []
    sigma_txt = []
    fit_values = params_align(global_peaks, exp_params_list)

    df_values = list([path]) + list([name]) + list([f_min]) + list([
        f_max
    ]) + list([im_max]) + list([real_max]) + list([error]) + fit_values

    if loop_index == 0:

        for i in range(0, len(global_peaks)):

            amp_txt.append('im_Z_' + str(i + 1))
            cen_txt.append('freq_' + str(i + 1))
            sigma_txt.append('sigma_' + str(i + 1))

            column_list = amp_txt + cen_txt + sigma_txt

        df_list = [
            'path', 'name', 'f_min', 'f_max', 'im_max', 'real_max', 'error'
        ] + column_list

        for j in range(0, len(df_list)):
            df.insert(j, df_list[j], df_values[j], True)

        df.loc[loop_index] = df_values
    else:
        df.loc[loop_index] = df_values
def total_fit_error(x, y_real, y_fit, ter):
    lg.function_log()
    err = 0

    for i in range(0, len(x)):
        err = err + ((y_real[i] - y_fit[i]) / ter)**2

    return err
Пример #4
0
def low_pass_gauss_filter(n, sigma_filter):
    lg.function_log()
    
    #lg.text_log('filtering high frequencies - defined by sigma')
    x = np.arange(0,n,1)
    
    
    return (np.exp(-(x**2)*0.5/sigma_filter**2)+np.exp(-(((x-n+1)**2)*0.5/sigma_filter**2)))
def generate_bounds(params, error):
    lg.function_log()
    bounds_min = []
    bounds_max = []

    for i in params:
        bounds_min.append(i - error)
        bounds_max.append(i + error)

    bnds = ((*bounds_min, ), (*bounds_max, ))

    return bnds
def bounds_merge(merged_bounds):
    lg.function_log()

    bounds_min = []
    bounds_max = []

    for i in merged_bounds:
        bounds_min = bounds_min + (list(i[0]))
        bounds_max = bounds_max + (list(i[1]))

    bnds = ((*bounds_min, ), (*bounds_max, ))

    return bnds
def peak_filter(global_peaks, f_min, f_max):
    lg.function_log()

    global_peaks = sorted(global_peaks)

    peak_list = []
    peak_index = []

    for i in range(0, len(global_peaks)):
        if f_min <= global_peaks[i] <= f_max:
            peak_list.append(global_peaks[i])
            peak_index.append(i)

    return peak_list, peak_index
def get_sample_name(file_path):
    lg.function_log()

    file_path = file_path.replace('\\', '/')

    file_path_list = file_path.split('/')

    parent_folder = file_path_list[len(file_path_list) - 2]
    sample_name = file_path_list[len(file_path_list) - 1]

    block_end = sample_name.find("_")
    sample_name = sample_name[block_end + 1:]
    sample_name = sample_name.replace(".txt", "")

    return parent_folder, sample_name
def histogram_maxima(peak_hist, plot_bins, hist_sigma, plot):
    lg.function_log()

    #hist_filter_1 = savgol_filter(peak_hist,5,3)
    #hist_filter = gaussian_filter1d(hist_filter_1,0.1)

    gauss_sigma = max(plot_bins) * hist_sigma
    """interpolate histogram for higher resolution"""

    interp_hist = interpolate.interp1d(plot_bins, peak_hist)

    new_inc = (plot_bins[1] - plot_bins[0]) / 10

    new_bins = np.arange(min(plot_bins), max(plot_bins), new_inc)

    hist_filter = gaussian_filter1d(interp_hist(new_bins), gauss_sigma)
    """interpolation of histogram"""
    hist_spline = UnivariateSpline(new_bins, hist_filter, k=4, s=0)
    """higher number of bins for analysis of interpolation curve"""
    #new_bins = np.arange(0,max(peak_hist),0.1)
    """calculation of derivatives to find local maxima"""
    d_hist_spline = hist_spline.derivative()
    d2_hist_spline = hist_spline.derivative(2)
    d_hist_roots = d_hist_spline.roots()
    """only extract roots with positive values for d2/d2x(root)"""
    find_peaks_result = []

    for n in range(0, len(d_hist_roots)):
        if d_hist_roots[n] > min(new_bins):
            if d_hist_roots[n] < max(new_bins):
                if d2_hist_spline(d_hist_roots[n]) < 0:
                    find_peaks_result.append(d_hist_roots[n])

    if plot == True:
        plt.plot(plot_bins, peak_hist, label='center of mass histogram')
        #plt.plot(plot_bins, hist_filter_1, label = 'savgol_filter')
        plt.plot(new_bins, hist_filter, label='gauss_filter')
        plt.plot(new_bins, d_hist_spline(new_bins), label='derivative')
        plt.legend()
        plt.show()

    cen_list = []

    for n in range(0, len(find_peaks_result)):
        if hist_spline(find_peaks_result[n]) > 0:
            cen_list.append(np.round(find_peaks_result[n], 0))

    return cen_list
def R_CPE_fit(x, amp1, cen1, sigma1):
    lg.function_log()

    y = _1gaussian(x, amp1, cen1, sigma1)

    params = [1, amp1 * 2, 0.000001]

    bnds = ((0.6, 1, 1E-14), (1, 1E10, 1E-2))

    values, errors = scipy.optimize.curve_fit(Z_R_CPE,
                                              x,
                                              y,
                                              p0=params,
                                              bounds=bnds)

    return values
def interp_derivative(x, y, start, end, increment, window, p_order, d_order,
                      output):
    lg.function_log()
    x_list = x
    y_list = y

    for n in range(0, d_order):

        if window > len(y_list):
            window = 11

        filter_curve = savgol_filter(y_list, window, p_order)
        spline = UnivariateSpline(x_list, filter_curve, k=4)
        spline_der = spline.derivative()

        x_list = np.arange(start, end, increment)
        y_list = spline_der(x_list)

    roots_list = spline_der.roots()

    filter_curve = savgol_filter(y_list, window, p_order)
    spline = UnivariateSpline(x_list, filter_curve, k=4)
    spline_der2 = spline.derivative()

    roots_vals = spline_der2(roots_list)

    peak_x = []

    for k in range(0, len(roots_vals)):
        if roots_vals[k] < 0:
            peak_x.append(roots_list[k])

    filter_curve = savgol_filter(y, window, p_order)
    spline = UnivariateSpline(x, filter_curve, k=4)
    peak_y = spline(peak_x)
    peak_y = peak_y.tolist()

    if output == "data":
        result_data = {'x': x_list, 'y': y_list}

    if output == "peaks":
        result_data = {'peak_x': peak_x, 'peak_y': peak_y}

    result = pd.DataFrame(result_data)

    return result
def get_Im_Z(data):
    lg.function_log()

    data = data['file_path']

    df = pd.DataFrame()
    n = 0

    for i in data:
        raw = load_clean_data(1, i)
        frequency = raw['imag_z']

        df.insert(n, i, frequency, True)

        n = n + 1

    return df
def Z_R_CPE(log_f, n, R, Q):
    lg.function_log()

    f = 10**log_f

    omega = 2 * np.pi * f

    Z_CPE = 1 / ((1j * omega)**n * Q)

    Z_R = complex(R, 0)

    Z_res = 1 / (1 / Z_R + 1 / Z_CPE)

    Z_imag = -Z_res.imag
    Z_real = Z_res.real

    return Z_imag
def params_align(global_peaks, exp_fit_params):
    lg.function_log()

    n_peaks = len(global_peaks)
    n_params = round(len(exp_fit_params) / 3)

    amp_list = exp_fit_params[0:n_params]
    cen_list = exp_fit_params[n_params:n_params * 2]
    sigma_list = exp_fit_params[n_params * 2:n_params * 3]

    amp_new = [0] * n_peaks
    cen_new = [0] * n_peaks
    sigma_new = [0] * n_peaks

    local_peaks = cen_list
    d_n = abs(len(global_peaks) - len(local_peaks))

    score_list = []

    for i in range(0, (d_n + 1)):

        score = 0

        for j in range(0, len(local_peaks)):
            score = score + abs(global_peaks[i + j] - local_peaks[j])

        score_list.append(score)

    max_score = min(score_list)
    max_index = score_list.index(max_score)

    zero = [0] * len(global_peaks)

    for i in range(0, len(global_peaks)):
        if i == max_index:
            for j in range(0, len(local_peaks)):
                amp_new[i + j] = amp_list[j]
                cen_new[i + j] = cen_list[j]
                sigma_new[i + j] = sigma_list[j]

    result = amp_new + cen_new + sigma_new

    return result
def peaks_assign(global_peaks, exp_fit_params, f_min, f_max):
    lg.function_log()

    peak_ranges = []

    n_peaks = len(global_peaks)

    n_params = round(len(exp_fit_params) / 3)

    amp_list = exp_fit_params[0:n_params]
    cen_list = exp_fit_params[n_params:n_params * 2]
    sigma_list = exp_fit_params[n_params * 2:n_params * 3]

    p_list = []
    d_list = []

    for d in range(0, n_peaks - 1):
        d_f = global_peaks[d + 1] - global_peaks[d]

        f_x = global_peaks[d] + d_f

        d_list.append(f_x)

    p_list.append(f_min)
    p_list = p_list + d_list
    p_list.append(f_max)

    amp_new = [0] * n_peaks
    cen_new = [0] * n_peaks
    sigma_new = [0] * n_peaks

    for j in range(0, len(cen_list)):

        for i in range(0, n_peaks):

            if p_list[i] < cen_list[j] <= p_list[i + 1]:
                amp_new[i] = amp_list[j]
                cen_new[i] = cen_list[j]
                sigma_new[i] = sigma_list[j]

    result = amp_new + cen_new + sigma_new

    return result
Пример #16
0
def find_extrema(x, y):
    lg.function_log()

    lg.text_log('fit spline for algebraic operations')
    y_spline = UnivariateSpline(x, y, k=4, s=0)

    lg.text_log('get extrema via roots of first derivative')
    d_roots = y_spline.derivative().roots()

    lg.img_log((x, (curve_normalize(y), curve_normalize(
        y_spline(x)), curve_normalize(y_spline.derivative()(x)))),
               title='extrema calculation (normalized)',
               legend=['y', 'spline fit', '1st derivative (spline)'],
               x_axis='x',
               y_axis='y')

    lg.text_log('assign extrema via second derivative')
    max_x = [x for x in d_roots if y_spline.derivative(2)(x) < 0]
    min_x = [x for x in d_roots if y_spline.derivative(2)(x) > 0]

    return max_x, y_spline(max_x), min_x, y_spline(min_x)
Пример #17
0
def find_extrema(x, y):
    lg.function_log()
    
    lg.text_log('fit spline for algebraic operations')
    y_spline = UnivariateSpline(x, y, k=4, s=0)
    """
    plt.plot(x, y, label = 'original')
    plt.plot(x, y_spline(x), label = 'spline')
    #plt.plot(x, y_spline.derivative()(x), label = '1st derivative')
    plt.xlabel('frequency [Hz]')
    plt.ylabel('magnitude')
    lg.img_log('find extrema')
    """
    
    lg.text_log('get extrema via roots of first derivative')
    d_roots = y_spline.derivative().roots()
    
    lg.text_log('assign extrema via second derivative')
    max_x = [x for x in d_roots if y_spline.derivative(2)(x) < 0]
    min_x = [x for x in d_roots if y_spline.derivative(2)(x) > 0]
        
    return max_x, y_spline(max_x), min_x, y_spline(min_x)    
def generate_peaks(gauss_peaks, f, imag_z):
    lg.function_log()

    peak_df = pd.DataFrame()

    y_int = UnivariateSpline(f, imag_z, k=4, s=0)

    sigma_list = list([0.5]) * len(gauss_peaks)
    cen_list = gauss_peaks
    amp_list = y_int(gauss_peaks)

    gauss_params = list(amp_list) + list(cen_list) + list(sigma_list)

    n_peaks = len(gauss_peaks)

    for n in range(0, n_peaks):
        amp1 = gauss_params[n]
        cen1 = gauss_params[n + n_peaks]
        sigma1 = gauss_params[n + n_peaks * 2]

        gauss_curve = _1gaussian(f, amp1, cen1, sigma1)
        peak_df.insert(n, str(n), gauss_curve)

    return peak_df
Пример #19
0
def gauss_optimum(I):
    lg.function_log()
    
    cost_list = []
    dI_list = []
    d_args_list = []
    
    sigma_factor = 0.1

    sigma_range = np.arange(1*sigma_factor,len(I)*sigma_factor,sigma_factor)
    I_0 = I
    
    lg.text_log('calculate squared argsort difference for I_n-1 and I_n ')
    
    for sigma in sigma_range:
        
        I_gauss = gaussian_filter1d(I, sigma)
        
        dI = sum([(a_i - b_i)**2 for a_i, b_i in zip(I_gauss, I_0)])
        
        d_args = sum([(a_i - b_i)**2 for a_i, b_i in zip(np.argsort(I_gauss), np.argsort(I_0))])
        
        cost = d_args*dI
        
        I_0 = I_gauss
        
        cost_list.append(cost)
        dI_list.append(dI)
        d_args_list.append(d_args)
        
    #plt.plot(sigma_range, d_args_list, label = 'd_args')
    #lg.img_log('argsort difference raw')
    
    #lg.text_log('set initial d_args peak parameters')
    Amp_init = d_args_list[np.argsort(d_args_list)[-1]]
    cen_init = sigma_range[np.argsort(d_args_list)[-1]]
    sigma_init = abs(cen_init-sigma_range[0])
    
    p_Amp = [Amp_init, 0, Amp_init*1.2]
    p_cen = [cen_init, 0, cen_init*2]
    p_sigma = [sigma_init, sigma_init*0.01, sigma_init*5]
    
    params, bnds = lg.var_log(fit_params((p_Amp, p_sigma, p_cen)))
    
    
    #lg.text_log('fit gauss peak to extract optimum sigma')  
    gauss_peak_params, gauss_peak_errs = curve_fit(gauss_peak, sigma_range, d_args_list, p0=params, bounds=bnds)
    #lg.var_log(gauss_peak_params)
    
    
    #plt.plot(sigma_range, d_args_list, label = 'd_args')
    d_args_list = gauss_peak(sigma_range, *gauss_peak_params)
    #plt.plot(sigma_range, d_args_list, label = 'gauss peak fit')
    #plt.xlabel('sigma')
    #plt.ylabel('d_args')
    #lg.img_log('argsort difference peak fit')
    
    #lg.text_log('get optimum sigma for gauss filtering from maximum argsort difference')
    sigma_optimum = lg.var_log(gauss_peak_params[2]+abs(gauss_peak_params[1]))
    #lg.text_log('calculated new filtered spectrum')
    I_optimum = gaussian_filter1d(I, sigma_optimum)
    
    #documentation plot
    #plt.plot(I, label='original')
    #plt.plot(I_optimum, label='sigma optimum')
    #plt.xlabel('n')
    #plt.ylabel('magnitude')
    #lg.img_log('gauss_filtering')
    
    return I_optimum    
Пример #20
0
def baseline(dt, I, report = True, logfile = None):
    """

    Parameters
    ----------
    dt : float;
    distance between time points.
    I : array;
    Signal Intensity/Magnitude.

    Returns
    -------
    baseline : List;
    Intensity array for baseline subtraction.
    inclination : float;
    inclination of baseline calculated via linear regression.

    """
    
    lg.function_log()
    #create spectrum to analyze
    #lg.text_log('calculate fourier transform')
    f_range, spectrum = fourier_transform(dt, I)
    #lg.text_log('get magnitude from spectrum')
    magnitude = gauss_optimum(np.abs(spectrum))
    
    
    #lg.text_log('get extrema of fourier transform magnitude')
    max_x, max_y, min_x, min_y = lg.var_log(find_extrema(f_range, magnitude))
    #print(find_extrema(f_range, magnitude))
    
    #lg.text_log('get cutoff frequency - at first minimum')
    bg_cutoff = lg.var_log(min((min_x[np.argsort(min_y)[-1]]), (max_x[np.argsort(max_y)[-1]])))
    
    #lg.text_log('get indices below cutoff')
    bg_indices = lg.var_log([list(f_range).index(x) for x in f_range if x < bg_cutoff])
    
    #lg.text_log('maximum index*0.5 yields sigma for gauss filtering ')
    sigma_filter = lg.var_log(max(bg_indices)/2)
    
    
    #create full spectrum for filtering and inverse fourier transformation
    full_spectrum = DFT(I)
    #create gauss filter
    gauss_filter = np.asarray(low_pass_gauss_filter(len(full_spectrum), sigma_filter)) 
    #filter spectrum by multiplication with gauss filter
    filtered_spectrum = np.multiply(full_spectrum, gauss_filter)
    #create baseline via inverse fourier transformation
    baseline = np.abs(iDFT(np.asarray(filtered_spectrum)))
    

    t = np.arange(0,len(I)*dt,dt)
    coef = np.polyfit(t,baseline,1)
    inclination = coef[0]
    
    poly1d_fn = np.poly1d(coef)
    #plt.plot(t,baseline, label = 'fourier filtering baseline')
    #plt.plot(t, poly1d_fn(t), label = 'line fit')
    #plt.xlabel('t [s]')
    #plt.ylabel('I A.U.')
    #lg.img_log('baseline')
    
    return baseline, inclination
def main(corr_threshold, root, avoid, pattern, report=False):
    lg.function_log()

    ############################################################
    """0. get data"""
    ############################################################

    data = get_file_list(pattern, avoid, root)

    n_samples = len(data)
    print(str(n_samples) + ' datasets will be analysed')

    ############################################################
    """1. set parameters"""
    ############################################################
    """1.1 parameters for smoothing the impedance curve"""
    #
    #filter window for savitzky golay (sg) filter (greater then sg_poly, odd)
    sg_global = 9
    sg_local = 7

    #polynomial order for savitzky golay (sg) filter (>= 3, odd)
    sg_poly = 3
    """1.2 parameters to extract peaks from the impedance curve"""
    #derivative orders for peak extraction (dev_local = 1, dev_global => 3)
    dev_global = 5
    dev_local = 1
    """1.3 parameters for fitting the impedance curve - boundary settings"""

    #allowed relative change of peak height (0 < u_fit < 1)
    u_fit = 0.95

    #allowed displacement from peak centre -> high value -> low displacement (div_cen > 0)
    div_cen = 4

    #choose if global peaks should be adjusted to locally detected peaks yes -> single, no -> global
    peak_select = 'single'
    """1.4 set plot parameters"""
    plt.rcParams.update({'font.size': 14})

    width = 14
    height = 3
    scaling = 1

    ############################################################
    """2. fit all curves - use globally extracted peaks"""
    ############################################################

    input_control('start curve analysis?')

    error_list = []

    #try to group data, convert source dataframe to list if no groups found
    try:
        test_f = get_Im_Z(data)
        corr_map = test_f.corr()

        labels = [get_sample_name(x)[1] for x in corr_map.columns.values]
        #optional plot
        #sns.heatmap(corr_map, xticklabels=labels, yticklabels=labels)
        #lg.img_log(title='nicht sortiert',x_axis='Datensatz Nr.', y_axis='Datensatz Nr.')

        corr_map, data_groups, label = cluster_distance(
            corr_map, corr_threshold)

        labels = [get_sample_name(x)[1] for x in corr_map.columns.values]
        #sns.heatmap(corr_map, xticklabels=labels, yticklabels=labels)
        #lg.img_log(title='nach Clustering',x_axis='Datensatz Nr.', y_axis='Datensatz Nr.')

        data_dict = {}

        for idx, group in enumerate(data_groups):
            group_str = 'group_' + str(idx)
            data_dict[group_str] = str(len(group))

        print('number of data groups: ' + str(len(data_groups)))
        print('groups: ' + str(data_dict))

        data = data_groups
    except Exception as e:

        print(e)
        data_groups = []
        data_groups.append(data['file_path'].tolist())
        print('no groups found')
        pass

    input_control('proceed? - y/n')

    result_list = []
    result_names = []

    #perform fitting for each data group
    for i in range(0, len(data_groups)):

        data = data_groups[i]
        test_name, f, imag_z = get_sample_dataset(data)

        parent_folder, sample_name = get_sample_name(test_name)
        sample_name = 'sample: ' + str(sample_name)
        print(sample_name)

        proceed = False

        while proceed == False:
            """1. Check peak extraction"""
            gauss_peaks, gauss_sigma = peak_extract(sg_global, sg_poly,
                                                    dev_global, data, True)
            print(
                str(len(gauss_peaks)) + ' peaks used for fitting: ' +
                str(gauss_peaks))

            local_peaks, local_sigma = peak_extract(sg_local, sg_poly,
                                                    dev_local, test_name, True)
            peak_df = generate_peaks(local_peaks, f, imag_z)
            """display peaks"""
            plt.plot(f, imag_z, label=sample_name)
            for j in range(0, len(local_peaks)):
                plt.plot(f, peak_df[str(j)], label='peak ' + str(j))
            plt.legend()
            plt.show()

            print('peak extraction ok? - y/n')

            if input() == 'y':
                break
            else:
                print('enter global (sg) filter window (odd, greater then 7)')
                sg_global = int(input())
                print('enter local (sg) filter window (odd, greater then 5)')
                sg_local = int(input())

        input_control('start fitting now? - y/n')

        data = data_groups[i]
        print('Fitting group number ' + str(i))
        result = fit_spectrum(sg_global, sg_local, sg_poly, dev_global,
                              dev_local, u_fit, div_cen, data, 3, peak_select)

        try:
            error_list = error_list + result['error'].to_list()
        except:
            pass

        result_names.append(r'\result pattern_' + pattern + '_group' + str(i) +
                            '.csv')
        result_list.append(result)

    #plot data and save figure for each fitted dataset and save to csv

    if report == True:
        for i in range(0, len(result_list)):
            #write results to file
            file_name = result_names[i]
            result = result_list[i]
            result.to_csv(root + file_name)

            #plot result
            result_shape = result.shape

            result = result.reset_index()

            n_samples = result_shape[0]
            n_peaks = round((result_shape[1] - 6) / 3)

            plt.rcParams.update({'font.size': 14})

            width = 14
            height = 3
            scaling = 1

            for k in range(0, n_samples):

                fit_data = pd.DataFrame()

                fig = plt.figure(figsize=(width * scaling,
                                          height * scaling * 2))

                names = result['name']
                paths = result['path']

                row = result.loc[k]
                gauss_params = row[8:len(row)]

                legend_temp = names[k]
                print(legend_temp)
                legend_text = legend_temp.replace('.', '_')
                spectrum = load_clean_data(1, paths[k])

                x = spectrum['frequency']
                y = spectrum['imag_z']

                fit_data.insert(fit_data.shape[1], 'frequency', x, True)
                fit_data.insert(fit_data.shape[1], 'imag_z', y, True)

                plt.plot(x, y, '-', label=legend_text)

                amp_list = []
                cen_list = []
                sigma_list = []

                R_CPE_list = []

                n_index = 3

                start = 0
                end = n_peaks

                for n in range(start, end):

                    amp1 = gauss_params[n]
                    cen1 = gauss_params[n + n_peaks]
                    sigma1 = gauss_params[n + n_peaks * 2]

                    amp_list.append(amp1)
                    cen_list.append(cen1)
                    sigma_list.append(sigma1)

                    plt.plot(x,
                             _1gaussian(x, amp1, cen1, sigma1),
                             label='f_' + str(n + 1) + ', sigma' + str(sigma1))
                    fit_data.insert(fit_data.shape[1], 'f_' + str(n + 1),
                                    _1gaussian(x, amp1, cen1, sigma1), True)

                sum_gauss = amp_list + cen_list + sigma_list

                plt.plot(x, multiple_gauss(x, *sum_gauss), label='fit')
                fit_data.insert(fit_data.shape[1], 'fit',
                                multiple_gauss(x, *sum_gauss), True)

                plt.title(legend_text)
                plt.legend()
                plt.grid()
                plt.show()

                #save figures
                fig.savefig(legend_text, dpi=600)

                #save fitted spectra to csv
                fit_data.to_csv(root + '\\' + legend_text + '.csv')
def fit_spectrum(sg_global, sg_local, sg_poly, dev_global, dev_local, u_fit,
                 div_cen, data, n_peaks, peaks):
    lg.function_log()

    fail_df = pd.DataFrame()
    fail_df.insert(0, 'fail_path', 'dummy')
    file_list = []

    n_success = 0

    if isinstance(data, str):
        file_list.append(data)
    else:
        if isinstance(data, list):
            file_list = data
        else:
            file_list = data['file_path'].tolist()

    output_df = pd.DataFrame()

    gauss_peaks, gauss_sigma = peak_extract(sg_global, sg_poly, dev_global,
                                            data, True,
                                            'global peak detection')
    global_peaks = gauss_peaks

    n_data = len(file_list)

    for i in range(0, n_data):

        peak_inc = 0

        print('progress: ' + str(np.round(i / n_data * 100, 2)) + ' %')

        if peaks == 'single':
            gauss_peaks, gauss_sigma = peak_extract(sg_local, sg_poly,
                                                    dev_local, file_list[i],
                                                    True,
                                                    'local peak detection')
            if len(gauss_peaks) > len(global_peaks):
                while len(gauss_peaks) > len(global_peaks):
                    gauss_peaks, gauss_sigma = peak_extract(
                        sg_local + peak_inc, sg_poly, dev_local, file_list[i],
                        True, 'local peak detection')
                    peak_inc = peak_inc + 2

        if 1 <= len(gauss_peaks) <= len(global_peaks):
            print('local and global peaks used')

        else:
            print(' only global peaks used')
            gauss_peaks = global_peaks

        folder_name, sample_name = get_sample_name(file_list[i])

        test_file = load_clean_data(1, file_list[i])

        f = test_file['frequency']
        imag_z = test_file['imag_z']
        real_z = test_file['real_z']
        """Filtern der Peaks über den Frequenzbereich"""

        gauss_peaks = peaks_align(global_peaks, gauss_peaks, False)

        gauss_peaks, peak_index = peak_filter(gauss_peaks, min(f), max(f))

        #interpolate data
        imag_int = UnivariateSpline(f, imag_z, k=4, s=0)

        amp_list = list(imag_int(gauss_peaks))
        cen_list = gauss_peaks

        sigma_min = 0.5
        sigma_max = 1.0

        p_init, bnds = fit_params(amp_list, cen_list, min(f), max(f),
                                  sigma_min, sigma_max, u_fit, div_cen)

        ff = 0

        try:
            exp_fit_params, exp_fit_errs = scipy.optimize.curve_fit(
                multiple_gauss, f, imag_z, p0=p_init, bounds=bnds)
            success = True

            #df_append(file_list[i], sample_name, min(f), max(f), exp_fit_params, global_peaks, i, output_df)

        except Exception as e:

            print(e)
            success = False
            print(sample_name + ' failed')
            fail_df.loc[ff] = file_list[i]

            #exp_fit_param = p_init
            #df_append(sample_list[i], min(f), max(f), exp_fit_params, global_peaks, i, output_df)
            pass

        if success == True:
            error = total_fit_error(f, imag_z,
                                    multiple_gauss(f, *exp_fit_params),
                                    max(real_z))
            df_append(file_list[i], sample_name, min(f), max(f), max(imag_z),
                      max(real_z), exp_fit_params, global_peaks, error,
                      n_success, output_df)
            n_success = n_success + 1

    print(str(len(file_list) - n_success) + ' curves could not be fitted')
    return output_df
def peak_extract(sg_window,
                 sg_poly,
                 dev_order,
                 data,
                 plot=False,
                 plot_title=''):
    lg.function_log()

    f_min_list = []
    f_max_list = []

    f_inc_list = []

    peaks_array = []

    file_list = []

    if isinstance(data, str):
        file_list.append(data)
    else:
        if isinstance(data, list):
            file_list = data
        else:
            file_list = data['file_path'].tolist()
    """create list of all peaks found in the datasets"""
    for i in range(0, len(file_list)):

        file = load_clean_data(1, file_list[i])

        f = file['frequency']
        imag_z = file['imag_z']

        min_f = round(min(f), 1)
        max_f = round(max(f), 1)

        f_min_list.append(min_f)
        f_max_list.append(max_f)

        f_inc = round(abs((f[1]) - (f[0])), 2)

        f_inc_list.append(f_inc)
        """extraction of peaks from interpolation curve"""
        #d_Z = interp_derivative(f, imag_z, min_f, max_f, f_inc, sg_window, sg_poly, dev_order, "data")
        peaks_d1 = interp_derivative(f, imag_z, min_f, max_f, f_inc, sg_window,
                                     sg_poly, 1, "peaks")
        peaks_dn = interp_derivative(f, imag_z, min_f, max_f, f_inc, sg_window,
                                     sg_poly, dev_order, "peaks")

        peaks_d1_list = (peaks_d1["peak_x"].values.tolist())
        peaks_dn_list = (peaks_dn["peak_x"].values.tolist())
        peaks_array = peaks_array + peaks_d1_list + peaks_dn_list

    hist_start = min(f_min_list) - 0.5
    hist_end = max(f_max_list) + 1
    hist_bins = max(f_inc_list)
    """create histogram"""
    peak_hist, peak_bins = np.histogram(peaks_array,
                                        bins=np.arange(hist_start, hist_end,
                                                       hist_bins))
    plot_bins = (peak_bins[:len(peak_bins) - 1])
    """smoothing of histogram via gaussian filter"""

    hist_filter_1 = savgol_filter(peak_hist, 7, 5)
    hist_filter = gaussian_filter1d(hist_filter_1, 2.5)
    """interpolation of histogram"""
    hist_spline = UnivariateSpline(plot_bins, hist_filter, k=4, s=0)
    """higher number of bins for analysis of interpolation curve"""
    new_bins = np.arange(0, hist_end, f_inc * 0.1)
    """calculation of derivatives to find local maxima"""
    d_hist_spline = hist_spline.derivative()
    d2_hist_spline = hist_spline.derivative(2)
    d_hist_roots = d_hist_spline.roots()
    """only extract roots with positive values for d2/d2x(root)"""
    find_peaks_result = []

    for n in range(0, len(d_hist_roots)):
        if d_hist_roots[n] > min(new_bins):
            if d_hist_roots[n] < max(new_bins):
                if d2_hist_spline(d_hist_roots[n]) < 0:
                    find_peaks_result.append(d_hist_roots[n])

    cen_list = []

    for n in range(0, len(find_peaks_result)):
        if hist_spline(find_peaks_result[n]) > 0:
            cen_list.append(find_peaks_result[n])
    """refinement with gaussian peak fitting"""
    sigma_peak_find = 0.2
    params = []

    amp_list = list(hist_spline(cen_list))

    if len(cen_list) > 0:
        params, bnds = fit_params(amp_list, cen_list, hist_start, hist_end,
                                  0.1, 1.25, 0.5, 2)

        hist_gauss, errs_gauss = scipy.optimize.curve_fit(multiple_gauss,
                                                          plot_bins,
                                                          hist_filter,
                                                          p0=params,
                                                          bounds=bnds)
        g_hist = multiple_gauss(new_bins, hist_gauss)
        """plot peak search result"""

        if plot == True:
            plt.rc('legend', fontsize=10)
            plt.plot(plot_bins, peak_hist, label='histogram')
            plt.plot(plot_bins, hist_filter, label='histogram smoothed')
            plt.plot(new_bins, d_hist_spline(new_bins), label='1. derivative')
            plt.title(plot_title)
            plt.legend()
            plt.show()
            #plt.plot(new_bins, multiple_gauss(new_bins, *params), label = 'gauss')
            #plt.plot(new_bins,g_hist)

            folder_text, sample_text = get_sample_name(file_list[i])

        n_peaks = len(cen_list)
        gauss_peaks = []
        gauss_sigma = []

        errs_peaks = []
        errs_sigma = []

        for a in range(0, n_peaks):
            if hist_gauss[a] > (0.2 * np.mean(hist_gauss[0:n_peaks])):
                gauss_peaks.append(hist_gauss[a + n_peaks])
                gauss_sigma.append(abs(hist_gauss[a + n_peaks * 2]))

                errs_peaks.append(errs_gauss[a + n_peaks])
                errs_sigma.append(abs(errs_gauss[a + n_peaks * 2]))

    else:
        print('no local peaks found')
        gauss_peaks = []
        gauss_sigma = []

    return gauss_peaks, gauss_sigma
def fit_params(amp_list, cen_list, min_x, max_x, sigma_min, sigma_max, u_fit,
               div_cen):
    lg.function_log()

    bnds_amp_min = []
    bnds_cen_min = []
    bnds_sigma_min = []

    bnds_amp_max = []
    bnds_cen_max = []
    bnds_sigma_max = []

    fit_amp = []
    fit_cen = []
    fit_sigma = []

    d_cen_list = []

    for j in range(0, len(amp_list)):

        f_amp = amp_list[j]
        f_cen = cen_list[j]

        fit_amp.append(f_amp)
        fit_cen.append(f_cen)

        if j == 0:
            d_min = abs(min_x - cen_list[j])
            d_cen_list.append(d_min)

        if j == (len(amp_list) - 1):
            d_cen = abs(max_x - cen_list[j]) / 2
            d_cen_list.append(d_cen)
        else:
            d_int = abs(cen_list[j] - cen_list[j + 1]) / div_cen
            d_cen_list.append(d_int)

        d_amp = f_amp * u_fit
        bnds_amp_min.append(f_amp - d_amp)
        bnds_amp_max.append(f_amp + d_amp)

    for i in range(0, len(fit_cen)):
        bnds_cen_min.append(fit_cen[i] - d_cen_list[i])
        bnds_cen_max.append(fit_cen[i] + d_cen_list[i + 1])

        f_sigma = min(max((d_cen_list[i] + d_cen_list[i + 1]) / 2, sigma_min),
                      sigma_max)
        #d_sigma = f_sigma*u_fit

        bnds_sigma_min.append(sigma_min)
        bnds_sigma_max.append(sigma_max)

        fit_sigma.append(f_sigma)

    if sigma_max == sigma_min:
        bounds_min = bnds_amp_min + bnds_cen_min
        bounds_max = bnds_amp_max + bnds_cen_max
        params = fit_amp + fit_cen
    else:
        bounds_min = bnds_amp_min + bnds_cen_min + bnds_sigma_min
        bounds_max = bnds_amp_max + bnds_cen_max + bnds_sigma_max
        params = fit_amp + fit_cen + fit_sigma

    bnds = ((*bounds_min, ), (*bounds_max, ))

    return params, bnds