def xs_interp (inp_ene, inp_xs, inp_ene_interp, plot_cs): inp_ene = inp_ene # energies from Talys inp_xs = inp_xs # xs from talys inp_ene_interps = inp_ene_interp # energies for interpolation out_xs_A = [] out_xs = np.array([]) # iterpolated xs plot_fig = plot_cs x_ene = np.linspace (0,660,3301) spl = UnivariateSpline(inp_ene, inp_xs, s = 0.25) y_xs = spl(x_ene) for inp_ene_interp in inp_ene_interps: out_xs_A.append(spl.__call__(inp_ene_interp)) out_xs = np.append(out_xs, out_xs_A) # optional_plot if plot_fig: plt.plot (inp_ene, inp_xs, 'ro', ms = 5) plt.plot (x_ene, y_xs, lw = 3, c = 'g', alpha = 0.6) plt.plot (inp_ene_interps, out_xs, 'o', ms = 3) plt.show() return out_xs
def thickness_finder(ezz, zz, nbins, sym=True, nbspl=30): if sym == True: # if membrane is symmetric zzlow = zz[:len(zz) // 2] # Reverse array zzupp = zz[::-1] zzupp = zzupp[:len(zz) // 2] ezzlow = ezz[:len(ezz) // 2] # Reverse array ezzupp = ezz[::-1] ezzupp = ezzupp[:len(ezz) // 2] ezzmeanlow = np.mean((ezzlow, ezzupp), axis=0) ezzmeanupp = ezzmeanlow[::-1] # Concatenate if (nbins % 2) == 0: ezzmean = np.hstack((ezzmeanlow, ezzmeanupp)) else: zzcentre = zz[len(zz) // 2] ezzcentre = ezz[len(zz) // 2] ezz = np.hstack((ezzmeanlow, ezzcentre, ezzmeanupp)) zz = zz[2:-2] ezz = ezz[2:-2] knots = np.linspace(zz[0], zz[-1], nbspl - 2, endpoint=True)[1:-1] # Build design matrix and smooth ezz by means of ridge regression Bmat = Bmatspl(zz, knots=knots, q=3) reg = RidgeCV(alphas=(np.logspace(-15, 15, 300001)), store_cv_values=True, normalize=True) reg.fit(Bmat, ezz) ezzsmooth = np.dot(Bmat, reg.coef_) # Interpolate function in finer grid dz = 1e-3 zgrid = np.arange(zz[0], zz[-1], dz) ezzsmoothspl = UnivariateSpline(zz, ezzsmooth, k=3, s=0) smooth = ezzsmoothspl.__call__(zgrid, nu=0) # Calculate derivatives dsmooth = np.gradient(smooth) d2smooth = np.gradient(dsmooth) # Find critical points ### First derivative idx_min = argrelextrema(dsmooth, np.less_equal, order=len(zgrid) // 10) idx_max = argrelextrema(dsmooth, np.greater_equal, order=len(zgrid) // 10) idx11, val11 = findclosest(len(zgrid) // 2, idx_min[0]) idx12, val12 = findclosest(len(zgrid) // 2, idx_max[0]) ### Second derivative idx_min = argrelextrema(d2smooth, np.less_equal, order=len(zgrid) // 10) idx_max = argrelextrema(d2smooth, np.greater_equal, order=len(zgrid) // 10) idx21, val21 = findclosest(idx11, idx_max[0][idx_max[0] > idx11]) idx22, val22 = findclosest(idx12, idx_max[0][idx_max[0] < idx12]) #db= abs(zgrid[idx12] - zgrid[idx11]) dh = abs(zgrid[idx22] - zgrid[idx21]) return dh
def Accept_data(): # Read in data from entry for info tables n = info['current_exp'] for row,var in enumerate(info_vars): val = float(t0['etr'][var].get()) info[var][n] = val #accept all values as given in entry s,e = cut(n) for var in info_vars: if '_start' in var: val = max(info[var][n],data[var[0]][n][0],data[var[0]][n][s]) info[var][n]=val #update with correct minimum limit #val = data[var[0]][n][s] elif '_end' in var: val = val = min(info[var][n],data[var[0]][n][-1],data[var[0]][n][e]) info[var][n]=val #update with correct maximum limit elif 'n_points' in var: val = int(len(data['t'][n])/(max(int(len(data['t'][n])/info[var][n]),1)))-1 info[var][n]=val normalize_mass(n) spl = UnivariateSpline(data['t'][n],data['x_exp'][n],s=info['s.factor'][n]) data['dx_exp'][n] = spl.__call__(data['t'][n],nu=1)*(-1) # first derivative Plot_raw_data() create_equally_spaced_data() write_info_text()
def thermCond_fit_UnivariateSpline_model(T, data=[]): # This is a fitting model for creating an equation when given a list of x,y values x = data[0] y = data[1] if len(x) <= 5: order = len(x)-1 else: order = 5 fit = UnivariateSpline(x, y, k=order) thermCond = fit.__call__(T) return thermCond
def thermCond_fit_UnivariateSpline_model(T, data=[]): # This is a fitting model for creating an equation when given a list of x,y values x = data[0] y = data[1] if len(x) <= 5: order = len(x) - 1 else: order = 5 fit = UnivariateSpline(x, y, k=order) thermCond = fit.__call__(T) return thermCond
def extract_info_from_data(filenames): info['n_data'] = len(data['T']) info['filenames']=filenames #add derivative data['dx_exp'] = {} for n in range(info['n_data']): data['T'][n] += 273.15 data['x_exp'].append(1) info['s.factor'].append(1e-4) info['T_start'].append(data['T'][n][0]) info['t_start'].append(data['t'][n][0]) info['T_end'].append(data['T'][n][-1]) info['t_end'].append(data['t'][n][-1]) info['mass norm. temp.'].append(100) normalize_mass(n) spl = UnivariateSpline(data['t'][n],data['x_exp'][n],s=info['s.factor'][n]) data['dx_exp'][n] = spl.__call__(data['t'][n],nu=1)*(-1) # first derivative info['max'].append(max(data['dx_exp'][n])) #info['n_points'].append(len(data['t'][n])) info['n_points'].append(100)
def __call__(self, x): """Overloaded call method. """ return numpy.power(10., UnivariateSpline.__call__(self, numpy.log10(x)))
def __call__(self, x, nu=0): return UnivariateSpline.__call__(self, x%self.period, nu)
def __call__(self, x): outside = self.is_outside_domain(x) return np.where(outside, self.fill_value, UnivariateSpline.__call__(self, x))
from scipy.interpolate import UnivariateSpline import numpy #coeffs are stored in coeffs array with coeffs[i] being coeff of x^i log_ic =[-2,-1,0,1,2] curr_gain =[100,140,175,165,100] z=UnivariateSpline(log_ic,curr_gain,k=3) #print(z.__call__([0,3,4,6,7,9],nu=0,ext=None)) a=z.derivative() b=a.derivative() c=b.derivative() coeffs=[0,0,0,0] coeffs[0]=(z.__call__(0,nu=0,ext=None)) coeffs[1]=(a.__call__(0,nu=0,ext=None)) coeffs[2]=(b.__call__(0,nu=0,ext=None)/2) coeffs[3]=(c.__call__(0,nu=0,ext=None)/6) print(str(coeffs[3])+"*x^3+ "+str(coeffs[2])+"*x^2+ "+str(coeffs[1])+"*x^1+ "+str(coeffs[0]))
def extract_raw_features(df_subset_dict, index_col, analysis_column, date_col, le_time_periods=10, trans_parameter=0.5, max_time_lag=20, smoothing_factor=1222, min_adjustment_val=0.001): # Takes in a df_subset_dict created by the "create_df_subset_dict" method above and extracts raw features # Assumes exactly ONE feature is being inputted for analysis # Could probably refactor to do as multiple, but for now it will be one variable at a time # This for loop already takes some time to complete, so multiple variables may be unwieldy # Begin analysis analysis_time_series_features_raw_dct = {} for index, df_subset in df_subset_dict.items(): # Extract unique category name for each set of data index = df_subset[index_col].unique().item() # Set the activity date as the index and convert the activity date into a datetime index # So that the seasonal decomposition function to transform the data later works df_subset[date_col] = pd.to_datetime(df_subset[date_col]) analysis_df = df_subset[[date_col, analysis_column]].set_index(date_col) # Convert analysis data to numeric data in case data is "object" datatype analysis_df[analysis_column] = pd.to_numeric( analysis_df[analysis_column], errors='coerce') ##### RAW FEATURE PRE-PROCESSING ###### # Process raw data for feature calculation later (we will perform the transformation needed for the TSA features below) # Skew & Kurtosis Calculations (These are direct calculations, so these are simpler) analysis_skew_raw = skew(analysis_df[analysis_column]) analysis_kurtosis_raw = kurtosis(analysis_df[analysis_column]) ### Autocorrelation Pre-processing ### # Because this involves comparing the original data with various time-lagged versions of the data # We need to iteratively calculate many different autocorrelation series (Qh = n∑hk=1 rk^2) # Where n is the length of the time series, k is the current lag considered and h is the maximum time lag being considered (~20) # We will store the results of this in dictionaries to be saved into the final dictionary h = max_time_lag analysis_autocorrelation_raw_dct = { 'LENGTH': len(analysis_df), 'VALUES': {} } for k in range(1, h + 1): analysis_autocorrelation_raw = pd.concat( [analysis_df, analysis_df.shift(k)], axis=1).corr().iloc[0, 1] # Could be [1,0] too analysis_autocorrelation_raw_dct['VALUES'].update( {k: analysis_autocorrelation_raw}) ### The "chaos" metric (Lyapunov Exponent) Pre-processing ### # The paper says that for this metric, for all i = 1,2,...,n Xj should be the "nearest" point to Xi # I am taking that definition to mean that Xj is the next consecutive point to Xi (i.e. j = i + 1) # It also says that |Xj - Xi| should be small; for the most part this appears to be true with how I've defined it # So we will take the difference of each consecutive point (|X2 - X1|, |X3 - X2|, etc.) as a new dataframe # And then take the quotient of each point at varying points in the future to the current point # I.e. |Xj+n - Xi+n| / |Xj - Xi| with n the number of time points looking ahead (the le_time_periods variable above) # The final formula for each individual point is (1/n) * log(|Yj+n − Yi+n|/|Yj − Yi|) analysis_diff = analysis_df.diff(periods=1) analysis_lyapunov_exponents_dct = {} for n in range(1, le_time_periods + 1): analysis_lyapunov_exponents = abs(analysis_diff) / abs( analysis_diff.shift(n)) analysis_lyapunov_exponent = analysis_lyapunov_exponents[ analysis_column].apply(lambda x: (1 / n) * log(x) if x > 0 and x < float('Inf') else np.nan) analysis_lyapunov_exponents_dct[n] = analysis_lyapunov_exponent ### Periodicity (frequency) Pre-processing ### # First need detrended data (original data minus a fitted regression spline with 3 knots) # Then need all autocorrelation values on detrended data for all lags up to 1/3 the series length # Then from the autocorrelation function above, we examine it for peaks and troughs (in the next code block) # Frequency is the first peak provided with following conditions: # (a) there is also a trough before it # (b) the difference between peak and trough is at least 0.1 # (c) the peak corresponds to positive correlation # If no such peak is found, frequency is set to 1 (equivalent to non-seasonal) analysis_x, analysis_y = analysis_df.reset_index( ).index.values, analysis_df.values analysis_reg_spline = UnivariateSpline(x=analysis_x, y=analysis_y, s=smoothing_factor) analysis_reg_spline_values = analysis_reg_spline.__call__(analysis_x) analysis_detrended = analysis_y.transpose( ) - analysis_reg_spline_values # Autocorrelation values on detrended data max_lag = int(ceil((1 / 3) * analysis_detrended.shape[1])) analysis_detrended_autocorrelation_values = [] analysis_detrended = pd.DataFrame(analysis_detrended.transpose()) for k in range(0, max_lag): analysis_detrended_autocorrelation = pd.concat( [analysis_detrended, analysis_detrended.shift(k)], axis=1).corr().iloc[0, 1] # Could be [1,0] too analysis_detrended_autocorrelation_values.append( analysis_detrended_autocorrelation) ##### TRANSFORMED FEATURE PRE-PROCESSING ###### # In order to get the TSA (trend & seasonality adjusted) features, we need to assess whether the minimum values are non-negative # We only consider a transformation if theminimum of{Yt}is non-negative. If the minimum ofYtis zero, we add a small posi-tive constant (equal to 0.001 of the maximum ofYt) to all values to avoid undefinedresults min_analysis = min(analysis_df[analysis_column]) if min_analysis == 0: max_analysis = max(analysis_df[analysis_column]) analysis_df[analysis_column] = analysis_df[analysis_column] + ( min_adjustment_val * max_analysis) # Perform transformation Yt* = (Yt^λ − 1)/λ where λ != 0 and λ ∈ (−1,1) analysis_transformed = (pow(analysis_df[analysis_column], trans_parameter) - 1) / trans_parameter # Decompose the transformed data to get TSA data for processing analysis_decomposition = sm.tsa.seasonal_decompose( analysis_transformed, model='additive', extrapolate_trend='freq') # Calculate features based on tsa data # Trend & Seasonality Calculations ### Skew & Kurtosis Pre-processing ### analysis_skew_tsa = skew(analysis_decomposition.resid) analysis_kurtosis_tsa = kurtosis(analysis_decomposition.resid) ### Autocorrelation Pre-processing ### # Because this involves comparing the original data with various time-lagged versions of the data # We need to iteratively calculate many different autocorrelation series (rk=Corr(Yt,Yt−k) with many k values) # Later for actual feature calcuation, we will be using the Box-Pierce statistic (Qh = n∑hk=1 rk^2) # Where n is the time series length, k is the current lag considered and h is the maximum time lag being considered (~20) # We will store the results of this in dictionaries to be processed later and stored in the final dictionary h = max_time_lag analysis_autocorrelation_tsa_dct = { 'LENGTH': len(analysis_df), 'VALUES': {} } for k in range(1, h + 1): analysis_autocorrelation_tsa = pd.concat([ analysis_decomposition.resid, analysis_decomposition.resid.shift(k) ], axis=1).corr().iloc[0, 1] analysis_autocorrelation_tsa_dct['VALUES'].update( {k: analysis_autocorrelation_tsa}) analysis_time_series_features_raw_dct[index] = { 'ANALYSIS_DECOMPOSITION': analysis_decomposition, 'ANALYSIS_SKEW_RAW': analysis_skew_raw, 'ANALYSIS_KURTOSIS_RAW': analysis_kurtosis_raw, 'ANALYSIS_SKEW_TSA': analysis_skew_tsa, 'ANALYSIS_KURTOSIS_TSA': analysis_kurtosis_tsa, 'ANALYSIS_AUTOCORRELATION_TSA': analysis_autocorrelation_tsa_dct, 'ANALYSIS_AUTOCORRELATION_RAW': analysis_autocorrelation_raw_dct, 'ANALYSIS_DIFF': analysis_diff, 'ANALYSIS_LE_VALUES_DCT': analysis_lyapunov_exponents_dct, 'ANALYSIS_DETRENDED_AUTOCORRELATION': analysis_detrended_autocorrelation_values, } return analysis_time_series_features_raw_dct