def denoise(nblck,filename,mode='sym', wv='sym5' ):
    from statsmodels.robust import mad
    #noisy_coefs = pywt.wavedec(nblck, 'sym5', level=5, mode='per')
    noisy_coefs = pywt.wavedec(nblck, wavelet=wv,   mode=mode) #level=5,  #dwt is for single level decomposition; wavedecoding is for more levels
    sigma = mad(noisy_coefs[-1])
    #uthresh=np.std(ca)/2
    uthresh = sigma*np.sqrt(2*np.log(len(nblck)))
    denoised = noisy_coefs[:]
    denoised[1:] = [pywt.threshold(i, value=uthresh,mode='soft') for i in denoised[1:]]
    signal = pywt.waverec(denoised, wavelet=wv, mode=mode)
    from matplotlib import pyplot as plt
    fig, axes = plt.subplots(1, 2, sharey=True, sharex=True,figsize=(8,4))
    ax1, ax2 = axes
    
    ax1.plot(signal)
    #ax1.set_xlim(0,2**10)
    ax1.set_title("Recovered Signal")
    ax1.margins(.1)
    
    ax2.plot(nblck)
    ax2.set_title("Noisy Signal")
    
    for ax in fig.axes:
        ax.tick_params(labelbottom=False, top=False, bottom=False, left=False,       right=False)
    fig.tight_layout()
    fig.savefig(filename+'_'+wv+'.pdf')
    plt.clf()
    return signal
Пример #2
0
def task_mad(data):
    """
    http://statsmodels.sourceforge.net/devel/generated/statsmodels.robust.scale.mad.html
    """

    mad_value = mad(data)

    return mad_value
Пример #3
0
    def _guerrero_cv(self, x, bounds, window_length=4, scale='sd',
                     options={'maxiter': 25}):
        """
        Computes lambda using guerrero's coefficient of variation. If no
        seasonality is present in the data, window_length is set to 4 (as
        per Guerrero and Perera, (2004)).

        NOTE: Seasonality-specific auxiliaries *should* provide their own
        seasonality parameter.

        Parameters
        ----------
        x : array_like
        bounds : tuple
            Numeric 2-tuple, that indicate the solution space for the lambda
            parameter.
        window_length : int
            Seasonality/grouping parameter. Default 4, as per Guerrero and
            Perera (2004). NOTE: this indicates the length of the individual
            groups, not the total number of groups!
        scale : {'sd', 'mad'}
            The dispersion measure to be used. 'sd' indicates the sample
            standard deviation, but the more robust 'mad' is also available.
        options : dict
            The options (as a dict) to be passed to the optimizer.
        """
        nobs = len(x)
        groups = int(nobs / window_length)

        # remove the first n < window_length observations from consideration.
        grouped_data = np.reshape(x[nobs - (groups * window_length): nobs],
                                  (groups, window_length))
        mean = np.mean(grouped_data, 1)

        scale = scale.lower()
        if scale == 'sd':
            dispersion = np.std(grouped_data, 1, ddof=1)
        elif scale == 'mad':
            dispersion = mad(grouped_data, axis=1)
        else:
            raise ValueError("Scale '{0}' not understood.".format(scale))

        def optim(lmbda):
            rat = np.divide(dispersion, np.power(mean, 1 - lmbda))  # eq 6, p 40
            return np.std(rat, ddof=1) / np.mean(rat)

        res = minimize_scalar(optim,
                              bounds=bounds,
                              method='bounded',
                              options=options)
        return res.x
Пример #4
0
def madnormalize(vector):
    """

    Parameters
    ----------
    vector

    Returns
    -------

    """
    demedianed = vector - np.median(vector)
    sigmad = mad(demedianed)
    if sigmad > 0.0:
        return demedianed / sigmad
    else:
        return demedianed
Пример #5
0
for lst in [k for k in pickedSubjects.iterkeys() if k.startswith('sr')]:
    subjects += pickedSubjects[lst]

poss = np.zeros((len(subjects), 3))
for ii, subj in enumerate(subjects):
    raw_file = glob.glob(
        op.join(study_dir, 'bad_%s' % subj, 'raw_fif', '*mmn_raw.fif'))[0]
    raw = mne.io.read_raw_fif(raw_file, allow_maxshield='yes')
    poss[ii] = raw.info['dev_head_t']['trans'][:3, 3]
np.savez_compressed(op.join(study_dir, 'initial_head_poss.npz'),
                    poss=poss,
                    subjects=subjects)
poss = np.load(op.join(study_dir, 'initial_head_poss.npz'))['poss']
poss_norm = LA.norm(poss, axis=1)

mad_poss_norm = mad(poss_norm)
'''
    Median Absolute deviation
    R-blboggers - Absolute Deviation Around the Median
    https://www.r-bloggers.com/absolute-deviation-around-the-median/

    Boris Iglewicz and David Hoaglin (1993), "Volume 16: How to Detect and
    Handle Outliers", The ASQC Basic References in Quality Control:
    Statistical Techniques, Edward F. Mykytka, Ph.D., Editor.
'''
# Outliers defined as >/< +/- 3 * MAD
mask = ~np.logical_or(poss_norm > np.median(poss_norm) + 2.5 * mad_poss_norm,
                      poss_norm < np.median(poss_norm) - 2.5 * mad_poss_norm)
# Figure window dressing
sns.set(style="white", palette="colorblind", color_codes=True)
colors = sns.color_palette()
Пример #6
0
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import math
from statsmodels import robust

col = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'type']
iris = pd.read_csv("iris.xlsx", names=col)
#print(iris)

iris_setosa = iris.loc[iris["type"] == "Iris-setosa"]
iris_virginica = iris.loc[iris["type"] == "Iris-virginica"]
iris_versicolor = iris.loc[iris["type"] == "Iris-versicolor"]

print("Meadian absolute deviations")
print("setosa", robust.mad(iris_setosa["petal_length"]))
print("viriginica", robust.mad(iris_virginica["petal_length"]))
print("versicolor", robust.mad(iris_versicolor["petal_length"]))
Пример #7
0
def mad(X):
    X = np.array(X).astype('float64')
    return robust.mad(X)
def make_selection_lookups(
    all_series: np.ndarray, pattern_lookups: Dict[int, PatternLookup],
    subseries_lookups: Dict[int, Dict[int, SubSeriesLookup]],
    sub_clusters: SubClusters, sub_mrfs: SubMRFs
) -> Tuple[Dict[Tuple, np.ndarray], Dict[Tuple, np.ndarray]]:
    """

    :param all_series:
    :param pattern_lookups:
    :param subseries_lookups:
    :param sub_clusters:
    :param sub_mrfs:
    :return:
    """
    dispersions: Dict[Tuple, np.ndarray] = {}
    modes: Dict[Tuple, np.ndarray] = {}
    print("computing selection criteria")
    for k in pattern_lookups:
        print("k =", k)
        for cid in {p.cid for p in pattern_lookups[k]["base"]}:
            print("    cid", cid)
            ss = [
                all_series[p.start_idx:p.end_idx]
                for p in pattern_lookups[k]["base"] if p.cid == cid
            ]
            ubiqs = np.array([[
                len([x for x in s[:, i] if x > 0]) / len(s)
                for i in range(s.shape[1])
            ] for s in ss])

            dispersions[(k, cid, 0)] = np.mean(robust.mad(ubiqs, axis=0))
            modes[(k, cid, 0)] = stats.mode(np.round(ubiqs, 1)).mode

            if cid not in sub_clusters[k]:
                continue

            idx_lookup = subseries_lookups[k][cid].idx_lookup
            ser = subseries_lookups[k][cid].series
            for sub_k in sub_clusters[k][cid]:
                if not any(
                        is_null_cluster(mrf) for mrf in sub_mrfs[k][cid]
                    [sub_k].values()) or len(idx_lookup) <= sub_k:
                    continue
                ps = pattern_lookups[k][cid][sub_k]
                for sub_cid in {p.cid for p in ps}:
                    ss = [
                        ser[p.start_idx:p.end_idx] for p in ps
                        if p.cid == sub_cid
                    ]
                    ubiqs = np.array([[
                        len([x for x in s[:, i] if x > 0]) / len(s)
                        for i in range(s.shape[1])
                    ] for s in ss])
                    dispersions[(k, cid, sub_k,
                                 sub_cid)] = np.mean(robust.mad(ubiqs, axis=0))
                    modes[(k, cid, sub_k,
                           sub_cid)] = stats.mode(np.round(ubiqs, 1)).mode

    dispersion_lookup = {
        tag: [x[1] for x in sorted(xs)]
        for tag, xs in groupby(sorted(dispersions.items()), lambda x: x[0][:3])
    }
    mode_lookup = {
        tag: [x[1] for x in sorted(xs)]
        for tag, xs in groupby(sorted(modes.items()), lambda x: x[0][:3])
    }

    return dispersion_lookup, mode_lookup
Пример #9
0
def wave(data, wavelet='haar', mode='soft'):  #, pyr=0, wav = 0):
    """Wavelet coefficients of the input data, and subsequent pyramide plotting
    build using pywt.

    threshold is using universal thresholding. Refer to jseabold wavelet regression
    http://jseabold.net/blog/2012/02/23/wavelet-regression-in-python/
    

    SYNTAX:
        [true_coef, signal, denoised] = wavelets.wave(ava['101'], mode='hard')
        
    INPUT:
        data: 1D-array or list with values for wavelets.

        wavelet: the mother wavelet. default = 'Haar', refer to pywt.wavelist() for wavelets.

        mode: 'hard', 'soft', 'less', refer to pywt.threshold for details.

        
    Output:
    
        true_coef: the coefficients for the original signal.     
    
        signal: wavelet transformed data.

        denoised: the denoised coefficients.

    Ex.
        [tr, signal, dn] = wavelets.wave(ava['101'], wavelet='coif16', mode='hard')
        
    """
    true_coefs = pywt.wavedec(data, wavelet, mode='per')

    #Evaluate data
    #Pyramid plot
    '''
    if pyr ==1:
        
        fig = cpp.coef_pyramid_plot(true_coefs[1:])
        fig.show()
    '''

    #Calculating Mean-absolute-deviation

    sigma = mad(true_coefs[-1])
    #Calculating the universal thresholding.
    uthresh = sigma * np.sqrt(2 * np.log(len(data)))
    #uthresh = sigma*np.sqrt(2*np.log(len(data))/len(data))

    #denoising data using universal thresholding, resulting in denoised signal.
    denoised = true_coefs[:]
    denoised[1:] = (pywt.threshold(i, value=uthresh, mode=mode, substitute=0)
                    for i in denoised[1:])
    signal = pywt.waverec(denoised, wavelet, mode='per')

    return true_coefs, signal, denoised
    '''
    #Number of coefficients
    comp = cmpt(denoised)

    #Evaluate Chosen Wavelet
    #let = pywt.Wavelet(wavelet)
    #sca, wave, x = let.wavefun()
    '''
    '''
Пример #10
0
def gamma(x, z):
    changes = np.empty(T)
    for t in range(T):
        changes[t] = np.linalg.norm(V_color(x, t) - V_color(z, t))
    mad = robust.mad(changes)
    return 1 / (1 + (LAMBDA_S * mad))
def prepare_features(window_data):
    # trimming
    window_data_x = window_data.x[:]
    window_data_y = window_data.y[:]
    window_data_z = window_data.z[:]

    window_data_x[window_data_x > MAX_VAL] = MAX_VAL
    window_data_x[window_data_x < MIN_VAL] = MIN_VAL
    window_data_y[window_data_y > MAX_VAL] = MAX_VAL
    window_data_y[window_data_y < MIN_VAL] = MIN_VAL
    window_data_z[window_data_z > MAX_VAL] = MAX_VAL
    window_data_z[window_data_z < MIN_VAL] = MIN_VAL

    assert np.sum(window_data_x[window_data_x > MAX_VAL]) == 0
    assert np.sum(window_data_x[window_data_x < MIN_VAL]) == 0

    assert np.sum(window_data_y[window_data_y > MAX_VAL]) == 0
    assert np.sum(window_data_y[window_data_y < MIN_VAL]) == 0

    assert np.sum(window_data_z[window_data_z > MAX_VAL]) == 0
    assert np.sum(window_data_z[window_data_z < MIN_VAL]) == 0

    magnitude = np.sqrt(window_data_x**2 + window_data_y**2 + window_data_z**2)

    # min
    x_min = np.min(window_data_x)
    y_min = np.min(window_data_y)
    z_min = np.min(window_data_z)
    overall_min = np.min(magnitude)

    # max
    x_max = np.max(window_data_x)
    y_max = np.max(window_data_y)
    z_max = np.max(window_data_z)
    overall_max = np.max(magnitude)

    # mean
    x_mean = np.mean(window_data_x)
    y_mean = np.mean(window_data_y)
    z_mean = np.mean(window_data_z)
    overall_mean = np.mean(magnitude)

    # standard deviation
    x_stdev = np.std(window_data_x)
    y_stdev = np.std(window_data_y)
    z_stdev = np.std(window_data_z)
    overall_stdev = np.std(magnitude)

    # mean average deviation
    x_mad = mad(window_data_x)
    y_mad = mad(window_data_y)
    z_mad = mad(window_data_z)
    overall_mad = mad(magnitude)

    # skewness
    x_skewness = skew(window_data_x)
    y_skewness = skew(window_data_y)
    z_skewness = skew(window_data_z)
    overall_skewness = skew(magnitude)

    # kurtosis
    x_kurtosis = kurtosis(window_data_x)
    y_kurtosis = kurtosis(window_data_y)
    z_kurtosis = kurtosis(window_data_z)
    overall_kurtosis = kurtosis(magnitude)

    # root mean square amplitude
    x_rms_amplitude = np.sqrt(np.abs(np.mean(window_data_x)))
    y_rms_amplitude = np.sqrt(np.abs(np.mean(window_data_y)))
    z_rms_amplitude = np.sqrt(np.abs(np.mean(window_data_z)))
    overall_rms_amplitude = np.sqrt(np.abs(np.mean(magnitude)))

    covariance_matrix = np.cov(window_data[['x', 'y', 'z']])

    # covariance of two values
    x_y_covariance = covariance_matrix[0, 1]
    x_z_covariance = covariance_matrix[0, 2]
    y_z_covariance = covariance_matrix[1, 2]

    # min covariance of two values
    min_covariance = np.min([x_y_covariance, x_z_covariance, y_z_covariance])

    # max covariance of two values
    max_covariance = np.max([x_y_covariance, x_z_covariance, y_z_covariance])

    # window energy
    x_window_energy = np.sum(window_data_x)
    y_window_energy = np.sum(window_data_y)
    z_window_energy = np.sum(window_data_z)
    overall_window_energy = np.sum(magnitude)

    # window entropy
    # x_window_entropy = entropy(window_data.x)
    # y_window_entropy = entropy(window_data.y)
    # z_window_entropy = entropy(window_data.z)

    # min_window_entropy = np.min([
    #     x_window_entropy, y_window_entropy, z_window_entropy])
    # max_window_entropy = np.max([
    #     x_window_entropy, y_window_entropy, z_window_entropy])
    # overall_window_entropy = entropy(magnitude)

    # Fourier transform
    frequency_component_amplitudes = np.fft.fft(magnitude).real

    # spectral centroid
    x_spectral_centroid = spectral_centroid(window_data_x)
    y_spectral_centroid = spectral_centroid(window_data_y)
    z_spectral_centroid = spectral_centroid(window_data_z)
    overall_spectral_centroid = spectral_centroid(magnitude)

    # spectral energy
    x_spectral_energy = np.sum(np.fft.fft(window_data_x).real)
    y_spectral_energy = np.sum(np.fft.fft(window_data_y).real)
    z_spectral_energy = np.sum(np.fft.fft(window_data_z).real)
    overall_spectral_energy = np.sum(frequency_component_amplitudes)

    # spectral entropy
    # x_spectral_entropy = entropy(np.fft.fft(window_data.x).real)
    # y_spectral_entropy = entropy(np.fft.fft(window_data.y).real)
    # z_spectral_entropy = entropy(np.fft.fft(window_data.z).real)
    # overall_spectral_entropy = entropy(frequency_component_amplitudes)

    features = (
        x_min,
        y_min,
        z_min,
        overall_min,
        x_max,
        y_max,
        z_max,
        overall_max,
        x_mean,
        y_mean,
        z_mean,
        overall_mean,
        x_stdev,
        y_stdev,
        z_stdev,
        overall_stdev,
        x_mad,
        y_mad,
        z_mad,
        overall_mad,
        x_skewness,
        y_skewness,
        z_skewness,
        overall_skewness,
        x_kurtosis,
        y_kurtosis,
        z_kurtosis,
        overall_kurtosis,
        x_rms_amplitude,
        y_rms_amplitude,
        z_rms_amplitude,
        overall_rms_amplitude,
        x_y_covariance,
        x_z_covariance,
        y_z_covariance,
        min_covariance,
        max_covariance,
        x_window_energy,
        y_window_energy,
        z_window_energy,
        overall_window_energy,
        # x_window_entropy, y_window_entropy, z_window_entropy,
        # min_window_entropy, max_window_entropy,
        # overall_window_entropy,
        x_spectral_centroid,
        y_spectral_centroid,
        z_spectral_centroid,
        overall_spectral_centroid,
        x_spectral_energy,
        y_spectral_energy,
        z_spectral_energy,
        overall_spectral_energy,
        # x_spectral_entropy, y_spectral_entropy, z_spectral_entropy,
        # overall_spectral_entropy
    )

    features += tuple(frequency_component_amplitudes)
    return features
def add_robust_features(df):
    df['X_95_quantile'] = np.array(
        [np.quantile(df.iloc[i].X, 0.95) for i in range(len(df))])
    df['X_mad'] = np.array([robust.mad(df.iloc[i].X) for i in range(len(df))])
    return df
Пример #13
0
 def dev_func(vector, parametric=False):
     if parametric:
         return np.std(vector)
     else:
         return mad(vector, c=1)
Пример #14
0
def get_sta_median(v_APs):
    sta = np.median(v_APs, 0)
    sta_mad = mad(v_APs, axis=0)
    return sta, sta_mad
Пример #15
0
    def fit(self,X,*args,**kwargs):
        
        """
        Fit a projection pursuit dimension reduction model. 
        
        Required input argument: X data as matrix or data frame 
        
        Optinal input arguments: 
            
            arg or kwarg:
            y data as vector or 1D matrix
            
            kwargs: 
            h, int: option to overrule class's n_components parameter in fit. 
                Convenient command line, yet should not be used in automated 
                loops, e.g. cross-validation.
                
            dmetric, str: distance metric used internally. Defaults to 'euclidean'
            
            mixing, bool: to estimate mixing matrix (only relevant for ICA)
            
            Further parameters to the regression methods can be passed on 
            here as well as kwargs, e.g. quantile=0.8 for quantile regression. 
            
            kwargs only relevant if y specified: 
        
        """

        # Collect optional fit arguments
        biascorr = kwargs.pop('biascorr',False)
            
        if 'h' not in kwargs:
            h = self.n_components
        else:
            h = kwargs.pop('h')
            self.n_components = h
            
        if 'dmetric' not in kwargs:
            dmetric = 'euclidean'
        else:
            dmetric = kwargs.get('dmetric')
            
        if 'mixing' not in kwargs:
            mixing = False
        else:
            mixing = kwargs.get('mixing')
            
        if 'y' not in kwargs:
            na = len(args)
            if na > 0: #Use of *args makes it sklearn consistent
                flag = 'two-block'
                y = args[0]
            else:
                flag = 'one-block'
                y = 0 # to allow calls with 'y=y' in spit of no real y argument present
        else:
            flag = 'two-block'
            y = kwargs.get('y')
                            
            if 'quantile' not in kwargs:
                quantile = .5
            else:
                quantile = kwargs.get('quantile')
                
            if self.regopt == 'robust':
            
                if 'fun' not in kwargs:
                    fun = 'Hampel'
                else:
                    fun = kwargs.get('fun')
                
                if 'probp1' not in kwargs:
                    probp1 = 0.95
                else:
                    probp1 = kwargs.get('probp1')
                
                if 'probp2' not in kwargs:
                    probp2 = 0.975
                else:
                    probp2 = kwargs.get('probp2')
                
                if 'probp3' not in kwargs:
                    probp3 = 0.99
                else:
                    probp3 = kwargs.get('probp3')

            
        if self.projection_index == dicomo:
            
            if self.pi_arguments['mode'] in ('M3','cos','c*k'):
            
                if 'option' not in kwargs:
                    option = 1
                else:
                    option = kwargs.get('option')
                
                if option > 3:
                    print('Option value >3 will compute results, but meaning may be questionable')
                
        # Initiate projection index    
        self.most = self.projection_index(**self.pi_arguments)         
        
        # Initiate some parameters and data frames
        if self.copy:
            X0 = copy.deepcopy(X)
            self.X0 = X0
        else:
            X0 = X        
        X = convert_X_input(X0)    
        n,p = X0.shape 
        trimming = self.trimming
        
        # Check dimensions 
        if h > min(n,p):
            raise(MyException('number of components cannot exceed number of samples'))
            
        if (self.projection_index == dicomo and self.pi_arguments['mode'] == 'kurt' and self.whiten_data==False):
            warnings.warn('Whitening step is recommended for ICA')
            
        # Pre-processing adjustment if whitening
        if self.whiten_data:
            self.center_data = True
            self.scale_data = False
            self.compression = False
            print('All results produced are for whitened data')
        
        # Centring and scaling
        if self.scale_data:
            if self.center=='mean':
                scale = 'std'
            elif ((self.center=='median')|(self.center=='l1median')):
                scale = 'mad' 
        else:
            scale = 'None'
            warnings.warn('Without scaling, convergence to optima is not given')
            
         # Data Compression for flat tables if required                
        if ((p>n) and self.compression):
            V,S,U = np.linalg.svd(X.T,full_matrices=False)
            X = np.matmul(U.T,np.diag(S))
            n,p = X.shape
            
            if (srs.mad(X)==0).any(): 
                warnings.warn('Due to low scales in data, compression would induce zero scales.' 
                              + '\n' + 'Proceeding without compression.')
                dimensions = False
                if copy:
                    X = copy.deepcopy(X0)
                else:
                    X = X0
            else:
                dimensions = True
        else:
            dimensions = False
        
        # Initiate centring object and scale X data 
        centring = VersatileScaler(center=self.center,scale=scale,trimming=trimming)      
  
        if self.center_data:
            Xs = centring.fit_transform(X)
            mX = centring.col_loc_
            sX = centring.col_sca_
        else:
            Xs = X
            mX = np.zeros((1,p))
            sX = np.ones((1,p))

        fit_arguments = {}
            
        # Data whitening (best practice for ICA)
        if self.whiten_data:
            V,S,U = np.linalg.svd(Xs.T,full_matrices=False)
            del U
            K = (V/S)[:,:p]
            del V,S
            Xs = np.matmul(Xs, K)
            Xs *= np.sqrt(p)
        
        # Presently, X and y need to be matrices 
        # Will be changed to use regular np.ndarray
        Xs = np.matrix(Xs)

        # Pre-process y data when available 
        if flag != 'one-block':
            
            ny = y.shape[0]
            y = convert_y_input(y)
            if len(y.shape) < 2:
                y = np.matrix(y).reshape((ny,1))
            # py = y.shape[1]
            if ny != n:
                raise(MyException('X and y number of rows must agree'))
            if self.copy:
                y0 = copy.deepcopy(y)
                self.y0 = y0
                
            if self.center_data:
                ys = centring.fit_transform(y)
                my = centring.col_loc_
                sy = centring.col_sca_ 
            else:
                ys = y
                my = 0
                sy = 1
            ys = np.matrix(ys).astype('float64')
        
        else:
            ys = None
                

        # Initializing output matrices
        W = np.zeros((p,h))
        T = np.zeros((n,h))
        P = np.zeros((p,h))
        B = np.zeros((p,h))
        R = np.zeros((p,h))
        B_scaled = np.zeros((p,h))
        C = np.zeros((h,1))
        Xev = np.zeros((h,1))
        assovec = np.zeros((h,1))
        Maxobjf = np.zeros((h,1))

        # Initialize deflation matrices 
        E = copy.deepcopy(Xs)
        f = ys

        bi = np.zeros((p,1))
        
        opt_args = { 
                    'alpha': self.alpha,
                    'trimming': self.trimming,
                    'biascorr': biascorr, 
                    'dmetric' : 'euclidean',
                    }
        
        if self.optimizer=='grid':
            # Define grid optimization ranges
            if 'ndir' not in self.optimizer_options:
                self.optimizer_options['ndir'] = 1000
            optrange = np.sign(self.optrange)
            optmax = self.optrange[1]
            stop0s = np.arcsin(optrange[0])
            stop1s = np.arcsin(optrange[1])
            stop1c = np.arccos(optrange[0])
            stop0c = np.arccos(optrange[1])
            anglestart = max(stop0c,stop0s)
            anglestop = max(stop1c,stop1s)
            nangle = np.linspace(anglestart,anglestop,self.optimizer_options['ndir'],endpoint=False)            
            alphamat = np.matrix([np.cos(nangle), np.sin(nangle)])
            opt_args['_stop0c'] = stop0c
            opt_args['_stop0s'] = stop0s
            opt_args['_stop1c'] = stop1c
            opt_args['_stop1s'] = stop1s
            opt_args['optmax'] = optmax
            opt_args['optrange'] = self.optrange
            opt_args['square_pi'] = self.square_pi
            if optmax != 1:
                alphamat *= optmax
        
            if p>2:
                anglestart = min(opt_args['_stop0c'],opt_args['_stop0s'])
                anglestop = min(opt_args['_stop1c'],opt_args['_stop1s'])
                nangle = np.linspace(anglestart,anglestop,self.optimizer_options['ndir'],endpoint=True)
                alphamat2 = np.matrix([np.cos(nangle), np.sin(nangle)])
                if optmax != 1:
                    alphamat2 *= opt_args['optmax']
                
            # Arguments for grid plane
            opt_args['alphamat'] = alphamat,
            opt_args['ndir'] = self.optimizer_options['ndir'],
            opt_args['maxiter'] = self.optimizer_options['maxiter']
            if type(opt_args['ndir'] is tuple): 
                opt_args['ndir'] = opt_args['ndir'][0]
            
            # Arguments for grid plane #2
            grid_args_2 = { 
                     'alpha': self.alpha,
                     'alphamat': alphamat2,
                     'ndir': self.optimizer_options['ndir'],
                     'trimming': self.trimming,
                     'biascorr': biascorr, 
                     'dmetric' : 'euclidean',
                     '_stop0c' : stop0c,
                     '_stop0s' : stop0s,
                     '_stop1c' : stop1c,
                     '_stop1s' : stop1s,
                     'optmax' : optmax,
                     'optrange' : self.optrange,
                     'square_pi' : self.square_pi
                     }
            if flag=='two-block':
                grid_args_2['y'] = f
        
        if flag=='two-block':
            opt_args['y'] = f
            

        # Itertive coefficient estimation
        for i in range(0,h):

            if self.optimizer=='grid':
                if p==2:
                    wi,maximo = gridplane(E,self.most,
                                          pi_arguments=opt_args
                                          )
           
                elif p>2:
                
                    afin = np.zeros((p,1)) # final parameters for linear combinations
                    Z = copy.deepcopy(E)
                    # sort variables according to criterion
                    meas = [self.most.fit(E[:,k],
                            **opt_args) 
                            for k in np.arange(0,p)]
                    if self.square_pi:
                        meas = np.square(meas)
                    wi,maximo = gridplane(Z[:,0:2],self.most,opt_args)
                    Zopt = Z[:,0:2]*wi 
                    afin[0:2]=wi
                    for j in np.arange(2,p):
                        projmat = np.matrix([np.array(Zopt[:,0]).reshape(-1),
                                         np.array(Z[:,j]).reshape(-1)]).T
                        wi,maximo = gridplane(projmat,self.most,
                                              opt_args
                                              )
                        Zopt = Zopt*float(wi[0]) + Z[:,j]*float(wi[1])
                        afin[0:(j+1)] = afin[0:(j+1)]*float(wi[0])
                        afin[j] = float(wi[1])

                    tj = Z*afin
                    objf = self.most.fit(tj,
                                     **{**fit_arguments,**opt_args}
                                    )
                    if self.square_pi:
                        objf *= objf
    

                    # outer loop to run until convergence
                    objfold = copy.deepcopy(objf)
                    objf = -1000
                    afinbest = afin
                    ii = 0
                    maxiter_2j = 2**round(np.log2(self.optimizer_options['maxiter'])) 
                
                    while ((ii < self.optimizer_options['maxiter'] + 1) and (abs(objfold - objf)/abs(objf) > 1e-4)):
                        for j in np.arange(0,p):
                            projmat = np.matrix([np.array(Zopt[:,0]).reshape(-1),
                                         np.array(Z[:,j]).reshape(-1)]).T
                            if j > 16:
                                divv = maxiter_2j
                            else:
                                divv = min(2**j,maxiter_2j)
                        
                            wi,maximo = gridplane_2(projmat,
                                                    self.most,
                                                    q=afin[j],
                                                    div=divv,
                                                    pi_arguments=grid_args_2
                                                    )
                            Zopt = Zopt*float(wi[0,0]) + Z[:,j]*float(wi[1,0])
                            afin *= float(wi[0,0])
                            afin[j] += float(wi[1,0])
                        
                        # % evaluate the objective function:
                        tj = Z*afin
                    
                        objfold = copy.deepcopy(objf)
                        objf = self.most.fit(tj,
                                         q=afin,
                                         **opt_args
                                         )
                        if self.square_pi:
                            objf *= objf
                    
                        if  objf!=objfold:
                            if self.constraint == 'norm':
                                afinbest = afin/np.sqrt(np.sum(np.square(afin)))
                            else:
                                afinbest = afin
                            
                        ii +=1
                        if self.verbose:
                            print(str(ii))
                    #endwhile
                
                    afinbest = afin
                    wi = np.zeros((p,1))
                    wi = afinbest
                    Maxobjf[i] = objf
                # endif;%if p>2;
            else: # do not optimize by the grid algorithm
                if self.trimming > 0: 
                    warnings.warn('Optimization that involves a trimmed objective is not a quadratic program. The scipy-optimize result will be off!!')
                if 'center' in self.pi_arguments:
                    if (self.pi_arguments['center']=='median'): 
                        warnings.warn('Optimization that involves a median in the objective is not a quadratic program. The scipy-optimize result will be off!!')   
                constraint = {'type':'eq',
                              'fun': lambda x: np.linalg.norm(x) -1,
                              }
                if len(self.optimizer_constraints)>0: 
                    constraint = [constraint,self.optimizer_constraints]
                wi = minimize(pp_objective,
                              E[0,:].transpose(),
                              args=(self.most,E,opt_args),
                              method=self.optimizer,
                              constraints=constraint,
                              options=self.optimizer_options).x
                wi = np.matrix(wi).reshape((p,1))
                wi /= np.sqrt(np.sum(np.square(wi)))
                
                
            # Computing projection weights and scores
            ti = E*wi
            if self.optimizer != 'grid':
                Maxobjf[i] = self.most.fit(E*wi,**opt_args)
            nti = np.linalg.norm(ti)
            pi = E.T*ti / (nti**2)
            if self.whiten_data:
                wi /= np.sqrt((wi**2).sum())
                wi = K*wi
            wi0 = wi
            wi = np.array(wi)
            if len(W[:,i].shape) == 1:
                wi = wi.reshape(-1)
            W[:,i] = wi
            T[:,i] = np.array(ti).reshape(-1)
            P[:,i] = np.array(pi).reshape(-1)
            
            if flag != 'one-block':
                criteval = self.most.fit(E*wi0,
                                         **opt_args
                                         )
                if self.square_pi:
                    criteval *= criteval
                    
                assovec[i] = criteval
                

            # Deflation of the datamatrix guaranteeing orthogonality restrictions
            E -= ti*pi.T
 
            # Calculate R-Weights
            R = np.dot(W[:,0:(i+1)],pinv2(np.dot(P[:,0:(i+1)].T,W[:,0:(i+1)]),check_finite=False))
        
            # Execute regression y~T if y is present. Generate regression estimates.
            if flag != 'one-block':
                if self.regopt=='OLS':
                    ci = np.dot(ti.T,ys)/(nti**2)
                elif self.regopt == 'robust':
                    linfit = rm(fun=fun,probp1=probp1,probp2=probp2,probp3=probp3,
                                centre=self.center,scale=scale,
                                start_cutoff_mode='specific',verbose=self.verbose)
                    linfit.fit(ti,ys)
                    ci = linfit.coef_
                elif self.regopt == 'quantile':
                    linfit = QuantReg(y,ti)
                    model = linfit.fit(q=quantile)
                    ci = model.params
                # end regression if
                
                C[i] = ci
                bi = np.dot(R,C[0:(i+1)])
                bi_scaled = bi
                bi = np.multiply(np.reshape(sy/sX,(p,1)),bi)
                B[:,i] = bi[:,0]
                B_scaled[:,i] = bi_scaled[:,0]

        # endfor; Loop for latent dimensions

        # Re-adjust estimates to original dimensions if data have been compressed 
        if dimensions:
            B = np.matmul(V[:,0:p],B)
            B_scaled = np.matmul(V[:,0:p],B_scaled)
            R = np.matmul(V[:,0:p],R)
            W = np.matmul(V[:,0:p],W)
            P = np.matmul(V[:,0:p],P)
            bi = B[:,h-1]
            if self.center_data:
                Xs = centring.fit_transform(X0)
                mX = centring.col_loc_
                sX = centring.col_sca_
            else:
                Xs = X0
                mX = np.zeros((1,p))
                sX = np.ones((1,p))
        
        bi = bi.astype("float64")
        if flag != 'one-block':            
            # Calculate scaled and unscaled intercepts
            if dimensions:
                X = convert_X_input(X0)
            if(self.center == "mean"):
                intercept = sps.trim_mean(y - np.matmul(X,bi),trimming)
            else:
                intercept = np.median(np.reshape(y - np.matmul(X,bi),(-1)))
            yfit = np.matmul(X,bi) + intercept
            if not(scale == 'None'):
                if (self.center == "mean"):
                    b0 = np.mean(ys - np.matmul(Xs.astype("float64"),bi))
                else:
                    b0 = np.median(np.array(ys.astype("float64") - np.matmul(Xs.astype("float64"),bi)))
            else:
                b0 = intercept
            
            # Calculate fit values and residuals 
            yfit = yfit    
            r = y - yfit
            setattr(self,"coef_",B)
            setattr(self,"intercept_",intercept)
            setattr(self,"coef_scaled_",B_scaled)
            setattr(self,"intercept_scaled_",b0)
            setattr(self,"residuals_",r)
            setattr(self,"fitted_",yfit)
            setattr(self,"y_loadings_",C)
            setattr(self,"y_loc_",my)
            setattr(self,"y_sca_",sy)
                
        setattr(self,"x_weights_",W)
        setattr(self,"x_loadings_",P)
        setattr(self,"x_rotations_",R)
        setattr(self,"x_scores_",T)
        setattr(self,"x_ev_",Xev)
        setattr(self,"crit_values_",assovec)
        setattr(self,"Maxobjf_",Maxobjf)
        
        if self.whiten_data:
            setattr(self,"whitening_",K)

        
        if mixing:
            setattr(self,"mixing_",np.linalg.pinv(W))
        
        
        setattr(self,"x_loc_",mX)
        setattr(self,"x_sca_",sX)

        setattr(self,'scaling',scale)
        if self.return_scaling_object:
            setattr(self,'scaling_object_',centring)
        
        return(self)   
Пример #16
0
from statsmodels import robust

url = "winequality-red.csv"
names = [
    'Fixed Acidity', 'Volatile Acidity', 'Citric Acid', 'Residual Sugar',
    'Chlorides', 'Free SO2', 'Total SO2', 'Density', 'pH', 'Sulphates',
    'ALC by Vol', 'Quality'
]
data = pandas.read_csv(url, names=names)
print np.mean(data)
print "\n"
print "Median of all Attributes:"
print(np.median(data, axis=0))
print "\n"
print "Standard Deviation of all Attributes:"
print np.std(data, axis=0)

print "\n"
mad = robust.mad(data, axis=0)
print "MAD of the attributes given is: "
print mad
print "\n"
max_data = np.max(data, axis=0)
min_data = np.min(data, axis=0)

print "Maximum and minimum data points are given below:"
print max_data
print "\n"
print min_data

print data['Quality']
Пример #17
0
def lcStats(F_fileName, Fstat_fileName, S_fileName=None, filter=True):

    fPhot = open(F_fileName)
    fStat = open(Fstat_fileName, 'w')
    eof = False
    activeField = 0
    activeTile = 0

    lcDict = {}

    if S_fileName is not None:
        starData = np.loadtxt(S_fileName, delimiter=';', dtype=str)
        sTile = starData[:, 1].astype(int)
        sSeq = starData[:, 2].astype(int)
        sRchunk = starData[:, 7].astype(int)
        raDecPat = re.compile('\(([0-9-\.]+),([0-9-\.]+)\)')

    while not eof:
        photLine = fPhot.readline()
        if photLine == '':
            eof = True
        else:
            photFields = string.split(photLine, ';')
            field = int(photFields[1])
            tile = int(photFields[2])
            seq = int(photFields[3])
            rmag = float(photFields[9])
            rerr = float(photFields[10])
            bmag = float(photFields[24])
            berr = float(photFields[25])
            if filter:
                if rmag <= -15 or bmag <= -15 or rmag > -2 or bmag > -2 or rerr < 0 or berr < 0:
                    continue
            if field != activeField or tile != activeTile:
                if activeField == 0:
                    activeField = field
                    activeTile = tile
                else:
                    # error exit
                    sys.exit('Input not all same field and tile')
            if lcDict.has_key(seq):
                lc = lcDict[seq]
                lc[0].append(rmag)
                lc[1].append(bmag)
                lc[2].append(rerr)
                lc[3].append(berr)
                lcDict[seq] = lc
            else:
                lcDict[seq] = [[rmag], [bmag], [rerr], [berr]]

    if debug:
        print lcDict

    if S_fileName is not None:
        fStat.write(
            '# F T S Rchunk RA DEC Rmed Rmad RmeanErr Vmed Vmad VmeanErr WScoeff WScoeffp\n'
        )
    else:
        fStat.write(
            '# F T S Rmed Rmad RmeanErr Vmed Vmad VmeanErr WScoeff WScoeffp\n')

    for seq in lcDict.keys():
        lc = lcDict[seq]
        lcr = np.array(lc[0])
        lcb = np.array(lc[1])
        lcrerr = lc[2]
        lcberr = lc[3]
        lcrMedian = np.median(lcr)
        lcrStdev = mad(lcr, center=lcrMedian)
        lcrAverr = np.median(lcrerr)
        lcbMedian = np.median(lcb)
        lcbStdev = mad(lcb, center=lcbMedian)
        lcbAverr = np.median(lcberr)

        bMinusR = lcb - lcr
        wsCoeff, wsCoeffp = pearsonr(lcb - lcbMedian, lcr - lcrMedian)
        if S_fileName is not None:
            idStar = np.where((sTile == tile) & (sSeq == seq))
            if len(idStar[0]) == 0:
                print 'Star fts %d %d %d not found in Star file' % (field,
                                                                    tile, seq)
                raise ValueError
            thisStar = starData[idStar, :][0][0]
            redChunk = int(thisStar[7])
            raHMS = Angle(thisStar[3] + ' hours')
            decDMS = Angle(thisStar[4] + ' degrees')
            raDeg = raHMS.degree
            decDeg = decDMS.degree
            outputLine = '%d %d %d %d %.5f %.5f %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n' % (
                field, tile, seq, redChunk, raDeg, decDeg, lcrMedian, lcrStdev,
                lcrAverr, lcbMedian, lcbStdev, lcbAverr, wsCoeff, wsCoeffp)
        else:
            outputLine = '%d %d %d %.3f %.3f %.3f %.3f %.3f %.3f %.3f %.3f\n' % (
                field, tile, seq, lcrMedian, lcrStdev, lcrAverr, lcbMedian,
                lcbStdev, lcbAverr, wsCoeff, wsCoeffp)

        fStat.write(outputLine)

    fStat.close()
Пример #18
0
    usedur[idx] = 5.0

    ts = fed.timeseries(uowStart, uowEnd)
    skylineData = np.zeros_like(ts.ephemCentral, dtype=np.int64)
    
    print('alpha')

    for i in range(len(alltic)):
        curTic = alltic[i]
        curper = useper[i]
        curepc = useepc[i]
        curdur = usedur[i]
        ts = ts.makeEphemVector(curper,curepc,curdur)
        skylineData = skylineData + np.copy(ts.ephemFull)

    medSkyline = np.median(skylineData)
    madSkyline = robust.mad(skylineData)
    tmp = np.arange(len(skylineData))
    plt.plot(tmp, skylineData, '.')
    idxBad = np.where((skylineData-medSkyline)/madSkyline > 2.75)[0]
    plt.plot(tmp[idxBad], skylineData[idxBad], '.r')

    plt.show()

    for j in idxBad:
        fout.write('{:11.5f}\n'.format(ts.ts[j]))
    fout.close()

    
    print('hello world')
def tpf_resamp(file, fileOut, RESAMP, lcFile):
    """ Resample TESS target pixel file and save as h5d format
        resamp - Resample factor just make it odd okay"""
    hdulist = fits.open(file)
    arr = hdulist[1].data[0]['FLUX']
    nImage = len(hdulist[1].data[:]['CADENCENO'])
    shp = arr.shape
    nx = shp[0]
    ny = shp[1]
    saturate_pixel = np.zeros((nx, ny), dtype=int)
    median_image = np.zeros((nx, ny))

    # Get header information that we should keep
    keepprihdr = ['TICID','SECTOR','CAMERA','CCD','PXTABLE','RA_OBJ', \
                  'DEC_OBJ','PMRA','PMDEC','PMTOTAL','TESSMAG','TEFF', \
                  'LOGG','RADIUS']
    formatprihdr = [np.uint32, int, int, int,int, \
                    float,float,float,float,float, \
                    float,float,float,float,float]

    keep1hdr = ['1CRV4P', '2CRV4P', '1CRPX4', '2CRPX4']
    format1hdr = [int, int, float, float]

    cadenceNo = hdulist[1].data[:]['CADENCENO']
    timetbjd = hdulist[1].data[:]['TIME']
    flux_array = hdulist[1].data[:]['FLUX']
    flux_bkg_array = hdulist[1].data[:]['FLUX_BKG']
    dq_flag = hdulist[1].data[:]['QUALITY']

    f = h5py.File(lcFile, 'r')
    cadNo = np.array(f['cadenceNo'])
    cadNoBeg = np.array(f['cadenceNoBeg'])
    cadNoEnd = np.array(f['cadenceNoEnd'])
    ia, ib = cjb.intersect(cadNoBeg, cadenceNo)
    # In rare instances the light curve data doesnt exist for this sector and
    #  ib will be empty causeing error if so
    #  just return and do nothing for this sector
    try:
        frstIdx = ib[0]
    except:
        return
    ia, ib = cjb.intersect(cadNoEnd, cadenceNo)
    endIdx = ib[-1]

    #Make a fix for Sector 3 where not all cadences were used
    # in the backend DV
    #idx = np.where((cadenceNo>=114115) & (cadenceNo<=128706))[0]
    #nImage = len(idx)
    #cadenceNo, timetbjd, dq_flag = idx_filter(idx, cadenceNo, timetbjd, dq_flag)
    #flux_array = flux_array[idx, :, :]
    #flux_bkg_array = flux_bkg_array[idx, :, :]

    # trim off the excess images not integral into resamp
    cadenceNo = cadenceNo[frstIdx:endIdx + 1]
    timetbjd = timetbjd[frstIdx:endIdx + 1]
    flux_array = flux_array[frstIdx:endIdx + 1, :, :]
    flux_bkg_array = flux_bkg_array[frstIdx:endIdx + 1, :, :]
    dq_flag = dq_flag[frstIdx:endIdx + 1]
    newNImage = len(cadenceNo) // RESAMP
    # Do downsampling of data stream
    cadenceNo = np.mean(np.reshape(cadenceNo, (newNImage, RESAMP)),
                        axis=1,
                        dtype=int)
    timetbjd = np.mean(np.reshape(timetbjd, (newNImage, RESAMP)), axis=1)
    flux_array = np.sum(np.reshape(flux_array, (newNImage, RESAMP, nx, ny)),
                        axis=1)
    flux_bkg_array = np.sum(np.reshape(flux_bkg_array,
                                       (newNImage, RESAMP, nx, ny)),
                            axis=1)
    dq_flag = np.sum(np.reshape(dq_flag, (newNImage, RESAMP)),
                     axis=1,
                     dtype=int)

    # Identify data that is missing or NaN
    idx = np.where((np.isfinite(timetbjd))
                   & (np.isfinite(np.squeeze(flux_array[:, 0, 0])))
                   & (np.isfinite(np.squeeze(flux_bkg_array[:, 0, 0]))))[0]
    valid_data_flag = np.zeros((newNImage, ), dtype=np.bool_)
    valid_data_flag[idx] = True

    # Identify saturated pixels
    for i in range(nx):
        for j in range(ny):
            curflux = flux_array[:, i, j]
            diff_flux = np.diff(curflux[valid_data_flag])
            robmad = robust.mad(diff_flux)
            medval = np.median(curflux[valid_data_flag])
            median_image[i, j] = medval
            if medval > 1000.0 and np.log10(robmad / medval) < -3.5:
                saturate_pixel[i, j] = 1


#                print("Saturated Pixel detected x: {0:d} y: {1:d}".format(i, j))

# Now save data as h5py
    epic = hdulist[0].header['TICID']
    sec = hdulist[0].header['SECTOR']
    #    fileoutput = os.path.join(make_data_dirs(dirOut,sec,epic), 'tess_tpf_{0:016d}.h5d'.format(epic))
    f = h5py.File(fileOut, 'w')
    tmp = f.create_dataset('cadenceNo', data=cadenceNo, compression='gzip')
    tmp = f.create_dataset('timetbjd', data=timetbjd, compression='gzip')
    tmp = f.create_dataset('flux_array', data=flux_array, compression='gzip')
    tmp = f.create_dataset('flux_bkg_array',
                           data=flux_bkg_array,
                           compression='gzip')
    tmp = f.create_dataset('dq_flag', data=dq_flag, compression='gzip')
    tmp = f.create_dataset('valid_data_flag',
                           data=valid_data_flag,
                           compression='gzip')
    tmp = f.create_dataset('saturate_pixel',
                           data=saturate_pixel,
                           compression='gzip')
    tmp = f.create_dataset('median_image',
                           data=median_image,
                           compression='gzip')
    # Now make many datasets from the header parameters
    for i in range(len(keepprihdr)):
        curval = hdulist[0].header[keepprihdr[i]]
        if np.isscalar(curval):
            tmp = f.create_dataset(keepprihdr[i],
                                   data=np.array(
                                       [hdulist[0].header[keepprihdr[i]]],
                                       dtype=formatprihdr[i]))
        else:
            tmp = f.create_dataset(keepprihdr[i],
                                   data=np.array([-1], dtype=formatprihdr[i]))

    for i in range(len(keep1hdr)):
        curval = hdulist[1].header[keep1hdr[i]]
        if np.isscalar(curval):
            tmp = f.create_dataset(keep1hdr[i],
                                   data=np.array(
                                       [hdulist[1].header[keep1hdr[i]]],
                                       dtype=format1hdr[i]))
        else:
            tmp = f.create_dataset(keep1hdr[i],
                                   data=np.array([-1], dtype=format1hdr[i]))

    f.close()
Пример #20
0
Asimetria_Colombia = []
MAD_Colombia = []
TriMd = []
YK_Colombia = []

for i in range(len(time)):

    Mapa_Colombia = precip[i, Colombia_Lat, :]
    Mapa_Colombia = Mapa_Colombia[:, Colombia_Lon]
    Mapa_NoNaN_Colombia = Mapa_Colombia[np.isfinite(Mapa_Colombia)]
    Media_Colombia.append(np.mean(Mapa_NoNaN_Colombia))
    Mediana_Colombia.append(np.median(Mapa_NoNaN_Colombia))
    Desviacion_Colombia.append(np.std(Mapa_NoNaN_Colombia))
    Curtosis_Colombia.append(scipy.stats.kurtosis(Mapa_NoNaN_Colombia))
    Asimetria_Colombia.append(stats.skew(Mapa_NoNaN_Colombia))
    MAD_Colombia.append(robust.mad(Mapa_NoNaN_Colombia))
    #TriMd_Colombia.append(np.median(Mapa_NoNaN_Colombia))
    #YK_Colombia.append(np.median(Mapa_NoNaN_Colombia))

Media_Colombia = np.array(Media_Colombia)
Mediana_Colombia = np.array(Mediana_Colombia)
Desviacion_Colombia = np.array(Desviacion_Colombia)
Curtosis_Colombia = np.array(Curtosis_Colombia)
Asimetria_Colombia = np.array(Asimetria_Colombia)
MAD_Colombia = np.array(MAD_Colombia)

Meses = np.array([fechas[i].month for i in range(len(fechas))])

Colombia_Media_mensual = np.zeros([12]) * np.NaN
Colombia_Mediana_mensual = np.zeros([12]) * np.NaN
Colombia_Desviacion_mensual = np.zeros([12]) * np.NaN
Пример #21
0
 def _mad(x):
     return smrb.mad(x)
Пример #22
0
def normalize_mad(x):
    x = np.array(x, dtype=np.float32)
    med = np.median(x, axis=0)
    mad = robust.mad(x, axis=0)
    return (x - med) / (mad * 2)  # 2 is for having smaller values
Пример #23
0
print('Airbone:', numpy.median(Airborne))
print('Aquatic:', numpy.median(Aquatic))
print('Predator:', numpy.median(Predator))
print('Toothed:', numpy.median(Toothed))
print('Backbone:', numpy.median(Backbone))
print('Breathes:', numpy.median(Breathes))
print('Venomous:', numpy.median(Venomous))
print('Fins:', numpy.median(Fins))
print('Legs:', numpy.median(Legs))
print('Tail:', numpy.median(Tail))
print('Domestic:', numpy.median(Domestic))
print('Catsize:', numpy.median(Catsize))
print('Type:', numpy.median(Type))
print('\n')
print('MAD:')
print('Hair:', robust.mad(Hair))
print('Feather:', robust.mad(Feather))
print('Eggs:', robust.mad(Eggs))
print('Milk:', robust.mad(Milk))
print('Airbone:', robust.mad(Airborne))
print('Aquatic:', robust.mad(Aquatic))
print('Predator:', robust.mad(Predator))
print('Toothed:', robust.mad(Toothed))
print('Backbone:', robust.mad(Backbone))
print('Breathes:', robust.mad(Breathes))
print('Venomous:', robust.mad(Venomous))
print('Fins:', robust.mad(Fins))
print('Legs:', robust.mad(Legs))
print('Tail:', robust.mad(Tail))
print('Domestic:', robust.mad(Domestic))
print('Catsize:', robust.mad(Catsize))
Пример #24
0
def find_noisy_channels(raw, linenoise):
    """ High-pass filters, detrends, and removes line noise from the EEG data. Additionally
     finds channels having Nans, no data, unusually high amplitudes poor correlation,
     high-frequency noise, and bad correlation in the low frequency portion of the signal
     using RANSAC.

     Inspired by the PREP pipleine [1]. Fischler and Bolles RANSAC method was used for
     finding outlier channels [2].

     Parameters
     __________
     raw:  raw mne object
           contains the EEG data and other information related to it
     linenoise: int
                line frequency that needs to be removed by notch filtering
     Raises
     ______
     IOE error
            If too few channels are present to perfom RANSAC

     Returns
     _______
     noisy_channels: list of string
                     list of the names of all the bad channels
     References
     __________

     [1] Bigdely-Shamlo, N., Mullen, T., Kothe, C., Su, K., & Robbins, K. (2015).
     The PREP pipeline: standardized preprocessing for large-scale EEG analysis.
     Frontiers In Neuroinformatics, 9. doi: 10.3389/fninf.2015.00016
     [2] Fischler, M., & Bolles, R. (1981). Random sample consensus: a paradigm
     for model fitting with applications to image analysis and automated
     cartography. Communications Of The ACM, 24(6), 381-395. doi: 10.1145/358669.358692

     """
    EEGData = raw.get_data()
    ch_names_original = raw.info["ch_names"]
    sample_rate = raw.info["sfreq"]
    mne.filter.filter_data(EEGData,sample_rate, 1, None, picks=None,
                           filter_length="auto", l_trans_bandwidth="auto",
                           h_trans_bandwidth="auto", n_jobs=1,
                           method="fir", iir_params=None,
                           copy=True,phase="zero",fir_window="hamming",
                           fir_design="firwin",pad="reflect_limited",
                           verbose=None)
    EEGData = signal.detrend(EEGData)
    # removing line noise
    mne.filter.notch_filter(EEGData,sample_rate,linenoise,filter_length="auto",
                            notch_widths=None,trans_bandwidth=1,method="fir",
                            iir_params=None,mt_bandwidth=None,
                            p_value=0.05,picks=None,n_jobs=1,copy=True,phase="zero",
                            fir_window="hamming",ir_design="firwin",pad="reflect_limited",
                            verbose=None)
    # finding channels with NaNs or constant values for long periods of time
    original_dimensions = np.shape(EEGData)
    original_channels = np.arange(original_dimensions[0])
    channels_interpolate = original_channels
    nan_channel_mask = [False] * original_dimensions[0]
    no_signal_channel_mask = [False] * original_dimensions[0]

    for i in range(0, original_dimensions[0]):
        nan_channel_mask[i] = np.sum(np.isnan(EEGData[i, :])) > 0
    for i in range(0, original_dimensions[0]):
        no_signal_channel_mask[i] = robust.mad(EEGData[i, :]) < 10 ** (-10) or np.std(
            EEGData[i, :]) < 10 ** (-10)
    nan_channels = channels_interpolate[nan_channel_mask]
    no_data_channels = channels_interpolate[no_signal_channel_mask]
    for i in range(0, original_dimensions[0]):
        if nan_channel_mask[i] == True or no_signal_channel_mask[i] == True:
            EEGData = np.delete(EEGData, i, axis=0)
    nans_no_data_channels = np.union1d(nan_channels, no_data_channels)
    channels_interpolate = np.setdiff1d(
    channels_interpolate, nans_no_data_channels)
    nans_no_data_ChannelName = list()
    ch_names = raw.info["ch_names"]
    for i in range(0, len(nans_no_data_channels)):
        nans_no_data_ChannelName.append(ch_names[nans_no_data_channels[i]])
    raw.drop_channels(nans_no_data_ChannelName)
    evaluation_channels = channels_interpolate
    new_dimension = np.shape(EEGData)

    # find channels that have abnormally high or low amplitude
    robust_channel_deviation = np.zeros(original_dimensions[0])
    deviation_channel_mask = [False] * (new_dimension[0])
    channel_deviation = np.zeros(new_dimension[0])
    for i in range(0, new_dimension[0]):
        channel_deviation[i] = 0.7413 * iqr(EEGData[i, :])
    channel_deviationSD = 0.7413 * iqr(channel_deviation)
    channel_deviationMedian = np.nanmedian(channel_deviation)
    robust_channel_deviation[evaluation_channels] = np.divide(
        np.subtract(channel_deviation, channel_deviationMedian), channel_deviationSD
    )
    for i in range(0, new_dimension[0]):
        deviation_channel_mask[i] = abs(robust_channel_deviation[i]) > 5 or np.isnan(
            robust_channel_deviation[i]
        )
    deviation_channels = evaluation_channels[deviation_channel_mask]
    # finding channels with high frequency noise
    EEGData = np.transpose(EEGData)
    dimension = np.shape(EEGData)
    if sample_rate > 100:
        new_EEG = np.zeros((dimension[0], dimension[1]))
        bandpass_filter = filter_design(
            N_order=100,
            amp=np.array([1, 1, 0, 0]),
            freq=np.array([0, 0.36, 0.4, 1]),
            sample_rate=sample_rate)
        for i in range(0, dimension[1]):
            new_EEG[:, i] = signal.filtfilt(bandpass_filter, 1, EEGData[:, i])
        noisiness = np.divide(robust.mad(np.subtract(EEGData, new_EEG)),
                              robust.mad(new_EEG))
        noisiness_median = np.nanmedian(noisiness)
        noiseSD = (np.median(np.absolute(np.subtract(noisiness, np.median(noisiness))))
                   * 1.4826)
        zscore_HFNoise = np.divide(np.subtract(noisiness, noisiness_median), noiseSD)
        HFnoise_channel_mask = [False] * new_dimension[0]
        for i in range(0, new_dimension[0]):
            HFnoise_channel_mask[i] = zscore_HFNoise[i] > 5 or np.isnan(
                zscore_HFNoise[i])
    else:
        new_EEG = EEGData
        noisiness_median = 0
        noisinessSD = 1
        zscore_HFNoise = np.zeros(dimension[1], 1)
        HFNoise_channels = []
    HFNoise_channels = evaluation_channels[HFnoise_channel_mask]
    # finding channels by correlation
    CORRELATION_SECONDS = 1  # default value
    CORRELATION_FRAMES = CORRELATION_SECONDS * sample_rate
    correlation_window = np.arange(CORRELATION_FRAMES)
    correlation_offsets = np.arange(1, dimension[0] - CORRELATION_FRAMES,
                                    CORRELATION_FRAMES)
    w_correlation = len(correlation_offsets)
    maximum_correlations = np.ones((original_dimensions[0], w_correlation))
    drop_out = np.zeros((dimension[1], w_correlation))
    channel_correlation = np.ones((w_correlation, dimension[1]))
    noiselevels = np.zeros((w_correlation, dimension[1]))
    channel_deviations = np.zeros((w_correlation, dimension[1]))
    drop = np.zeros((w_correlation, dimension[1]))
    len_correlation_window = len(correlation_window)
    EEG_new_win = np.reshape(
        np.transpose(new_EEG[0: len_correlation_window * w_correlation, :]),
        (dimension[1], len_correlation_window, w_correlation),
        order="F")
    data_win = np.reshape(
        np.transpose(EEGData[0: len_correlation_window * w_correlation, :]),
        (dimension[1], len_correlation_window, w_correlation),
        order="F")
    for k in range(0, w_correlation):
        eeg_portion = np.transpose(np.squeeze(EEG_new_win[:, :, k]))
        data_portion = np.transpose(np.squeeze(data_win[:, :, k]))
        window_correlation = np.corrcoef(np.transpose(eeg_portion))
        abs_corr = np.abs(
            np.subtract(window_correlation, np.diag(np.diag(window_correlation))))
        channel_correlation[k, :] = np.quantile(
            abs_corr, 0.98, axis=0)  # problem is here is solved
        noiselevels[k, :] = np.divide(
            robust.mad(np.subtract(data_portion, eeg_portion)), robust.mad(eeg_portion))
        channel_deviations[k, :] = 0.7413 * iqr(data_portion, axis=0)
    for i in range(0, w_correlation):
        for j in range(0, dimension[1]):
            drop[i, j] = np.int(
                np.isnan(channel_correlation[i, j]) or np.isnan(noiselevels[i, j]))
            if drop[i, j] == 1:
                channel_deviations[i, j] = 0
                noiselevels[i, j] = 0
    maximum_correlations[evaluation_channels, :] = np.transpose(channel_correlation)
    drop_out[:] = np.transpose(drop)
    noiselevels_out = np.transpose(noiselevels)
    channel_deviations_out = np.transpose(channel_deviations)
    thresholded_correlations = maximum_correlations < 0.4
    thresholded_correlations = thresholded_correlations.astype(int)
    fraction_BadCorrelationWindows = np.mean(thresholded_correlations, axis=1)
    fraction_BadDropOutWindows = np.mean(drop_out, axis=1)

    badCorrelation_channels = np.where(fraction_BadCorrelationWindows > 0.01)
    badCorrelation_channels_out = badCorrelation_channels[:]
    dropout_channels = np.where(fraction_BadDropOutWindows > 0.01)
    dropout_channels_out = dropout_channels[:]
    # medianMaxCorrelation = np.median(maximumCorrelations, 2);

    badSNR_channels = np.union1d(badCorrelation_channels_out, HFNoise_channels)
    noisy_channels = np.union1d(np.union1d(np.union1d(deviation_channels,
                    np.union1d(badCorrelation_channels_out, dropout_channels_out)),
                     badSNR_channels), np.union1d(nan_channels, no_data_channels))

    # performing ransac
    bads = list()
    for i in range(0, len(noisy_channels)):
        bads.append(ch_names[noisy_channels[i]])
    SAMPLES = 50
    FRACTION_GOOD = 0.25
    CORR_THRESH = 0.75
    FRACTION_BAD = (0.4,)
    CORR_WIN_SEC = 4
    chn_pos = raw._get_channel_positions()
    raw.info["bads"] = bads
    good_chn_labs = list()
    good_idx = mne.pick_channels(ch_names, include=[], exclude=raw.info["bads"])
    for i in range(0, len(good_idx)):
        good_chn_labs.append(ch_names[good_idx[i]])
    n_chans_good = good_idx.shape[0]
    chn_pos_good = chn_pos[good_idx, :]
    n_pred_chns = int(np.ceil(FRACTION_GOOD * n_chans_good))
    EEGData_filtered = np.transpose(new_EEG)
    if n_pred_chns <= 3:
        raise IOError("Too few channels available to reliably perform ransac.")

    # Make the ransac predictions
    ransac_eeg = run_ransac(chn_pos=chn_pos, chn_pos_good=chn_pos_good,
        good_chn_labs=good_chn_labs, n_pred_chns=n_pred_chns,
        data=EEGData_filtered, n_samples=SAMPLES, raw=raw)
    signal_len = original_dimensions[1]
    n_chans = len(chn_pos)
    correlation_frames = CORR_WIN_SEC * raw.info["sfreq"]
    correlation_window = np.arange(correlation_frames)
    n = correlation_window.shape[0]
    correlation_offsets = np.arange(
        0, (signal_len - correlation_frames), correlation_frames)
    w_correlation = correlation_offsets.shape[0]
    data_window = EEGData_filtered[:n_chans, : n * w_correlation]
    data_window = data_window.reshape(n_chans, n, w_correlation)
    pred_window = ransac_eeg[:n_chans, : n * w_correlation]
    pred_window = pred_window.reshape(n_chans, n, w_correlation)
    channel_correlations = np.ones((w_correlation, n_chans))
    for k in range(w_correlation):
        data_portion = data_window[:, :, k]
        pred_portion = pred_window[:, :, k]
        corr = np.corrcoef(data_portion, pred_portion)
        corr = np.diag(corr[0:n_chans, n_chans:])
        channel_correlations[k, :] = corr

    thresholded_correlations = channel_correlations < CORR_THRESH
    frac_bad_corr_windows = np.mean(thresholded_correlations, axis=0)
    # find the corresponding channel names and return
    bad_idxs_bool = frac_bad_corr_windows > FRACTION_BAD
    bad_idxs = np.argwhere(bad_idxs_bool)
    bad_by_ransac = list()
    noisy_channels = np.union1d(noisy_channels, bad_idxs[0: len(bad_idxs)][0])
    ransac_channel_correlations = channel_correlations
    noisy_channels_list = list()
    for i in range(0, len(noisy_channels)):
        noisy_channels_list.append(ch_names_original[noisy_channels[i]])
    print(noisy_channels_list)
    return noisy_channels_list
Пример #25
0
    DecilesS3.append(np.percentile(S3, i))
    DecilesS4.append(np.percentile(S4, i))
    DecilesS5.append(np.percentile(S5, i))

#Rango intercuartil
IQR1 = S1_perc_75 - S1_perc_25
IQR2 = S2_perc_75 - S2_perc_25
IQR3 = S3_perc_75 - S3_perc_25
IQR4 = S4_perc_75 - S4_perc_25
IQR5 = S5_perc_75 - S5_perc_25

from statsmodels import robust

#Desviacion absoluta media

MAD1 = robust.mad(S1)
MAD2 = robust.mad(S2)
MAD3 = robust.mad(S3)
MAD4 = robust.mad(S4)
MAD5 = robust.mad(S5)

#Trimedia


def Trimd(percentil25, mediana, percentil75):
    Trimedia = ((percentil25) + (2 * mediana) + (percentil75)) / 4
    return Trimedia


TriM1 = Trimd(S1_perc_25, S1_medi, S1_perc_75)
TriM2 = Trimd(S2_perc_25, S2_medi, S2_perc_75)
Пример #26
0
    needD = 1

    while(norm(y_dwtD[dwtD_sort_id[0:needD]]) / norm(y_dwtD) < (compressed_percentage/100)):
        needD = needD + 1
    print needD, compressed_percentage/100
    #zero the coeff that is not really doing contribution to compressed_percentage% of y (thresholding)
    y_dwtD[dwtD_sort_id[needD+1:]] = 0
    #y_dwtD = np.reshape(y_dwtD, (len(y_dwtD), 1))
    
    #y_cmp = np.concatenate((y_dwtA, y_dwtD),axis=1)
    #print np.shape(y_cmp)
    #get compressed signal by inverse dwt the finalized coeffs
    y_cmp = idwt(y_dwtA, y_dwtD, 'db4')
    '''

    sigma = mad(coeff[-1])
    threshold = sigma * np.sqrt(2 * np.log(len(y_data)))
    #coeff[1:] = (pywt.threshold(i, value=threshold, mode="soft") for i in coeff[1:])
    coeff[1:] = (pywt.threshold(i, value=threshold, mode="hard")
                 for i in coeff[1:])
    y_cmp = pywt.waverec(coeff, "db20", mode="per")
    #print np.shape(y_cmp)
    '''
    output_file("legend.html", title="legend.py example")
    p1 = figure(title="Original", tools=TOOLS, plot_width=800, plot_height=400)
    p2 = figure(title="After dwt", tools=TOOLS, plot_width=800, plot_height=400)

    #p1.circle(x, y, legend="Control points", color="red", alpha=0.5)
    p1.line(x, y_data, legend="Control Points", color="blue", alpha=0.8)

    #p2.line(x, fft_y, legend="Control points", color="red", alpha=0.5)
Пример #27
0
# Median, Percentile, quantile, MAD
print("Median:")
print(np.median(haber_1["nodes"]))
print(np.median(haber_2["nodes"]))

print("\nQuantiles:")
print(np.percentile(haber_1["nodes"], np.arange(0, 100, 25)))
print(np.percentile(haber_2["nodes"], np.arange(0, 100, 25)))

print("\n20th Percentile range")
print(np.percentile(haber_1["nodes"], np.arange(0, 100, 20)))
print(np.percentile(haber_2["nodes"], np.arange(0, 100, 20)))

from statsmodels import robust
print("\nMedian Absolute Deviation:")
print(robust.mad(haber_1["nodes"]))
print(robust.mad(haber_2["nodes"]))
# Box plot and Whiskers

# Setting handles for the legend.
import matplotlib.patches as mpatches
blue_patch = mpatches.Patch(color="steelblue", label="1")
orange_patch = mpatches.Patch(color="orange", label="2")

# Box plot and whiskers for nodes
sns.boxplot(x="status", y="nodes", data=haber)
plt.title("Box plot for Nodes")
plt.legend(title="status", handles=[blue_patch, orange_patch])
plt.show()
# Box plot and whiskers for age
sns.boxplot(x="status", y="age", data=haber)
Пример #28
0
    def check_station_residual(self, instaxml, period, runid = 0, discard = False, usemad = True, madfactor = 3., crifactor = 0.5, crilimit = 10.,\
            plot = True, projection = 'merc', cmap = 'surf', vmin = None, vmax = None, clabel = 'average absolute'):
        stainv = obspy.read_inventory(instaxml)
        lats = []
        lons = []
        staids = []
        for network in stainv:
            for station in network:
                stlo = float(station.longitude)
                if stlo < 0.:
                    stlo += 360.
                if station.latitude <= self.maxlat and station.latitude >= self.minlat\
                    and stlo <= self.maxlon and stlo >= self.minlon:
                    lats.append(station.latitude)
                    lons.append(stlo)
                    staids.append(network.code + '.' + station.code)
        smoothgroup = self['smooth_run_' + str(runid)]
        try:
            residdset = smoothgroup['%g_sec' % (period) + '/residual']
            # id fi0 lam0 f1 lam1 vel_obs weight res_tomo res_mod delta
            residual = residdset[()]
        except:
            raise AttributeError('Residual data: ' + str(period) +
                                 ' sec does not exist!')
        if discard:
            res_tomo = residual[:, 7]
            # quality control to discard data with large misfit
            if usemad:
                from statsmodels import robust
                mad = robust.mad(res_tomo)
                cri_res = madfactor * mad
            else:
                cri_res = min(crifactor * per, crilimit)
            residual = residual[np.abs(res_tomo) < cri_res, :]

        lats = np.asarray(lats, dtype=np.float64)
        lons = np.asarray(lons, dtype=np.float64)
        Ncounts, absres, res = _tomo_funcs._station_residual(
            np.float64(lats), np.float64(lons), np.float64(residual))

        # plot
        #-----------
        # plot data
        #-----------
        m = self._get_basemap(projection=projection)
        x, y = m(lons, lats)
        try:
            import pycpt
            if os.path.isfile(cmap):
                cmap = pycpt.load.gmtColormap(cmap)
                # cmap    = cmap.reversed()
            elif os.path.isfile(cpt_path + '/' + cmap + '.cpt'):
                cmap = pycpt.load.gmtColormap(cpt_path + '/' + cmap + '.cpt')
        except:
            pass
        values = res / Ncounts
        im = m.scatter(x,
                       y,
                       marker='^',
                       s=50,
                       c=values,
                       cmap=cmap,
                       vmin=vmin,
                       vmax=vmax)
        cb = m.colorbar(
            im, "bottom", size="5%", pad='2%'
        )  #, ticks=[20., 25., 30., 35., 40., 45., 50., 55., 60., 65., 70.])
        cb.set_label(clabel, fontsize=20, rotation=0)
        plt.suptitle(str(period) + ' sec', fontsize=20)
        cb.ax.tick_params(labelsize=40)

        cb.set_alpha(1)
        cb.draw_all()

        # # cb.solids.set_rasterized(True)
        cb.solids.set_edgecolor("face")

        plt.show()

        return Ncounts, absres, res, staids
Пример #29
0
def baseline_als(x,
                 y,
                 lam=None,
                 p=None,
                 niter=10,
                 return_baseline=False,
                 offset_correction=False):
    """Baseline Correction with Asymmetric Least Squares Smoothing.

    Parameters
    ----------
    x : array-like
        the sample time/number/position
    y : array-like
        the data series corresponding to ``x``
    lam : float
        the lambda parameter of the ALS method. This control how much the
        baseline can adapt to local changes. A higher value corresponds to a
        stiffer baseline
    p : float
        the asymmetry parameter of the ALS method. This controls the overall
        slope tolerated for the baseline. A higher value correspond to a
        higher possible slope

    Other Parameters
    ----------------
    niter : int
        The number of iterations to perform
    return_baseline : bool
        return the baseline?
    offset_correction : bool
        also correct for an offset to align with the running mean of the scan

    Returns
    -------
    y_subtracted : array-like, same size as ``y``
        The initial time series, subtracted from the trend
    baseline : array-like, same size as ``y``
        Fitted baseline. Only returned if return_baseline is ``True``

    Examples
    --------
    >>> x = np.arange(0, 10, 0.01)
    >>> y = np.zeros_like(x) + 10
    >>> ysub = baseline_als(x, y)
    >>> np.all(ysub < 0.001)
    True
    """

    if lam is None:
        lam = 1e11
    if p is None:
        p = 0.001

    z = _als(y, lam, p, niter=niter)

    ysub = y - z
    offset = 0
    if offset_correction:
        std = mad(ysub)
        good = np.abs(ysub) < 10 * std
        if len(x[good]) < 10:
            good = np.ones(len(x), dtype=bool)
            warnings.warn('Too few bins to perform baseline offset correction'
                          ' precisely. Beware of results')
        offset = offset_fit(x[good], ysub[good], 0)

    if return_baseline:
        return ysub - offset, z + offset
    else:
        return ysub - offset
Пример #30
0
                     np.reshape(yr_accomp[i:i + seglen], (1, seglen))),
                    axis=0)
                estimates = np.concatenate(
                    (np.reshape(ye_vocals[i:i + seglen], (1, seglen)),
                     np.reshape(ye_accomp[i:i + seglen], (1, seglen))),
                    axis=0)
                [SDR, _, SIR,
                 SAR] = museval.evaluate(references,
                                         estimates)  #sdr, isr, sir, sar
                vocal_SDR.append(SDR[0])
                vocal_SIR.append(SIR[0])
                vocal_SAR.append(SAR[0])

        print("Current vocal SDR median/mad/mean/std",
              np.median(np.asarray(vocal_SDR)),
              robust.mad(np.asarray(vocal_SDR)),
              np.mean(np.asarray(vocal_SDR)), np.std(np.asarray(vocal_SDR)))
        sw_SDR.append(np.median(np.asarray(vocal_SDR)))
        print("Current macro vocal SDR median/mad/mean/std",
              np.median(np.asarray(sw_SDR)), robust.mad(np.asarray(sw_SDR)),
              np.mean(np.asarray(sw_SDR)), np.std(np.asarray(sw_SDR)))
        print("Current vocal SIR median/mad/mean/std",
              np.median(np.asarray(vocal_SIR)),
              robust.mad(np.asarray(vocal_SIR)),
              np.mean(np.asarray(vocal_SIR)), np.std(np.asarray(vocal_SIR)))
        sw_SIR.append(np.median(np.asarray(vocal_SIR)))
        print("Current macro vocal SIR median/mad/mean/std",
              np.median(np.asarray(sw_SIR)), robust.mad(np.asarray(sw_SIR)),
              np.mean(np.asarray(sw_SIR)), np.std(np.asarray(sw_SIR)))
        print("Current vocal SAR median/mad/mean/std",
              np.median(np.asarray(vocal_SAR)),
Пример #31
0
plt.xlabel('{:s} ({:s})'.format(a_true, a_string))
plt.ylabel('{:s} ({:s})'.format(a_fit, a_string))
plt.title('(b) acceleration')  # + subtitle + statistic_title[1])
plt.legend(framealpha=0.5, loc='upper left', fontsize=legend_fontsize_fraction*axis_fontsize)
plt.grid()
plt.tight_layout()
if save:
    filename = 'acceleration_{:s}_{:s}.pdf'.format(name, root)
    plt.savefig(os.path.join(image_directory, filename), bbox_inches='tight', pad_inches=pad_inches)


#
# Median velocity and acceleration plots
#
v1 = np.median(z1v, axis=1)
v1e = mad(z1v, axis=1, c=1.0)

v2 = np.median(z2v, axis=1)
v2e = mad(z2v, axis=1, c=1.0)

a2 = np.median(z2a, axis=1)
a2e = mad(z2a, axis=1, c=1.0)

v_string = v0.unit.to_string('latex_inline')
a_string = a0.unit.to_string('latex_inline')
plt.figure(3)
plt.errorbar(accs, v1, yerr=v1e, label='polynomial n=1, fit velocity')
plt.errorbar(accs, v2, yerr=v2e, label='polynomial n=2, fit velocity')
plt.xlim(np.min(accs), np.max(accs))
plt.axhline(v0.to(u.km/u.s).value, label='true velocity ({:n} {:s})'.format(v0.value, v_string), color='r')
plt.xlabel('true acceleration ({:s})'.format(a_string))
Пример #32
0
def analysisIrradianceandPowerMismatch2(testfolder,
                                        writefiletitle,
                                        numpanels,
                                        sensorsy,
                                        portraitorlandscape='landscape'):
    '''
    Reads and calculates power output and mismatch for each file in the 
    testfolder where all the bifacial_radiance irradiance results .csv are saved.
    First load each file, cleans it and resamples it to the numsensors set in this function,
    and then calculates irradiance mismatch and PVMismatch power output for averaged, minimum,
    or detailed irradiances on each cell for the cases of A) only 12 or 8 downsmaples values are
    considered (at the center of each cell), and B) 12 or 8 values are obtained from averaging
    all the irradiances falling in the area of the cell (No edges or inter-cell spacing are considered
    at this moment). Then it saves all the A and B irradiances, as well as the cleaned/resampled
    front and rear irradiances.
    
    Ideally sensorsy in the read data is >> 12 to give results for the irradiance mismatch in the cell.
    
    Also ideally n
     
    Parameters
    ----------
    testfolder:   folder containing output .csv files for bifacial_radiance
    writefiletitle:   .csv title where the output results will be saved.
    numpanels:   1 or 2 only at hte moment, necessary for the cleaning routine.
    portraitorlandscape: 'portrait' or 'landscape', for PVMismatch input
                      which defines the electrical interconnects inside the module. 
    sensorsy : number of sensors. Ideally this number is >> 12 and 
               is also similar to the number of sensors (points) in the .csv result files.
               We want more than 12 sensors to be able to calculate mismatch of 
               irradiance in the cell.
    
    '''

    #INPUT VARIABLES NECESSARY:
    #\\nrel.gov\shared\5J00\Staff\CDeline\Bifacial mismatch data\Tracker mismatch data\3_26_19 Cairo_mismatch_1up tube
    #testfolder = r'C:\Users\sayala\Documents\RadianceScenes\Demo3\results'
    #testfolder = r'\\nrel.gov\shared\5J00\Staff\CDeline\Bifacial mismatch data\Tracker mismatch data\3_26_19 Cairo_mismatch_1up tube\results_noTorqueTube'
    #writefiletitle = r'C:\Users\sayala\Documents\RadianceScenes\results_Cairo_mismatch_1up_noTorqueTube.csv'
    #numpanels= 1
    #portraitorlandscape = 'portrait' # portrait has 12 cells, landscape has 8
    #sensorsy = 120  # deepclean will clean and resample to this number of sensors.
    #ideally close nubmer to the original number of sample points.
    # Also, if it's just 12 or 8 (for landscape or portrait), all the averagd values and cell mismatch
    # become a mooth point

    # User information.
    filelist = sorted(os.listdir(testfolder))
    print('{} files in the directory'.format(filelist.__len__()))

    # PVMISMATCH Initialization of System
    pvsys = pvsystem.PVsystem(
        numberStrs=1,
        numberMods=1)  # makes the system  # 1 module, in portrait mode.
    pmp_ideal = pvsys.Pmp  # Panel ideal. Monofacial.
    stdpl = np.array([[0, 23, 24, 47, 48, 71, 72, 95],
                      [1, 22, 25, 46, 49, 70, 73, 94],
                      [2, 21, 26, 45, 50, 69, 74, 93],
                      [3, 20, 27, 44, 51, 68, 75, 92],
                      [4, 19, 28, 43, 52, 67, 76, 91],
                      [5, 18, 29, 42, 53, 66, 77, 90],
                      [6, 17, 30, 41, 54, 65, 78, 89],
                      [7, 16, 31, 40, 55, 64, 79, 88],
                      [8, 15, 32, 39, 56, 63, 80, 87],
                      [9, 14, 33, 38, 57, 62, 81, 86],
                      [10, 13, 34, 37, 58, 61, 82, 85],
                      [11, 12, 35, 36, 59, 60, 83, 84]])

    if portraitorlandscape == 'portrait':
        samplecells = 12
        repeatedcells = 8

    if portraitorlandscape == 'landscape':
        samplecells = 8
        repeatedcells = 12
        stdpl = stdpl.transpose()

    # SAMPLE POINT AND HEADER DEFINITION
    cellCenterPVM = [
    ]  # This grabs just the value at the 'center' of the cell.
    cellFrontandBackMismatch_Header = []
    cellBackMismatch_Header = []
    cellCenterFrontValue_Header = []
    cellCenterBackValue_Header = []
    cellFrontAveragedValue_Header = []
    cellBackAveragedValue_Header = []
    frontres_header = []
    backres_header = []

    for i in range(0, samplecells):
        cellCenterPVM.append((i * sensorsy / (samplecells * 1.0) +
                              (i + 1) * sensorsy / (samplecells * 1.0) / 2))
        cellFrontandBackMismatch_Header.append('FrontplusBack_Mismatch_cell_' +
                                               str(i))
        cellBackMismatch_Header.append('Back_Mismatch_cell_' + str(i))
        cellCenterFrontValue_Header.append('CellCenterFrontValue_cell' +
                                           str(i))
        cellCenterBackValue_Header.append('CellCenterBackValue_cell' + str(i))
        cellBackAveragedValue_Header.append('CellBack_AveragedValue_cell_' +
                                            str(i))
        cellFrontAveragedValue_Header.append('CellFront_AveragedValue_cell_' +
                                             str(i))

    for i in range(0, sensorsy):
        frontres_header.append('Clean_Front_cell' + str(i))
        backres_header.append('Clean_Back_cell' + str(i))

    # HEADERS:
    outputheaders = [
        'Timestamp', 'PowerAveraged_CellCenter', 'PowerMin_CellCenter',
        'PowerDetailed_CellCenter', 'PowerAveraged_AverageValues',
        'PowerMin_AverageValues', 'PowerDetailed_AverageValues',
        'PowerFRONT_Averaged', 'PowerFRONT_Detailed', 'MAD_cellCenterVal',
        'MAD_cellAverage', 'MAD_frontplusback_clean', 'Cell Front Min',
        'Cell Back Min', 'Irradiance Mismatch Front+Back Max',
        'Irradiance Mismatch Back Max'
    ]
    outputheaders += cellFrontandBackMismatch_Header
    outputheaders += cellBackMismatch_Header
    outputheaders += cellCenterFrontValue_Header
    outputheaders += cellCenterBackValue_Header
    outputheaders += cellBackAveragedValue_Header
    outputheaders += cellFrontAveragedValue_Header
    outputheaders += frontres_header
    outputheaders += backres_header

    with open(writefiletitle, 'w') as csvfile:

        sw = csv.writer(csvfile,
                        delimiter=',',
                        quotechar='|',
                        quoting=csv.QUOTE_MINIMAL,
                        lineterminator='\n')

        sw.writerow(outputheaders)
        # LOOP OVER FILES HERE
        for z in range(0, filelist.__len__()):
            #for z in range(0, 1):

            data = load.read1Result(os.path.join(testfolder, filelist[z]))
            #sensorsy = len(data)  # 210 for this case. deepclean resamples to value given.

            [frontres, backres] = load.deepcleanResult(data,
                                                       sensorsy,
                                                       numpanels,
                                                       automatic=True)
            cellAverageValues_FrontPlusBack = []
            cellFrontAverage = []  # This averages the number of sensors.
            cellBackAverage = []
            cellFrontandBackMismatch = []
            cellBackMismatch = []
            cellFrontMin = []
            cellBackMin = []
            cellFrontPlusBackMin = []
            frontandbackres = frontres + backres
            cellRows = len(
                frontres
            )  # this is the same as sensorsy.... maybe replace? #TODO

            if cellRows != samplecells:
                for i in range(0, samplecells):
                    istart = int(i * cellRows / samplecells)
                    iend = int((i + 1) * cellRows / samplecells)
                    cellFrontAverage.append(np.average(frontres[istart:iend]))
                    cellBackAverage.append(np.average(backres[istart:iend]))
                    cellAverageValues_FrontPlusBack.append(
                        np.average(frontres[istart:iend]) +
                        np.average(backres[istart:iend]))
                    cellFrontandBackMismatch.append(
                        (max(frontandbackres[istart:iend]) -
                         min(frontandbackres[istart:iend])) * 100 /
                        (max(frontandbackres[istart:iend]) +
                         min(frontandbackres[istart:iend])))
                    cellBackMismatch.append(
                        (max(backres[istart:iend]) - min(backres[istart:iend]))
                        * 100 / (max(backres[istart:iend]) +
                                 min(backres[istart:iend])))
                    cellFrontMin.append(min(frontres[istart:iend]))
                    cellBackMin.append(min(backres[istart:iend]))
                    cellFrontPlusBackMin.append(
                        min(frontandbackres[istart:iend]))
                cellCenterValFront = np.interp(cellCenterPVM,
                                               list(range(0, cellRows)),
                                               frontres)
                cellCenterValBack = np.interp(cellCenterPVM,
                                              list(range(0, cellRows)),
                                              backres)
            else:
                cellCenterValFront = frontres
                cellCenterValBack = backres

            sunmatDetailed_CellCenter = []
            sunmatAveraged_CellCenter = []
            sunmatMin_CellCenter = []
            sunmatDetailed_AverageValues = []
            sunmatAveraged_AverageValues = []
            sunmatMin_AverageValues = []
            sunmatFrontOnly_Averaged = []
            sunmatFrontOnly_Detailed = []

            # Center of Cell only
            cellCenterValues_FrontPlusBack = cellCenterValFront + cellCenterValBack
            AveFront_CellCenter = cellCenterValFront.mean()
            AveBack_CellCenter = cellCenterValBack.mean()

            # Average of Cell
            #cellAverageValues_FrontPlusBack = sum(cellFrontAverage,cellBackAverage)
            AveFront_AverageValues = np.mean(cellFrontAverage)
            AveBack_AverageValues = np.mean(cellBackAverage)

            # Repeat to create a matrix to pass matrix.
            for j in range(0, len(cellCenterValues_FrontPlusBack)):
                sunmatDetailed_CellCenter.append(
                    [cellCenterValues_FrontPlusBack[j] / 1000] * repeatedcells)
                sunmatDetailed_AverageValues.append(
                    [cellAverageValues_FrontPlusBack[j] / 1000] *
                    repeatedcells)

            for j in range(0, len(cellCenterValFront)):
                sunmatAveraged_CellCenter.append(
                    [(AveFront_CellCenter + AveBack_CellCenter) / 1000] *
                    repeatedcells)
                sunmatAveraged_AverageValues.append(
                    [(AveFront_AverageValues + AveBack_AverageValues) / 1000] *
                    repeatedcells)

            for j in range(0, len(cellCenterValFront)):
                sunmatMin_CellCenter.append(
                    [min(cellCenterValues_FrontPlusBack) / 1000] *
                    repeatedcells)
                sunmatMin_AverageValues.append(
                    [min(cellFrontPlusBackMin) / 1000] * repeatedcells)

            # FRONT MISMATCH
            for j in range(0, len(cellCenterValFront)):
                sunmatFrontOnly_Averaged.append([cellFrontAverage[j] / 1000] *
                                                repeatedcells)
                sunmatFrontOnly_Detailed.append(
                    [cellCenterValFront[j] / 1000] * repeatedcells)

            # ACtually do calculations
            pvsys.setSuns({0: {0: [sunmatAveraged_CellCenter, stdpl]}})
            PowerAveraged_CellCenter = pvsys.Pmp

            pvsys.setSuns({0: {0: [sunmatDetailed_CellCenter, stdpl]}})
            PowerDetailed_CellCenter = pvsys.Pmp

            pvsys.setSuns({0: {0: [sunmatMin_CellCenter, stdpl]}})
            PowerMinimum_CellCenter = pvsys.Pmp

            # ACtually do calculations
            pvsys.setSuns({0: {0: [sunmatAveraged_AverageValues, stdpl]}})
            PowerAveraged_AverageValues = pvsys.Pmp

            pvsys.setSuns({0: {0: [sunmatDetailed_AverageValues, stdpl]}})
            PowerDetailed_AverageValues = pvsys.Pmp

            pvsys.setSuns({0: {0: [sunmatMin_AverageValues, stdpl]}})
            PowerMinimum_AverageValues = pvsys.Pmp

            # ACtually do calculations
            pvsys.setSuns({0: {0: [sunmatFrontOnly_Averaged, stdpl]}})
            PowerFRONT_Averaged = pvsys.Pmp

            pvsys.setSuns({0: {0: [sunmatFrontOnly_Detailed, stdpl]}})
            PowerFRONT_Detailed = pvsys.Pmp

            #flattened = [val for sublist in dictvalues for val in sublist]

            # Append Values
            # Append Values
            #cellCenterValFrontFlat = [val for sublist in cellCenterValFront for val in sublist]
            outputvalues = [
                filelist[z], PowerAveraged_CellCenter, PowerMinimum_CellCenter,
                PowerDetailed_CellCenter, PowerAveraged_AverageValues,
                PowerMinimum_AverageValues, PowerDetailed_AverageValues,
                PowerFRONT_Averaged, PowerFRONT_Detailed,
                robust.mad(cellCenterValues_FrontPlusBack),
                robust.mad(cellAverageValues_FrontPlusBack),
                robust.mad(frontandbackres),
                min(cellFrontMin),
                min(cellBackMin),
                max(cellFrontandBackMismatch),
                max(cellBackMismatch)
            ]
            outputvalues += cellFrontandBackMismatch  # 12
            outputvalues += cellBackMismatch  #   12
            outputvalues += list(cellCenterValFront)  # 12
            outputvalues += list(cellCenterValBack)  # 12
            outputvalues += list(cellFrontAverage)  # 12
            outputvalues += list(cellBackAverage)  # 12
            outputvalues += list(frontres)  #   sensorsy   # 210
            outputvalues += list(backres)  # sensorsy 210

            sw.writerow(outputvalues)
plt.xlim(xlim)
plt.xlabel('true velocity ({:s})'.format(v_string))
plt.ylabel('fit acceleration ({:s})'.format(a_string))
plt.title('(b) acceleration' + subtitle + statistic_title[1])
plt.legend(framealpha=0.5, loc='upper left')
plt.grid()
plt.tight_layout()
if save:
    filename = 'acceleration_mean_{:s}.png'.format(root)
    plt.savefig(os.path.join(image_directory, filename), bbox_inches='tight', pad_inches=pad_inches)

#
# Median velocity and acceleration plots
#
v1 = np.median(z1v, axis=1)
v1e = mad(z1v, axis=1, c=1.0)

v2 = np.median(z2v, axis=1)
v2e = mad(z2v, axis=1, c=1.0)

a2 = np.median(z2a, axis=1)
a2e = mad(z2a, axis=1, c=1.0)

v_string = v0.unit.to_string('latex_inline')
a_string = a0.unit.to_string('latex_inline')
"""
plt.figure(3)
plt.errorbar(accs, v1, yerr=v1e, label='polynomial n=1, fit velocity')
plt.errorbar(accs, v2, yerr=v2e, label='polynomial n=2, fit velocity')
plt.xlim(xlim)
plt.axhline(v0.to(u.km/u.s).value, label='true velocity ({:n} {:s})'.format(v0.value, v_string), color='r')
Пример #34
0
# Normalise size data by item.
joined = pd.concat([known, unknown], sort = False)
item_sizes = df(joined.groupby('item_id')['item_size'].apply(list))
x_size = list(item_sizes.index)
y_size = list(item_sizes.item_size.values)
zip_dict_size = dict(zip(x_size, y_size))

def mad_normalise(initial_size, sizes_median, mad):
    return (float(initial_size) - sizes_median) / mad

replacement_dict_master = {}
for item in joined['item_id'].unique():
    item_sizes = zip_dict_size[item]
    item_sizes = [float(size) for size in item_sizes]
    sizes_median = np.median(item_sizes)
    mad = robust.mad(item_sizes)
    if mad == 0:
        replacement_dict = {(item, initial_size): 0 for initial_size in item_sizes}
    else:
        replacement_dict = {(item, initial_size): round(mad_normalise(initial_size, sizes_median, mad), 2) for initial_size in item_sizes}

    replacement_dict_master.update(replacement_dict)

known['item_size'] = known.set_index(['item_id', 'item_size']).index.map(replacement_dict_master.get)
unknown['item_size'] = unknown.set_index(['item_id', 'item_size']).index.map(replacement_dict_master.get)

# Change date columns to datetime features.
known['order_date'] = pd.to_datetime(known['order_date'])
known['delivery_date'] = pd.to_datetime(known['delivery_date'])
known['user_dob'] = pd.to_datetime(known['user_dob'])
known['user_reg_date'] = pd.to_datetime(known['user_reg_date'])
Пример #35
0
        ioblk.parm.fitregion = 4.0
        ioblk.parm.debugLevel = 3
        tmpflx = flx[idxGd]
        tmpt = time[idxGd]
        tmpttwo = tmpt * tmpt
        tmptfour = tmpttwo * tmpttwo
        # Remove polynomial fit
        pvals = np.polyfit(tmpt, tmpflx, 4, full=False)
        tmpy = pvals[4] + pvals[3] * tmpt + pvals[2] * tmpttwo + pvals[
            1] * tmpttwo * tmpt + pvals[0] * tmptfour
        #plt.plot(tmpt, tmpflx, '.')
        #plt.plot(tmpt, tmpy, '-')
        #plt.show()
        ioblk.normlc = tmpflx / tmpy - 1.0

        ioblk.normes = robust.mad(ioblk.normlc)
        origstd = np.copy(ioblk.normes)
        ioblk.normts = time[idxGd]
        ioblk.modellc = np.copy(ioblk.normlc)
        ioblk.yData = np.copy(ioblk.normlc)
        ioblk.errData = np.full_like(ioblk.normlc, ioblk.normes)

        ioblk.timezpt = np.median(ioblk.normts)
        ioblk.normts = ioblk.normts - ioblk.timezpt

        ioblk.physval_names = ['Per', 'To', 'Amp', 'Zpt']
        ioblk.calcval_names = ['Per_c', 'To_c', 'Amp_c', 'Zpt_c']
        # Give seed starting values for the minimization
        ioblk.origests = np.array([allper[i], 0.0, ioblk.normes * 3.0, 0.0])
        # Give integer array for variables you want fixed during fit
        # 0 - not fixed (solved for) ; 1 - fixed (not solved for)
Пример #36
0
def baseline_als(x, y, lam=None, p=None, niter=10, return_baseline=False,
                 offset_correction=False):
    """Baseline Correction with Asymmetric Least Squares Smoothing.

    Parameters
    ----------
    x : array-like
        the sample time/number/position
    y : array-like
        the data series corresponding to ``x``
    lam : float
        the lambda parameter of the ALS method. This control how much the
        baseline can adapt to local changes. A higher value corresponds to a
        stiffer baseline
    p : float
        the asymmetry parameter of the ALS method. This controls the overall
        slope tolerated for the baseline. A higher value correspond to a
        higher possible slope

    Other Parameters
    ----------------
    niter : int
        The number of iterations to perform
    return_baseline : bool
        return the baseline?
    offset_correction : bool
        also correct for an offset to align with the running mean of the scan

    Returns
    -------
    y_subtracted : array-like, same size as ``y``
        The initial time series, subtracted from the trend
    baseline : array-like, same size as ``y``
        Fitted baseline. Only returned if return_baseline is ``True``

    Examples
    --------
    >>> x = np.arange(0, 10, 0.01)
    >>> y = np.zeros_like(x) + 10
    >>> ysub = baseline_als(x, y)
    >>> np.all(ysub < 0.001)
    True
    """

    if lam is None:
        lam = 1e11
    if p is None:
        p = 0.001

    z = _als(y, lam, p, niter=niter)

    ysub = y - z
    offset = 0
    if offset_correction:
        std = mad(ysub)
        good = np.abs(ysub) < 10 * std
        if len(x[good]) < 10:
            good = np.ones(len(x), dtype=bool)
            warnings.warn('Too few bins to perform baseline offset correction'
                          ' precisely. Beware of results')
        offset = offset_fit(x[good], ysub[good], 0)

    if return_baseline:
        return ysub - offset, z + offset
    else:
        return ysub - offset
Пример #37
0
def train_lipop(seed: int = 19700101,
                limit: int = -1,
                use_cuda: bool = True,
                use_tqdm=True,
                force_save=False,
                special_config: dict = None,
                position_encoder_path: str = 'net/pe.pt',
                tag='std',
                dataset='Lipop'):
    cfg = DEFAULT_CONFIG.copy()
    if special_config:
        cfg.update(special_config)
    for k, v in cfg.items():
        print(k, ':', v)
    set_seed(seed, use_cuda)
    np.set_printoptions(precision=4, suppress=True, linewidth=140)

    if dataset == 'FreeSolv':
        smiles, info_list, properties = load_freesolv(limit,
                                                      force_save=force_save)
    elif dataset == 'ESOL':
        smiles, info_list, properties = load_esol(limit, force_save=force_save)
    else:
        smiles, info_list, properties = load_lipop(limit,
                                                   force_save=force_save)
    molecules = [
        HeteroGraph(info['nf'], info['ef'], info['us'], info['vs'], info['em'])
        for info in info_list
    ]
    n_dim = molecules[0].n_dim
    e_dim = molecules[0].e_dim
    node_num = len(molecules)

    train_mask, validate_mask, test_mask = sample(list(range(node_num)),
                                                  cfg['TRAIN_PER'],
                                                  cfg['VALIDATE_PER'],
                                                  cfg['TEST_PER'])
    n_seg = int(len(train_mask) / (cfg['BATCH'] + 1))
    n_seg = min(len(train_mask), n_seg)
    train_mask_list = [train_mask[i::n_seg] for i in range(n_seg)]
    n_seg = int(len(validate_mask) / (cfg['BATCH'] + 1))
    n_seg = min(len(validate_mask), n_seg)
    validate_mask_list = [validate_mask[i::n_seg] for i in range(n_seg)]
    n_seg = int(len(test_mask) / (cfg['BATCH'] + 1))
    n_seg = min(len(test_mask), n_seg)
    test_mask_list = [test_mask[i::n_seg] for i in range(n_seg)]
    print(train_mask[0], validate_mask[0], test_mask[0])
    print(len(train_mask_list), len(validate_mask_list), len(test_mask_list))

    t_properties = properties[train_mask, :]
    prop_mean = np.mean(t_properties, axis=0)
    print('mean:', prop_mean)
    prop_std = np.std(t_properties.tolist(), axis=0, ddof=1)
    print('std:', prop_std)
    prop_mad = robust.mad(t_properties.tolist(), axis=0)
    print('mad:', prop_mad)
    norm_properties = (properties - prop_mean) / prop_std

    if position_encoder_path and os.path.exists(position_encoder_path):
        position_encoder = torch.load(position_encoder_path)
        position_encoder.eval()
    else:
        print('NO POSITION ENCODER IS BEING USED!!!')
        position_encoder = None
    model = AMPNN(n_dim=n_dim,
                  e_dim=e_dim,
                  config=cfg,
                  position_encoder=position_encoder,
                  use_cuda=use_cuda)
    regression = MLP(cfg['F_DIM'],
                     1,
                     h_dims=cfg['MLP_DIMS'],
                     dropout=cfg['DROPOUT'])
    if use_cuda:
        model.cuda()
        regression.cuda()
    for name, param in chain(model.named_parameters(),
                             regression.named_parameters()):
        if param.requires_grad:
            print(name, ":", param.shape)
    optimizer = optim.Adam(filter(
        lambda x: x.requires_grad,
        chain(model.parameters(), regression.parameters())),
                           lr=cfg['LR'],
                           weight_decay=cfg['DECAY'])
    scheduler = optim.lr_scheduler.StepLR(optimizer=optimizer,
                                          step_size=1,
                                          gamma=cfg['GAMMA'])
    matrix_cache = MatrixCache(cfg['MAX_DICT'])
    loss_fuc = MSELoss()
    logs = []

    def forward(mask: list,
                name=None) -> (torch.Tensor, torch.Tensor, torch.Tensor):
        nfs = torch.cat([molecules[i].node_features for i in mask])
        efs = torch.cat([molecules[i].edge_features for i in mask])
        if use_cuda:
            nfs = nfs.cuda()
            efs = efs.cuda()

        us, vs, mm_tuple = matrix_cache.fetch(molecules, mask, nfs, name,
                                              use_cuda)

        embeddings, _ = model(nfs, efs, us, vs, mm_tuple, name,
                              [smiles[i] for i in mask])
        std_loss = 0
        logits = regression(embeddings)
        target = norm_properties[mask, :]
        target = torch.tensor(target.astype(np.float32), dtype=torch.float32)
        if use_cuda:
            target = target.cuda()
        return logits, target, std_loss

    def train(mask_list: list, name=None):
        model.train()
        regression.train()
        u_losses = []
        losses = []

        t = enumerate(mask_list)
        if use_tqdm:
            t = tqdm(t, total=len(mask_list))
        for i, m in t:
            if name:
                name_ = name + str(i)
            else:
                name_ = None
            optimizer.zero_grad()
            logits, target, std_loss = forward(m, name=name_)
            u_loss = loss_fuc(logits, target)
            u_losses.append(u_loss.cpu().item())
            loss = u_loss + std_loss
            # loss.backward()
            # optimizer.step()
            losses.append(loss)
            if len(losses) >= cfg['PACK'] or i == len(mask_list) - 1:
                (sum(losses) / len(losses)).backward()
                optimizer.step()
                losses.clear()

        u_loss = np.average(u_losses)
        print('\t\tSemi-supervised loss: {:.4f}'.format(u_loss))
        logs[-1].update({'on_train_loss': u_loss})

    def evaluate(mask_list: list, name=None, visualize=None):
        model.eval()
        regression.eval()
        losses = []
        masks = []
        logits_list = []
        target_list = []
        t = enumerate(mask_list)
        if use_tqdm:
            t = tqdm(t, total=len(mask_list))
        for i, m in t:
            if name:
                name_ = name + str(i)
            else:
                name_ = None
            logits, target, _ = forward(m, name=name_)
            loss = loss_fuc(logits, target)
            losses.append(loss.cpu().item())

            if visualize:
                masks.extend(m)
                logits_list.append(logits.cpu().detach().numpy())
                target_list.append(target.cpu().detach().numpy())

        mse_loss = np.average(losses) * (prop_std[0]**2)
        rmse_loss = np.average([loss**0.5 for loss in losses]) * prop_std[0]
        print('\t\tMSE Loss: {:.3f}'.format(mse_loss))
        print('\t\tRMSE Loss: {:.3f}'.format(rmse_loss))
        logs[-1].update({'{}_loss'.format(name): mse_loss})
        logs[-1].update({'{}_metric'.format(name): rmse_loss})

        if visualize:
            all_logits = np.vstack(logits_list)
            all_target = np.vstack(target_list)
            best_ids, best_ds, worst_ids, worst_ds = \
                plt_multiple_scatter(GRAPH_PATH + visualize, masks, all_logits, all_target)
            print('\t\tBest performance on:')
            for i, d in zip(best_ids, best_ds):
                print('\t\t\t{}: {}'.format(smiles[i], d))
            print('\t\tWorst performance on:')
            for i, d in zip(worst_ids, worst_ds):
                print('\t\t\t{}: {}'.format(smiles[i], d))

    for epoch in range(cfg['ITERATION']):
        logs.append({'epoch': epoch + 1})
        scheduler.step(epoch=epoch)
        print('In iteration {}:'.format(epoch + 1))
        print('\tTraining: ')
        train(train_mask_list, name='train')
        print('\tEvaluating training: ')
        evaluate(
            train_mask_list,
            name='train',
            # visualize='train_{}'.format(epoch + 1) if (epoch + 1) % cfg['EVAL'] == 0 else None
        )
        print('\tEvaluating validation: ')
        evaluate(
            validate_mask_list,
            name='evaluate',
            # visualize='val_{}'.format(epoch + 1) if (epoch + 1) % cfg['EVAL'] == 0 else None
        )
        print('\tEvaluating test: ')
        evaluate(
            test_mask_list,
            name='test',
            # visualize='test' if epoch + 1 == cfg['ITERATION'] else None
        )
        gc.collect()
        d = {'metric': 'RMSE', 'logs': logs}
        with open('{}{}.json'.format(LOG_PATH, tag), 'w+',
                  encoding='utf-8') as fp:
            json.dump(d, fp)