Пример #1
0
def balance_dataset(ds, label, sort=True, **kwargs):
    
    ################ To be changed ######################
    m_fixation = ds.targets == 'fixation'
    ev_fix = zip(ds.chunks[m_fixation], 
                 4*((ds.sa.events_number[m_fixation]+2)/4 - 1 )+2)
    ####################################################
    
    ev_fix=np.array(ev_fix)
    ds.sa.events_number[m_fixation] = np.int_(ev_fix.T[1])
    arg_sort = np.argsort(ds.sa.events_number)
    events = find_events(chunks = ds[arg_sort].sa.chunks, 
                         targets = ds[arg_sort].sa.targets)
    # min duration
    min_duration = np.min( [e['duration'] for e in events])

    mask = False

    for ev in np.unique(ds.sa.events_number):
        mask_event = ds.sa.events_number == ev
        mask_event[np.nonzero(mask_event)[0][min_duration-1]+1:] = False
    
        mask = mask + mask_event
    
    if sort == True:
        arg_sort = np.argsort(ds[mask].sa.events_number)
        ds = ds[mask][arg_sort]
    else:
        ds = ds[mask]
    
    ds.a.events = find_events(targets = ds.targets, chunks = ds.chunks)
    
    return ds
Пример #2
0
def load_spatiotemporal_dataset(ds, **kwargs):
    
    onset = 0
    
    for arg in kwargs:
        if (arg == 'onset'):
            onset = kwargs[arg]
        if (arg == 'duration'):
            duration = kwargs[arg]
        if (arg == 'enable_results'):
            enable_results = kwargs[arg]
        
        
        
    events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks)   
    
    #task_events = [e for e in events if e['targets'] in ['Vipassana','Samatha']]
    
    if 'duration' in locals():
        events = [e for e in events if e['duration'] >= duration]
    else:
        duration = np.min([ev['duration'] for ev in events])

    for e in events:
        e['onset'] += onset           
        e['duration'] = duration
        
    evds = eventrelated_dataset(ds, events = events)
    
    return evds
Пример #3
0
def build_events_ds(ds, new_duration, **kwargs):
    """
    This function is used to convert a dataset in a event_related dataset. Used for
    transfer learning and clustering, thus a classifier has been trained on a 
    event related dataset and the prediction should be done on the same kind of the 
    dataset.
    
    Parameters    
    ----------
    
    ds : Dataset
        The dataset to be converted
    new_duration : integer
        Is the duration of the single event, if experiment events are of different
        length, it takes the events greater or equal to new_duration.
    kwarsg : dict
        win_number: is the number of window of one single event to be extracted,
        if it is not setted, it assumes the ratio between event duration and new_duration
        overlap:
        
    Returns
    -------
    
    Dataset:
        the event_related dataset
    """
    
    for arg in kwargs:
        if arg == 'win_number':
            win_number = kwargs[arg]
        if arg == 'overlap':
            overlap = kwargs[arg]

    events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks)
    labels = np.unique(ds.targets)
    current_duration = dict()
    for l in labels:
        d = [e['duration'] for e in events if e['targets'] == l]
        current_duration[l] = np.unique(d)[0]

    def calc_overlap(w, l, n):
        return w - np.floor((l - w)/(n - 1))
    
    def calc_win_number (w, l, o):
        return (l - w)/(w - o) + 1
    
    if 'overlap' not in locals():
        overlap = calc_overlap(new_duration, current_duration[l], win_number)
    else:
        if overlap >= new_duration:
            overlap = new_duration - 1
            
    if 'win_number' not in locals():
        #win_number = np.ceil(current_duration[l]/np.float(new_duration))
        win_number = calc_win_number(new_duration, current_duration[l], overlap)
        
    new_event_list = []
    
    for e in events:
        onset = e['onset']
        chunks = e['chunks']
        targets = e['targets']
        duration = e['duration']

        for i in np.arange(win_number):
            new_onset = onset + i * (new_duration - overlap)
            
            new_event = dict()
            new_event['onset'] = new_onset
            new_event['duration'] = new_duration
            new_event['targets'] = targets
            new_event['chunks'] = chunks
            
            new_event_list.append(new_event)
    
    
    logger.info('Building new event related dataset...')
    evds = eventrelated_dataset(ds, events = new_event_list)
    
    return evds
Пример #4
0
def spatiotemporal(ds, **kwargs):
      
    onset = 0
    
    for arg in kwargs:
        if (arg == 'onset'):
            onset = kwargs[arg]
        if (arg == 'duration'):
            duration = kwargs[arg]
        if (arg == 'enable_results'):
            enable_results = kwargs[arg]
        if (arg == 'permutations'):
            permutations = int(kwargs[arg])
       
    events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks)   
    
    if 'duration' in locals():
        events = [e for e in events if e['duration'] >= duration]
    else:
        duration = np.min([ev['duration'] for ev in events])

    for e in events:
        e['onset'] += onset           
        e['duration'] = duration
        
    evds = eventrelated_dataset(ds, events = events) 
    
    [fclf, cvte] = setup_classifier(**kwargs)
    
    logger.info('Cross validation is performing ...')
    res = cvte(evds)
    
    print cvte.ca.stats 
    
    
    if permutations != 0:
        print cvte.ca.null_prob.samples
        dist_len = len(cvte.null_dist.dists())
        err_arr = np.zeros(dist_len)
        for i in range(dist_len):
            err_arr[i] = 1 - cvte.ca.stats.stats['ACC']
    
        total_p_value = np.mean(cvte.null_dist.p(err_arr))
        p_value = cvte.ca.null_prob.samples
    else:
        total_p_value = 0.
        p_value = np.array([0,0])
    
    
    try:
        sensana = fclf.get_sensitivity_analyzer()
        res_sens = sensana(evds)
    except Exception, err:
        allowed_keys = ['map', 'sensitivities', 'stats', 
                        'mapper', 'classifier', 'ds', 
                        'perm_pvalue', 'p']
        
        allowed_results = [None, None, cvte.ca.stats, 
                           evds.a.mapper, fclf, evds, 
                           p_value, total_p_value]
        
        results_dict = dict(zip(allowed_keys, allowed_results))
        results = dict()
        if not 'enable_results' in locals():
            enable_results = allowed_keys[:]
        for elem in enable_results:
            if elem in allowed_keys:
                results[elem] = results_dict[elem]
                
        return results
Пример #5
0
def preprocess_dataset(ds, type_, **kwargs):
    """
    Preprocess the dataset: detrending of single run and for chunks, the zscoring is also
    done by chunks and by run.
    
    Parameters
    ----------
    ds : Dataset
        The dataset to be preprocessed
    type : string
        The experiment to be processed
    kwargs : dict
        mean_samples - boolean : if samples should be averaged
        label_included - list : list of labels to be included in the dataset
        label_dropped - string : label to be dropped (rest, fixation)
        
    Returns
    -------
    Dataset
        the processed dataset
    
    
    """
    mean = False
    normalization = 'feature'
    for arg in kwargs:
        if (arg == 'mean_samples'):
            mean = kwargs[arg]
        if (arg == 'label_included'):
            label_included = kwargs[arg].split(',')
        if (arg == 'label_dropped'):
            label_dropped = kwargs[arg] 
        if (arg == 'img_dim'):
            img_dim = int(kwargs[arg])
        if (arg == 'normalization'):
            normalization = str(kwargs[arg])
                
    
    logger.info('Dataset preprocessing: Detrending...')
    if len(np.unique(ds.sa['file'])) != 1:
        poly_detrend(ds, polyord = 1, chunks_attr = 'file')
    poly_detrend(ds, polyord = 1, chunks_attr = 'chunks')
    
    
    if  label_dropped != 'None':
        logger.info('Removing labels...')
        ds = ds[ds.sa.targets != label_dropped]
    if  label_included != ['all']:
        ds = ds[np.array([l in label_included for l in ds.sa.targets],
                          dtype='bool')]
        
               
    if str(mean) == 'True':
        logger.info('Dataset preprocessing: Averaging samples...')
        avg_mapper = mean_group_sample(['event_num']) 
        ds = ds.get_mapped(avg_mapper)     
    
    
    if normalization == 'feature' or normalization == 'both':
        logger.info('Dataset preprocessing: Normalization feature-wise...')
        if img_dim == 4:
            zscore(ds, chunks_attr='file')
        zscore(ds)#, param_est=('targets', ['fixation']))
    
    if normalization == 'sample' or normalization == 'both':
        #Normalizing image-wise
        logger.info('Dataset preprocessing: Normalization sample-wise...')
        ds.samples -= np.mean(ds, axis=1)[:, None]
        ds.samples /= np.std(ds, axis=1)[:, None]
        
        ds.samples[np.isnan(ds.samples)] = 0
    
    
    ds.a.events = find_events(#event= ds.sa.event_num, 
                              chunks = ds.sa.chunks, 
                              targets = ds.sa.targets)
    
    return ds
Пример #6
0
from sklearn.linear_model import RidgeCV,BayesianRidge
from sklearn.preprocessing import StandardScaler
from sklearn.cross_decomposition import PLSRegression
from pandas import read_csv
from sklearn.externals import joblib
from encoding_helpers import *

from itertools import combinations

T3 = False


for subj in xrange(12,13):
    subj_preprocessed_path = os.path.join('/home','mboos','SpeechEncoding','PreProcessed','subj%02dnpp.gzipped.hdf5' % subj)
    s1ds = mvpa.h5load(subj_preprocessed_path)
    events = mvpa.find_events(targets=s1ds.sa.targets,chunks=s1ds.sa.chunks)
    rvstr_TS = rolling_window(s1ds.sa['targets'][::-1].copy(),4)

    s1ds.sa['targets'].value[(np.where(np.apply_along_axis(lambda x : len(np.unique(x)) == 1 and x[0] != 'rest',1,rvstr_TS)[::-1])[0]+3)] = 'rest'
    labelsTS = s1ds.sa['targets'].value.copy()
    	
    	# <codecell>
    	
    	#unroll audio features
    	#cut last 500ms
    featureTS = np.zeros((labelsTS.shape[0],20*ft_freq))
    featureTS[labelsTS!='rest',:] = np.reshape(np.vstack([feature_dict[ev['targets']][:60,:] for ev in events if ev['targets']!='rest']),(-1,ft_freq*20))
    	
    	# <codecell>
    	
    	#now lag the audiofeatures
Пример #7
0
 
 # Load the pymvpa dataset.
 try:
     logger.info('Loading dataset...')
     ds = fmri_dataset(fmri_list, targets=attr.targets, chunks=attr.chunks, mask=mask) 
     logger.info('Dataset loaded...')
 except ValueError, e:
     logger.error(subj + ' *** ERROR: '+ str(e))
     del fmri_list
     return 0;
 
 # Update Dataset attributes
 #
 # TODO: Evaluate if it is useful to build a dedicated function
 ev_list = []
 events = find_events(targets = ds.sa.targets, chunks = ds.sa.chunks)
 for i in range(len(events)):
     duration = events[i]['duration']
     for j in range(duration):
         ev_list.append(i+1)
            
 ds.a['events'] = events  # Update event field
 ds.sa['events_number'] = ev_list # Update event number
 
 # Name added to do leave one subject out analysis
 ds.sa['name'] = [subj for i in range(len(ds.sa.chunks))] 
 
 try:
     for k in attr.keys():
         ds.sa[k] = attr[k]
 except BaseException, e: