Пример #1
0
def run(args):

    # load parcellation file
    parcel, nw_info = _get_parcel(args.roi, args.net)

    # use glob to get all files with `ext`
    ext = '*MSMAll_hp2000_clean.dtseries.nii'
    files = [
        y for x in os.walk(args.input_data)
        for y in glob(os.path.join(x[0], ext))
    ]

    # get list of participants
    # ID <=> individual
    participants = set()
    for file in files:
        ID = file.split('/MNINonLinear')[0][-6:]
        participants.add(ID)
    participants = np.sort(list(participants))
    _info('Number of participants = %d' % len(participants))

    data = {}
    for ii, ID in enumerate(participants):
        ID_files = [file for file in files if ID in file]
        ID_files = np.sort(ID_files)

        # if individual has all 4 runs
        if len(ID_files) == 4:
            _info('%s: %d/%d' % (ID, (ii + 1), len(participants)))
            ID_ts, t = [], []
            for path in ID_files:
                roi_ts = _get_roi_ts(path, parcel, nw_info, args)
                ID_ts.append(roi_ts)
                t.append(roi_ts.shape[0])

            k_time = np.max(t)
            '''
            ID_ts have different temporal length
            pad zeros
            (time x roi x number of runs)
            '''
            save_ts = np.zeros((k_time, args.roi, 4))
            for run in range(4):
                run_ts = ID_ts[run]
                t = run_ts.shape[0]
                save_ts[:t, :, run] = run_ts

            data[ID] = save_ts

        else:
            _info('%s not processed' % ID)

    SAVE_DIR = args.output_data
    if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

    save_path = (SAVE_DIR + '/data_MOVIE_runs_roi_%d_net_%d_ts.pkl' %
                 (args.roi, args.net))
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
def _bhv_class_df(args):
    '''
    data for k_class bhv classification
    *** used for both classification and regression
    args.mode: 'class' or bhv'

    args.roi: number of ROIs
    args.net: number of subnetworks (7 or 17)
    args.subnet: subnetwork; 'wb' if all subnetworks
    args.bhv: behavioral measure
    args.k_class: number of behavioral groups
    args.cutoff: percentile for participant cutoff
    args.invert_flag: all-but-one subnetwork

    save each timepoint as feature vector
    append 'c' based on clip
    append 'y' based on behavioral group
    '''
    # optional arguments
    d = vars(args)
    if 'invert_flag' not in d:
        args.invert_flag = False
    if 'mode' not in d:
        args.mode = 'class'

    load_path = (args.input_data + '/data_MOVIE_runs_' +
        'roi_%d_net_%d_ts.pkl' %(args.roi, args.net))

    with open(load_path, 'rb') as f:
        data = pickle.load(f)

    subject_list = np.sort(list(data.keys()))
    bhv_df = _group_bhv_df(args, subject_list)

    cutoff_list = bhv_df['Subject'].values.astype(str)

    # where are the clips within the run?
    timing_file = pd.read_csv('data/videoclip_tr_lookup.csv')
    
    # pick either all ROIs or subnetworks
    if args.subnet!='wb':      
        if 'minus' in args.subnet:
            # remove 'minus_' prefix
            args.subnet = args.subnet.split('minus_')[1]

        _, nw_info = _get_parcel(args.roi, args.net)
        # ***roi ts sorted in preprocessing
        nw_info = np.sort(nw_info)
        idx = (nw_info == args.subnet)
    else:
        idx = np.ones(args.roi).astype(bool)

    # all-but-one subnetwork
    if args.subnet and args.invert_flag:
        idx = ~idx

    '''
    main
    '''
    clip_y = _get_clip_labels()
    
    table = []
    for run in range(K_RUNS):
        
        print('loading run %d/%d' %(run+1, K_RUNS))
        run_name = 'MOVIE%d' %(run+1) #MOVIEx_7T_yz

        # timing file for run
        timing_df = timing_file[
            timing_file['run'].str.contains(run_name)]  
        timing_df = timing_df.reset_index(drop=True)

        for subject in data:
            if subject in cutoff_list:
                # get subject data (time x roi x run)
                roi_ts = data[subject][:, idx, run]

                for jj, clip in timing_df.iterrows():

                    start = int(np.floor(clip['start_tr']))
                    stop = int(np.ceil(clip['stop_tr']))
                    clip_length = stop - start
                    
                    # assign label to clip
                    c = clip_y[clip['clip_name']]

                    for t in range(clip_length):
                        act = roi_ts[t + start, :]
                        t_data = {}
                        t_data['Subject'] = subject
                        t_data['timepoint'] = t
                        for feat in range(roi_ts.shape[1]):
                            t_data['feat_%d' %(feat)] = act[feat]
                        t_data['c'] = c
                        table.append(t_data)

    df = pd.DataFrame(table)
    df['Subject'] = df['Subject'].astype(int)
    # merges on all subject rows!
    df = df.merge(bhv_df, on='Subject', how='inner')
    
    return df, bhv_df
def _clip_class_rest_df(args, run):
    '''
    data for 15 clip + rest visualization
    each run is saved individually

    run: 0, 1, 2, 3 (one of the 4 runs)

    args.roi: number of ROIs
    args.net: number of subnetworks (7 or 17)
    args.subnet: subnetwork; 'wb' if all subnetworks
    args.invert_flag: all-but-one subnetwork
    args.r_roi: number of random ROIs to pick
    args.r_seed: random seed for picking ROIs

    save each timepoint as feature vector
    append class label based on clip

    return:
    pandas df
    '''
    # optional arguments
    d = vars(args)
    if 'invert_flag' not in d:
        args.invert_flag = False
    if 'r_roi' not in d:
        args.r_roi = 0
        args.r_seed = 0

    load_path = (args.input_data + '/data_MOVIE_runs_' +
        'roi_%d_net_%d_ts.pkl' %(args.roi, args.net))

    with open(load_path, 'rb') as f:
        data = pickle.load(f)
        
    # where are the clips within the run?
    timing_file = pd.read_csv('data/videoclip_tr_lookup.csv')
    
    # pick either all ROIs or subnetworks
    if args.subnet!='wb':      
        if 'minus' in args.subnet:
            # remove 'minus_' prefix
            args.subnet = args.subnet.split('minus_')[1]

        _, nw_info = _get_parcel(args.roi, args.net)
        # ***roi ts sorted in preprocessing
        nw_info = np.sort(nw_info)
        idx = (nw_info == args.subnet)
    else:
        idx = np.ones(args.roi).astype(bool)

    # all-but-one subnetwork
    if args.subnet and args.invert_flag:
        idx = ~idx

    # if random selection,
    # overwrite everything above
    if args.r_roi > 0:
        random.seed(args.r_seed)
        idx = np.zeros(args.roi).astype(bool)
        # random sample without replacement
        samp = random.sample(range(args.roi), k=args.r_roi)
        idx[samp] = True
    '''
    main
    '''
    print('loading run %d' %(run+1))
    run_name = 'MOVIE%d' %(run+1) #MOVIEx_7T_yz
    timing_df = timing_file[timing_file['run'].str.contains(run_name)]  
    timing_df = timing_df.reset_index(drop=True)
    
    # get unique id for each segment including rest segments
    length = data[list(data.keys())[0]][:, :, run].shape[0]
    k_class = len(timing_df)
    y_vec = np.ones(length)*len(timing_df)
    for jj, clip in timing_df.iterrows():
        start = int(np.floor(clip['start_tr']))
        if jj==0:
            tag = k_class
            y_vec[:start] = tag
            tag += 1
        else:
            y_vec[stop:start] = tag
            tag += 1
        stop = int(np.ceil(clip['stop_tr']))
        clip_length = stop - start    
        y_vec[start:stop] = jj
    y_vec[stop:] = tag
    
    table = []
    for subject in data:
        roi_ts = data[subject][:, idx, run]
        
        for t in range(roi_ts.shape[0]):
            act = roi_ts[t, :]
            t_data = {}
            t_data['Subject'] = subject
            t_data['timepoint'] = t
            t_data['y'] = y_vec[t]
            for feat in range(roi_ts.shape[1]):
                t_data['feat_%d' %(feat)] = act[feat]
            table.append(t_data)

    df = pd.DataFrame(table)
    df['Subject'] = df['Subject'].astype(int)
        
    return df
Пример #4
0
def _clip_class_df(args):
    '''
    data for 15-way clip classification

    args.roi: number of ROIs
    args.net: number of subnetworks (7 or 17)
    args.subnet: subnetwork; 'wb' if all subnetworks
    args.invert_flag: all-but-one subnetwork
    args.r_roi: number of random ROIs to pick
    args.r_seed: random seed for picking ROIs

    save each timepoint as feature vector
    append class label based on clip

    return:
    pandas df
    '''
    # optional arguments
    d = vars(args)
    if 'invert_flag' not in d:
        args.invert_flag = False
    if 'r_roi' not in d:
        args.r_roi = 0
        args.r_seed = 0

    load_path = (args.input_data + '/data_MOVIE_runs_' +
                 'roi_%d_net_%d_ts.pkl' % (args.roi, args.net))

    with open(load_path, 'rb') as f:
        data = pickle.load(f)

    # where are the clips within the run?
    timing_file = pd.read_csv('data/videoclip_tr_lookup.csv')

    # pick either all ROIs or subnetworks
    if args.subnet != 'wb':
        if 'minus' in args.subnet:
            # remove 'minus_' prefix
            args.subnet = args.subnet.split('minus_')[1]

        _, nw_info = _get_parcel(args.roi, args.net)
        # ***roi ts sorted in preprocessing
        nw_info = np.sort(nw_info)
        idx = (nw_info == args.subnet)
    else:
        idx = np.ones(args.roi).astype(bool)

    # all-but-one subnetwork
    if args.subnet and args.invert_flag:
        idx = ~idx

    # if random selection,
    # overwrite everything above
    if args.r_roi > 0:
        random.seed(args.r_seed)
        idx = np.zeros(args.roi).astype(bool)
        # random sample without replacement
        samp = random.sample(range(args.roi), k=args.r_roi)
        idx[samp] = True
    '''
    main
    '''
    clip_y = _get_clip_labels()

    table = []
    for run in range(K_RUNS):

        print('loading run %d/%d' % (run + 1, K_RUNS))
        run_name = 'MOVIE%d' % (run + 1)  #MOVIEx_7T_yz

        # timing file for run
        timing_df = timing_file[timing_file['run'].str.contains(run_name)]
        timing_df = timing_df.reset_index(drop=True)

        for subject in data:

            # get subject data (time x roi x run)
            roi_ts = data[subject][:, idx, run]

            for jj, clip in timing_df.iterrows():

                start = int(np.floor(clip['start_tr']))
                stop = int(np.ceil(clip['stop_tr']))
                clip_length = stop - start

                # assign label to clip
                y = clip_y[clip['clip_name']]

                for t in range(clip_length):
                    act = roi_ts[t + start, :]
                    t_data = {}
                    t_data['Subject'] = subject
                    t_data['timepoint'] = t
                    for feat in range(roi_ts.shape[1]):
                        t_data['feat_%d' % (feat)] = act[feat]
                    t_data['y'] = y
                    table.append(t_data)

    df = pd.DataFrame(table)
    df['Subject'] = df['Subject'].astype(int)

    return df