Пример #1
0
def run(args):
    _info(args.bhv)
    _info(args.subnet)
    # set to regression mode
    args.mode = 'reg'
    # use data from all participants
    args.cutoff = 0.5
    '''
    get dataframe
    '''
    # all-but-subnetwork (invert_flag)
    if 'minus' in args.subnet:
        args.invert_flag = True

    res_path = (RES_DIR + '/roi_%d_net_%d' % (args.roi, args.net) + '_nw_%s' %
                (args.subnet) + '_bhv_%s' % (args.bhv) + '_trainsize_%d' %
                (args.train_size) + '_kfold_%d_k_hidden_%d' %
                (args.k_fold, args.k_hidden) + '_k_layers_%d_batch_size_%d' %
                (args.k_layers, args.batch_size) + '_num_epochs_%d_z_%d.pkl' %
                (args.num_epochs, args.zscore))

    if not os.path.isfile(res_path):
        df, bhv_df = _bhv_reg_df(args)
        results = {}
        results['train_mode'] = _train(df, bhv_df, args)
        results['test_mode'] = _test(df, bhv_df, args)
        with open(res_path, 'wb') as f:
            pickle.dump(results, f)
Пример #2
0
def run(args):
    _info(args.roi_name)

    param_grid = {'k_hidden':args.k_hidden,'k_layers':args.k_layers}
    param_grid = [comb for comb in ParameterGrid(param_grid)]
    
    print(len(param_grid))
    print(len(args.k_layers))


    if len(param_grid) == 1:
        res_path = (RES_DIR + 
                    '/%s_%d_net_%d' %(args.roi_name, args.roi, args.net) +
                    '_trainsize_%d' %(args.train_size) +
                    '_k_hidden_%d' %(args.k_hidden[0]) +
                    '_k_layers_%d_batch_size_%d' %(args.k_layers[0], args.batch_size) +
                    '_num_epochs_%d_z_%d.pkl' %(args.num_epochs, args.zscore))

        mod_path = res_path.replace('results','models')
        mod_path = mod_path.replace('pkl','h5')

    elif len(param_grid) > 1:
        res_path = (RES_DIR + 
                    '/%s_%d_net_%d' %(args.roi_name, args.roi, args.net) +
                    '_trainsize_%d' %(args.train_size) +
                    '_kfold_%d' %(args.k_fold) +
                    '_batch_size_%d' %(args.batch_size) +
                    '_num_epochs_%d_z_%d_GSCV.pkl' %(args.num_epochs, args.zscore))
        
    '''
    get dataframe
    '''
    if not os.path.isfile(res_path):
        start = time.time()
        df = _clip_class_df(args)
        print('data loading time: %.2f seconds' %(time.time()-start))
        if len(param_grid) == 1:
            results, results_prob, model = _test(df,args,param_grid[0])
            # save results
            with open(res_path, 'wb') as f:
                pickle.dump([results, results_prob], f)
                # save model
            
            if not os.path.exists(os.path.dirname(mod_path)):
                os.makedirs(os.path.dirname(mod_path),exist_ok=True)

            model.save(mod_path)

        elif len(param_grid) > 1:
            results = {}
            for mm, params in enumerate(param_grid):
                print('---')
                print('model{:02d}'.format(mm) + ': ')
                print(params)
                print('---')
                results['model{:02d}'.format(mm)] = _train(df, args, params)
            # save grid-search CV results
            with open(res_path, 'wb') as f:
                pickle.dump([results, param_grid], f)
Пример #3
0
def run(args):
    
    _info(args.roi_name)
    # Get all combinations of the parameter grid
    param_grid = {'k_hidden':args.k_hidden,'k_layers':args.k_layers}
    param_grid = [comb for comb in ParameterGrid(param_grid)]

    _info('Number of hyperparameter combinations: '+str(len(param_grid)))
    _info(args.roi_name)
    _info(args.input_data)

    '''
    get dataframe
    '''
    
    if len(param_grid) > 1:
        res_path = (RES_DIR + 
            '/%s_near_miss_%d_trainsize_%d' %(args.roi_name,args.near_miss,args.train_size) +
            '_kfold_%d' %(args.k_fold) +
            '_batch_size_%d' %(args.batch_size) +
            '_num_epochs_%d_z_%d_GSCV.pkl' %(args.num_epochs, args.zscore))

        if not os.path.isfile(res_path):
            df = _emo_class_df(args)
            results = {}
            for mm, params in enumerate(param_grid):
                print('---')
                print('model{:02d}'.format(mm) + ': ')
                print(params)
                print('---')
                results['model{:02d}'.format(mm)] = _train(df, args, params)

            with open(res_path, 'wb') as f:
                pickle.dump([results, param_grid], f)
                
    elif len(param_grid) == 1:
        res_path = (RES_DIR + 
            '/%s_near_miss_%d_trainsize_%d' %(args.roi_name,args.near_miss,args.train_size) +
            '_kfold_%d_k_hidden_%d' %(args.k_fold, args.k_hidden[0]) +
            '_k_layers_%d_batch_size_%d' %(args.k_layers[0], args.batch_size) +
            '_num_epochs_%d_z_%d.pkl' %(args.num_epochs, args.zscore))
        
        mod_path = res_path.replace('results','models')
        mod_path = mod_path.replace('pkl','h5')
        
        if not os.path.isfile(res_path):
            df = _emo_class_df(args)
            results = {}
            results['train_mode'] = _train(df, args, param_grid[0])
            results['test_mode'], _, model = _test(df, args, param_grid[0])

            # save results
            with open(res_path, 'wb') as f:
                pickle.dump(results, f)

            # save model
            if not os.path.exists(os.path.dirname(mod_path)):
                os.makedirs(os.path.dirname(mod_path),exist_ok=True)

            model.save(mod_path)
Пример #4
0
def _saliency(df, args):
    '''
    save gradient with respect to the input
    to use as proxy for importance
    '''
    then = time.time()
    _info('save gradients')

    subject_list = np.unique(df['Subject'])
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]
    k_class = len(np.unique(df['y']))
    print('number of unique sequences = %d' % k_class)

    features = [ii for ii in df.columns if 'feat' in ii]

    gru_model = keras.models.load_model(args.gru_model_path)
    gru_model.trainable = False

    mat = {}
    for i_class in range(k_class):
        mat[i_class] = []

    for subject in tqdm(test_list):
        subj_df = df[df['Subject'] == subject]
        subj_df.reset_index(inplace=True)
        trials = np.split(subj_df,
                          subj_df[subj_df['timepoint'] == 0].index)[1:]
        for ii, trial in enumerate(trials):
            seq = trial[trial['Subject'] == subject][features].values
            label_seq = trial[trial['Subject'] == subject]['y'].values
            class_label = label_seq[0]
            X = [seq]
            X_padded = tf.keras.preprocessing.sequence.pad_sequences(
                X, padding="post", dtype='float')

            gradX = _compute_saliency_maps(gru_model, X_padded, i_class)

            mat[class_label].append(gradX)

    for i_class in range(k_class):
        mat[i_class] = np.stack(mat[i_class],
                                axis=0)  #participant x time x vox

    return mat
Пример #5
0
def run(args):
    _info(args.subnet)
    '''
    get dataframe
    '''
    # all-but-subnetwork (invert_flag)
    if 'minus' in args.subnet:
        args.invert_flag = True

    res_path = (RES_DIR + '/%s_%d_net_%d' %
                (args.roi_name, args.roi, args.net) + '_nw_%s' %
                (args.subnet) + '_trainsize_%d' % (args.train_size) +
                '_kdim_%d_batch_size_%d' % (args.k_dim, args.batch_size) +
                '_num_epochs_%d_z_%d.pkl' % (args.num_epochs, args.zscore))
    if not os.path.isfile(res_path):
        df = _clip_class_df(args)
        results = _test(df, args)
        with open(res_path, 'wb') as f:
            pickle.dump(results, f)
Пример #6
0
def run(args):

    # load parcellation file
    parcel, nw_info = _get_parcel(args.roi, args.net)

    # use glob to get all files with `ext`
    ext = '*MSMAll_hp2000_clean.dtseries.nii'
    files = [
        y for x in os.walk(args.input_data)
        for y in glob(os.path.join(x[0], ext))
    ]

    # get list of participants
    # ID <=> individual
    participants = set()
    for file in files:
        ID = file.split('/MNINonLinear')[0][-6:]
        participants.add(ID)
    participants = np.sort(list(participants))
    _info('Number of participants = %d' % len(participants))

    data = {}
    for ii, ID in enumerate(participants):
        ID_files = [file for file in files if ID in file]
        ID_files = np.sort(ID_files)

        # if individual has all 4 runs
        if len(ID_files) == 4:
            _info('%s: %d/%d' % (ID, (ii + 1), len(participants)))
            ID_ts, t = [], []
            for path in ID_files:
                roi_ts = _get_roi_ts(path, parcel, nw_info, args)
                ID_ts.append(roi_ts)
                t.append(roi_ts.shape[0])

            k_time = np.max(t)
            '''
            ID_ts have different temporal length
            pad zeros
            (time x roi x number of runs)
            '''
            save_ts = np.zeros((k_time, args.roi, 4))
            for run in range(4):
                run_ts = ID_ts[run]
                t = run_ts.shape[0]
                save_ts[:t, :, run] = run_ts

            data[ID] = save_ts

        else:
            _info('%s not processed' % ID)

    SAVE_DIR = args.output_data
    if not os.path.exists(SAVE_DIR):
        os.makedirs(SAVE_DIR)

    save_path = (SAVE_DIR + '/data_MOVIE_runs_roi_%d_net_%d_ts.pkl' %
                 (args.roi, args.net))
    with open(save_path, 'wb') as f:
        pickle.dump(data, f)
Пример #7
0
def run(args):
    # Get all combinations of the parameter grid

    _info(args.roi_name)
    # Get all combinations of the parameter grid

    param_grid = {'k_hidden': args.k_hidden, 'k_layers': args.k_layers}
    param_grid = [comb for comb in ParameterGrid(param_grid)]

    print(len(param_grid))
    print(len(args.k_layers))

    _info('Number of hyperparameter combinations: ' + str(len(param_grid)))
    _info(args.roi_name)
    '''
    get dataframe
    '''
    if len(param_grid) == 1:
        res_path = (RES_DIR + '/%s_near_miss_%d_trainsize_%d' %
                    (args.roi_name, args.near_miss, args.train_size) +
                    '_kfold_%d_k_hidden_%d' % (args.k_fold, args.k_hidden[0]) +
                    '_k_layers_%d_batch_size_%d' %
                    (args.k_layers[0], args.batch_size) +
                    '_num_epochs_%d_z_%d.pkl' % (args.num_epochs, args.zscore))

        gru_mod_path = res_path.replace('results', 'models')
        gru_mod_path = gru_mod_path.replace('pkl', 'h5')
        gru_model_path = gru_mod_path.replace('saliency', 'gru')
        args.gru_model_path = gru_model_path

    elif len(param_grid) > 1:
        res_path = (RES_DIR + '/%s_near_miss_%d_trainsize_%d' %
                    (args.roi_name, args.near_miss, args.train_size) +
                    '_kfold_%d_k_hidden_%d' % (args.k_fold, args.k_hidden[0]) +
                    '_k_layers_%d_batch_size_%d' %
                    (args.k_layers[0], args.batch_size) +
                    '_num_epochs_%d_z_%d.pkl' % (args.num_epochs, args.zscore))

    if not os.path.isfile(res_path):
        df = _emo_class_df(args)

        if len(param_grid) == 1:
            mat = _saliency(df, args)

            # save results
            with open(res_path, 'wb') as f:
                pickle.dump(mat, f)

        elif len(param_grid) > 1:
            results = {}
            for mm, params in enumerate(param_grid):
                print('---')
                print('model{:02d}'.format(mm) + ': ')
                print(params)
                print('---')
                results['model{:02d}'.format(mm)] = _train(df, args, params)
            # save grid-search CV results
            with open(res_path, 'wb') as f:
                pickle.dump([results, param_grid], f)
Пример #8
0
def run(args):
    # Get all combinations of the parameter grid

    _info(args.roi_name)
    # Get all combinations of the parameter grid

    param_grid = {'k_hidden': args.k_hidden, 'k_layers': args.k_layers}
    param_grid = [comb for comb in ParameterGrid(param_grid)]

    print(len(param_grid))
    print(len(args.k_layers))

    _info('Number of hyperparameter combinations: ' + str(len(param_grid)))
    _info(args.roi_name)
    '''
    get dataframe
    '''
    if len(param_grid) == 1:
        res_path = (RES_DIR + '/%s_%d_net_%d' %
                    (args.roi_name, args.roi, args.net) + '_trainsize_%d' %
                    (args.train_size) + '_k_hidden_%d' % (args.k_hidden[0]) +
                    '_k_layers_%d_batch_size_%d' %
                    (args.k_layers[0], args.batch_size) +
                    '_num_epochs_%d_z_%d.pkl' % (args.num_epochs, args.zscore))

        gru_mod_path = res_path.replace('results', 'models')
        gru_mod_path = gru_mod_path.replace('pkl', 'h5')
        gru_model_path = gru_mod_path.replace('null_saliency', 'gru')
        args.gru_model_path = gru_model_path

    elif len(param_grid) > 1:
        res_path = (RES_DIR + '/%s_%d_net_%d' %
                    (args.roi_name, args.roi, args.net) + '_trainsize_%d' %
                    (args.train_size) + '_kfold_%d' % (args.k_fold) +
                    '_batch_size_%d' % (args.batch_size) +
                    '_num_epochs_%d_z_%d_GSCV.pkl' %
                    (args.num_epochs, args.zscore))

    if not os.path.isfile(res_path):
        start = time.clock()
        #df = _clip_class_df(args)
        with open('data/df.pkl', 'rb') as f:
            df = pickle.load(f)
        print('data loading time: %.2f seconds' % (time.clock() - start))

        if len(param_grid) == 1:
            _saliency(df, args)
        elif len(param_grid) > 1:
            results = {}
            for mm, params in enumerate(param_grid):
                print('---')
                print('model{:02d}'.format(mm) + ': ')
                print(params)
                print('---')
                results['model{:02d}'.format(mm)] = _train(df, args, params)
            # save grid-search CV results
            with open(res_path, 'wb') as f:
                pickle.dump([results, param_grid], f)
Пример #9
0
def run(args):

    _info(args.bhv)
    _info(args.subnet)
    # set to regression mode
    args.mode = 'reg'
    '''
    get dataframe
    '''
    # all-but-subnetwork (invert_flag)
    if 'minus' in args.subnet:
        args.invert_flag = True

    res_path = (RES_DIR + '/roi_%d_net_%d' % (args.roi, args.net) + '_nw_%s' %
                (args.subnet) + '_bhv_%s_cutoff_%0.1f' %
                (args.bhv, args.cutoff) + '_corrthresh_%0.1f' %
                (args.corr_thresh) + '_kfold_%d_z_%d.pkl' %
                (args.k_fold, args.zscore))
    if not os.path.isfile(res_path):
        df, bhv_df = _bhv_reg_df(args)
        results = _train(df, bhv_df, args)
        with open(res_path, 'wb') as f:
            pickle.dump(results, f)
Пример #10
0
def _saliency(df, args):
    '''
    save gradient with respect to the input
    to use as proxy for importance
    '''
    then = time.time()
    _info('save gradients')

    subject_list = np.unique(df['Subject'])
    k_class = len(np.unique(df['y']))
    print('number of unique sequences = %d' % k_class)

    # create columns for gradients
    # don't use number of ROI in case of subnetwork
    features = [ii for ii in df.columns if 'feat' in ii]
    grads = ['grad_%d' % ii for ii in range(len(features))]
    for grad in grads:
        df.loc[:, grad] = np.nan

    gru_model = keras.models.load_model(args.gru_model_path)
    gru_model.trainable = False

    for i_class in tqdm(range(k_class)):
        for subject in subject_list:

            if i_class == 0:  # must handle test retest differently
                seqs = df[(df['Subject'] == subject)
                          & (df['y'] == 0)][features].values
                gradX = np.zeros(seqs.shape)

                k_time = int(seqs.shape[0] / K_RUNS)
                for i_run in range(K_RUNS):
                    seq = seqs[i_run * k_time:(i_run + 1) * k_time, :]
                    if args.zscore:
                        # zscore each seq that goes into model
                        seq = (1 / np.std(seq)) * (seq - np.mean(seq))

                    X = [seq]
                    X_padded = tf.keras.preprocessing.sequence.pad_sequences(
                        X, padding="post", dtype='float')

                    gX = _compute_saliency_maps(gru_model, X_padded, i_class)
                    gradX[i_run * k_time:(i_run + 1) * k_time, :] = gX

            else:
                seq = df[(df['Subject'] == subject)
                         & (df['y'] == i_class)][features].values
                if args.zscore:
                    # zscore each seq that goes into model
                    seq = (1 / np.std(seq)) * (seq - np.mean(seq))

                X = [seq]
                X_padded = tf.keras.preprocessing.sequence.pad_sequences(
                    X, padding="post", dtype='float')

                gradX = _compute_saliency_maps(gru_model, X_padded, i_class)

            df.loc[(df['Subject'] == subject) & (df['y'] == i_class),
                   grads] = gradX.squeeze()

    sal_df = df[['Subject', 'timepoint', 'y'] + grads]

    return sal_df
Пример #11
0
def _test(df, args):
    '''
    test subject results
    view only for best cross-val parameters
    '''
    _info('test mode')

    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(len(test_list))
    results['val'] = np.zeros(len(test_list))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_test'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii]))
        results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii]))
    '''
    init model
    '''
    model = TCNClassifier(k_feat, args.k_hidden, args.k_wind, args.k_class)
    model.to(args.device)
    print(model)

    lossfn = nn.CrossEntropyLoss(ignore_index=-100)
    # if input is cuda, loss function is auto cuda
    opt = torch.optim.Adam(model.parameters())

    # get train, val sequences
    X_train, train_len, y_train = _get_seq(df, train_list, args)
    X_test, test_len, y_test = _get_seq(df, test_list, args)

    max_length = torch.max(train_len)
    '''
    train classifier
    '''
    permutation = torch.randperm(X_train.size()[0])
    losses = np.zeros(args.num_epochs)
    #
    then = time.time()

    for epoch in range(args.num_epochs):
        for i in range(0, X_train.size()[0], args.batch_size):

            indices = permutation[i:i + args.batch_size]
            batch_x, batch_y = X_train[indices], y_train[indices]

            y_pred = model(batch_x)
            loss = lossfn(y_pred.view(-1, args.k_class), batch_y.view(-1))

            opt.zero_grad()
            loss.backward()
            opt.step()

        losses[epoch] = loss

    _info(losses)
    #
    print('--- train time =  %0.4f seconds ---' % (time.time() - then))
    '''
    results on train data
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_train, y_train, train_len,
                                 max_length, clip_time, len(train_list))
    results['train'] = a
    print('tacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_train'][ii] = a_t[ii]
    results['train_conf_mtx'] = c_mtx
    '''
    results on test data
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_test, y_test, test_len, max_length,
                                 clip_time, len(test_list))
    results['test'] = a
    print('sacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_test'][ii] = a_t[ii]
    results['test_conf_mtx'] = c_mtx

    return results
Пример #12
0
def _train(df, args):
    '''
    cross-validation results
    '''
    # set pytorch device
    torch.manual_seed(K_SEED)
    use_cuda = torch.cuda.is_available()
    args.device = torch.device('cuda:0' if use_cuda else 'cpu')
    if use_cuda:
        _info('cuda')
    else:
        _info('cpu')

    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(args.k_fold)
    results['val'] = np.zeros(args.k_fold)

    # confusion matrices
    results['train_conf_mtx'] = np.zeros((args.k_class, args.k_class))
    results['val_conf_mtx'] = np.zeros((args.k_class, args.k_class))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_val'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((args.k_fold, clip_time[ii]))
        results['t_val'][ii] = np.zeros((args.k_fold, clip_time[ii]))

    i_fold = 0
    kf = KFold(n_splits=args.k_fold, random_state=K_SEED)

    for train, val in kf.split(train_list):

        _info('fold: %d/%d' % (i_fold + 1, args.k_fold))

        # ***between-subject train-val split
        train_subs = [train_list[ii] for ii in train]
        val_subs = [train_list[ii] for ii in val]
        '''
        init model
        '''
        model = TCNClassifier(k_feat, args.k_hidden, args.k_wind, args.k_class)
        model.to(args.device)
        print(model)

        lossfn = nn.CrossEntropyLoss(ignore_index=-100)
        # if input is cuda, loss function is auto cuda
        opt = torch.optim.Adam(model.parameters())

        # get train, val sequences
        X_train, train_len, y_train = _get_seq(df, train_subs, args)
        X_val, val_len, y_val = _get_seq(df, val_subs, args)

        max_length = torch.max(train_len)
        '''
        train classifier
        '''
        permutation = torch.randperm(X_train.size()[0])
        losses = np.zeros(args.num_epochs)
        #
        then = time.time()

        for epoch in range(args.num_epochs):
            for i in range(0, X_train.size()[0], args.batch_size):

                indices = permutation[i:i + args.batch_size]
                batch_x, batch_y = X_train[indices], y_train[indices]

                y_pred = model(batch_x)
                loss = lossfn(y_pred.view(-1, args.k_class), batch_y.view(-1))

                opt.zero_grad()
                loss.backward()
                opt.step()

            losses[epoch] = loss

        _info(losses)
        #
        print('--- train time =  %0.4f seconds ---' % (time.time() - then))
        '''
        results on train data
        '''
        a, a_t, c_mtx = _ff_acc(model, X_train, y_train, train_len, max_length,
                                clip_time)
        results['train'][i_fold] = a
        print('tacc = %0.3f' % a)
        for ii in range(args.k_class):
            results['t_train'][ii][i_fold] = a_t[ii]
        results['train_conf_mtx'] += c_mtx
        '''
        results on val data
        '''
        a, a_t, c_mtx = _ff_acc(model, X_val, y_val, val_len, max_length,
                                clip_time)
        results['val'][i_fold] = a
        print('vacc = %0.3f' % a)
        for ii in range(args.k_class):
            results['t_val'][ii][i_fold] = a_t[ii]
        results['val_conf_mtx'] += c_mtx

        i_fold += 1

    return results
Пример #13
0
def _train(df, args, params):
    '''
    cross-validation results
    '''
    _info('train mode: Running grid search')
    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    print('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(args.k_fold)
    results['val'] = np.zeros(args.k_fold)

    # confusion matrices
    results['train_conf_mtx'] = np.zeros((args.k_class, args.k_class))
    results['val_conf_mtx'] = np.zeros((args.k_class, args.k_class))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_val'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((args.k_fold, clip_time[ii]))
        results['t_val'][ii] = np.zeros((args.k_fold, clip_time[ii]))

    i_fold = 0
    kf = KFold(n_splits=args.k_fold, random_state=K_SEED)

    for train, val in kf.split(train_list):
        _info('fold: %d/%d' % (i_fold + 1, args.k_fold))

        # ***between-subject train-val split
        train_subs = [train_list[ii] for ii in train]
        val_subs = [train_list[ii] for ii in val]

        # get train, val sequences
        X_train, train_len, y_train = _get_seq(df, train_subs, args)
        X_val, val_len, y_val = _get_seq(df, val_subs, args)
        '''
        train classifier
        '''
        then = time.time()
        model = TCNClassifier(X_train,
                              k_hidden=params['k_hidden'],
                              k_wind=params['k_wind'],
                              k_class=args.k_class)

        model.fit(X_train,
                  y_train,
                  epochs=args.num_epochs,
                  validation_split=0.2,
                  batch_size=args.batch_size,
                  verbose=1)
        print('--- train time =  %0.4f seconds ---' % (time.time() - then))

        trainable = np.sum([
            tf.reshape(params, -1).shape[0]
            for params in model.trainable_variables
        ])
        print('Total trainable parameters: %i' % trainable)
        '''
        results on train data
        '''
        a, a_t, c_mtx = _ff_acc(model, X_train, y_train, clip_time)
        results['train'][i_fold] = a
        print('tacc = %0.3f' % a)
        for ii in range(args.k_class):
            results['t_train'][ii][i_fold] = a_t[ii]
        results['train_conf_mtx'] += c_mtx
        '''
        results on val data
        '''
        a, a_t, c_mtx = _ff_acc(model, X_val, y_val, clip_time)
        results['val'][i_fold] = a
        print('vacc = %0.3f' % a)
        for ii in range(args.k_class):
            results['t_val'][ii][i_fold] = a_t[ii]
        results['val_conf_mtx'] += c_mtx

        i_fold += 1

    return results
Пример #14
0
def _test(df, args, params):
    '''
    test subject results
    view only for best cross-val parameters
    '''
    _info('test mode')
    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    print('seq lengths = %s' % clip_time)
    '''
    init model
    '''
    # get train, test sequences
    X_train, train_len, y_train = _get_seq(df, train_list, args)
    X_test, test_len, y_test = _get_seq(df, test_list, args)
    max_length = tf.math.reduce_max(train_len).numpy()
    '''
    train encoder
    '''
    then = time.time()
    model_encoder = GRUEncoder(X_train,
                               args.gru_model_path,
                               k_layers=params['k_layers'],
                               k_hidden=params['k_hidden'],
                               k_dim=args.k_dim,
                               k_class=args.k_class)

    model_encoder.fit(X_train,
                      y_train,
                      epochs=args.num_epochs,
                      validation_split=0.2,
                      batch_size=args.batch_size,
                      verbose=1)
    '''
    encoder results
    '''
    results = {}

    a, a_t, c_mtx = _gru_test_acc(model_encoder, X_train, y_train, clip_time,
                                  len(train_list))
    results['train'] = a
    a, a_t, c_mtx = _gru_test_acc(model_encoder, X_test, y_test, clip_time,
                                  len(test_list))
    results['test'] = a
    '''
    get encoder trajectories
    '''
    traj_train = _gruenc_test_traj(model_encoder, X_train)
    traj_test = _gruenc_test_traj(model_encoder, X_test)
    '''
    apply mask on trajectories
    '''
    mask = X_train[:, :, 0] == 0.0
    traj_train[mask, :] = 0.0
    mask = X_test[:, :, 0] == 0.0
    traj_test[mask, :] = 0.0
    '''
    train decoder
    '''
    model_decoder = GRUDecoder(traj_train,
                               X_train,
                               k_layers=args.k_layers[0],
                               lr=0.001)

    model_decoder.fit(traj_train,
                      X_train,
                      epochs=args.num_epochs,
                      validation_split=0.2,
                      batch_size=args.batch_size,
                      verbose=1)
    '''
    evaluate decoder
    '''
    train_mask = X_train != 0
    test_mask = X_test != 0
    '''
    results on train data
    '''
    outputs = model_decoder.predict(traj_train)
    o = outputs[train_mask == True]
    y = X_train[train_mask == True]
    a = mean_squared_error(o, y)
    print('train_recon mse = %0.3f' % a)
    results['train_mse'] = a
    a = r2_score(o, y)
    results['train_r2'] = a
    print('train_recon r2 = %0.3f' % a)
    '''
    results on test data
    '''
    outputs = model_decoder.predict(traj_test)
    o = outputs[test_mask == True]
    y = X_test[test_mask == True]
    a = mean_squared_error(o, y)
    print('test_recon mse = %0.3f' % a)
    results['test_mse'] = a
    a = r2_score(o, y)
    results['test_r2'] = a
    print('test_recon r2 = %0.3f' % a)
    '''
    compare to pca reconstruction
    '''
    train_mse, test_mse, train_r2, test_r2, pca_var = _get_pca_recon(
        df, train_list, test_list, args)
    results['pca_var'] = pca_var
    '''
    results on train data
    '''
    results['train_pca_mse'] = train_mse
    results['train_pca_r2'] = train_r2
    print('t_pca_recon r2 = %0.3f' % train_r2)
    '''
    results on test data
    '''
    results['test_pca_mse'] = test_mse
    results['test_pca_r2'] = test_r2
    print('s_pca_recon r2 = %0.3f' % test_r2)

    return results
Пример #15
0
def _train(df, bhv_df, args):
    # get X-y from df
    feature = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(feature)
    print('number of features = %d' % (k_feat))
    k_clip = len(np.unique(df['c']))
    print('number of clip = %d' % (k_clip))
    subject_list = bhv_df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    # length of each clip
    clip_time = np.zeros(k_clip)
    for ii in range(k_clip):
        class_df = df[df['c'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # init dict for all results
    results = {}

    # true and predicted scores and clip label
    results['y'] = {}
    results['y_hat'] = {}
    results['c'] = {}

    for score in SCORES:
        # mean scores across time
        results['train_%s' % score] = np.zeros(args.k_fold)
        results['val_%s' % score] = np.zeros(args.k_fold)

        # per clip temporal score
        results['t_train_%s' % score] = {}
        results['t_val_%s' % score] = {}

        for ii in range(k_clip):
            results['t_train_%s' % score][ii] = np.zeros(
                (args.k_fold, clip_time[ii]))
            results['t_val_%s' % score][ii] = np.zeros(
                (args.k_fold, clip_time[ii]))

    kf = KFold(n_splits=args.k_fold, random_state=K_SEED)

    # get participant lists for each assigned class
    # ensure they're only in train_list
    class_list = {}
    for ii in range(args.k_class):
        class_list[ii] = bhv_df[(bhv_df['Subject'].isin(train_list))
                                & (bhv_df['y'] == ii)]['Subject'].values
        print('No. of participants in class {} = {}'.format(
            ii, len(class_list[ii])))
    '''    
    split participants in each class with kf
    nearly identical ratio of train and val,
    in all classes
    '''
    split = {}
    for ii in range(args.k_class):
        split[ii] = kf.split(class_list[ii])

    for i_fold in range(args.k_fold):

        _info('fold: %d/%d' % (i_fold + 1, args.k_fold))

        # ***between-subject train-val split
        train_subs, val_subs = [], []
        for ii in range(args.k_class):
            train, val = next(split[ii])
            for jj in train:
                train_subs.append(class_list[ii][jj])
            for jj in val:
                val_subs.append(class_list[ii][jj])
        '''
        model main
        '''

        X_train, train_len, y_train, c_train = _get_seq(df, train_subs, args)
        X_val, val_len, y_val, c_val = _get_seq(df, val_subs, args)

        max_length = tf.reduce_max(train_len)
        '''
        train regression model
        '''
        then = time.time()
        model = GRURegressor(X_train,
                             k_hidden=args.k_hidden,
                             k_layers=args.k_layers,
                             l2=args.l2,
                             dropout=args.dropout,
                             lr=args.lr)
        model.fit(X_train,
                  y_train.reshape(y_train.shape[0], y_train.shape[1], 1),
                  batch_size=args.batch_size,
                  epochs=args.num_epochs,
                  verbose=1,
                  validation_split=0.2)

        print('--- train time =  %0.4f seconds ---' % (time.time() - then))
        '''
        results on train data
        '''
        s, s_t, _, _, _ = dnn_score(model,
                                    X_train,
                                    y_train,
                                    c_train,
                                    train_len,
                                    max_length,
                                    clip_time,
                                    model_type=model_type)
        for score in SCORES:
            results['train_%s' % score][i_fold] = s[score]
            for ii in range(k_clip):
                results['t_train_%s' % score][ii][i_fold] = s_t[ii][score]
        print('train p = %0.3f' % s['p'])
        '''
        results on val data
        '''
        s, s_t, y, y_hat, c = dnn_score(model,
                                        X_val,
                                        y_val,
                                        c_val,
                                        val_len,
                                        max_length,
                                        clip_time,
                                        model_type=model_type)
        for score in SCORES:
            results['val_%s' % score][i_fold] = s[score]
            for ii in range(k_clip):
                results['t_val_%s' % score][ii][i_fold] = s_t[ii][score]
        print('val p = %0.3f' % s['p'])

        results['y'][i_fold] = y
        results['y_hat'][i_fold] = y_hat
        results['c'][i_fold] = c

    return results
Пример #16
0
def _test(df, args, params):
    '''
    test subject results
    view only for best cross-val parameters
    '''
    _info('test mode')
    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    print('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(len(test_list))
    results['val'] = np.zeros(len(test_list))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_test'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii]))
        results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii]))

    results_prob = {}
    for method in 'train test'.split():
        results_prob[method] = {}
        for measure in 'acc t_prob'.split():
            results_prob[method][measure] = {}
    '''
    init model
    '''
    # get train, test sequences
    X_train, train_len, y_train = _get_seq(df, train_list, args)
    X_test, test_len, y_test = _get_seq(df, test_list, args)
    '''
    train classifier
    '''
    then = time.time()

    model = TCNClassifier(X_train,
                          k_hidden=params['k_hidden'],
                          k_wind=params['k_wind'],
                          k_class=args.k_class)

    model.fit(X_train,
              y_train,
              epochs=args.num_epochs,
              validation_split=0.2,
              batch_size=args.batch_size,
              verbose=1)

    print('--- train time =  %0.4f seconds ---' % (time.time() - then))
    '''
    results on train data
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_train, y_train, clip_time,
                                 len(train_list))
    results['train'] = a
    print('tacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_train'][ii] = a_t[ii]
    results['train_conf_mtx'] = c_mtx

    # train temporal probs
    results_prob['train']['acc'] = model.evaluate(X_train, y_train)[1]
    X_train_probs = model.predict(X_train)
    results_prob['train']['t_prob'] = _get_true_class_prob(
        y_train, X_train_probs, train_len)
    '''
    results on test data
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_test, y_test, clip_time,
                                 len(test_list))
    results['test'] = a
    print('sacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_test'][ii] = a_t[ii]
    results['test_conf_mtx'] = c_mtx

    # test temporal probs
    results_prob['test']['acc'] = model.evaluate(X_test, y_test)[1]
    X_test_probs = model.predict(X_test)
    results_prob['test']['t_prob'] = _get_true_class_prob(
        y_test, X_test_probs, test_len)

    return results, results_prob, model
Пример #17
0
def _test(df, bhv_df, args):

    _info('test mode')

    # get X-y from df
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    k_clip = len(np.unique(df['c']))
    print('number of clips = %d' % (k_clip))
    subject_list = bhv_df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    # length of each clip
    clip_time = np.zeros(k_clip)
    for ii in range(k_clip):
        class_df = df[df['c'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # init dict for all results
    results = {}
    for score in SCORES:

        # per clip temporal score
        results['t_train_%s' % score] = {}
        results['t_test_%s' % score] = {}

        for ii in range(k_clip):
            results['t_train_%s' % score][ii] = np.zeros(clip_time[ii])
            results['t_test_%s' % score][ii] = np.zeros(clip_time[ii])
    '''
    model main
    '''

    # get train, test sequences
    X_train, train_len, y_train, c_train = _get_seq(df, train_list, args)
    X_test, test_len, y_test, c_test = _get_seq(df, test_list, args)

    max_length = tf.reduce_max(train_len)
    '''
    test regression model
    '''
    then = time.time()
    model = GRURegressor(X_train,
                         k_hidden=args.k_hidden,
                         k_layers=args.k_layers,
                         l2=args.l2,
                         dropout=args.dropout,
                         lr=args.lr)
    model.fit(X_train,
              y_train.reshape(y_train.shape[0], y_train.shape[1], 1),
              batch_size=args.batch_size,
              epochs=args.num_epochs,
              verbose=1,
              validation_split=0.2)
    print('--- train time =  %0.4f seconds ---' % (time.time() - then))
    '''
    results on train data
    '''
    s, s_t, _, _, _ = dnn_score(model,
                                X_train,
                                y_train,
                                c_train,
                                train_len,
                                max_length,
                                clip_time,
                                model_type=model_type)
    for score in SCORES:
        results['train_%s' % score] = s[score]
        for ii in range(k_clip):
            results['t_train_%s' % score][ii] = s_t[ii][score]
    print('train p = %0.3f' % s['p'])
    '''
    results on test data
    '''
    s, s_t, y, y_hat, c = dnn_score(model,
                                    X_test,
                                    y_test,
                                    c_test,
                                    test_len,
                                    max_length,
                                    clip_time,
                                    model_type=model_type)
    for score in SCORES:
        results['test_%s' % score] = s[score]
        for ii in range(k_clip):
            results['t_test_%s' % score][ii] = s_t[ii][score]
    print('test p = %0.3f' % s['p'])

    results['y'] = y
    results['y_hat'] = y_hat
    results['c'] = c

    return results
Пример #18
0
def run(args):
    # Get all combinations of the parameter grid

    _info(args.roi_name)
    # Get all combinations of the parameter grid

    param_grid = {'k_hidden': args.k_hidden, 'k_layers': args.k_layers}
    param_grid = [comb for comb in ParameterGrid(param_grid)]

    print(len(param_grid))
    print(len(args.k_layers))

    _info('Number of hyperparameter combinations: ' + str(len(param_grid)))
    _info(args.roi_name)
    '''
    get dataframe
    '''
    if len(param_grid) == 1:
        res_path = (RES_DIR + '/%s_%d_net_%d' %
                    (args.roi_name, args.roi, args.net) + '_trainsize_%d' %
                    (args.train_size) + '_k_hidden_%d' % (args.k_hidden[0]) +
                    '_kdim_%d' % (args.k_dim) + '_k_layers_%d_batch_size_%d' %
                    (args.k_layers[0], args.batch_size) +
                    '_num_epochs_%d_z_%d.pkl' % (args.num_epochs, args.zscore))

        mod_path = res_path.replace('results', 'models')
        mod_path = mod_path.replace('pkl', 'h5')

        gru_model_path = mod_path.replace('gruencoder', 'gru')
        gru_model_path = gru_model_path.replace('_kdim_%d' % (args.k_dim), '')
        args.gru_model_path = gru_model_path

    elif len(param_grid) > 1:
        res_path = (RES_DIR + '/%s_%d_net_%d' %
                    (args.roi_name, args.roi, args.net) + '_trainsize_%d' %
                    (args.train_size) + '_kfold_%d' % (args.k_fold) +
                    '_batch_size_%d' % (args.batch_size) +
                    '_num_epochs_%d_z_%d_GSCV.pkl' %
                    (args.num_epochs, args.zscore))

    if not os.path.isfile(res_path):
        start = time.clock()
        df = _clip_class_df(args)
        print('data loading time: %.2f seconds' % (time.clock() - start))

        if len(param_grid) == 1:
            results, results_prob, model = _test(df, args, param_grid[0])
            print('TESTING DONE :)')
            # forward pass data and save encodings
            #traj_df = {}
            #for run in range(K_RUNS):
            #    df = _clip_class_rest_df(args, run)
            #    traj_df[run] = _get_trajectories(df, model, args)

            # save results
            with open(res_path, 'wb') as f:
                pickle.dump([results, results_prob], f)
            # save model
            #model.save(mod_path)

        elif len(param_grid) > 1:
            results = {}
            for mm, params in enumerate(param_grid):
                print('---')
                print('model{:02d}'.format(mm) + ': ')
                print(params)
                print('---')
                results['model{:02d}'.format(mm)] = _train(df, args, params)
            # save grid-search CV results
            with open(res_path, 'wb') as f:
                pickle.dump([results, param_grid], f)
Пример #19
0
def _saliency(df, args):
    '''
    save gradient with respect to the input
    to use as proxy for importance
    '''
    then = time.time()
    _info('save gradients')

    subject_list = np.unique(df['Subject'])
    k_class = len(np.unique(df['y']))
    print('number of unique sequences = %d' % k_class)

    # create columns for gradients
    # don't use number of ROI in case of subnetwork
    features = [ii for ii in df.columns if 'feat' in ii]
    grads = ['grad_%d' % ii for ii in range(len(features))]
    for grad in grads:
        df.loc[:, grad] = np.nan

    gru_model = keras.models.load_model(args.gru_model_path)
    gru_model.trainable = False

    # length of each clip
    clip_time = np.zeros(k_class, dtype=np.int)
    for i_clip in range(k_class):
        class_df = df[df['y'] == i_clip]
        clip_time[i_clip] = int(np.max(np.unique(class_df['timepoint'])) + 1)

    num_perms = 100

    for idx_perm in tqdm(range(1, num_perms)):
        """
        shuffle labels
        """
        class_labels = []
        for i_class in range(k_class):
            if i_class == 0:
                class_labels += [0] * len(subject_list)
            else:
                class_labels += [i_class] * len(subject_list)
        random.shuffle(class_labels)
        """"""
        counter = 0

        for i_class in range(k_class):

            if i_class != 14:
                continue
            for subject in subject_list:
                class_label = class_labels[counter]
                counter += 1

                if i_class == 0:  # must handle test retest differently
                    seqs = df[(df['Subject'] == subject)
                              & (df['y'] == 0)][features].values
                    gradX = np.zeros(seqs.shape)

                    k_time = int(seqs.shape[0] / K_RUNS)
                    for i_run in range(K_RUNS):
                        seq = seqs[i_run * k_time:(i_run + 1) * k_time, :]
                        if args.zscore:
                            # zscore each seq that goes into model
                            seq = (1 / np.std(seq)) * (seq - np.mean(seq))

                        X = [seq]
                        X_padded = tf.keras.preprocessing.sequence.pad_sequences(
                            X, padding="post", dtype='float')

                        gX = _compute_saliency_maps(gru_model, X_padded,
                                                    i_class)
                        gradX[i_run * k_time:(i_run + 1) * k_time, :] = gX

                else:
                    seq = df[(df['Subject'] == subject)
                             & (df['y'] == class_label)][features].values
                    if args.zscore:
                        # zscore each seq that goes into model
                        seq = (1 / np.std(seq)) * (seq - np.mean(seq))

                    X = [seq]
                    X_padded = tf.keras.preprocessing.sequence.pad_sequences(
                        X, padding="post", dtype='float')

                    gradX = _compute_saliency_maps(gru_model, X_padded,
                                                   i_class)
                    gradX = gradX.squeeze()
                    if clip_time[i_class] > clip_time[class_label]:
                        gradX = np.vstack([
                            gradX,
                            np.zeros(
                                (clip_time[i_class] - clip_time[class_label],
                                 gradX.shape[1]))
                        ])

                df.loc[(df['Subject'] == subject) & (df['y'] == i_class),
                       grads] = gradX[0:clip_time[i_class], :]

        sal_df = df[['Subject', 'timepoint', 'y'] + grads]

        with open(
                "/home/joyneelm/movie-clip-predictions/results/null_saliency/label_shuffle_starwars/perm_{:03}.pkl"
                .format(idx_perm), 'wb') as f:
            pickle.dump(sal_df, f)

    return
Пример #20
0
def _test(df, args):
    '''
    test subject results
    view only for best cross-val parameters
    '''
    _info('test mode')

    # get X-y from df
    subject_list = df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    pc_df = _get_pc(df, train_list, test_list, args)

    print('number of subjects = %d' % (len(subject_list)))
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    args.k_class = len(np.unique(df['y']))
    print('number of classes = %d' % (args.k_class))

    # length of each clip
    clip_time = np.zeros(args.k_class)
    for ii in range(args.k_class):
        class_df = df[df['y'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # results dict init
    results = {}

    # mean accuracy across time
    results['train'] = np.zeros(len(test_list))
    results['val'] = np.zeros(len(test_list))

    # per class temporal accuracy
    results['t_train'] = {}
    results['t_test'] = {}
    for ii in range(args.k_class):
        results['t_train'][ii] = np.zeros((len(test_list), clip_time[ii]))
        results['t_test'][ii] = np.zeros((len(test_list), clip_time[ii]))
    '''
    init model
    '''

    # get train, test sequences
    X_train, train_len, y_train = _get_seq(pc_df, train_list, args)
    X_test, test_len, y_test = _get_seq(pc_df, test_list, args)
    '''
    train classifier
    '''
    then = time.time()
    model = LogReg(k_dim=args.k_dim, k_class=args.k_class)

    model.fit(X_train,
              y_train,
              epochs=args.num_epochs,
              validation_split=0.2,
              batch_size=args.batch_size,
              verbose=1)

    print('--- train time =  %0.4f seconds ---' % (time.time() - then))
    '''
    results on train data
    ff_test_acc works for logreg
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_train, y_train, clip_time,
                                 len(train_list))
    results['train'] = a
    print('tacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_train'][ii] = a_t[ii]
    '''
    results on test data
    '''
    a, a_t, c_mtx = _ff_test_acc(model, X_test, y_test, clip_time,
                                 len(test_list))
    results['test'] = a
    print('sacc = %0.3f' % np.mean(a))
    for ii in range(args.k_class):
        results['t_test'][ii] = a_t[ii]

    return results
Пример #21
0
def _train(df, bhv_df, args):

    # get X-y from df
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    k_clip = len(np.unique(df['c']))
    print('number of clips = %d' % (k_clip))
    subject_list = bhv_df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    # init dict for all results
    results = {}

    # true and predicted scores and clip label
    results['y'] = {}
    results['y_hat'] = {}
    results['c'] = {}

    for score in SCORES:

        # mean scores across time
        results['train_%s' % score] = np.zeros(args.k_fold)
        results['val_%s' % score] = np.zeros(args.k_fold)

        # per clip score
        results['c_train_%s' % score] = {}
        results['c_val_%s' % score] = {}

        for ii in range(k_clip):
            results['c_train_%s' % score][ii] = np.zeros(args.k_fold)
            results['c_val_%s' % score][ii] = np.zeros(args.k_fold)

    kf = KFold(n_splits=args.k_fold, random_state=K_SEED)

    # get participant lists for each assigned class
    class_list = {}
    for ii in range(args.k_class):
        class_list[ii] = bhv_df[(bhv_df['Subject'].isin(train_list))
                                & (bhv_df['y'] == ii)]['Subject'].values
        print('No. of participants in class {} = {}'.format(
            ii, len(class_list[ii])))
    '''    
    split participants in each class with kf
    nearly identical ratio of train and val,
    in all classes
    '''
    split = {}
    for ii in range(args.k_class):
        split[ii] = kf.split(class_list[ii])

    for i_fold in range(args.k_fold):

        _info('fold: %d/%d' % (i_fold + 1, args.k_fold))

        # ***between-subject train-val split
        train_subs, val_subs = [], []
        for ii in range(args.k_class):
            train, val = next(split[ii])
            for jj in train:
                train_subs.append(class_list[ii][jj])
            for jj in val:
                val_subs.append(class_list[ii][jj])
        '''
        model main
        '''
        model = cpm(corr_thresh=args.corr_thresh)

        X_train, y_train, c_train = _get_seq(df, train_subs, args)
        X_val, y_val, c_val = _get_seq(df, val_subs, args)

        # train model
        _, _ = model.fit(X_train, y_train)
        '''
        results on train data
        '''
        s, s_c, _ = static_score(model,
                                 X_train,
                                 y_train,
                                 c_train,
                                 model_type=model_type)
        for score in SCORES:
            results['train_%s' % score][i_fold] = s[score]
            for ii in range(k_clip):
                results['c_train_%s' % score][ii][i_fold] = s_c[ii][score]
        print('train p = %0.3f' % s['p'])
        '''
        results on val data
        '''
        s, s_c, y_hat = static_score(model,
                                     X_val,
                                     y_val,
                                     c_val,
                                     model_type=model_type)
        for score in SCORES:
            results['val_%s' % score][i_fold] = s[score]
            for ii in range(k_clip):
                results['c_val_%s' % score][ii][i_fold] = s_c[ii][score]
        print('val p = %0.3f' % s['p'])

        results['y'][i_fold] = y_val
        results['y_hat'][i_fold] = y_hat
        results['c'][i_fold] = c_val

    return results
Пример #22
0
def _test(df, bhv_df, args):

    _info('test mode')

    # get X-y from df
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    k_clip = len(np.unique(df['c']))
    print('number of clips = %d' % (k_clip))
    subject_list = bhv_df['Subject'].unique()
    train_list = subject_list[:args.train_size]
    test_list = subject_list[args.train_size:]

    # init dict for all results
    results = {}
    for score in SCORES:
        # per clip score
        results['c_train_%s' % score] = {}
        results['c_test_%s' % score] = {}
    '''
    model main
    '''
    model = cpm(corr_thresh=args.corr_thresh)

    X_train, y_train, c_train = _get_seq(df, train_list, args)
    X_test, y_test, c_test = _get_seq(df, test_list, args)

    # train model
    _, _ = model.fit(X_train, y_train)
    '''
    results on train data
    '''
    s, s_c, _ = static_score(model,
                             X_train,
                             y_train,
                             c_train,
                             model_type=model_type)
    for score in SCORES:
        results['train_%s' % score] = s[score]
        for ii in range(k_clip):
            results['c_train_%s' % score][ii] = s_c[ii][score]
    print('train p = %0.3f' % s['p'])
    '''
    results on test data
    '''
    s, s_c, y_hat = static_score(model,
                                 X_test,
                                 y_test,
                                 c_test,
                                 model_type=model_type)
    for score in SCORES:
        results['test_%s' % score] = s[score]
        for ii in range(k_clip):
            results['c_test_%s' % score][ii] = s_c[ii][score]
    print('test p = %0.3f' % s['p'])

    results['y'] = y_test
    results['y_hat'] = y_hat
    results['c'] = c_test

    return results
Пример #23
0
def _train(df, bhv_df, args):

    # set pytorch device
    torch.manual_seed(K_SEED)
    use_cuda = torch.cuda.is_available()
    args.device = torch.device('cuda:0' if use_cuda else 'cpu')
    if use_cuda:
        _info('cuda')
    else:
        _info('cpu')

    # get X-y from df
    features = [ii for ii in df.columns if 'feat' in ii]
    k_feat = len(features)
    print('number of features = %d' % (k_feat))
    k_clip = len(np.unique(df['c']))
    print('number of clips = %d' % (k_clip))

    # length of each clip
    clip_time = np.zeros(k_clip)
    for ii in range(k_clip):
        class_df = df[df['c'] == ii]
        clip_time[ii] = np.max(np.unique(class_df['timepoint'])) + 1
    clip_time = clip_time.astype(int)  # df saves float
    _info('seq lengths = %s' % clip_time)

    # init dict for all results
    results = {}

    # true and predicted scores and clip label
    results['y'] = {}
    results['y_hat'] = {}
    results['c'] = {}

    for score in SCORES:

        # mean scores across time
        results['train_%s' % score] = np.zeros(args.k_fold)
        results['val_%s' % score] = np.zeros(args.k_fold)

        # per clip temporal score
        results['t_train_%s' % score] = {}
        results['t_val_%s' % score] = {}

        for ii in range(k_clip):
            results['t_train_%s' % score][ii] = np.zeros(
                (args.k_fold, clip_time[ii]))
            results['t_val_%s' % score][ii] = np.zeros(
                (args.k_fold, clip_time[ii]))

    kf = KFold(n_splits=args.k_fold, random_state=K_SEED)

    # get participant lists for each assigned class
    class_list = {}
    for ii in range(args.k_class):
        class_list[ii] = bhv_df[bhv_df['y'] == ii]['Subject'].values
    '''    
    split participants in each class with kf
    nearly identical ratio of train and val,
    in all classes
    '''
    split = {}
    for ii in range(args.k_class):
        split[ii] = kf.split(class_list[ii])

    for i_fold in range(args.k_fold):

        _info('fold: %d/%d' % (i_fold + 1, args.k_fold))

        # ***between-subject train-val split
        train_subs, val_subs = [], []
        for ii in range(args.k_class):
            train, val = next(split[ii])
            for jj in train:
                train_subs.append(class_list[ii][jj])
            for jj in val:
                val_subs.append(class_list[ii][jj])
        '''
        model main
        '''
        model = LSTMRegression(k_feat, args.k_hidden, args.k_layers)
        model.to(args.device)
        print(model)

        lossfn = nn.MSELoss()
        # if input is cuda, loss function is auto cuda
        opt = torch.optim.Adam(model.parameters())

        # get train, val sequences
        X_train, train_len, y_train, c_train = _get_seq(df, train_subs, args)
        X_val, val_len, y_val, c_val = _get_seq(df, val_subs, args)

        max_length = torch.max(train_len)
        '''
        train regression model
        '''
        permutation = torch.randperm(X_train.size()[0])
        losses = np.zeros(args.num_epochs)
        #
        then = time.time()

        for epoch in range(args.num_epochs):
            for i in range(0, X_train.size()[0], args.batch_size):

                indices = permutation[i:i + args.batch_size]
                batch_x, batch_y = X_train[indices], y_train[indices]
                batch_x_len = train_len[indices]
                batch_mask = torch.BoolTensor(
                    _get_mask(batch_x_len, max_length)).to(args.device)

                y_pred = model(batch_x, batch_x_len, max_length).squeeze(2)
                loss = lossfn(y_pred[batch_mask == True],
                              batch_y[batch_mask == True])

                opt.zero_grad()
                loss.backward()
                opt.step()

            losses[epoch] = loss

        _info(losses)
        #
        print('--- train time =  %0.4f seconds ---' % (time.time() - then))
        model.eval()
        '''
        results on train data
        '''
        s, s_t, _, _, _ = _lstm_score(model, X_train, y_train, c_train,
                                      train_len, max_length, clip_time)
        for score in SCORES:
            results['train_%s' % score][i_fold] = s[score]
            for ii in range(k_clip):
                results['t_train_%s' % score][ii][i_fold] = s_t[ii][score]
        print('train p = %0.3f' % s['p'])
        '''
        results on val data
        '''
        s, s_t, y, y_hat, c = _lstm_score(model, X_val, y_val, c_val, val_len,
                                          max_length, clip_time)
        for score in SCORES:
            results['val_%s' % score][i_fold] = s[score]
            for ii in range(k_clip):
                results['t_val_%s' % score][ii][i_fold] = s_t[ii][score]
        print('val p = %0.3f' % s['p'])

        results['y'][i_fold] = y
        results['y_hat'][i_fold] = y_hat
        results['c'][i_fold] = c

    return results