Exemplo n.º 1
0
def read_data_e2(data_dir):

    main_dir = glob.glob(data_dir+'/*/*')

    print(main_dir)
    for fl in main_dir:
        # print("Participant id is: ",fl.strip().split('/')[-2])
        participant = fl.strip().split("/")[-2]
        exp = fl.strip().split("/")[-3]
        print(fl.split('/')[-1])

        if 'example' in fl.split('/')[-1]:
            ff = spio.loadmat(fl,squeeze_me=True)
            ff_2 = spio.loadmat(fl,squeeze_me=False)
            disc_pr()
            sents = ff['keySentences']
            
            part_topic_id = ff['labelsPassageForEachSentence']
            topic_id = ff['labelsPassageCategory']
            topics = ff['keyPassageCategory']
            part_of_topics =ff['keyPassages']
            vxl = ff['examples']
            mtd = ff_2['meta']
            topic_id = [x for x, number in zip(topic_id, len(topic_id)*[4]) for _ in range(number)]
            data_dict={}
            for id,el in enumerate(part_topic_id):
                data_dict[(sents[id],part_of_topics[el-1],topics[topic_id[id]-1])]=vxl[id]

        
            # (Sentence,subtopic(Apple),topic(Fruit)): voxels
            save_pickle(data_dict, '../data_processed/' + exp + '_proc/' + participant + '/' + fl.strip().split("/")[-1])
            save_pickle(mtd, '../data_processed/' + exp + '_proc/' + participant + '/' + fl.strip().split("/")[-1] + '_meta')
Exemplo n.º 2
0
def load_exp1(data_dir):
    w2vec_dict = load_pickle('./stimuli/word2vec.pkl')
    exp_id = int((data_dir.split('/')[-2]).split('_')[0][-1])
    assert exp_id == 1
    fld = data_dir
    # Run one participant
    data_files = sorted(glob.glob(fld + '/*'))
    dt_fls_grouped = [tuple(data_files[i:i + 2]) for i in
                      range(0, len(data_files), 2)]
    print(fld)
    disc_pr()

    # for every file wordcloud pictures and sentences cases
    for data_group in dt_fls_grouped:
        data_dict, metadata = load_data_meta(data_group)
        word_dict = dict()
        for word, _ in data_dict.items():
            word_dict[word] = w2vec_dict[word]
        yield data_group[0], data_dict, word_dict, metadata
Exemplo n.º 3
0
def read_data_e3(data_dir):

    main_dir = glob.glob(data_dir+'/*/*')
    
    for fl in main_dir:
        print("Participant id is: ",fl.strip().split('/')[-2])
        participant = fl.strip().split("/")[-2]
        exp = fl.strip().split("/")[-3]

        if 'example' in fl.split('/')[-1]:
            ff = spio.loadmat(fl,squeeze_me=True)
            ff_v2 = spio.loadmat(fl,squeeze_me=True)

            disc_pr()
            sents = ff['keySentences']

            vxl = ff['examples']
            mtd = ff_v2['meta']

            sen_lbl = ff['labelsPassageForEachSentence'].tolist()
            zipped = list(zip(list(set(sen_lbl)),ff['labelsPassageCategory'].tolist()))
            freq = [sen_lbl.count(key) for key in list(set(sen_lbl))]
            final_list_lbls = []
            for idx,el in enumerate(zipped):
                for x in range(freq[idx]):
                    final_list_lbls.append(el)
            print(len(final_list_lbls))

            for i,j in enumerate(final_list_lbls):
                final_list_lbls[i]=(sents[i],final_list_lbls[i][0],ff['keyPassageCategory'][final_list_lbls[i][1]-1])
            print(final_list_lbls)
            data_dict={}
            for i,j in enumerate(final_list_lbls):
                data_dict[j] = vxl[i]
            save_pickle(data_dict, '../data_processed/' + exp + '_proc/' + participant + '/' + fl.strip().split("/")[-1])
            save_pickle(mtd, '../data_processed/' + exp + '_proc/' + participant + '/' + fl.strip().split("/")[-1] + '_meta')

        disc_pr()
Exemplo n.º 4
0
        yield data_group[0], data_dict, word_dict, metadata


if __name__ == '__main__':

    parser = argparse.ArgumentParser()
    parser.add_argument('-i', '-data_dir', dest="data_dir", required=True)
    args = parser.parse_args()
    print(args.data_dir)
    # assert 'data_processed' not in args.data_dir, 'You should rename your {} to data_processed'.format(args.data_dir)

    exp = int((args.data_dir.split('/')[-2]).split('_')[0][-1])
    assert exp == 1 or exp == 2 or exp == 3
    assert 'exp' in args.data_dir.split('/')[-2]
    if exp == 1:
        data_gen = load_exp1(args.data_dir)
        disc_pr()
        # how to access a generator silly boy :*
        for x in data_gen:
            print(x[0])
            out_file = os.path.join('./', 'voxels_scores',
                                    '{}.npy'.format(x[0].split('.')[0]))
            out_dir = '/'.join(out_file.split('/')[:-1])
            mkdir_p(out_dir)
            vscores = voxel_scores(x[1], x[2], x[3])
            np.save(out_file, vscores)
    elif exp == 2 or exp == 3:
        load_exp23(args.data_dir)
    else:
        raise ValueError("Illegal value for data folder .Select from{1,2,3}")
Exemplo n.º 5
0
def regression_decoder(train_data, train_targets):
    '''

    :param train_data: #examples x #voxels matrix
    :param train_targets: # examples x #dimensions matrix
    :return:
     weighMatrix -   a #voxels+1 x #dimensions weight matrix
     r -             #dimensions vector with the regularization parameter
                    value for each dimension

    column i of weightMatrix has #voxels weights + intercept (last row)
    for predicting target i
    This function uses an efficient implementation of cross-validation within the
    training set to pick a different optimal value of the
    regularization parameter for each semantic dimension in the target vector
    This function uses kernel ridge regression with a linear
    kernel. This allows us to use the full brain as input features
    because we avoid the large inversion of the voxels/voxels matrix

    '''
    # add one in the end of train data for the bias term
    h_x = np.ones((train_data.shape[0], train_data.shape[1] + 1))
    h_x[:, :-1] = train_data
    train_data = h_x

    dims_vxl = train_data.shape[1]
    emb_dim = train_targets.shape[1]
    examples = train_data.shape[0]
    assert train_data.shape[0] == train_targets.shape[
        0], 'Same numbers of examples for data and targets '

    params = [
        1, .5, 5, 0.1, 10, 0.01, 100, 0.001, 1000, 0.0001, 10000, 0.00001,
        100000, 0.000001, 1000000
    ]

    n_words = train_data.shape[0]

    cv_err = np.zeros((len(params), emb_dim))

    K = np.matmul(train_data, train_data.T)
    U, D, V = svd(K)

    D = np.eye(U.shape[1], V.shape[0]) * D

    for idx, reg_param in enumerate(params):

        dlambda = D + reg_param * np.eye(D.shape[0], D.shape[1])
        dlambdaInv = np.diag(1 / np.diag(dlambda))
        klambdainv = np.matmul(np.matmul(V, dlambdaInv), U.T)

        K_p = np.matmul(train_data.T, klambdainv)
        S = np.matmul(train_data, K_p)

        weights = np.matmul(K_p, train_targets)
        # Snorm = repmat(1 - diag(S), 1, train_targets.shape[1])
        Snorm = np.tile((1 - np.diag(S)).reshape(np.diag(S).shape[0], 1),
                        (1, emb_dim))

        #Snorm = np.tile(1-np.diag(S),(1,emb_dim))
        #print(Snorm.shape)

        Y_diff = train_targets - np.matmul(train_data, weights)
        disc_pr()
        disc_pr()
        print(Y_diff)
        disc_pr()
        disc_pr()
        Y_diff = Y_diff / Snorm
        print(Y_diff)
        disc_pr()
        disc_pr()
        cv_err[idx, :] = (1 / examples) * np.sum(
            Y_diff * Y_diff)  # elementwise
    disc_pr()
    print(cv_err)
    disc_pr()
    minerridx = cv_err.argmin(axis=0)
    # minerr = np.amin(cv_err)
    reg_dim = np.zeros((1, emb_dim))

    for i in range(emb_dim):
        reg_param = params[minerridx[i]]
        reg_dim[0, i] = reg_param

        dlambda = D + reg_param * np.eye(D.shape[0], D.shape[1])
        dlambdaInv = np.diag(1 / np.diag(dlambda))
        klambdainv = np.matmul(np.matmul(V, dlambdaInv), U.T)

        weights[:, i] = np.matmul(np.matmul(train_data.T, klambdainv),
                                  train_targets[:, i])

    return weights, reg_dim