Пример #1
0
def main():
    feature_dict, col_names = pp.set_targets('data/features.txt', threshold=-1)
    # consider only the terms of interest
    with open('data/terms.json', 'rb') as f:
       	terms = json.load(f)
    for key in list(feature_dict):
	feature_dict[key] = [x for x in terms if feature_dict[key][x] > THRESH]
	if not feature_dict[key]:
	    del(feature_dict[key])
    # filter coordinates based on voxels
    coord_dict = ex.filter_studies_active_voxels('data/docdict.txt', 'data/MNI152_T1_2mm_brain.nii.gz',
                                                threshold=500, radius=6)
    # ensure that the keys are ints
    for key in list(coord_dict):
        if not isinstance(key, int):
            coord_dict[int(key)] = coord_dict[key]
            del(coord_dict[key])
    # find intersecting dicts
    coord_dict, feature_dict = ex.get_intersecting_dicts(coord_dict, feature_dict)
    # get the respective vectors
    X, y = pp.get_features_targets(coord_dict, feature_dict, labels=terms, mask='data/MNI152_T1_2mm_brain.nii.gz')
    score_per_class, score_per_label = classify(X, y)
    with open('class_scores.json', 'wb') as f:
        json.dump(score_per_class, f)
    with open('label_scores.json', 'wb') as f:
        json.dump(score_per_label, f)
    return
Пример #2
0
def filter_studies_terms(feature_file, terms=None, threshold=0.001,
                         set_unique_label=False):
    """
    Given the frequency of terms corresponding to each study, as well as the
    tems to consider, eliminates all studies that have more than one term
    occuring at frequency >= threshold

    Parameters
    ----------
    feature_file : str
        the file with the raw features.
    terms : list of str, optional
        the terms that are being considered as labels. If not specified,
        uses the 25 terms from the original study.
    threshold : real, optional
        the frequency of the term for it to be considered, as significant with
        respect to the study. If not specified, uses 0.001 as by the original
        paper.
    set_unique_label : bool, optional
        defaults to false, when true returns only a single label corresponding
        to each study

    Returns
    -------
    feature_dict : dict
        the dict such that all studies with conflicting labels are eliminated.
    """
    if terms is None:
        terms = ['Semantic',
                'Encoding',
                'Executive',
                'Language',
                'Verbal',
                'Phonological',
                'Visual',
                'Inference',
                'Working Memory',
                'Conflict',
                'Spatial',
                'Attention',
                'Imagery',
                'Action',
                'Sensory',
                'Perception',
                'Auditory',
                'Pain',
                'Reward',
                'Arousal',
                'Emotion',
                'Social',
                'Episodic',
                'Retrieval',
                'Recognition'
                ]
    feature_dict, target_names = pp.set_targets(feature_file,
                                                    threshold=-1)
    # validate that the terms are actual features and convert to lower case
    new_terms = [x.lower() for x in terms if x.lower() in target_names]
    for key in list(feature_dict.keys()):
            # remove all studies that have more than one major term
            if len([x for x in new_terms if feature_dict[key][x] >
            threshold])>1:
                del(feature_dict[key])
    if set_unique_label:
        for key in list(feature_dict):
            vmax = 0
            label = None
            for x in new_terms:
                if feature_dict[key][x] > vmax:
                    vmax = feature_dict[key][x]
                    label = x
            if label is not None:
                feature_dict[key] = label
            else:
                del(feature_dict[key])
    return feature_dict
Пример #3
0
def filter_studies_terms(feature_file, terms=None, threshold=0.001,
                         set_unique_label=False):
    """
    Given the frequency of terms corresponding to each study, as well as the
    tems to consider, eliminates all studies that have more than one term
    occuring at frequency >= threshold

    Parameters
    ----------
    feature_dict : str
        the file with the raw features.
    terms : list of str, optional 
        the terms that are being considered as labels. If not specified,
        uses the 25 terms from the original study.
    threshold : real, optional
        the frequency of the term for it to be considered, as significant with
        respect to the study. If not specified, uses 0.001 as by the original
        paper.
    set_unique_label : bool, optional
        defaults to false, when true returns only a single label corresponding
        to each study

    Returns
    -------
    feature_dict : dict
        the dict such that all studies with conflicting labels are eliminated.
    """
    if terms is None:
        terms = ['Semantic',
                'Encoding',
                'Executive',
                'Language',
                'Verbal',
                'Phonological',
                'Visual',
                'Inference',
                'Working Memory',
                'Conflict',
                'Spatial',
                'Attention',
                'Imagery',
                'Action',
                'Sensory',
                'Perception',
                'Auditory',
                'Pain',
                'Reward',
                'Arousal',
                'Emotion',
                'Social',
                'Episodic',
                'Retrieval',
                'Recognition'
                ]
    feature_dict, target_names = pp.set_targets(feature_file,
                                                    threshold=-1)   
    # validate that the terms are actual features and convert to lower case
    new_terms = [x.lower() for x in terms if x.lower() in target_names]
    for key in list(feature_dict.keys()):
            # remove all studies that have more than one major term
            if len([x for x in new_terms if feature_dict[key][x] >
            threshold])>1:
                del(feature_dict[key])
    if set_unique_label:
        for key in list(feature_dict):
            vmax = 0
            label = None
            for x in new_terms:
                if feature_dict[key][x] > vmax:
                    vmax = feature_dict[key][x]
                    label = x
            if label is not None:
                feature_dict[key] = label
            else:
                del(feature_dict[key])
    return feature_dict