Python frames示例，nltk.corpus.framenet.frames Python示例

示例#1

0

显示文件

def frame_chose():
    fs = fn.frames()
    fs_dic = {}
    fs_ID = []
    for f in fs:
        fs_ID.append(f.ID)
        fs_dic[f.name] = []
        lexes = f.lexUnit
        for lex in lexes:
            fs_dic[f.name].append(lexes[lex].name)

    fs_ID_copy = fs_ID
    result = []
    for f1 in fs_ID:
        fs_ID_copy.remove(f1)
        f1_name = fn.frame(f1).name
        set1 = set(fs_dic[f1_name])
        for f2 in fs_ID_copy:
            f2_name = fn.frame(f2).name
            set2 = set(fs_dic[f2_name])
            r = list(set1 & set2)
            result.append((f1_name, f2_name, r, len(r)))

    result = sorted(result, key=lambda x: (x[3]), reverse=True)

    frame_chose = []
    for r in result:
        if r[3] >= 10:
            frame_chose.append(r)

    return frame_chose

示例#2

0

显示文件

文件： util.py 项目： JessieSalas/Fil

def getFrames(phrase):
    frames = []
    content = [s.translate(string.maketrans("",""), string.punctuation) for s in phrase.split()]
    for lemma in content:
        frame = fn.frames(lemma)
        if frame != []:
            frames.append([f.name for f in frame])
    return frames

示例#3

0

显示文件

文件： construct_ontology.py 项目： spilioeve/WM-src

def set_FN_embeddings():
    embeddings = {}
    frames = fn.frames()
    vector = np.zeros(len(frames))
    for i in range(len(frames)):
        name = frames[i].name
        vector_i = np.copy(vector)
        vector_i[i] = 1.0
        embeddings[name] = vector_i
    return embeddings, len(frames)

示例#4

0

显示文件

文件： word_senses.py 项目： rsteckel/EDA

def print_frame(name_re):    
    for m_frame in fn.frames(name_re):
        #m_frame = fn.frame(299)
        print 'Unincorporated', [x.name for x in m_frame.lexUnit.values() if 'incorporatedFE' not in x]
        for relation in m_frame['frameRelations']:
            print '  ', relation 
        for fe in m_frame['FE']:
            ailment_lus = [x for x in m_frame.lexUnit.values() if 'incorporatedFE' in x and x.incorporatedFE == fe]
            print '  ', fe
            print '  ', [x.name for x in ailment_lus]
        print '\n'

示例#5

0

显示文件

def expandByGraph(mappinglist):
    expandlist = list()
    for item in mappinglist:
        expandlist.append(item)
        for frame in fn.frames():
            if frame.name == item:
                for fr in frame.frameRelations:
                    if fr.type.name == 'Inheritance':
                        if 'Child' in fr and fr.Child.name != item:
                            expandlist.append(fr.Child.name)
                        elif 'Parent' in fr and fr.Parent.name != item:
                            expandlist.append(fr.Parent.name)
                    elif fr.type.name == 'See_also':
                        if 'ReferringEntry' in fr and fr.ReferringEntry.name != item:
                            expandlist.append(fr.ReferringEntry.name)
    return list(set(expandlist))

示例#6

0

显示文件

文件： frame_relation_utils.py 项目： cltl/HDD_analysis

def get_frame_to_root_information(di_g, fn, roots, verbose=0):
    """get all the relations from frames to their roots"""
    frame_to_root_information = {}

    for frame_obj in fn.frames():
        frame = frame_obj.name
        if not di_g.has_node(frame):
            root_information = [{
                'subframe': frame,
                'root': frame,
                'the_path': [frame],
                'len_path': 1
            }]
        else:
            root_information = []
            for root in roots:
                if nx.has_path(di_g, root, frame):
                    the_path = nx.shortest_path(di_g, root, frame)
                    len_path = len(the_path)
                    root_info = {
                        'subframe': frame,
                        'root': root,
                        'the_path': the_path,
                        'len_path': len_path
                    }
                    root_information.append(root_info)
        # check for 2> root paths
        #chosen_root_info = {}
        #min_path_length = 100000

        #for root_info in root_information:
        #    if root_info['len_path'] < min_path_length:
        #        min_path_length = root_info['len_path']
        #        chosen_root_info = root_info
        #assert chosen_root_info != {}

        frame_to_root_information[frame] = root_information

    assert len(frame_to_root_information) == 1221
    #path_lengths = [root_info['len_path']
    #                for root_info in frame_to_root_information.values()]

    #if verbose >= 1:
    #    print()
    #    print(f'distribution of path lengths: {Counter(path_lengths)}')
    return frame_to_root_information

示例#7

0

显示文件

文件： util_framenet.py 项目： jianliu-ml/EEasMRC

def extrac_framenet():
    results = []
    frames = fn.frames()
    for f in frames:
        temp = {}
        f_name = f.name
        f_definition = f.definition
        f_lexunit = f.lexUnit
        f_fes = f.FE

        temp['name'] = f_name
        temp['definition'] = f_definition
        temp['lexunit'] = list(f_lexunit.keys())
        temp['fes'] = [[fe, f_fes[fe].coreType, f_fes[fe].definition] for fe in f_fes]

        results.append(temp)

    file_object = open('../data/frame.json', 'w')
    json.dump(results, file_object)

示例#8

0

显示文件

def getFrameSetForStudent(surname, list_len=5):
    frameList = []
    nof_frames = len(fn.frames())
    base_idx = (
        abs(int(hashlib.sha512(surname.encode('utf-8')).hexdigest(), 16)) %
        nof_frames)
    print('\nstudent: ' + surname)
    framenet_IDs = get_frams_IDs()
    i = 0
    offset = 0
    seed(1)
    while i < list_len:
        fID = framenet_IDs[(base_idx + offset) % nof_frames]
        f = fn.frame(fID)
        fNAME = f.name
        print('\tID: {a:4d}\tframe: {framename}'.format(a=fID,
                                                        framename=fNAME))
        offset = randint(0, nof_frames)
        frameList.append(fID)
        i += 1
    return frameList

示例#9

0

显示文件

文件： util_framenet.py 项目： jianliu-ml/EEasMRC

def extract_examples():
    results = []
    frames = fn.frames()
    for f in frames:
        for lu in f.lexUnit:
            examples = f.lexUnit[lu].exemplars
            for example in examples:
                temp = {}
                temp['name'] = f.name
                temp['lexunit'] = lu
                temp['text'] = example.text
                if 'Target' in example:
                    temp['target'] = example.Target
                else:
                    print(example.text)
                temp['fe'] = example.FE
                results.append(temp)
                if len(results) % 100 == 0:
                    print('Processing...', len(results))

    file_object = open('../data/frame_examples.json', 'w')
    json.dump(results, file_object)

示例#10

0

显示文件

def print_frames_with_IDs():
    for x in fn.frames():
        print('{}\t{}'.format(x.ID, x.name))

示例#11

0

显示文件

文件： run_genericity_train.py 项目： andreaoclarksevilla/factslab-python

                pickle.dump(lcs, f)
        lcs_feats = ['lcs_eventive', 'lcs_stative']
        type_embedder['lcs'] = lcs

        # Wordnet supersenses(lexicographer names)
        synsets = wordnet.all_synsets()
        supersenses = \
              sorted(list(set(['supersense=' + x.lexname() for x in synsets])))

        # Framenet
        lem2frame = {}
        for lm in framenet.lus():
            for lemma in lm['lexemes']:
                (lem2frame[lemma['name'] + '.' + \
                        framenet_posdict[lemma['POS']]]) = lm['frame']['name']
        frame_names = sorted(['frame=' + x.name for x in framenet.frames()])
        type_embedder['lem2frame'] = lem2frame

        # Verbnet classids
        verbnet_classids = \
                     sorted(['classid=' + vcid for vcid in verbnet.classids()])

        type_hand_features = (verbnet_classids + supersenses + frame_names +
                              lcs_feats + conc_cols)
        input_size += len(type_hand_features)
        for f in type_hand_features:
            type_embedder['embedder'][f] = 0

    # Write all the feature names to a text file
    if args.type and args.token:
        with open('../../data/list_of_all_hand_eng_features.txt', 'w') as f:

示例#12

0

显示文件

#! /usr/bin/env python
# Author: Kapil Thadani ([email protected])

from __future__ import division, with_statement
from nltk.corpus import framenet

###############################################################################

# Names of all frames in Framenet (1019 total)
frames = sorted(frame.name for frame in framenet.frames())

# Names of all possible FEs (1170 total)
fes = sorted(set(fe for frame in framenet.frames() for fe in frame.FE.keys()))

# Names of all possible frames and FEs (9633 total)
frame_fes = sorted([(frame.name, fe) for frame in framenet.frames()
                    for fe in frame.FE.iterkeys()],
                   key=lambda x: x[0] + x[1])

###############################################################################

# Core types of FEs
coretypes = ['Core', 'Peripheral', 'Extra-Thematic']

# Names of all possible FEs and coretypes (1491 total)
fe_coretypes = sorted(set((fe, frame_element.coreType)
                          for frame in framenet.frames()
                          for fe, frame_element in frame.FE.iteritems()),
                      key=lambda x: x[0] + x[1])

# Names of all possible frames and FEs and coretypes (9633 total)

示例#13

0

显示文件

文件： word_senses.py 项目： rsteckel/EDA

print_common_synsets(documents)

tps = corpus_probability(documents)
    
    
    
frames = extract_frames(documents)
counter = Counter(frames)
counter.most_common(25)





frames = fn.frames(r'Mental_stimulus_stimulus_focus')
for frame in frames:
    print set(frame.lexUnit.keys())
    lus = [x for x in frame.lexUnit.values() if 'incorporatedFE' in x ]
    print('   ', [x.name for x in lus])
    


print_frame(r'Emotions_of_mental_activity')



frames = []
frames += fn.frames(r'.*(?i)mental.*')
frames += fn.frames(r'.*(?i)medical.*')

示例#14

0

显示文件

        f.close()

        # LCS eventivity
        from lcsreader import LexicalConceptualStructureLexicon
        lcs = LexicalConceptualStructureLexicon(home + '/Desktop/protocols/data/verbs-English.lcs')
        lcs_feats = ['lcs_eventive', 'lcs_stative']

        # Wordnet supersenses(lexicographer names)
        supersenses = list(set(['supersense=' + x.lexname() for x in wordnet.all_synsets()]))

        # Framenet
        lem2frame = {}
        for lm in framenet.lus():
            for lemma in lm['lexemes']:
                lem2frame[lemma['name'] + '.' + framnet_posdict[lemma['POS']]] = lm['frame']['name']
        frame_names = ['frame=' + x.name for x in framenet.frames()]

        # Verbnet classids
        verbnet_classids = ['classid=' + vcid for vcid in verbnet.classids()]

        dict_feats = {}
        for f in verbnet_classids + lexical_feats + supersenses + frame_names + lcs_feats + all_ud_feature_cols + conc_cols:
            dict_feats[f] = 0

        x_pd = pd.DataFrame([features_func(sent_feat=sent, token=token, lemma=lemma, dict_feats=dict_feats.copy(), prot=args.prot, concreteness=concreteness, lcs=lcs, l2f=lem2frame) for sent, token, lemma in zip(raw_x, tokens, lemmas)])

        dev_x_pd = pd.DataFrame([features_func(sent_feat=sent, token=token, lemma=lemma, dict_feats=dict_feats.copy(), prot=args.prot, concreteness=concreteness, lcs=lcs, l2f=lem2frame) for sent, token, lemma in zip(raw_dev_x, dev_tokens, dev_lemmas)])

        test_x_pd = pd.DataFrame([features_func(sent_feat=sent, token=token, lemma=lemma, dict_feats=dict_feats.copy(), prot=args.prot, concreteness=concreteness, lcs=lcs, l2f=lem2frame) for sent, token, lemma in zip(raw_test_x, test_tokens, test_lemmas)])

        feature_names = (verbnet_classids, supersenses, frame_names, lcs_feats, conc_cols, lexical_feats, all_ud_feature_cols)

示例#15

0

显示文件

文件： framenet-semtypesFELU.py 项目： mawahballah/Wordnet-Verbnet-Framenet-Propbank-To-Datum-Universe

framenetRoot=generalThing.find("framenet")
frameElement=framenetRoot.find("frame element")
lexicalUnit=framenetRoot.find("lexical unit")
semType=framenetRoot.find("semantic type")
id_=framenetRoot.find("id")
frames=framenetRoot.find("frame")

for fE in fn.fes():
	if fE.semType!=None:
		semanticTypeKatum=exactSemType(fE.semType)
		frameElementkatum=exactFE(fE)
		if(semanticTypeKatum!=None and frameElementkatum!=None):
			frameElementkatum._is(semanticTypeKatum,False)

for lU in fn.lus():
	if len(lU.semTypes)!=0:
		for semTypeInstance in lU.semTypes:
			semanticTypeKatum=exactSemType(semTypeInstance)
			lUkatum=exactlU(lU)
			if(semanticTypeKatum!=None and lUkatum!=None):
				lUkatum._is(semanticTypeKatum,False)

for frame in fn.frames():
	if len(frame.semTypes)!=0:
		for semTypeInstance in frame.semTypes:
			semanticTypeKatum=exactSemType(semTypeInstance)
			frameKatum=exactFrame(frame)
			if(semanticTypeKatum!=None and frameKatum!=None):
				frameKatum._is(semanticTypeKatum,False)

generalThing.save('wordnet-verbnet-framenet.datum')

示例#16

0

显示文件

bert_embedding = BertEmbeddings('bert-base-cased')

from flair.embeddings import StackedEmbeddings

# now create the StackedEmbedding object that combines all embeddings
stacked_embeddings = StackedEmbeddings(
    embeddings=[
        #flair_forward_embedding,
        #flair_backward_embedding,
        bert_embedding])

import nltk
nltk.download('framenet_v17')

from nltk.corpus import framenet as fn
len(fn.frames())

txt=preprocess.read_pg(data_root + r'\EN_1818_Shelley,Mary_Frankenstein_Novel.txt')
print(len(txt), 'chars')

from segtok.segmenter import split_single
sentences = [Sentence(s, use_tokenizer=True) for s in split_single(txt)]
print(len(sentences), 'sentences')

import random as rand

t = range(100)#rand.sample(range(len(sentences)), 100)
sents_sample = [sentences[i] for i in sorted(t)]

t = np.array(t)
_ = bert_embedding.embed(sents_sample)

示例#17

0

显示文件

    else:
        reverse_dict[word] = {}
        reverse_dict[word][event] = features_dict


reverse_dict = {}
nominal_dict = {}
for word in word_list:
    event = event_verb_mapping[word]
    nominal_dict[word] = []
    nominals = {}
    #added_nouns = set()
    #frames = fn.frames_by_lemma(word)
    if use_framenet and word in frame_dict:
        for frame_index in frame_dict[word]:
            if len(fn.frames(frame_index)) < 1:
                print "no frame for: ", frame_index
                continue
            frame = fn.frames(frame_index)[0]
            for potential_noun in frame.lexUnit.keys():
                lemma = potential_noun.split('.')[0]
                pos = potential_noun.split('.')[1]
                if pos == 'n' or add_verbs:
                    features_dict = {}
                    features_dict['event'] = event
                    features_dict['word'] = lemma
                    features_dict['pos'] = pos
                    #features_dict['synset'] = float('nan')
                    features_dict['fn'] = 1
                    features_dict['num_wordnet'] = 0
                    features_dict['synset_percent'] = 0

示例#18

0

显示文件

文件： fngraph.py 项目： hectormartinez/verdisandbox

"""Yields a graph for FN instead of the unwieldy labyrinth of nested dicts
We use a philosophy similar to conllreader and put stuff in the nodes, maybe as dicts or maybe as a class"""

from nltk.corpus import framenet as fn
fn.propagate_semtypes()


framekeys = set()
frametypes = set()

for fx in fn.frames():
    for k in fx.keys():
        framekeys.add(k)
    if fx['semTypes']:
        for t in fx['semTypes']:
            frametypes.add(t['name'])
#We could read straight from the
print(frametypes)


#for k in framekeys:
#    print(k,fn.frames()[0][k])
#    print(k,fn.frames()[1][k])
#    print(k,fn.frames()[2][k])

示例#19

0

显示文件

def closure_graph(synset, fn):
    seen = set()
    graph = nx.DiGraph()

    def recurse(s):
        if not s in seen:
            seen.add(s)
            graph.add_node(s.name())
            for s1 in fn(s):
                graph.add_node(s1.name())
                graph.add_edge(s.name(), s1.name())
                recurse(s1)

    recurse(synset)
    return graph


dog = fn.frames(r'(?i)medical')
print(str(dog.name))
G = closure_graph(dog.name, lambda s: s.hypernyms())
index = nx.betweenness_centrality(G)
plt.rc('figure', figsize=(12, 7))
node_size = [index[n] * 1000 for n in G]
pos = nx.spring_layout(G)
nx.draw_networkx(G,
                 pos,
                 node_size=node_size,
                 edge_color='r',
                 alpha=.3,
                 linewidths=0)
plt.show()

示例#20

0

显示文件

    def load_framenet():
        edges = []
        for frm in fn.frames():
            # frame-frame relations
            for fe in frm.frameRelations:
                edges = pretty_frame_edge(edges, frm_id(fe.superFrameName),
                                          frm_id(fe.subFrameName),
                                          ncheck(fe.type.name))

            # lexical units
            for lu in frm.lexUnit.keys():
                edges.append([
                    frm_id(frm.name), 'fn:HasLexicalUnit',
                    lu_format(lu, frm.name)
                ])

            # FE
            for fe in frm.FE.values():
                if isinstance(fe.semType,
                              nltk.corpus.reader.framenet.AttrDict):

                    edges.append([
                        fe_id(fe.name), 'fn:HasSemType',
                        fe_semtype_id(fe.semType.name)
                    ])

                    edges.append([
                        fe_semtype_id(fe.semType.name), 'fn:st:RootType',
                        fe_semtype_id(fe.semType.rootType.name)
                    ])

                    edges.append([
                        fe_semtype_id(fe.semType.name), 'fn:st:SuperType',
                        fe_semtype_id(fe.semType.superType.name)
                    ])

                    for fesub in fe.semType.subTypes:
                        edges.append([
                            fe_semtype_id(fe.semType.name), 'fn:st:SubType',
                            fe_semtype_id(fesub.name)
                        ])

                if isinstance(fe.requiresFE,
                              nltk.corpus.reader.framenet.AttrDict):
                    edges.append([
                        fe_id(fe.name), 'fn:fe:RequiresFE',
                        fe_id(fe.requiresFE.name)
                    ])

                if isinstance(fe.excludesFE,
                              nltk.corpus.reader.framenet.AttrDict):
                    edges.append([
                        fe_id(fe.name), 'fn:fe:ExcludesFE',
                        fe_id(fe.excludesFE.name)
                    ])

                # coreType as edge feature
                edges.append(
                    [frm_id(frm.name), 'fn:HasFrameElement',
                     fe_id(fe.name)])
        return edges

示例#21

0

显示文件

def get_global_frame_dictionary():
    frame_dict = {f["name"]: i for i, f in enumerate(fn.frames())}
    return frame_dict

示例#22

0

显示文件

def get_frams_IDs():
    return [f.ID for f in fn.frames()]

示例#23

0

显示文件

    def load_framenet():
        edges = []
        for frm in fn.frames():
            # frame-frame relations
            for fe in frm.frameRelations:
                edges = pretty_frame_edge(edges, frm_id(fe.superFrameName),
                                          frm_id(fe.subFrameName),
                                          ncheck(fe.type.name))

            # lexical units
            for lu in frm.lexUnit.keys():
                edges.append([
                    frm_id(frm.name), 'fn:HasLexicalUnit',
                    lu_format(lu, frm.name)
                ])

            # FE
            for fe in frm.FE.values():
                if isinstance(fe.semType,
                              nltk.corpus.reader.framenet.AttrDict):

                    # Sem type
                    semtype_edge = [
                        fe_id(fe.name),
                        '/r/IsA',  #'fn:HasSemType',
                        fe_semtype_id(fe.semType.name)
                    ]
                    if semtype_edge not in edges:
                        edges.append(semtype_edge)

                    # Root type
                    root_edge = [
                        fe_semtype_id(fe.semType.name),
                        '/r/IsA',  # 'fn:st:RootType'
                        fe_semtype_id(fe.semType.rootType.name)
                    ]
                    if root_edge not in edges:
                        edges.append(root_edge)

                    # Super type
                    super_edge = [
                        fe_semtype_id(fe.semType.name),
                        '/r/IsA',  #'fn:st:SuperType',
                        fe_semtype_id(fe.semType.superType.name)
                    ]
                    if super_edge not in edges:
                        edges.append(super_edge)

                    # Sub type
                    for fesub in fe.semType.subTypes:
                        sub_edge = [
                            fe_semtype_id(fesub.name), '/r/IsA',
                            fe_semtype_id(fe.semType.name)
                        ]
                        if sub_edge not in edges:
                            edges.append(sub_edge)
                        #edges.append([fe_semtype_id(fe.semType.name),
                        #            'fn:st:SubType',
                        #            fe_semtype_id(fesub.name)])

                # Requires FE
                if isinstance(fe.requiresFE,
                              nltk.corpus.reader.framenet.AttrDict):
                    req_edge = [
                        fe_id(fe.name), '/r/HasPrerequisite',
                        fe_id(fe.requiresFE.name)
                    ]
                    if req_edge not in edges:
                        edges.append(req_edge)
                    #edges.append([fe_id(fe.name), 'fn:fe:RequiresFE', fe_id(fe.requiresFE.name)])

                # Excludes FE
                if isinstance(fe.excludesFE,
                              nltk.corpus.reader.framenet.AttrDict):
                    excl_edge = [
                        fe_id(fe.name), '/r/RelatedTo',
                        fe_id(fe.excludesFE.name)
                    ]
                    if excl_edge not in edges:
                        edges.append(excl_edge)
                    #edges.append([fe_id(fe.name), 'fn:fe:ExcludesFE', fe_id(fe.excludesFE.name)])

                # HasFrameElement - coreType as edge feature
                hasfe_edge = [
                    frm_id(frm.name),
                    '/r/HasA',  #'fn:HasFrameElement',
                    fe_id(fe.name)
                ]
                if hasfe_edge not in edges:
                    edges.append(hasfe_edge)
        return edges

示例#24

0

显示文件

文件： framenet.py 项目： mcka1n/dissertation

#! /usr/bin/env python
# Author: Kapil Thadani ([email protected])

from __future__ import division, with_statement
from nltk.corpus import framenet


###############################################################################

# Names of all frames in Framenet (1019 total)
frames = sorted(frame.name for frame in framenet.frames())


# Names of all possible FEs (1170 total)
fes = sorted(set(fe for frame in framenet.frames() for fe in frame.FE.keys()))


# Names of all possible frames and FEs (9633 total)
frame_fes = sorted([(frame.name, fe)
                        for frame in framenet.frames()
                        for fe in frame.FE.iterkeys()],
                   key=lambda x: x[0] + x[1])

###############################################################################

# Core types of FEs
coretypes = ['Core', 'Peripheral', 'Extra-Thematic']


# Names of all possible FEs and coretypes (1491 total)
fe_coretypes = sorted(set((fe, frame_element.coreType)

示例#25

0

显示文件

文件： framenet.py 项目： allenwade3/TextBlob

def demo():
    from pprint import pprint
    from nltk.corpus import framenet as fn

    #
    # It is not necessary to explicitly build the indexes by calling
    # buildindexes(). We do this here just for demo purposes. If the
    # indexes are not built explicitely, they will be built as needed.
    #
    print('Building the indexes...')
    fn.buildindexes()

    #
    # Get some statistics about the corpus
    #
    print('Number of Frames:', len(fn.frames()))
    print('Number of Lexical Units:', len(fn.lexical_units()))
    print('Number of annotated documents:', len(fn.documents()))
    print()

    #
    # Frames
    #
    print(
        'getting frames whose name matches the (case insensitive) regex: "(?i)medical"'
    )
    medframes = fn.frames(r'(?i)medical')
    print('Found {0} Frames whose name matches "(?i)medical":'.format(
        len(medframes)))
    print([(f.name, f.ID) for f in medframes])

    #
    # store the first frame in the list of frames
    #
    tmp_id = medframes[0].ID
    m_frame = fn.frame(tmp_id)  # reads all info for the frame

    #
    # get the frame relations
    #
    print(
        '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(
            m_frame.name, m_frame.ID), len(m_frame.frameRelation))
    for fr in m_frame.frameRelation:
        print('   ', fr.type + ":", fr.relatedFrame)

    #
    # get the names of the Frame Elements
    #
    print(
        '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name),
        len(m_frame.FE))
    print('   ', [x.name for x in m_frame.FE])

    #
    # get the names of the "Core" Frame Elements
    #
    print('\nThe "core" Frame Elements in the "{0}" frame:'.format(
        m_frame.name))
    print('   ', [x.name for x in m_frame.FE if x.coreType == "Core"])

    #
    # get all of the Lexical Units that are incorporated in the
    # 'Ailment' FE of the 'Medical_conditions' frame (id=239)
    #
    print('\nAll Lexical Units that are incorporated in the "Ailment" FE:')
    m_frame = fn.frame(239)
    ailment_lus = [x for x in m_frame.lexUnit if x.incorporatedFE == 'Ailment']
    print([x.name for x in ailment_lus])

    #
    # get all of the Lexical Units for the frame
    #
    print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name),
          len(m_frame.lexUnit))
    print('  ', [x.name for x in m_frame.lexUnit[:5]], '...')

    #
    # get basic info on the second LU in the frame
    #
    tmp_id = m_frame.lexUnit[1].ID  # grab the id of the second LU
    luinfo = fn.lu_basic(tmp_id)  # get basic info on the LU
    print('\nInformation on the LU: {0}'.format(luinfo.name))
    pprint(luinfo)

    #
    # Get a list of all of the corpora used for fulltext annotation
    #
    print('\nNames of all of the corpora used for fulltext annotation:')
    allcorpora = set([x.corpname for x in fn.documents()])
    pprint(list(allcorpora))

    #
    # Get the names of the annotated documents in the first corpus
    #
    firstcorp = list(allcorpora)[0]
    firstcorp_docs = fn.documents(firstcorp)
    print('\nNames of the annotated documents in the "{0}" corpus:'.format(
        firstcorp))
    pprint([x.filename for x in firstcorp_docs])

    #
    # Search for frames containing LUs whose name attribute matches a
    # regexp pattern.
    #
    # Note: if you were going to be doing a lot of this type of
    #       searching, you'd want to build an index that maps from
    #       lemmas to frames because each time frames_by_lemma() is
    #       called, it has to search through ALL of the frame XML files
    #       in the db.
    print(
        '\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":'
    )
    pprint(fn.frames_by_lemma(r'^run.v$'))

示例#26

0

显示文件

文件： frames.py 项目： rsteckel/EDA

fn.lu(3238).frame.lexUnit['glint.v'] is fn.lu(3238)

fn.frame_by_name('Replacing') is fn.lus('replace.v')[0].frame

fn.lus('prejudice.n')[0].frame.frameRelations == fn.frame_relations('Partiality')


fn.lus('look.n')[0].frame
fn.lus('look.n')[1].frame


for f in fn.lus('look.n'):
    print f.frame.name


result = fn.frames(r'(?i)erception')

print result
f = fn.frame(1301)

f.ID
f.definition
for u in f.lexUnit:
    print u

fn.lexical_units('r(?i)look')


from pattern.en import wordnet

示例#27

0

显示文件

文件： framenet.py 项目： Tahnan/nltk

def demo():
    from pprint import pprint
    from nltk.corpus import framenet as fn

    #
    # It is not necessary to explicitly build the indexes by calling
    # buildindexes(). We do this here just for demo purposes. If the
    # indexes are not built explicitely, they will be built as needed.
    #
    print('Building the indexes...')
    fn.buildindexes()

    #
    # Get some statistics about the corpus
    #
    print('Number of Frames:', len(fn.frames()))
    print('Number of Lexical Units:', len(fn.lexical_units()))
    print('Number of annotated documents:', len(fn.documents()))
    print()

    #
    # Frames
    #
    print('getting frames whose name matches the (case insensitive) regex: "(?i)medical"')
    medframes = fn.frames(r'(?i)medical')
    print(
        'Found {0} Frames whose name matches "(?i)medical":'.format(len(medframes)))
    print([(f.name, f.ID) for f in medframes])

    #
    # store the first frame in the list of frames
    #
    tmp_id = medframes[0].ID
    m_frame = fn.frame(tmp_id)  # reads all info for the frame

    #
    # get the frame relations
    #
    print(
        '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(m_frame.name,
                                                                        m_frame.ID),
        len(m_frame.frameRelation))
    for fr in m_frame.frameRelation:
        print('   ', fr.type + ":", fr.relatedFrame)

    #
    # get the names of the Frame Elements
    #
    print(
        '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name),
        len(m_frame.FE))
    print('   ', [x.name for x in m_frame.FE])

    #
    # get the names of the "Core" Frame Elements
    #
    print(
        '\nThe "core" Frame Elements in the "{0}" frame:'.format(m_frame.name))
    print('   ', [x.name for x in m_frame.FE if x.coreType == "Core"])

    #
    # get all of the Lexical Units that are incorporated in the
    # 'Ailment' FE of the 'Medical_conditions' frame (id=239)
    #
    print('\nAll Lexical Units that are incorporated in the "Ailment" FE:')
    m_frame = fn.frame(239)
    ailment_lus = [x for x in m_frame.lexUnit if x.incorporatedFE == 'Ailment']
    print([x.name for x in ailment_lus])

    #
    # get all of the Lexical Units for the frame
    #
    print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name),
          len(m_frame.lexUnit))
    print('  ', [x.name for x in m_frame.lexUnit[:5]], '...')

    #
    # get basic info on the second LU in the frame
    #
    tmp_id = m_frame.lexUnit[1].ID  # grab the id of the second LU
    luinfo = fn.lu_basic(tmp_id)  # get basic info on the LU
    print('\nInformation on the LU: {0}'.format(luinfo.name))
    pprint(luinfo)

    #
    # Get a list of all of the corpora used for fulltext annotation
    #
    print('\nNames of all of the corpora used for fulltext annotation:')
    allcorpora = set([x.corpname for x in fn.documents()])
    pprint(list(allcorpora))

    #
    # Get the names of the annotated documents in the first corpus
    #
    firstcorp = list(allcorpora)[0]
    firstcorp_docs = fn.documents(firstcorp)
    print(
        '\nNames of the annotated documents in the "{0}" corpus:'.format(firstcorp))
    pprint([x.filename for x in firstcorp_docs])

    #
    # Search for frames containing LUs whose name attribute matches a
    # regexp pattern.
    #
    # Note: if you were going to be doing a lot of this type of
    #       searching, you'd want to build an index that maps from
    #       lemmas to frames because each time frames_by_lemma() is
    #       called, it has to search through ALL of the frame XML files
    #       in the db.
    print('\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":')
    pprint(fn.frames_by_lemma(r'^run.v$'))

示例#28

0

显示文件

文件： taxonomy_test.py 项目： rsteckel/EDA

from nltk.corpus import framenet as fn

import pattern.search as PS
from pattern.search import Pattern, Classifier, search
from pattern.en import parse, parsetree
from pattern.en import wordnet as pwn
from nltk.corpus import wordnet as wn
from nltk.corpus import framenet as fn
import pandas as pd
import numpy as np

from datasets.customers.tufamilia_dataset import TuFamilia



frames = fn.frames('Medical_conditions')
frames = fn.frames('Causation')
frame = frames[0]  #Take first match

lus = frame['lexUnit'].values()
for lu in lus:
    if lu.has_key('incorporatedFE'):
        print '%20s %10s' % (lu.name, lu['incorporatedFE'])
    else:
        print '%20s %10s' % (lu.name, 'No IFE')

for relation in frame['frameRelations']:
    print '  ', relation 
    
    
for fe in frame['FE']:

示例#29

0

显示文件

def hand_engineering(prot, batch_size, data, data_dev):
    '''
        Hand engineered feature extraction. Supports the following - UD,
        Verbnet classids, Wordnet supersenses, concreteness ratings, LCS
        eventivity scores
    '''
    home = expanduser("~")
    framnet_posdict = {
        'V': 'VERB',
        'N': 'NOUN',
        'A': 'ADJ',
        'ADV': 'ADV',
        'PREP': 'ADP',
        'NUM': 'NUM',
        'INTJ': 'INTJ',
        'ART': 'DET',
        'C': 'CCONJ',
        'SCON': 'SCONJ',
        'PRON': 'PRON',
        'IDIO': 'X',
        'AVP': 'ADV'
    }
    # Load the features
    features = {}
    with open(home + '/Desktop/protocols/data/features-2.tsv', 'r') as f:
        for line in f.readlines():
            feats = line.split('\t')
            features[feats[0]] = (feats[1].split(), feats[2].split())

    # Load the predpatt objects for creating features
    files = [
        '/Downloads/UD_English-r1.2/en-ud-train.conllu',
        '/Downloads/UD_English-r1.2/en-ud-dev.conllu',
        '/Downloads/UD_English-r1.2/en-ud-test.conllu'
    ]
    home = expanduser("~")
    options = PredPattOpts(resolve_relcl=True,
                           borrow_arg_for_relcl=True,
                           resolve_conj=False,
                           cut=True)  # Resolve relative clause
    patt = {}

    for file in files:
        path = home + file
        with open(path, 'r') as infile:
            for sent_id, ud_parse in load_conllu(infile.read()):
                patt[file[33:][:-7] + " " + sent_id] = PredPatt(ud_parse,
                                                                opts=options)

    data['Structure'] = data['Split.Sentence.ID'].map(lambda x:
                                                      (patt[x], features[x]))
    data_dev['Structure'] = data_dev['Split.Sentence.ID'].map(
        lambda x: (patt[x], features[x]))

    raw_x = data['Structure'].tolist()
    raw_dev_x = data_dev['Structure'].tolist()

    all_x = raw_x + raw_dev_x
    all_feats = '|'.join(['|'.join(all_x[i][1][0]) for i in range(len(all_x))])
    feature_cols = Counter(all_feats.split('|'))

    # All UD dataset features
    all_ud_feature_cols = list(
        feature_cols.keys()) + [(a + "_dep") for a in feature_cols.keys()]

    # Concreteness
    f = open(home + '/Desktop/protocols/data/concrete.pkl', 'rb')
    concreteness = pickle.load(f)
    if prot == 'arg':
        conc_cols = ['concreteness']
    else:
        conc_cols = ['concreteness', 'max_conc', 'min_conc']
    f.close()

    # LCS eventivity
    from lcsreader import LexicalConceptualStructureLexicon
    lcs = LexicalConceptualStructureLexicon(
        home + '/Desktop/protocols/data/verbs-English.lcs')
    lcs_feats = ['lcs_eventive', 'lcs_stative']

    # Wordnet supersenses(lexicographer names)
    supersenses = list(
        set(['supersense=' + x.lexname() for x in wordnet.all_synsets()]))

    # Framenet
    lem2frame = {}
    for lm in framenet.lus():
        for lemma in lm['lexemes']:
            lem2frame[lemma['name'] + '.' +
                      framnet_posdict[lemma['POS']]] = lm['frame']['name']
    frame_names = ['frame=' + x.name for x in framenet.frames()]

    # Verbnet classids
    verbnet_classids = ['classid=' + vcid for vcid in verbnet.classids()]

    # Lexical features
    lexical_feats = [
        'can', 'could', 'should', 'would', 'will', 'may', 'might', 'must',
        'ought', 'dare', 'need'
    ] + [
        'the', 'an', 'a', 'few', 'another', 'some', 'many', 'each', 'every',
        'this', 'that', 'any', 'most', 'all', 'both', 'these'
    ]

    dict_feats = {}
    for f in verbnet_classids + lexical_feats + supersenses + frame_names + lcs_feats + all_ud_feature_cols + conc_cols:
        dict_feats[f] = 0

    x_pd = pd.DataFrame([
        features_func(sent_feat=sent,
                      token=token,
                      lemma=lemma,
                      dict_feats=dict_feats.copy(),
                      prot=prot,
                      concreteness=concreteness,
                      lcs=lcs,
                      l2f=lem2frame) for sent, token, lemma in
        zip(raw_x, data['Root.Token'].tolist(), data['Lemma'].tolist())
    ])

    dev_x_pd = pd.DataFrame([
        features_func(sent_feat=sent,
                      token=token,
                      lemma=lemma,
                      dict_feats=dict_feats.copy(),
                      prot=prot,
                      concreteness=concreteness,
                      lcs=lcs,
                      l2f=lem2frame)
        for sent, token, lemma in zip(raw_dev_x, data_dev['Root.Token'].tolist(
        ), data_dev['Lemma'].tolist())
    ])

    # Figure out which columns to drop(they're always zero)
    todrop1 = dev_x_pd.columns[(dev_x_pd == 0).all()].values.tolist()
    todrop = x_pd.columns[(x_pd == 0).all()].values.tolist()
    intdrop = [a for a in todrop if a not in todrop1]
    cols_to_drop = cols_to_drop = list(set(todrop) - set(intdrop))

    x = x_pd.drop(cols_to_drop, axis=1).values.tolist()
    dev_x = dev_x_pd.drop(cols_to_drop, axis=1).values.tolist()

    x = [[a[:] for a in x[i:i + batch_size]]
         for i in range(0, len(data), batch_size)]
    dev_x = [[a[:] for a in dev_x[i:i + batch_size]]
             for i in range(0, len(data_dev), batch_size)]
    return x, dev_x

示例#30

0

显示文件

文件： info_ext.py 项目： rsteckel/EDA



doccollections = ['NYT_19980407','NYT_19980403','NYT_19980315','APW_19980429','APW_19980424','APW_19980314']

IN = re.compile(r'.*\bin\b(?!\b.+ing)')

for doccol in doccollections:
    for doc in nltk.corpus.ieer.parsed_docs(doccol):
        relations = nltk.sem.extract_rels('PER', 'LOC', doc, corpus='ieer', pattern = IN)
        for relation in relations:
            print nltk.sem.relextract.rtuple(relation)
            


f = fn.frames(r'(?i)perception')
len(fn.frames())
f = fn.frame(66)

f.ID
f.definition
set(f.lexUnit.keys())

[x.name for x in f.FE]

f.frameRelations


fn.frames_by_lemma(r'(?i)a little')

示例#31

0

显示文件

                     '\t_\t' + \
                     tag + \
                     '\t_\t' + \
                     str(head_idx) + '\t' + \
                     dep + \
                     '\t_\t_\n'
        if len(s) > 0:
            conll += '\n'

    return conll


# def compile_framenet_starters():
print('loading dub frames')
dub_frames = [
    full_frame.name for full_frame in fn.frames()
    if len(full_frame.name.split('_')) > 1
]
FDD = defaultdict(list)
for dub_frame in dub_frames:
    FDD[dub_frame.split('_')[0]].append(dub_frame)
    # return fdd


# @clock
def get_frame_from_name(frame_name):
    try:
        frame = fn.frame_by_name(frame_name)
    except:
        if len(FDD[frame_name]) == 1:
            frame = fn.frame_by_name(FDD[frame_name][0])

示例#32

0

显示文件

文件： dummy.py 项目： machinereading/kbagent_0.1

from nltk.corpus import framenet as fn

fs = fn.frames()

for i in fs:
    print(i.FE)
    break

示例#33

0

显示文件

文件： framebank_test.py 项目： julieweeds/parcel

__author__ = 'juliewe'

from nltk.corpus import framenet as fn

if __name__=='__main__':
    print len(fn.frames())