Exemplo n.º 1
0
# encoding: utf-8
from nltk.corpus.reader.wordnet import WordNetCorpusReader

wn = WordNetCorpusReader(YOUR_WORDNET_PATH, '.*')  # 这种方式就会有函数补全
print('wordnet version %s: %s' % (wn.get_version(), YOUR_WORDNET_PATH))

print'get gloss from sensekey......'
key = 'dance%1:04:00::'
lemma = wn.lemma_from_key(key)
synset = lemma.synset()
print synset.definition()
Exemplo n.º 2
0
import math
import numpy as np
import collections
import re
import random
from bs4 import BeautifulSoup
from bs4 import NavigableString
import pickle
from utils import path
from nltk.corpus.reader.wordnet import WordNetCorpusReader
from nltk.stem import WordNetLemmatizer
wordnet_lemmatizer = WordNetLemmatizer()  # download wordnet: import nltk; nltk.download("wordnet") in readme.txt

_path = path.WSD_path()
wn = WordNetCorpusReader(_path.WORDNET_PATH, '.*')
print('wordnet version %s: %s' % (wn.get_version(), _path.WORDNET_PATH))

path_words_notin_vocab = '../tmp/words_notin_vocab_{}.txt'

pos_dic = {
    'ADJ': u'a',
    'ADV': u'r',
    'NOUN': u'n',
    'VERB': u'v', }

POS_LIST = pos_dic.values()  # ['a', 'r', 'n', 'v']


def load_train_data(dataset):
    if dataset in _path.LS_DATASET:
        return load_lexical_sample_data(_path.LS_TRAIN_PATH.format(dataset), True)