# encoding: utf-8 from nltk.corpus.reader.wordnet import WordNetCorpusReader wn = WordNetCorpusReader(YOUR_WORDNET_PATH, '.*') # 这种方式就会有函数补全 print('wordnet version %s: %s' % (wn.get_version(), YOUR_WORDNET_PATH)) print'get gloss from sensekey......' key = 'dance%1:04:00::' lemma = wn.lemma_from_key(key) synset = lemma.synset() print synset.definition()
import math import numpy as np import collections import re import random from bs4 import BeautifulSoup from bs4 import NavigableString import pickle from utils import path from nltk.corpus.reader.wordnet import WordNetCorpusReader from nltk.stem import WordNetLemmatizer wordnet_lemmatizer = WordNetLemmatizer() # download wordnet: import nltk; nltk.download("wordnet") in readme.txt _path = path.WSD_path() wn = WordNetCorpusReader(_path.WORDNET_PATH, '.*') print('wordnet version %s: %s' % (wn.get_version(), _path.WORDNET_PATH)) path_words_notin_vocab = '../tmp/words_notin_vocab_{}.txt' pos_dic = { 'ADJ': u'a', 'ADV': u'r', 'NOUN': u'n', 'VERB': u'v', } POS_LIST = pos_dic.values() # ['a', 'r', 'n', 'v'] def load_train_data(dataset): if dataset in _path.LS_DATASET: return load_lexical_sample_data(_path.LS_TRAIN_PATH.format(dataset), True)