Пример #1
0
class AcronymProcessor(Processor):
    splitting_pattern = re.compile(r'[A-Z][^A-Z]*')

    def __init__(self):
        self.api = Datamuse()

        self.grammar = Grammar({})
        self.grammar.add_modifiers(base_english)

    def process_text(self, input_text: str, **kwargs) -> str:
        topics = kwargs.get('topics', None)

        splitted = self.splitting_pattern.findall(input_text)

        dictionary: Dict[str, List[str]] = {}
        for start in (x for x in splitted if x not in self.grammar.symbols):
            if topics is None:
                res = self.api.words(sp='{}*'.format(start), max=1000)
            else:
                res = self.api.words(sp='{}*'.format(start),
                                     topics=topics,
                                     max=1000)

            dictionary[start] = [obj['word'] for obj in res]

        for k, v in dictionary.items():
            self.grammar.push_rules(k, v)

        return self.grammar.flatten('#{}.capitalize#'.format(
            '.capitalize# #'.join(splitted)))
Пример #2
0
    def setUp(self):
        self.max = 5
        self.api = Datamuse()
        _api_url = 'https://api.datamuse.com/words'

        _fp = pkg_resources.resource_filename(__name__, 'fixtures/orange.json')
        with open(_fp) as response_json:
            response_json = json.load(response_json)

        responses.add(responses.GET,
                      urljoin(_api_url, '?sl=orange?max=5'),
                      json=response_json, status=200)
        responses.add(responses.GET,
                      urljoin(_api_url, '?rel_rhy=orange?max=5'),
                      json=response_json, status=200)
        responses.add(responses.GET,
                      urljoin(_api_url, '?rel_nry=orange?max=5'),
                      json=response_json, status=200)

        _fp = pkg_resources.resource_filename(__name__, 'fixtures/ringing.json')
        with open(_fp) as response_json:
            response_json = json.load(response_json)

        responses.add(responses.GET,
                      urljoin(_api_url, f'?ml=ringing+in+the+ears'),
                      json=response_json, status=200)


        _fp = pkg_resources.resource_filename(__name__, 'fixtures/por.json')
        with open(_fp) as response_json:
            response_json = json.load(response_json)
        responses.add(responses.GET,
                      urljoin(_api_url, f'?s=por&max=3'),
                      json=response_json, status=200)
Пример #3
0
class DatamuseTestCase(unittest.TestCase):
    def setUp(self):
        self.api = Datamuse()
        self.max = 5

    # words endpoint
    def test_sounds_like(self):
        args = {'sl': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(type(data), list)
        print("sounds like", data)

    def test_rhymes(self):
        args = {'rel_rhy': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(len(data) <= self.max)
        print("rhyme", data)

    def test_near_rhymes(self):
        args = {'rel_nry': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(len(data) <= self.max)
        print("near rhyme", data)

    def test_bad_request(self):
        args = {'foo': 42}
        with self.assertRaises(ValueError):
            data = self.api.words(**args)
Пример #4
0
class DatamuseTestCase(unittest.TestCase):
    def setUp(self):
        self.api = Datamuse()
        self.max = 5

    # words endpoint
    def test_sounds_like(self):
        args = {'sl': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(type(data), list)
        print("sounds like", data)

    def test_rhymes(self):
        args = {'rel_rhy': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(len(data) <= self.max)
        print("rhyme", data)

    def test_near_rhymes(self):
        args = {'rel_nry': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(len(data) <= self.max)
        print("near rhyme", data)

    def test_bad_request(self):
        args = {'foo':42}
        with self.assertRaises(ValueError):
            data = self.api.words(**args)
Пример #5
0
    def generate_word(self):
        """Generates a word to use in the game"""
        topics = [
            "animals", "plants", "buildings", "places", "farming", "school",
            "food"
        ]
        api = Datamuse()
        result = api.words(topics=choice(topics))
        words = [res["word"] for res in result]

        return choice(list(filter(lambda x: (len(x) > 5), words)))
Пример #6
0
def sologame():
    global guess
    guess = []
    #  from https://pypi.python.org/pypi/RandomWords/0.1.5
    rw = RandomWords()
    hint = rw.random_word()

    #  from https://www.datamuse.com/api/
    api = Datamuse()

    foo_complete = api.words(ml=hint, max=4)
    foo_df = scripts.dm_to_df(foo_complete)

    loo = api.words(rel_par=hint, max=1)
    loo_df = scripts.dm_to_df(loo)

    maybe = api.words(rel_trg=hint, max=1)
    maybe_df = scripts.dm_to_df(maybe)

    values = foo_df['word'].values
    val = loo_df['word'].values
    v = maybe_df['word'].values
    wordList = set()

    level = 0
    if score > 5:
        level = 2

    for i in values:
        if 3+level < len(i) < 7+level and i.isalpha():
            wordList.add(i)
    for i in val:
        if 3+level < len(i) < 7+level and i.isalpha():
            wordList.add(i)
    for i in v:
        if 3+level < len(i) < 7+level and i.isalpha():
            pass
            wordList.add(i)

    wordList = list(wordList)
    if len(wordList) < 2:
        sologame()
    while len(wordList) > 3:
        wordList.pop()

    soloB = screen.Solo.game(wordList)
    try:
        main(wordList, soloB, hint, "solo")
    except:
        sologame()
Пример #7
0
def game():
    rw = RandomWords()
    hint = rw.random_word()

    api = Datamuse()

    foo_complete = api.words(ml=hint, max=3)
    foo_df = scripts.dm_to_df(foo_complete)

    loo = api.words(rel_par=hint, max=1)
    loo_df = scripts.dm_to_df(loo)

    maybe = api.words(rel_trg=hint, max=1)
    maybe_df = scripts.dm_to_df(maybe)

    values = foo_df['word'].values
    val = loo_df['word'].values
    v = maybe_df['word'].values

    words = set()

    for i in values:
        if 3 < len(i) < 8 and i.isalpha():
            words.add(i)
    for i in val:
        if 3 < len(i) < 8 and i.isalpha():
            words.add(i)
    for i in v:
        if 3 < len(i) < 8 and i.isalpha():
            pass
            words.add(i)

    words = list(words)
    if len(words) < 1:
        game()

    global guess
    guess = []
    try:
        return words[0], hint
    except:
        game()
Пример #8
0
    def __init__(self, acronym: str):
        self.acronym = acronym

        api = Datamuse()

        dictionary: Dict[str, List[str]] = {}

        splitted = self.splitting_pattern.findall(acronym)

        self.length = len(splitted)

        for start in splitted:
            res = api.words(sp='{}*'.format(start))

            dictionary[start] = [obj['word'] for obj in res]

        self.grammar = Grammar(dictionary)
        self.grammar.add_modifiers(base_english)

        self.rule = '#{}.capitalize#'.format('.capitalize# #'.join(splitted))
Пример #9
0
 def setUp(self):
     self.api = Datamuse()
     self.max = 5
Пример #10
0
 def setUp(self):
     self.api = Datamuse()
     self.max = 5
Пример #11
0
import random
from datamuse import Datamuse

api = Datamuse()
default_syllables = 8
default_topics = [
	'cars',
	'movies',
	'books',
	'music',
	'people',
	'earth',
	'buildings',
	'games',
	'animals',
	'fruits',
	'vegetables',
	'adventures',
	'planets',
	'government',
	'enlightenment',
	'peace',
	'gangs',
	'war',
	'soul',
	'religion',
	'politics'
]

common_words = [
	'the',
Пример #12
0
class DatamuseTestCase(unittest.TestCase):
    def setUp(self):
        self.api = Datamuse()
        self.max = 5

    # words endpoint
    def test_sounds_like(self):
        args = {'sl': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(type(data), list)
        print("sounds like", data)

    def test_rhymes(self):
        args = {'rel_rhy': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(len(data) <= self.max)
        print("rhyme", data)

    def test_near_rhymes(self):
        args = {'rel_nry': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(len(data) <= self.max)
        print("near rhyme", data)

    def test_bad_request(self):
        args = {'foo': 42}
        with self.assertRaises(ValueError):
            data = self.api.words(**args)

    def test_set_max(self):
        self.assertTrue(self.api.max, 100)
        self.api.set_max_default(10)
        self.assertEqual(self.api.max, 10)
        data = self.api.words(ml='ringing in the ears')
        self.assertEqual(len(data), 10)

    def test_set_max_error(self):
        with self.assertRaises(ValueError):
            self.api.set_max_default(-2)
            self.api.set_max_default(0)
            self.api.set_max_default(1001)
Пример #13
0
import unittest
import datamuse
from datamuse import Datamuse
from glove import loadGloveModel
import Queue
import unicodedata
from tsne import projectWordsNoFile
from tsne import projectWordsNoFileDoubleProject
import numpy as np

dm = Datamuse()



model,vectors,keys = loadGloveModel("../data/glove.6B.50d.txt")

# print keys[0:100]

cryptic = ['mother','ordered','sail','fabrics','materials',"mater",
     "they", "trade", "in", "french","sea","songs","merchants","mer","chants",
     "chew","honeydew","fruit","melon","lemon",
     "lap", "dancing", "friend", "pal",
     "outlaw", "leader", "managing", "money","banking","ban","king"
     "beheaded", "celebrity", "is", "sailor", "tar","star",
     "challenging", "sweetheart", "heartlessly", "daring","darling",
     "found", "ermine", "deer", "hides", "damaged","undermined"
     ]


crypticKeys = []
crypticVectors = []
Пример #14
0
class DatamuseTestCase(unittest.TestCase):

    @responses.activate
    def setUp(self):
        self.max = 5
        self.api = Datamuse()
        _api_url = 'https://api.datamuse.com/words'

        _fp = pkg_resources.resource_filename(__name__, 'fixtures/orange.json')
        with open(_fp) as response_json:
            response_json = json.load(response_json)

        responses.add(responses.GET,
                      urljoin(_api_url, '?sl=orange?max=5'),
                      json=response_json, status=200)
        responses.add(responses.GET,
                      urljoin(_api_url, '?rel_rhy=orange?max=5'),
                      json=response_json, status=200)
        responses.add(responses.GET,
                      urljoin(_api_url, '?rel_nry=orange?max=5'),
                      json=response_json, status=200)

        _fp = pkg_resources.resource_filename(__name__, 'fixtures/ringing.json')
        with open(_fp) as response_json:
            response_json = json.load(response_json)

        responses.add(responses.GET,
                      urljoin(_api_url, f'?ml=ringing+in+the+ears'),
                      json=response_json, status=200)


        _fp = pkg_resources.resource_filename(__name__, 'fixtures/por.json')
        with open(_fp) as response_json:
            response_json = json.load(response_json)
        responses.add(responses.GET,
                      urljoin(_api_url, f'?s=por&max=3'),
                      json=response_json, status=200)


    def test_sounds_like(self):
        args = {'sl': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(type(data), list)
        print("sounds like", data)

    def test_rhymes(self):
        args = {'rel_rhy': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(len(data) <= self.max)
        print("rhyme", data)

    def test_near_rhymes(self):
        args = {'rel_nry': 'orange', 'max': self.max}
        data = self.api.words(**args)
        self.assertTrue(len(data) <= self.max)
        print("near rhyme", data)

    def test_set_max(self):
        self.assertTrue(self.api.max, 100)
        self.api.set_max_default(10)
        self.assertEqual(self.api.max, 10)
        data = self.api.words(ml='ringing in the ears')
        self.assertEqual(len(data), 10)

    def test_set_max_error(self):
        with self.assertRaises(ValueError):
            self.api.set_max_default(-2)
            self.api.set_max_default(0)
            self.api.set_max_default(1001)

    def test_suggest(self):
        response = self.api.suggest(s='por', max_results=3, vocabulary='es')
        assert len(response) == 3
        assert isinstance(response, list)
        assert response[1]['word'] == 'porque'
Пример #15
0
    def __init__(self):
        self.api = Datamuse()

        self.grammar = Grammar({})
        self.grammar.add_modifiers(base_english)
Пример #16
0
    def __init__(self, input_texts: str):
        nltk.download('punkt')

        self.nlp = spacy.load('en_core_web_lg')

        self.summarizer = LsaSummarizer(Stemmer('english'))
        self.summarizer.stop_words = get_stop_words('english')

        self.cleaner = CleaningProcessor()

        self.synonyms: Dict[str, Optional[List[str]]] = {}
        if path.isfile('src/syns.yaml'):
            with open('src/syns.yaml', 'r') as f:
                self.synonyms = yaml.safe_load(f)

        if self.synonyms is None:
            self.synonyms = {}

        self.patterns: Dict[str, str] = OrderedDict()
        self.rev_patterns: Dict[str, str] = OrderedDict()

        with open('src/spreadr_shreddr/data.yaml', 'r') as f:
            data = yaml.safe_load(f)

        self.patterns.update(data['shorten'])
        self.patterns.update(data['expand'])

        data['filler'].extend(
            pycorpora.get_file('humans', 'prefixes')['prefixes'])

        self.patterns.update({k: '' for k in data['filler']})

        for obj in pycorpora.get_file('words', 'compounds')['compounds']:
            key = '{} {}'.format(obj['firstWord'], obj['secondWord'])
            if key not in self.patterns:
                self.patterns[key] = obj['compoundWord']

        self.patterns.update(
            {k.capitalize(): v.capitalize()
             for k, v in self.patterns.items()})

        self.brits = data['brit_am']
        self.murcans = {v: k for k, v in self.brits.items()}

        changed = False
        api = Datamuse()
        for text in input_texts:
            text >>= self.cleaner

            for sent in sent_tokenize(text):
                for index, word in enumerate(self.nlp(sent)):
                    orth = word.orth_.lower()
                    key = self.separator.join((orth, word.tag_))

                    if key not in self.synonyms:
                        changed = True
                        syns: List[str] = []

                        if (word.pos_ in UNIVERSAL_TO_DATAMUSE
                                and len(wn.synsets(orth)) <= 1):
                            res = api.words(ml=orth)

                            if len(res) > 0:
                                syns = self._get_synonyms(
                                    ' '.join(sent), (index, word), res)

                        if len(syns) > 1:
                            self.synonyms[key] = syns
                        else:
                            self.synonyms[key] = None

                    if changed:
                        changed = False
                        with open('src/syns.yaml', 'a') as f:
                            f.write(yaml.dump({key: self.synonyms[key]}))
Пример #17
0
from file_manager import File_manager
from naive import Naive
from datamuse import Datamuse
import asyncio

TRAIN_FILENAME = "train_data.tsv"
TEST_FILENAME = "data_estag_ds.tsv"

if __name__ == '__main__':

    filemng = File_manager()
    train_instances = filemng.get_instances_of_file(TRAIN_FILENAME)
    test_instances = filemng.get_instances_of_file(TEST_FILENAME)

    nv = Naive()
    dic = filemng.read_dictionary()
    dtmuse = Datamuse()

    loop = asyncio.get_event_loop()
    category_synonymous = loop.run_until_complete(
        dtmuse.get_synonymous('smartphone'))
    loop.close()
    category_synonymous = eval(category_synonymous)

    nv.naive_bayes(train_instances, dic, test_instances, category_synonymous)