Пример #1
0
def synonymsDatamuse(term, max):
    api = datamuse.Datamuse()
    response = api.words(ml=term.lower())
    resultslist = []
    counter = 0
    for i in response:
        if (i.get('tags') and ('syn' in i['tags'])):
            if (counter < max):
                word = i['word'].lower().replace("_", " ")
                resultslist.append(word)
                counter += 1
            else:
                break
    if (len(resultslist) == 0 and len(response) > 0):
        counter2 = 0
        maxwords = min(5, len(response))
        for i in response:
            if (i.get('tags') and ('ant' not in i['tags'])):
                if (counter2 < maxwords):
                    word = i['word'].lower().replace("_", " ")
                    resultslist.append(word)
                    counter2 += 1
                else:
                    break
    return resultslist
Пример #2
0
def give_pickup_lines(key_word, num_lines_weight=2, num_lines_random=1):
    """
    Inputs:
    key_word - the category for which you want related pickuplines
    num_lines_weight - how many relevant pickup lines you want based on weights
    num_lines_random - how many relevant pickup lines you want that are random
    """
    api = datamuse.Datamuse()
    related_words = get_related_words(key_word)
    category_dict = find_category(related_words)

    if category_dict == False:
        trigger_words = get_trigger_words(related_words)
        new_category, all_dicts = create_category(key_word, trigger_words,
                                                  all_dicts)
        num_lines_random, num_lines_weight = check_num_lines(
            new_category, num_lines_weight, num_lines_random)
        pul_list, temp_dict = find_top_weights(new_category, num_lines_weight)

    else:
        num_lines_random, num_lines_weight = check_num_lines(
            category_dict, num_lines_weight, num_lines_random)
        pul_list, temp_dict = find_top_weights(category_dict, num_lines_weight)

    return pul_list, category_dict
Пример #3
0
def getDatamuse():
    api = datamuse.Datamuse()
    source = 'datamuse'

    def callApi(terms, gender):
        # to store words from this source and check for multiple definitions of the same word
        words = []
        for term in terms:
            results = api.words(ml=term, max=1000, md='dp')
            for result in results:
                word = result['word'].lower()
                # check if it's a noun
                if word not in discardSet:
                    if ('tags' in result and 'n' in result['tags']):
                        if ('defs' in result):
                            definition = result['defs']
                        else:
                            definition = getWordDefinition(word)
                        if (definition != ' ' and definition is not None
                                and len(definition) > 0):
                            if word in wordSet:
                                entry = findWordInArray(word, words)
                                if entry is not None:
                                    addDefinition(entry, definition)
                                continue
                            elif (word not in wordSet and isValidWord(word)):
                                processWord(word, definition, source, words,
                                            gender)
        allWords.extend(words)

    callApi(femaleTermsArr, 'female')
    callApi(maleTermsArr, 'male')
    print('datamuse done')
Пример #4
0
def get_rhymes(input_lyric):
    # Get words that rhyme with input word
    api = datamuse.Datamuse()
    rhymes = api.words(rel_rhy=input_lyric, max=75)
    sorted_output = sorted(rhymes, key=lambda i: i['numSyllables'])
    rhymes = [x['word'] for x in sorted_output]
    return [word.replace('"', '') for word in rhymes]
Пример #5
0
def suggest_word_datamuse(w, topics, max_res=100):
    datamuseapi = datamuse.Datamuse()
    results = []
    mean = pd.DataFrame.from_dict(
        datamuseapi.words(ml=w, topics=topics, max=max_res))
    mean['tag'] = 'means'
    results.append(mean)
    syn = pd.DataFrame.from_dict(
        datamuseapi.words(rel_syn=w, topics=topics, max=max_res))
    syn['tag'] = 'synonym'
    results.append(syn)
    typ = pd.DataFrame.from_dict(
        datamuseapi.words(rel_spc=w, topics=topics, max=max_res))
    typ['tag'] = 'type'
    results.append(typ)
    exm = pd.DataFrame.from_dict(
        datamuseapi.words(rel_gen=w, topics=topics, max=max_res))
    exm['tag'] = 'exampleOf'
    results.append(exm)
    comp = pd.DataFrame.from_dict(
        datamuseapi.words(rel_com=w, topics=topics, max=max_res))
    comp['tag'] = 'hasComponent'
    results.append(comp)
    partof = pd.DataFrame.from_dict(
        datamuseapi.words(rel_par=w, topics=topics, max=max_res))
    partof['tag'] = 'partOf'
    results.append(partof)

    output = pd.concat(results, ignore_index=True, sort=True)

    return output
Пример #6
0
def get_related_words(key_word, n=10):
    """Input: keyword, n
       Output: list containing keyword and up to n related words (10 by default)"""
    api = datamuse.Datamuse()
    related_words = []
    related_words.append(key_word)
    related_words += (get_list(api.words(ml=key_word, max=n), key_word))
    return related_words
Пример #7
0
def get_trigger_words(related_words, key_word, n=10):
    """Input: list of related_words, n
       Output: list containing all of the related words, as well as n trigger words per related words
       """
    api = datamuse.Datamuse()
    trigger_words = []
    trigger_words += related_words
    trigger_words += (get_list(api.words(rel_trg=key_word, max=n), key_word))
    return trigger_words
def create_list_of_near_names(name):
    api = datamuse.Datamuse()
    list_of_dictionaries = api.words(sl=name, max=10)
    list_of_names = []
    for i in list_of_dictionaries:
        if i['score'] == 100:
            list_of_names.append(i['word'])
    if name not in list_of_names:
        list_of_names.append(name)
    return list_of_names
def get_datamuse_sysnonymous(word):
    api = datamuse.Datamuse()
    terms = api.words(ml=word)
    #    return terms
    syns = []
    for t in terms:
        if 'tags' in t and 'n' in t['tags']:  #and len(t['word'].split(' '))==1:
            syns.append(t)
        #syns.append(t['word']+':'+str(t['score']))
    return syns
Пример #10
0
def findNearNames(name):
    api = datamuse.Datamuse()
    listofDictionaries = api.words(sl=name, max=10)
    nameSet = set()
    for i in listofDictionaries:
        if i['score'] == 100:
            nameSet.add(i['word'])
    nameSet.add(name)
    nameList = list(nameSet)
    if len(nameSet) > 5:
        nameList = nameList[:4]
    return nameList
Пример #11
0
    def __init__(self, lyrics: str):
        """initialize song object
    
    Arguments:
        string {str} -- string to analyze
    """
        self.api = datamuse.Datamuse()
        self.lyrics = lyrics
        replace_all_punc = re.sub("[.,:?!;\-()']", "", self.lyrics)
        self.lyrics_array = re.split("[ |\n]", replace_all_punc)
        self.rhymes = self.generate_rhymes(self.lyrics_array)

        self.blacklist = ['a', 'the', 'can', 'an']
Пример #12
0
def get_rhyme_dict(top_words, api=None):
	print('Getting rhyming dictionaries...')
	rhyme_dict = {}
	if not api:
		api = datamuse.Datamuse()
	for word in tqdm(top_words):
		rhymes = api.words(rel_rhy=word, max=20)
		rhymes = [x['word'] for x in rhymes]
		near_rhymes = api.words(rel_nry=word, max=20)
		near_rhymes = [x['word'] for x in near_rhymes]
		all_rhymes = rhymes + near_rhymes
		rhyme_dict[word] = all_rhymes
	return rhyme_dict
def get_datamuse_sysnonymous(word, size):
    api = datamuse.Datamuse()
    terms = api.words(ml=word)
    #    return terms
    syns = []
    for t in terms:
        if 'tags' in t and 'n' in t['tags']:
            #           #syns.append(t)
            syns.append(t['word'])
    if size != -1:
        if len(syns) > size:
            return syns[0:size]
    return syns
Пример #14
0
    def __init__(self, context=None):
        """
        context -- context to be parsed and used for context 
                   (can be raw or nltk.Text)
        """
        self.datamuse = datamuse.Datamuse()
        self.syllable_dict = {}
        self.last_seen_word = None
        self.contextual = False

        if context:
            self.context = context
            self.contextual=True
            self._prepare_for_poet()
        print("contextual mode") if self.contextual else print("free mode")
Пример #15
0
def downloadData():
    api = datamuse.Datamuse()
    try:
        words = open("words_alpha.txt", encoding='utf-8')
        rhymes = open("rhymes.txt", 'a', encoding='utf-8')
        progress = open("progress.txt", 'r', encoding='utf-8')

        line_no = int(progress.readline())
        progress.close()
        line_begin = line_no
        print(line_no)
        for i, line in enumerate(words):
            if (i == line_no):
                if (line_no < line_begin + 75000):
                    line = line.rstrip()
                    try:
                        api_rhymes = api.words(rel_rhy=line, max=5)
                        [
                            rhymes.write(line + ';' + rh["word"] + '\n')
                            for rh in api_rhymes
                        ]
                        print(line)
                    except:
                        print("json decode on " + line)

                    if len(api_rhymes) < 5:
                        try:
                            near_rhymes = api.words(rel_nry=line, max=5)
                            [
                                rhymes.write(line + ';' + rh["word"] + '\n')
                                for rh in near_rhymes
                            ]
                            print(line)
                        except:
                            print("json decode on " + line)
                    rhymes.flush()
                    line_no = line_no + 1
                else:
                    break
    finally:
        progress = open("progress.txt", 'w', encoding='utf-8')
        progress.write(str(line_no))
        words.close()
        rhymes.close()
        progress.close()
def get_rhyming_words(to_rhyme):
    """
    params: str

    This function uses Datamuse to check for rhyming words
    with the last syllable of the user's name.
    It checks against the original last syllable,
    calls get_alternate_syllable if that yields no matches,
    and returns empty list if neither yields matches.

    returns: list
    """
    api = datamuse.Datamuse()

    # Does first check for exact rhymes for last syllable
    rhyme_matches = api.words(rel_rhy=to_rhyme, md='p' 's')
    match_list = get_list(rhyme_matches)

    # Does second check for exact rhymes for alternate syllable
    if match_list == []:
        alt_syll = get_alternate_syllable(to_rhyme)
        rhyme_matches = api.words(rel_rhy=alt_syll, md='p' 's')
        match_list = get_list(rhyme_matches)

    # Does third check for common English words
    url = "http://www.desiquintans.com/downloads/nounlist/nounlist.txt"
    file = urllib.request.urlopen(url)

    text_list = []
    for line in file:
        decoded_line = line.decode("utf-8")
        decoded_line = decoded_line.strip()
        text_list.append(decoded_line)

    matched_list = []

    for rhyme in match_list:
        for word in text_list:
            if rhyme == word:
                matched_list.append(rhyme)

    if matched_list == []:
        return match_list
    else:
        return matched_list
Пример #17
0
def bfs(u, v) -> list:
    api = datamuse.Datamuse()
    SFMap = {}
    q = deque()
    q.append(u)
    SFMap[u.title] = NodeData(0, None)

    lst = createList(v, api)

    while q:
        u = q.popleft()

        print("Popping " + u.title + "...")
        if u == v:
            return pathify(SFMap, v)

        for pageTitle in u.links:
            if (len(list) == 1 or pageTitle.lower() in lst):

                try:
                    wikiPage = wikipedia.page(pageTitle, auto_suggest=False)
                except wikipedia.DisambiguationError:
                    continue
                except wikipedia.PageError:
                    continue
                pageDist = 1 + SFMap.get(u.title).dist

                if (wikiPage.title not in SFMap):
                    print("Adding " + pageTitle + "...")
                    SFMap[wikiPage.title] = NodeData(pageDist, u)
                    q.append(wikiPage)
                    if (len(q) % 100 == 0):
                        print("Queue Size: " + str(len(q)) + " articles!")
                        print("\n")

                #if (pageTitle == v.title):
                if (wikipage == v):
                    q.appendleft(wikiPage)
                    break

    return []
Пример #18
0
def api(word):
    """
    Requires: word is a string
    Modifies: nothing
    Effects:  uses datamuse api to return list
              of words that rhyme with word
    """

    # makes rhymer an instance of Datamuse class
    rhymer = datamuse.Datamuse()

    # makes rhymes_dict a list of dictionaries
    # that contain the rhyming words and some
    # extra info that we don't need
    perfect_rhymes = rhymer.words(rel_rhy=word)
    near_rhymes = rhymer.words(rel_nry=word)
    rhymes_dict = perfect_rhymes
    rhymes_dict += near_rhymes

    # removes unneeded key/value pairs from rhymes_dict
    exclude1 = 'score'
    exclude2 = 'numSyllables'
    for i in range(len(rhymes_dict)):
        for j in range(len(rhymes_dict[i])):
            if exclude1 in rhymes_dict[i]:
                del rhymes_dict[i][exclude1]
            elif exclude2 in rhymes_dict[i]:
                del rhymes_dict[i][exclude2]

    # makes u_rhymes a list of unicode strings
    # that rhyme with word
    u_rhymes = []
    for i in range(len(rhymes_dict)):
        u_rhymes += rhymes_dict[i].values()

    # converts unicode strings in u_rhymes to UTF8 strings
    # and assings this new list to rhymes
    rhymes = [i.encode('UTF8') for i in u_rhymes]

    return rhymes
Пример #19
0
def give_pickup_lines(key_word, num_lines_weight=2, num_lines_random=3):
    """
    Inputs:
    key_word - the category for which you want related pickuplines
    num_lines_weight - how many relevant pickup lines you want based on weights
    num_lines_random - how many relevant pickup lines you want that are random
    """
    #setting up datamuse api

    api = datamuse.Datamuse()

    #pickling in the database of pickup lines
    pickle_files = find_pickled_files()
    all_dicts = get_dicts(pickle_files)

    # print(all_dicts)

    while 1:
        num_lines_weight = 2
        num_lines_random = 1
        #taking a keyword and searching for relevant categories
        key_word = standardize_format(
            input(
                "Beep boop! Give me one key word and I'll give you pickup lines! Beep boop! \n"
            ))
        related_words = get_related_words(key_word)
        category_dict = find_category(related_words)

        #if no categories exist, the script makes its own and returns it
        if category_dict == False:
            print(
                "Hmm... I don't think anyone's asked for that key word before! Let me search around in my bigger database."
            )
            trigger_words = get_trigger_words(related_words)
            new_category, all_dicts = create_category(key_word, trigger_words,
                                                      all_dicts)

            num_lines_random, num_lines_weight = check_num_lines(
                new_category, num_lines_weight, num_lines_random)
            if num_lines_random + num_lines_weight == 0:
                print(
                    "Whoops, didn't find any relevant pickup lines for that. Feel free to try other words though!"
                )

            pul_list, temp_dict = find_top_weights(new_category,
                                                   num_lines_weight)
            for pul in pul_list:
                print("\n" + pul + "\n")
                user_input = input(
                    "Give me feedback on the pickup line! \n Type good if it was good \n Type okay if it was okay \n Type bad if it was bad \n Type wrong if it was irrelavent \n"
                )
                new_category, all_dicts = adjust_weight(
                    user_input, key_word, new_category, pul, all_dicts)
                serialize_dicts(all_dicts)
            pul_list, temp_dict = find_random(temp_dict, num_lines_random)
            for pul in pul_list:
                print("\n" + pul + "\n")
                user_input = input(
                    "Give me feedback on the pickup line! \n Type good if it was good \n Type okay if it was okay \n Type bad if it was bad \n Type wrong if it was irrelavent \n"
                )
                new_category, all_dicts = adjust_weight(
                    user_input, key_word, new_category, pul, all_dicts)
                serialize_dicts(all_dicts)

        #if a category does exist, the script returns the relevant category
        else:
            num_lines_random, num_lines_weight = check_num_lines(
                category_dict, num_lines_weight, num_lines_random)
            if num_lines_random + num_lines_weight == 0:
                print(
                    "Whoops, didn't find any relevant pickup lines for that. Feel free to try other words though!"
                )

            pul_list, temp_dict = find_top_weights(category_dict,
                                                   num_lines_weight)
            for pul in pul_list:
                print("\n" + pul + "\n")
                user_input = input(
                    "Give me feedback on the pickup line! \n Type good if it was good \n Type okay if it was okay \n Type bad if it was bad \n Type wrong if it was irrelavent \n"
                )
                category_dict, all_dicts = adjust_weight(
                    user_input, key_word, category_dict, pul, all_dicts)
                serialize_dicts(all_dicts)
            pul_list, temp_dict = find_random(temp_dict, num_lines_random)
            for pul in pul_list:
                print("\n" + pul + "\n")
                user_input = input(
                    "Give me feedback on the pickup line! \n Type good if it was good \n Type okay if it was okay \n Type bad if it was bad \n Type wrong if it was irrelavent \n"
                )
                category_dict, all_dicts = adjust_weight(
                    user_input, key_word, category_dict, pul, all_dicts)
                serialize_dicts(all_dicts)
Пример #20
0
 def __init__(self, bot, db):
     self.bot = bot
     self.db = db
     self.api = datamuse.Datamuse()
Пример #21
0
 def __init__(self):
     self.api = datamuse.Datamuse()
     self.app = ClarifaiApp(api_key=config.get_api_key())
     self.workflow = Workflow(self.app.api, workflow_id="rebus-workflow")
Пример #22
0
print(define_word('food'))
print('food' in words.words())"""

#from datamuse import datamuse
#dm = datamuse.Datamuse()
#orange_rhymes = dm.words(rel_rhy='orange', max=5)
#print(orange_rhymes)

#import requests
#import re
#import xml
#r = requests.get('http://en.wiktionary.org/w/index.php?title=test&printable=yes')
#c = requests.get('http://en.wiktionary.org/w/index.php', params={'title': 'test', 'printable': 'yes'})
#print(r.text)


#import requests
# we will be using requests to query the unofficial Google Dictionary API 
# from https://googledictionaryapi.eu-gb.mybluemix.net/

#r = requests.get('https://mydictionaryapi.appspot.com', params={'define': 'test'})
#print(r.text["meaning"])



from datamuse import datamuse
dm = datamuse.Datamuse()
w = dm.words(rel_jjb='industrialization', md='d,f', max=10)
for word in w:
    print(word['word'])
    print(word['tags'])
Пример #23
0
import tweepy
from datamuse import datamuse
from random import randint
CONSUMER_KEY = "CVFmG4DSOvGc1gVHtBP5qbYzJ"
CONSUMER_SECRET = "t4ouGgBMNGbabfrSRe7k0rZUVdTIDGNYa7tqL4bEeoEYKitE2u"
ACCESS_TOKEN = "1091439390541271040-otarsi1XOiPZU5vw5L8yqkkNo0NX12"
ACCESS_TOKEN_SECRET = "nq41iqJPyrAHhqywycDn4HpYqLnSta2i412LsHBNjFSVk"

auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)

api = tweepy.API(auth)
api2 = datamuse.Datamuse()

#public_tweets = api.home_timeline()
#for tweet in public_tweets:
#    print (tweet.text)


def word(str):
    b = api2.words(rel_rhy=str, max=10)
    return (b)


print("Enter a word fam")
userWord = input("")
anyword = word(userWord)

#dict1 = word[2]
#print(dict1)
#del dict1['score']
Пример #24
0
def uploaded_file(filename, s, e):
    import fitz
    import pytesseract
    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe"
    pdffile = filename
    doc = fitz.open('static' + '/' + filename)
    for i in range(int(s) - 1, int(e)):
        page = doc.loadPage(i)  # number of page
        pix = page.getPixmap()
        output = "outfile" + str(i) + ".png"
        pix.writePNG(output)
    x = ''
    for i in range(int(s) - 1, int(e)):
        x += pytesseract.image_to_string(f'outfile{str(i)}.png')
    from PyDictionary import PyDictionary
    from summa import keywords
    from summa.summarizer import summarize
    import nltk
    from nltk.tokenize import sent_tokenize
    from docx import Document
    f = x
    b = str(filename.replace('.pdf', ''))
    a = x
    a = keywords.keywords(a)
    dictionary = PyDictionary()
    a = a.split('\n')
    a1 = []
    for i in a:
        x = i.split(' ')
        for j in x:
            a1.append(j)
    a1.sort(key=lambda s: len(s))
    a1.reverse()
    try:
        a1 = a1[:20]
    except:
        pass
    a = set(a1)
    a = tuple(a1)
    a1 = []
    for i in range(10):
        try:
            a1.append(a[i])
        except:
            pass
    from nltk.stem import WordNetLemmatizer
    lemmatizer = WordNetLemmatizer()
    a = a1
    a1 = []
    for i in a:
        a1.append(lemmatizer.lemmatize(i))
    a = list(set(a1))
    a1 = a
    a = [dictionary.meaning(i) for i in a1]

    z = sent_tokenize(summarize(f, ratio=0.25))

    doc = Document()
    doc.add_heading('Notes for ' + b, 0)
    for i in z:
        doc.add_paragraph(i)
    doc.add_heading('Vocab Words from ' + b, 0)
    for i in range(len(a)):
        c = doc.add_paragraph(str(i + 1) + ') ')
        c.add_run(a1[i]).bold = True
        c.add_run(': ')
        d = str(list(a[i].values()))
        d = d.replace('[', '')
        d = d.replace(']', '')
        c.add_run(d)
        g = doc.add_paragraph('')
        g.add_run('Synonyms for ')
        g.add_run(a1[i].upper() + ': ').bold = True
        from datamuse import datamuse
        api = datamuse.Datamuse()
        s = api.words(ml=a1[i], max=10)
        s1 = []
        for i in s:
            for j in i:
                if j == 'word':
                    s1.append(i[j])
        g.add_run(str(s1).replace('[',
                                  '').replace(']',
                                              '').replace("'",
                                                          '')).italic = True
    whitelist = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
    fileName = b.replace(' ', '')
    fileName = ''.join(filter(whitelist.__contains__, fileName))
    fileName += '.docx'
    doc.save(fileName)
    import cloudmersive_convert_api_client
    from cloudmersive_convert_api_client.rest import ApiException
    configuration = cloudmersive_convert_api_client.Configuration()
    configuration.api_key['Apikey'] = 'f0c513bc-8c00-4491-830e-3e83b015feb6'
    api_instance = cloudmersive_convert_api_client.ConvertDocumentApi(
        cloudmersive_convert_api_client.ApiClient(configuration))
    try:
        # Convert Word DOCX Document to PDF
        api_response = api_instance.convert_document_docx_to_pdf(fileName)
        file = open('static/' + fileName.replace('.docx', '.pdf'), 'wb')
        file.write(api_response)
        file.close()
    except ApiException as e:
        print(
            "Exception when calling ConvertDocumentApi->convert_document_docx_to_pdf: %s\n"
            % e)
    myFile = fileName.replace('.docx', '.pdf')
    myFile2 = myFile
    note = Note(noteFile=str(myFile2), creator=current_user)
    db.session.add(note)
    db.session.commit()
    myFile = url_for('.static', filename=myFile)
    return render_template('notes.html', myFile=myFile)
Пример #25
0
def predict(word):
    api = datamuse.Datamuse()
    return api.suggest(s=word, max=6)
Пример #26
0
def initialize_all_dicts():
    api = datamuse.Datamuse()
    pickle_files = find_pickled_files()
    all_dicts = get_dicts(pickle_files)
    return all_dicts
Пример #27
0
from googletrans import Translator as GoogleTrans
from datamuse import datamuse

translator = GoogleTrans()
similar_finder = datamuse.Datamuse()


def get_translation(word, src, dest='en'):
    if src:
        res = translator.translate(word, src=src, dest=dest)
    else:
        res = translator.translate(word, dest=dest)
    translation = {}
    translation['target'] = word
    translation['result'] = res.text
    translation['from_google'] = []
    translation['extra_similarity'] = []
    if res.extra_data['all-translations']:
        for item in res.extra_data['all-translations']:
            translation['from_google'].append(
                (item[0], [i[:2] + i[3:] for i in item[2]]))
    if len(translation['from_google']) <= 1:
        text = res.text
        # datamuse only support english similar words right now
        if dest == 'en':
            similars = similar_finder.words(ml=text, max=4)
            for item in similars:
                translation['extra_similarity'].append([
                    item['word'],
                    [i for i in item.get("tags", []) if i != 'syn'],
                    item['score']
Пример #28
0
from googletrans import Translator
from datamuse import datamuse

translator = Translator()
api = datamuse.Datamuse()

wlFinalMl = set()
wlFinalTrg = set()
wlTranslatedMl = set()
wlTranslatedTrg = set()


def generatingWordListMl(wordList):

    for word in wordList:
        #print(len(wordList))
        if len(wordList) >= 15:
            break

        mlResults = api.words(ml=word, max=10)

        for result in mlResults:
            res = result.get("word")
            if res not in wordList:
                wlFinalMl.add(res)
                wordList.append(res)

        return wordList


def generatingWordListTrg(wordList):
Пример #29
0
def search_products(request):
    products = []

    serializer = ProductSearchSerializer(data=request.data)
    serializer.is_valid(raise_exception=True)
    query = serializer.validated_data['query']
    if not query:
        return Response(data={'error': 'Query is not valid'},
                        status=status.HTTP_400_BAD_REQUEST)

    porter_stemmer = PorterStemmer()
    query_list = [q.strip() for q in query.split()]
    stem_list = [porter_stemmer.stem(q) for q in query_list]

    Q_strict = Q()
    for index in range(len(query_list)):
        Q_product = Q(name__icontains=query_list[index]) | Q(
            description__icontains=query_list[index]) | Q(
                name__icontains=stem_list[index]) | Q(
                    description__icontains=stem_list[index])
        Q_vendor = Q(vendor__user__username__icontains=query_list[index])
        Q_strict &= Q_product | Q_vendor

    if bool(Q_strict):
        products_strict = Product.objects.filter(Q_strict)
        for product in products_strict:
            if product not in products:
                products.append(product)

    Q_soft = Q()
    for index in range(len(query_list)):
        Q_product = Q(name__icontains=query_list[index]) | Q(
            description__icontains=query_list[index]) | Q(
                name__icontains=stem_list[index]) | Q(
                    description__icontains=stem_list[index])
        Q_category = Q(subcategory__name__icontains=query_list[index]) | Q(
            subcategory__category__name__icontains=query_list[index])
        Q_vendor = Q(vendor__user__username__icontains=query_list[index])
        Q_soft |= Q_product | Q_category | Q_vendor

    if bool(Q_soft):
        products_soft = Product.objects.filter(Q_soft)
        for product in products_soft:
            if product not in products:
                products.append(product)

    datamuse_api = datamuse.Datamuse()
    keyword_list = datamuse_api.words(ml=query, max=5)
    Q_datamuse = Q()
    for keyword in keyword_list:
        word = keyword['word']
        Q_product = Q(name__icontains=word) | Q(
            description__icontains=word) | Q(
                subcategory__name__icontains=word) | Q(
                    subcategory__category__name__icontains=word)
        Q_vendor = Q(vendor__user__username__icontains=word)
        Q_datamuse |= Q_product | Q_vendor

    if bool(Q_datamuse):
        products_datamuse = Product.objects.filter(Q_datamuse)
        for product in products_datamuse:
            if product not in products:
                products.append(product)

    # Add prodcuts to search history if user is authenticated
    if (not request.user.is_anonymous) and products:
        user = request.user

        search_history = SearchHistory.objects.filter(user=user).first()
        if search_history:
            search_history.delete()

        search_history = SearchHistory(user=user)
        search_history.save()
        for product in products:
            search_history.products.add(product)

    content = ProductSerializer(products, many=True)
    return Response(data=content.data, status=status.HTTP_200_OK)
Пример #30
0
def rap(sentences):
    used = set()
    rhymes = []
    api = datamuse.Datamuse()
    for s in sentences:
        if not s:
            sentences.remove(s)
    ending_words = get_end_words(sentences)
    seen = set()
    threads = [None] * len(ending_words)
    results = [[] for _ in range(len(ending_words))]
    for x, word1 in enumerate(ending_words):
        threads[x] = Thread(
            target=thread_perf,
            args=(word1, x, ending_words, results[x], seen, api, used),
        )
        threads[x].start()
        sleep(THREAD_WAIT)

    for i in range(len(threads)):
        threads[i].join()
    for r in results:
        if r:
            for e in r:
                rhymes.append(e)
    print(rhymes)

    seen2 = set()
    if len(rhymes) < USE_MORE_MAX:
        print("USING MORE")
        threads2 = [None] * len(ending_words)
        results2 = [[] for _ in range(len(ending_words))]
        for x, word1 in enumerate(ending_words):
            threads2[x] = Thread(
                target=thread_almost,
                args=(word1, x, ending_words, results2[x], seen2, api, used),
            )
            threads2[x].start()
            sleep(THREAD_WAIT)

        for i in range(len(threads2)):
            threads2[i].join()
        for r in results2:
            if r:
                for e in r:
                    rhymes.append(e)

    print(rhymes)

    rhymes.sort(key=lambda x: abs(x[2] - x[3]))
    rhymes.sort(key=lambda x: x[2])

    couplets = []

    for r in rhymes:
        couplets.append((sentences[r[2]].replace(u'\xa0', u' '),
                         sentences[r[3]].replace(u'\xa0', u' ')))

    # couplets.sort(key=lambda x: abs(len(x[0]) - len(x[1])))
    # couplets.sort(key=lambda x: len(x[0]))
    couplets = [
        c for c in couplets
        if len(c[0]) < MAX_LINE_LENGTH and len(c[1]) < MAX_LINE_LENGTH
    ]
    return couplets