def main(): """Requests the PhraseFinder web service and prints out the result.""" # Set up your query. query = 'I like' # Optional: set the maximum number of phrases to return. options = pf.SearchOptions() options.topk = 10 # Send the request. try: result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options) if result.status != pf.Status.OK: print('Request was not successful: {}'.format(result.status)) return # Print phrases line by line. for phrase in result.phrases: print("{0:6f}".format(phrase.score), end="") for token in phrase.tokens: print(" {}".format(token.text), end="") print() except Exception as error: print('Some error occurred: {}'.format(error))
def main(query, resultdict): # Set up your query. #set the maximum number of phrases to return. options = pf.SearchOptions() options.topk = 1 # Send the request. try: result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options) if result.status != pf.Status.OK: resultdict[query] = 0 return for phrase in result.phrases: if query in resultdict.keys(): resultdict[query] = resultdict[query] + phrase.match_count else: resultdict[query] = phrase.match_count if query not in resultdict.keys(): resultdict[query] = 0 except Exception as error: resultdict[query] = 0 return
def pf_query(query='I like ?'): """Queries the PhraseFinder web service and returns the result.""" q = query result = [] # Perform a request. try: response = phrasefinder.search(q, params) if response.status != phrasefinder.Status.Ok: print('Request was not successful: {}'.format(response.status)) return result # Print phrases line by line. # TODO light pre-processing (relFreq, organizing, etc) for phrase in response.phrases: phrase_relFreq = phrase.match_count / ngram_totals[len(phrase.tokens) - 1] print("{0} {1:6f} {2:6f}".format(phrase.match_count, phrase.score, phrase_relFreq), end="") for token in phrase.tokens: print(' {}_{}'.format(token.text, token.tag), end="") print() # Example output: # 1065105 0.268530 0.002543 I_0 like_0 to_1 # 484768 0.122218 0.001157 I_0 like_0 the_1 # ... # Token tag meaning: # 0 => Given # 1 => Inserted # 2 => Alternative # 3 => Completed except Exception as error: # Catch-all for connection issues, malformed query, something else unforseen print('Some error occurred: {}'.format(error)) return result
def ngrams(words, quiet=True): match = [] volume = [] word_count = len(words) counter = 1 for x in words: if not quiet: sys.stdout.write("\r%d/%d" % (counter, word_count)) sys.stdout.flush() match_str = '1' vol_str = '1' try: # search for term x through Google Ngrams using phrasefinder result = phrasefinder.search(x) if result.status == phrasefinder.Status.Ok: if len(result.phrases) > 0: match_str = (result.phrases[0].match_count) vol_str = (result.phrases[0].volume_count) except: match_str = '-1' vol_str = '-1' match.append(match_str) volume.append(vol_str) counter += 1 return match, volume
def qryGoogle(qryStr): """Requests the PhraseFinder web service and prints out the result.""" # Set up your query. query = qryStr # Optional: set the maximum number of phrases to return. options = pf.SearchOptions() #options.format='tsv' options.topk = 1 retval =0 # Send the request. try: result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options) if result.status != pf.Status.OK: print('Request was not successful: {}'.format(result.status)) return if len(result.phrases)==0: retval = 0 else: retval = result.phrases[0].match_count except Exception as error: print('Some error occurred: {}'.format(error)) raise return retval
def frequency_score(temp): repo = [temp] sum = 0 '''for l in range(4, len(temp)): for i in range(0, len(temp)-l+1): substr = temp[i:i + l] # print(substr) repo.append(substr) ''' for word in repo: query = word options = pf.SearchOptions() result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options) max_no = -1 for phrase in result.phrases: max_no = max(phrase.score, max_no) if max_no < 0: continue sum += pow(2, len(word)) * max_no return sum
def google_corpus_freq(temp): repo = [] sum = 0 for l in range(3, min(len(temp), 5)): for i in range(0, len(temp) - l): substr = temp[i:i + l] repo.append(substr) for word in repo: query = word options = pf.SearchOptions() result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options) max_no = -1 for phrase in result.phrases: max_no = max(phrase.score, max_no) if max_no < 0: continue sum += pow(2, len(word)) * max_no return sum
def new_main(query, resultdict): # Set up your query. #set the maximum number of phrases to return. options = pf.SearchOptions() options.topk = 30 # Send the request. try: result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options) if result.status != pf.Status.OK: return for phrase in result.phrases: skey = "" for token in phrase.tokens: skey = skey + token.text + " " resultdict[skey] = phrase.match_count except Exception as error: return
def ngram_search(word1, word2): options = pf.SearchOptions() options.topk = 100 # the maximum number of phrases to return. query = "*" + word1 + "*" + word2 + "*" query_rev = "*" + word2 + "*" + word1 + "*" langs = [pf.Corpus.AMERICAN_ENGLISH, pf.Corpus.BRITISH_ENGLISH] queries = [query, query_rev] counts = [] try: for query in queries: for lang in langs: result = pf.search(lang, query, options) if result.status != pf.Status.OK: print('Request was not successful: {}'.format( result.status)) return for phrase in result.phrases: counts.append(phrase.match_count) except Exception as error: print('Some error in querrying occurred: {}'.format(error)) return np.sum(np.array(counts))
for s in lines: j = 0 for j in range(0, len(s)): if s[j] == '.': break temp = s[0:j] repo = [] sum = 0 for l in range(3, min(len(temp), 5)): for i in range(0, len(temp) - l): substr = temp[i:i + l] repo.append(substr) for word in repo: query = word options = pf.SearchOptions() result = pf.search(pf.Corpus.AMERICAN_ENGLISH, query, options) maxno = -1 for phrase in result.phrases: maxno = max(phrase.score, maxno) if (maxno < 0): continue sum += pow(10, len(word)) * maxno outF.write(str(sum)) outF.write("\n") count = count + 1 print(count) if count == 1000: break