def search for query in fileinput.input(): query_tokens = query[:-1].split(' ') print (green('QUERY: ') + query) for quote in quotes: good = True for tok in query_tokens : good_tok = False for quot_tok in quote : if match_tokens(tok, quot_tok) : good_tok = True if good_tok == False: good = False break if good : #print (quote) print (' ', my_highlight(query_tokens, quote)) #print(lemtas)
good_token = False else: good_token = False if good_token : good = True break if good: for j in range(len(q)): r.append(Fore.RED + Style.BRIGHT + L[i+j]+ Fore.BLACK + Style.RESET_ALL) i+=len(q) else: r.append(w) return ' '.join(r) print("Ready for input\n") for query in fileinput.input(): query_tokens = [] split_res = query[:-1].split('\"') for i in range(len(split_res)): if i%2 == 0: query_tokens+= [[w] for w in split_res[i].split(' ') if w != ''] else: query_tokens+= [[w for w in split_res[i].split(' ') if w != '']] print(query_tokens) print (green('QUERY: ') + query) good_documents = sorted([get_dokument(f) for f in find_quotes( query_tokens )], key = lambda x : rank_document(query_tokens, x), reverse = True) good_documents = [word_tokenize(x)for x in good_documents] for quote in good_documents: print (' ', my_highlight(query_tokens, quote))
lines = [] tokens = [] def match(query, line): return all(w in line for w in query) def search(q): return [line for line in tokens if match(q, line)] for line in open('tokenized_quotes.txt'): L = line.split() tokens.append(L) print('No, dalej!') t0 = time.time() for x in sys.stdin: L = x.split() print(green('QUERY: ') + ' '.join(L)) search(L) for res in search(L): print(' ', highlight(L, res)) yellow_line() print() print(time.time() - t0)