def _search_pharse_func_tester(pharse, doc_id): terms = [] t_st = Token_Preprocessing_Engine() for token in pharse.split(): terms.append(t_st.process_token(token)) result = search_pharse(terms, doc_id) send_stdout(result)
def process_query(query): # initialize stemmer (Lemmatizer) if STEMMER: st = Token_Preprocessing_Engine() # process query terms = [] for token in query.split(): # Stemming and Lowercasing if STEMMER: t = st.process_token(token) else: t = token.lower() terms.append(t) return terms
def main(): # read arguments args = parse_arguments() if args.score not in ['y', 'n']: send_stdout('Error! arg "scores" should be either y or n') sys.exit() # open index file try: path = join(args.path, INDEX_FILE) f = open(path) except FileNotFoundError as e: send_stdout('Error! Index file "{}" does not exits.'.format(path)) sys.exit() # initialize query stemmer (Lemmatizer) if STEMMER: st = Token_Preprocessing_Engine() query = [st.process_token(t) for t in args.terms] else: query = [t.lower() for t in args.terms] # read index try: read_index(f) except: send_stdout('Error! Invalided index file format.') sys.exit() # compute vector space scores score = cosine_score(query) k_score = sorted(score.items(), key=lambda x: x[1], reverse=True) for i in range(min(args.k, len(k_score))): d, s = k_score[i] if args.score == 'y': send_stdout('{id} \t {score}'.format(id=d, score=s)) else: send_stdout('{id}'.format(id=d)) f.close()