def multi_word_query(query): query = word_split(query, symbol) words = [word for word in query if word not in stop_words] if words: docs = [set(inverted[word].keys()) for word in words] result_id = reduce(lambda x, y: x & y, docs) if docs else [] return list(result_id) else: return []
def get_distance(query, is_close=False): split_words = word_split(query, symbol) valid_distance = [] if not is_close: words = split_words distance = [1] * (len(words) - 1) else: words = split_words[::2] distance = map(int, split_words[1::2]) valid_index = [index for index, word in enumerate(words) if word not in stop_words] for x in xrange(len(valid_index) - 1): valid_distance.append(sum(distance[valid_index[x]:valid_index[x + 1]])) return [words[index] for index in valid_index], valid_distance