def get_splits(run_on_word, num_splits, lexicon): """Return all possible valid splits for run_on_word. TODO: Check for validity of each word. TODO: Use DP and memoize valid word splits for substrings. """ # List of all valid indices that a string can be partitioned at. all_indices = range(1, len(run_on_word)) valid_split_indices = itertools.combinations(all_indices, num_splits) valid_split_indices = [list(tuple_) for tuple_ in valid_split_indices] crude_splits_list = [partition(run_on_word, index_list) for index_list in valid_split_indices] return [str_tuple for str_tuple in crude_splits_list if all(lexicon.is_known_word(word) for word in str_tuple)]
def get_splits(run_on_word, num_splits, lexicon): """Return all possible valid splits for run_on_word. TODO: Check for validity of each word. TODO: Use DP and memoize valid word splits for substrings. """ # List of all valid indices that a string can be partitioned at. all_indices = range(1, len(run_on_word)) valid_split_indices = itertools.combinations(all_indices, num_splits) valid_split_indices = [list(tuple_) for tuple_ in valid_split_indices] crude_splits_list = [ partition(run_on_word, index_list) for index_list in valid_split_indices ] return [ str_tuple for str_tuple in crude_splits_list if all( lexicon.is_known_word(word) for word in str_tuple) ]
def get_corrected_split_queries(query, lexicon): """Correct split query by joining words. Return list of word/phrase/sentence queries with the split words joined. Assumption: a word has been split only once. Note: The original query is NOT part of the returned list. Arguments: - `query`: Suggestion object - `lexicon`: lexicon of the spell checker """ # TODO: Should probably check to see if the resultant suggestion # is a word/phrase/suggestion and then set its suggestion_type. # eg. 'No w.' (sentence) -> 'Now.' (word) joined_up_suggestion_list = [ Suggestion(query[:i] + [query[i] + query[i + 1]] + query[i+2:], suggestion_type = query.suggestion_type) for i in range(len(query) - 1) if lexicon.is_known_word(query[i] + query[i + 1])] return joined_up_suggestion_list
def get_corrected_split_queries(query, lexicon): """Correct split query by joining words. Return list of word/phrase/sentence queries with the split words joined. Assumption: a word has been split only once. Note: The original query is NOT part of the returned list. Arguments: - `query`: Suggestion object - `lexicon`: lexicon of the spell checker """ # TODO: Should probably check to see if the resultant suggestion # is a word/phrase/suggestion and then set its suggestion_type. # eg. 'No w.' (sentence) -> 'Now.' (word) joined_up_suggestion_list = [ Suggestion(query[:i] + [query[i] + query[i + 1]] + query[i + 2:], suggestion_type=query.suggestion_type) for i in range(len(query) - 1) if lexicon.is_known_word(query[i] + query[i + 1]) ] return joined_up_suggestion_list