示例#1
0
def get_splits(run_on_word, num_splits, lexicon):
    """Return all possible valid splits for run_on_word.

    TODO: Check for validity of each word.
    TODO: Use DP and memoize valid word splits for substrings.
    """
    # List of all valid indices that a string can be partitioned at.
    all_indices = range(1, len(run_on_word))

    valid_split_indices = itertools.combinations(all_indices, num_splits)
    valid_split_indices = [list(tuple_) for tuple_ in valid_split_indices]
    crude_splits_list = [partition(run_on_word, index_list) 
                         for index_list in valid_split_indices]

    return [str_tuple for str_tuple in crude_splits_list 
            if all(lexicon.is_known_word(word) for word in str_tuple)]
示例#2
0
def get_splits(run_on_word, num_splits, lexicon):
    """Return all possible valid splits for run_on_word.

    TODO: Check for validity of each word.
    TODO: Use DP and memoize valid word splits for substrings.
    """
    # List of all valid indices that a string can be partitioned at.
    all_indices = range(1, len(run_on_word))

    valid_split_indices = itertools.combinations(all_indices, num_splits)
    valid_split_indices = [list(tuple_) for tuple_ in valid_split_indices]
    crude_splits_list = [
        partition(run_on_word, index_list)
        for index_list in valid_split_indices
    ]

    return [
        str_tuple for str_tuple in crude_splits_list if all(
            lexicon.is_known_word(word) for word in str_tuple)
    ]
示例#3
0
def get_corrected_split_queries(query, lexicon):
    """Correct split query by joining words.

    Return list of word/phrase/sentence queries with the split words joined.

    Assumption: a word has been split only once.
    Note: The original query is NOT part of the returned list.

    Arguments:
    - `query`: Suggestion object
    - `lexicon`: lexicon of the spell checker
    """
    # TODO: Should probably check to see if the resultant suggestion
    # is a word/phrase/suggestion and then set its suggestion_type.
    # eg. 'No w.' (sentence) -> 'Now.' (word)
    joined_up_suggestion_list = [
        Suggestion(query[:i] + [query[i] + query[i + 1]] + query[i+2:], 
                   suggestion_type = query.suggestion_type)
        for i in range(len(query) - 1)
        if lexicon.is_known_word(query[i] + query[i + 1])]
    return joined_up_suggestion_list
示例#4
0
def get_corrected_split_queries(query, lexicon):
    """Correct split query by joining words.

    Return list of word/phrase/sentence queries with the split words joined.

    Assumption: a word has been split only once.
    Note: The original query is NOT part of the returned list.

    Arguments:
    - `query`: Suggestion object
    - `lexicon`: lexicon of the spell checker
    """
    # TODO: Should probably check to see if the resultant suggestion
    # is a word/phrase/suggestion and then set its suggestion_type.
    # eg. 'No w.' (sentence) -> 'Now.' (word)
    joined_up_suggestion_list = [
        Suggestion(query[:i] + [query[i] + query[i + 1]] + query[i + 2:],
                   suggestion_type=query.suggestion_type)
        for i in range(len(query) - 1)
        if lexicon.is_known_word(query[i] + query[i + 1])
    ]
    return joined_up_suggestion_list