Пример #1
0
def fast_semantic_similarity(word1, word2):
    syns1 = cached_synonyms(word1)
    syns1.append(word1)
    syns2 = cached_synonyms(word2)
    syns2.append(word2)
    for s1 in syns1:
        if s1 in syns2:
            return 1
    return 0
def solve_structured_clue(clue):
    pattern = clue.pop()
    length = clue.pop()
    definition, d = clue[[x[1] for x in clue].index('d')]
    if len(clue) == 1:
        # this must be just a regular crossword clue (no wordplay)
        answers = [(s, semantic_similarity(s, definition)) for s in cached_synonyms(definition) if len(s) == length and matches_pattern(s, pattern)]
        return sorted(answers, key=lambda x: x[1], reverse=True)
    groups_to_skip = find_skipped_groups(clue)
    answer_subparts = [set([]) for x in clue]
    groups_added = 0
    active_set = set([''])
    count = 0
    while any(len(s) == 0 for s in answer_subparts) and count < 2:
        count += 1
        for i, group in enumerate(clue):
            remaining_letters = length - min(len(s) for s in active_set)
            if remaining_letters < 0:
                return []
            if len(answer_subparts[i]) == 0:
                phrase, kind = group
                if kind[:3] in FUNCTIONS:
                    func, arg_offsets = compute_arg_offsets(i, clue)
                    arg_indices = [i + x for x in arg_offsets]
                    if any(len(answer_subparts[j]) == 0 for j in arg_indices):
                        continue
                    arg_sets = tree_search([[]],
                                           [answer_subparts[ai] for ai in arg_indices],
                                           combination_func=lambda s, w: s + [w])
                    for arg_set in arg_sets:
                        arg_set += [remaining_letters]
                        answer_subparts[i].update(list(FUNCTIONS[func](*arg_set)))
                else:
                    answer_subparts[i] = set(TRANSFORMS[kind](phrase, remaining_letters))
                if len(answer_subparts[i]) == 0:
                    if kind[:3] in FUNCTIONS:
                        base_clue = clue[:i + max(0, *arg_offsets) + 1]
                    else:
                        base_clue = clue[:i + 1]
                    raise ClueUnsolvableException(base_clue)
            # print "index and subparts", i, answer_subparts
            if all(len(s) > 0 for s in answer_subparts[:i + 1]) and i not in groups_to_skip:
                if i >= groups_added:
                    if VERBOSE:
                        print "updating"
                        print "current active set:", active_set
                        print "branching list:", answer_subparts[groups_added:i + 1]
                    active_set = set(tree_search(active_set, answer_subparts[groups_added:i + 1], lambda x: (x + groups_added) not in groups_to_skip, lambda x: len(x) <= length and x in INITIAL_NGRAMS[length][len(x)] and matches_pattern(x, pattern)))
                    if VERBOSE:
                        print "new active set:", active_set
                    groups_added = i + 1
                if len(active_set) == 0:
                    raise ClueUnsolvableException(clue[:i + 3])

    wordplay_answers = active_set
    answers = [(s, semantic_similarity(s, definition)) for s in wordplay_answers if s in WORDS and len(s) == length]
    return sorted(answers, key=lambda x: x[1], reverse=True)
Пример #3
0
def solve_structured_clue(clue):
    pattern = clue.pop()
    length = clue.pop()
    definition, d = clue[[x[1] for x in clue].index('d')]
    if len(clue) == 1:
        # this must be just a regular crossword clue (no wordplay)
        answers = [(s, semantic_similarity(s, definition))
                   for s in cached_synonyms(definition)
                   if len(s) == length and matches_pattern(s, pattern)]
        return sorted(answers, key=lambda x: x[1], reverse=True)
    groups_to_skip = find_skipped_groups(clue)
    answer_subparts = [set([]) for x in clue]
    groups_added = 0
    active_set = set([''])
    count = 0
    while any(len(s) == 0 for s in answer_subparts) and count < 2:
        count += 1
        for i, group in enumerate(clue):
            remaining_letters = length - min(len(s) for s in active_set)
            if remaining_letters < 0:
                return []
            if len(answer_subparts[i]) == 0:
                phrase, kind = group
                if kind[:3] in FUNCTIONS:
                    func, arg_offsets = compute_arg_offsets(i, clue)
                    arg_indices = [i + x for x in arg_offsets]
                    if any(len(answer_subparts[j]) == 0 for j in arg_indices):
                        continue
                    arg_sets = tree_search(
                        [[]], [answer_subparts[ai] for ai in arg_indices],
                        combination_func=lambda s, w: s + [w])
                    for arg_set in arg_sets:
                        arg_set += [remaining_letters]
                        answer_subparts[i].update(
                            list(FUNCTIONS[func](*arg_set)))
                else:
                    answer_subparts[i] = set(TRANSFORMS[kind](
                        phrase, remaining_letters))
                if len(answer_subparts[i]) == 0:
                    if kind[:3] in FUNCTIONS:
                        base_clue = clue[:i + max(0, *arg_offsets) + 1]
                    else:
                        base_clue = clue[:i + 1]
                    raise ClueUnsolvableException(base_clue)
            # print "index and subparts", i, answer_subparts
            if all(len(s) > 0
                   for s in answer_subparts[:i +
                                            1]) and i not in groups_to_skip:
                if i >= groups_added:
                    if VERBOSE:
                        print "updating"
                        print "current active set:", active_set
                        print "branching list:", answer_subparts[
                            groups_added:i + 1]
                    active_set = set(
                        tree_search(
                            active_set, answer_subparts[groups_added:i + 1],
                            lambda x: (x + groups_added) not in groups_to_skip,
                            lambda x: len(x) <= length and x in INITIAL_NGRAMS[
                                length][len(x)] and matches_pattern(
                                    x, pattern)))
                    if VERBOSE:
                        print "new active set:", active_set
                    groups_added = i + 1
                if len(active_set) == 0:
                    raise ClueUnsolvableException(clue[:i + 3])

    wordplay_answers = active_set
    answers = [(s, semantic_similarity(s, definition))
               for s in wordplay_answers if s in WORDS and len(s) == length]
    return sorted(answers, key=lambda x: x[1], reverse=True)