def _process_internal(question): assert(isinstance(question, spacy.tokens.span.Span)) if len(question) < 2: return str(question) + "@placeholder" question = "Which " + str(question) + " ?" nlp = SLoader.get_full_spacy_nlp() question = list(nlp(question).sents)[0] qtype = get_question_type(question) if qtype == QType.WHICH_OF: return which_of.process(question) if qtype == QType.IN_WHICH_OF: # No examples in the entire SQuAD dataset. return in_which_of.process(question) if qtype == QType.WHICH_NOUN: return which_noun.process(question) if qtype == QType.WHICH_BE: return which_be.process(question) if qtype == QType.IN_WHICH_NOUN: # No examples in the entire SQuAD dataset. return in_which_noun.process(question) if qtype == QType.WHICH_VERB: return which_verb.process(question) question = question[1:] # Remove "Which" question = ["@placeholder"] + [str(x) for x in question] question[-1] = '.' # Replace "?" with "." return ' '.join(question)
def _process_internal(question): assert (isinstance(question, spacy.tokens.span.Span)) if len(question) < 2: return str(question) + "@placeholder" question = "What " + str(question) + " ?" nlp = SLoader.get_full_spacy_nlp() question = list(nlp(question).sents)[0] qtype = get_question_type(question) if qtype == QType.WHAT_BE: return what_be.process(question) if qtype == QType.WHAT_DO: return what_do.process(question) if qtype == QType.IN_WHAT: return in_what.process(question) if qtype == QType.WHAT_NOUN: return what_noun.process(question) if qtype == QType.WHAT_VERB: return what_verb.process(question) question = question[1:] # Remove "What" question = ["@placeholder"] + [str(x) for x in question] question[-1] = '.' # Replace "?" with "." return ' '.join(question)
def process(question): assert (isinstance(question, spacy.tokens.span.Span)) assert (len(question) >= 3) skipped, question = _split_question(question) assert (isinstance(skipped, list)) assert (isinstance(question, spacy.tokens.span.Span)) if len(skipped) >= 1: skipped[0] = "in" # Not "In". while len(question) >= 1 and question[-1].is_punct: question = question[:-1] question = [str(x) for x in question] if not question[0].isupper(): question[0] = question[0].capitalize() skipped = ' '.join(skipped) question = ' '.join(question) # Swap main question and "In ...". question = question + ", " + skipped + " ?" nlp = SLoader.get_full_spacy_nlp() doc = nlp(question) question = doc[0:len(doc)] # Convert to spaCy Span (not Doc). return _process_internal(question)
def split_in_sentences(text): assert (isinstance(text, str)) nlp = SLoader.get_full_spacy_nlp() return list(nlp(text).sents)