示例#1
0
def get_phrases_free(question, model, nlp_path, java_path):
    """Convert a string question to list of phrases

    Keyword arguments:
    question -- question in string form
    model -- model for phrase detection
    nlp_path -- path to Stanford NLP tagger
    java_path -- path to Java instalation

    """
    pd_model = pickle.load(open(model))

    q = Question(question, "")
    utterance = question.split()

    labels = []
    pos = pos_tag([q])
    ner = ner_tag([q], nlp_path, java_path)
    l = 4

    u = ["", ""] + utterance + ["", ""]
    p = ['', ''] + [pp[1] for pp in pos[0]] + ['', '']
    n = ['', ''] + [nn[1] for nn in ner[0]] + ['', '']

    for j in range(2, len(u) - 2):
        feature = construct_feature(p, u, n, j, l)
        label = predict(pd_model, feature, 4)
        labels.append(label)
        l = label
    phr, pos_t = sr.parse_to_phrases([q], [labels], pos)
    candidates = el.obtain_entity_candidates(phr, 5)
    return labels, pos, q, candidates
示例#2
0
def get_phrases_free(question, model, nlp_path, java_path):
    """Convert a string question to list of phrases

    Keyword arguments:
    question -- question in string form
    model -- model for phrase detection
    nlp_path -- path to Stanford NLP tagger
    java_path -- path to Java instalation

    """
    pd_model = pickle.load(open(model))

    q = Question(question, "")
    utterance = question.split()

    labels = []
    pos = pos_tag([q])
    ner = ner_tag([q], nlp_path, java_path)
    l = 4

    u = ["", ""] + utterance + ["", ""]
    p = ['', ''] + [pp[1] for pp in pos[0]] + ['', '']
    n = ['', ''] + [nn[1] for nn in ner[0]] + ['', '']

    for j in range(2, len(u)-2):
        feature = construct_feature(p, u, n, j, l)
        label = predict(pd_model, feature, 4)
        labels.append(label)
        l = label
    phr, pos_t = sr.parse_to_phrases([q], [labels], pos)
    candidates = el.obtain_entity_candidates(phr, 5)
    return labels, pos, q, candidates
示例#3
0
def get_phrases(phrase, features):
    """Convert a question from the dataset to list of phrases

    Keyword arguments:
    question -- question in object form
    features -- features for phrase detection

    """
    phrases = []

    for U in features:
        label = predict(phrase, U, 4)
        if label == 2:
            label = 4
        phrases.append(label)
    return phrases
示例#4
0
def get_phrases(phrase, features):
    """Convert a question from the dataset to list of phrases

    Keyword arguments:
    question -- question in object form
    features -- features for phrase detection

    """
    phrases = []

    for U in features:
        label = predict(phrase, U, 4)
        if label == 2:
            label = 4
        phrases.append(label)
    return phrases
示例#5
0
def label_phrases(questions, pos_tagged, ner_tagged, weights):
    """Find phrase labels for words without knowing gold standard labels

    Keyword arguments:
    questions -- list of Question objects
    pos_tagged -- list of lists of POS tagged words
    ner_tagged -- list of lists of NER tagged words
    weights -- model trained for phrase detection

    """
    features = []
    labels = []
    for i in range(len(questions)):
        l = 4
        question = questions[i]
        u = ["", ""]+question.utterance.split()+["", ""]
        p = ['', '']+[pp[1] for pp in pos_tagged[i]]+['', '']
        n = ['', '']+[nn[1] for nn in ner_tagged[i]]+['', '']
        for j in range(2, len(u)-2):
            f = construct_feature(p, u, n, j, l)
            l = predict(weights, f, 5)
            features.append(f)
            labels.append(l)
    return features, labels
示例#6
0
def label_phrases(questions, pos_tagged, ner_tagged, weights):
    """Find phrase labels for words without knowing gold standard labels

    Keyword arguments:
    questions -- list of Question objects
    pos_tagged -- list of lists of POS tagged words
    ner_tagged -- list of lists of NER tagged words
    weights -- model trained for phrase detection

    """
    features = []
    labels = []
    for i in range(len(questions)):
        l = 4
        question = questions[i]
        u = ["", ""] + question.utterance.split() + ["", ""]
        p = ['', ''] + [pp[1] for pp in pos_tagged[i]] + ['', '']
        n = ['', ''] + [nn[1] for nn in ner_tagged[i]] + ['', '']
        for j in range(2, len(u) - 2):
            f = construct_feature(p, u, n, j, l)
            l = predict(weights, f, 5)
            features.append(f)
            labels.append(l)
    return features, labels