def concept_features_for_sentence(sentence, chunk_inds):
    """
    concept_features()

    @param  sentence.   A sentence in list of chunk format
    @param  chunk_inds. A list of indices for non-None-labeled chunks
    @return             A list of feature dictionaries
    """

    # Get a feature set for each word in the sentence
    features_list = []
    for ind in chunk_inds:
        features_list.append(
            feat_word.concept_features_for_chunk(sentence, ind))

    # Allow for particular features to be enabled
    for feature in enabled_concept_features:

        # Features: UMLS features
        if (feature == "UMLS") and enabled['UMLS']:
            umls_features = feat_umls.concept_features_for_chunks(
                sentence, chunk_inds)
            for i in range(len(chunk_inds)):
                features_list[i].update(umls_features[i])

    return features_list
Пример #2
0
def concept_features_for_sentence(sentence, chunk_inds):
    """
    concept_features()

    @param  sentence.   A sentence in list of chunk format
    @param  chunk_inds. A list of indices for non-None-labeled chunks
    @return             A list of feature dictionaries
    """

    global dependency_parser

    # Get a feature set for each word in the sentence
    features_list = []
    for ind in chunk_inds:
        features_list.append(
            feat_word.concept_features_for_chunk(sentence, ind))

    dependencies = None

    if dependency_parser is not None:
        dependencies = dependency_parser.get_collapsed_dependencies(sentence)

    # Allow for particular features to be enabled
    for feature in enabled_concept_features:

        # Features: UMLS features
        if (feature == "UMLS") and enabled['UMLS']:
            umls_features = feat_umls.concept_features_for_chunks(
                sentence, chunk_inds)
            for i in range(len(chunk_inds)):
                features_list[i].update(umls_features[i])

        if (feature == "grammar_features" and enabled["PY4J"]):
            print "getting grammar features"
            for i, target_index in enumerate(chunk_inds):
                if dependencies is not None:
                    features_list[i].update(
                        dependency_parser.get_related_tokens(
                            target_index, sentence, dependencies))

    if enabled_modules()["WORD2VEC"]:
        print "getting vectors..."
        for i, chunk_index in enumerate(chunk_inds):

            chunk = sentence[chunk_index]
            cluster = predict_sequence_cluster(chunk)

            features_list[i].update({("cluster", cluster): 1})

    return features_list