Python Feature.get_stem_target示例

    def load_stems(self, data):
        """Returns all word stems used in the parsed XML data."""
        # Get all word stems
        stems = np.array([])
        for txt in data.textfiles:
            if self.annotations == "union":
                txt.compute_union_relations()
            elif self.annotations == "intersected":
                txt.compute_intersection_relations()

            for rel in txt.relations:
                f = Feature(rel)
                stems = np.append(stems, [f.get_stem_target()])
                stems = np.append(stems, [f.get_stem_source()])

        stems = np.unique(stems)
        return stems

示例#2

显示文件

文件： train.py 项目： BeenleTian/Learning-of-Event-Timelines

def parse_Features(data, new=False, annotations="union", features=["pos", "stem", "aspect", "tense", "distance", "similarity", "polarity", "modality"], distance=False):
    """Extracts the features out of the dataset and returns a list of features with the corresponding classes.

    Args:
        data (list): The parsed data from fables-100-temporal-dependency.xml.
        new (bool): With new=True a new calculation of Pos() and Stem() can be enforced. Otherwise it will be loaded from a file.
        annotations (str): Looking on all relations ("union") or at all relations in common between the annotators ("intersected").
        features (list): Determines which features should be activated. Possible values: "pos", "stem", "aspect", "tense", "distance", "similarity", "polarity", "modality".
        distance (bool): If set to True parse_Features() will return distance information for the data (needed for evaluation)

    """
    # Only compute pos and stem if new flag is set
    if "pos" in features or "stem" in features:
        if new or not os.path.isfile("set.p"):
                pos = Pos(data, 6, annotations)
                stem = Stem(data, annotations)
                pickle.dump((pos, stem), open("save.p", "wb"))
        else:
            pos, stem = pickle.load(open("save.p", "rb"))

    if distance:
        distance_diff = []

    X = []
    y = np.array([], dtype=int)

    for txt in data.textfiles:
        # Union or intersected relations?
        if annotations == "union":
            txt.compute_union_relations()
        elif annotations == "intersected":
            txt.compute_intersection_relations()

        for rel in txt.relations:
            f = Feature(rel)

            feature = []

            # Make polarity feature
            if "polarity" in features:
                feature = np.concatenate((feature, [f.get_polarity()]))

            # Make distance feature
            if "distance" in features:
                feature = np.concatenate((feature, f.get_distance()))

            # Make POS feature
            if "pos" in features:
                pos_feature = pos.transform(f.get_pos_target(), f.get_pos_source())
                pos_feature = pos_feature.toarray()[0]
                feature = np.concatenate((feature, pos_feature))

            # Make Stem feature
            if "stem" in features:
                stem_feature = stem.transform(f.get_stem_source(), f.get_stem_target())
                stem_feature = stem_feature[0]
                feature = np.concatenate((feature, stem_feature))

            # Make similarity feature
            if "similarity" in features:
                feature = np.concatenate((feature, [f.get_similarity_of_words()]))

            # Make modality feature
            if "modality" in features:
                feature = np.concatenate((feature, [f.get_modality()]))

            # Make aspect feature
            if "aspect" in features:
                feature = np.concatenate((feature, f.get_aspect()))

            # Make tense feature
            if "tense" in features:
                feature = np.concatenate((feature, f.get_tense()))

            # Append feature to X
            X.append(feature)
            y = np.append(y, [f.get_class()])

            # Append distance information if needed
            if distance:
                distance_diff.append(f.get_distance_diff())

    if distance:
        return (X, y, distance_diff)
    else:
        return (X, y)