Exemplo n.º 1
0
    def IOB_prose_features(self, word):
        """
        IOB_prose_features()
        
        Purpose: Creates a dictionary of prose  features for the given word.
        
        @param word. A string
        @return      A dictionary of features

        >>> wf = WordFeatures()
        >>> wf.IOB_prose_features('test') is not None
        True
        """
        # Feature: <dummy>
        features = {('dummy', None): 1}  # always have >0 dimensions

        # Allow for particular features to be enabled
        for feature in self.enabled_IOB_prose_word_features:

            if feature == "word":
                features[(feature, word.lower())] = 1

            if feature == "stem_lancaster":
                features[ (feature, lancaster_st.stem(word.lower())) ] = 1

            # Feature: Generic# stemmed word
            if feature == 'Generic#':
                generic = re.sub('[0-9]','0',word)
                features[ ('Generic#',generic) ] = 1

            # Feature: Last two leters of word
            if feature == 'last_two_letters':
                features[ ('last_two_letters',word[-2:]) ] = 1


            if feature == "length":
                features[(feature, None)] = len(word)

            if feature == "stem_porter":
                features[(feature, porter_st.stem(word))] = 1


            if feature == "mitre":
                for f in self.mitre_features:
                    if re.search(self.mitre_features[f], word):
                        features[(feature, f)] = 1

            if feature == "word_shape":
                wordShapes = getWordShapes(word)
                for shape in wordShapes:
                    features[(feature, shape)] = 1


        return features
Exemplo n.º 2
0
    def IOB_prose_features(self, word):
        """
        IOB_prose_features()
        
        Purpose: Creates a dictionary of prose  features for the given word.
        
        @param word. A string
        @return      A dictionary of features

        >>> wf = WordFeatures()
        >>> wf.IOB_prose_features('test') is not None
        True
        """
        # Feature: <dummy>
        features = {('dummy', None): 1}  # always have >0 dimensions

        # Allow for particular features to be enabled
        for feature in self.enabled_IOB_prose_word_features:

            if feature == "word":
                features[(feature, word.lower())] = 1

            if feature == "stem_lancaster":
                features[(feature, lancaster_st.stem(word.lower()))] = 1

            # Feature: Generic# stemmed word
            if feature == 'Generic#':
                generic = re.sub('[0-9]', '0', word)
                features[('Generic#', generic)] = 1

            # Feature: Last two leters of word
            if feature == 'last_two_letters':
                features[('last_two_letters', word[-2:])] = 1

            if feature == "length":
                features[(feature, None)] = len(word)

            if feature == "stem_porter":
                features[(feature, porter_st.stem(word))] = 1

            if feature == "mitre":
                for f in self.mitre_features:
                    if re.search(self.mitre_features[f], word):
                        features[(feature, f)] = 1

            if feature == "word_shape":
                wordShapes = getWordShapes(word)
                for shape in wordShapes:
                    features[(feature, shape)] = 1

        return features
Exemplo n.º 3
0
    def IOB_nonprose_features(self, word):
        """
        IOB_nonprose_features()
        
        Purpose: Creates a dictionary of nonprose features for the given word.
        
        @param word. A string
        @return      A dictionary of features

        >>> wf = WordFeatures()
        >>> wf.IOB_nonprose_features('test') is not None
        True
        """

        features = {}

        # Feature: The word, itself
        features[('word', word.lower())] = 1

        # Allow for particular features to be enabled
        for feature in self.enabled_IOB_nonprose_word_features:

            # Feature: Mitre
            if feature == "mitre":
                for f in self.mitre_features:
                    if re.search(self.mitre_features[f], word):
                        features[('mitre', f)] = 1

            # Feature: Word Shape
            if feature == "word_shape":
                wordShapes = getWordShapes(word)
                for shape in wordShapes:
                    features[('word_shape', shape)] = 1

            # Feature: QANN features
            if feature == 'QANN':
                qann_feats = self.QANN_features(word)
                features.update(qann_feats)

        return features
Exemplo n.º 4
0
    def IOB_nonprose_features(self, word):
        """
        IOB_nonprose_features()
        
        Purpose: Creates a dictionary of nonprose features for the given word.
        
        @param word. A string
        @return      A dictionary of features

        >>> wf = WordFeatures()
        >>> wf.IOB_nonprose_features('test') is not None
        True
        """
        
        features = {}

        # Feature: The word, itself
        features[('word', word.lower())] = 1

        # Allow for particular features to be enabled
        for feature in self.enabled_IOB_nonprose_word_features:

            # Feature: Mitre
            if feature == "mitre":
                for f in self.mitre_features:
                    if re.search(self.mitre_features[f], word):
                        features[('mitre', f)] = 1

            # Feature: Word Shape
            if feature == "word_shape":
                wordShapes = getWordShapes(word)
                for shape in wordShapes:
                    features[('word_shape', shape)] = 1

            # Feature: QANN features
            if feature == 'QANN':
                qann_feats = self.QANN_features(word)
                features.update(qann_feats)

        return features
Exemplo n.º 5
0
def feature_word_shape(word):
    features = {}
    wordShapes = getWordShapes(word)
    for shape in wordShapes:
        features[('word_shape', shape)] = 1
    return features
Exemplo n.º 6
0
def feature_word_shape(word):
    features = {}
    wordShapes = getWordShapes(word)
    for shape in wordShapes:
        features[('word_shape', shape)] = 1
    return features