Пример #1
0
def add_general_suffix_to_dict(self, word, word_tag):
    suffix_3 = Utilities.get_suffix(word, 3)

    if suffix_3 in feature_suffix_dict:
        suffix_tags_dict = feature_suffix_dict[suffix_3]

        if word_tag in suffix_tags_dict:
            suffix_tags_dict[word_tag] += 1
        else:
            suffix_tags_dict[word_tag] = 1
            self.num_features += 1
    else:
        feature_suffix_dict[suffix_3] = {word_tag: 1}
        self.num_features += 1

    suffix_2 = Utilities.get_suffix(word, 2)

    if suffix_2 in feature_suffix_dict:
        suffix_tags_dict = feature_suffix_dict[suffix_2]

        if word_tag in suffix_tags_dict:
                suffix_tags_dict[word_tag] += 1
        else:
            suffix_tags_dict[word_tag] = 1
            self.num_features += 1
    else:
        feature_suffix_dict[suffix_2] = {word_tag: 1}
        self.num_features += 1
Пример #2
0
def calculate_all_dot_f_for_tuple(self):
    for data_tuple in self.history_tag_tuples:
        for word_tag in self.tags:

            temp_arr = []

            history = data_tuple[0]
            word_index = history[3]
            split_sentence = (history[2]).split()
            word = split_sentence[word_index]
            tag_minus = history[1]
            tag_minus2 = history[0]
            suffix_3 = Utilities.get_suffix(word, 3)
            suffix_2 = Utilities.get_suffix(word, 2)
            prefix_2 = Utilities.get_prefix(word, 2)

            if (word, word_tag) in self.features:
                temp_arr.append(self.features[(word, word_tag)])
            if (tag_minus, word_tag) in self.features:
                temp_arr.append(self.features[(tag_minus, word_tag)])
            if ((tag_minus2, tag_minus), word_tag) in self.features:
                temp_arr.append(self.features[((tag_minus2, tag_minus), word_tag)])

            if (suffix_3, word_tag) in self.features:
                temp_arr.append(self.features[(suffix_3, word_tag)])
            if (suffix_2, word_tag) in self.features:
                temp_arr.append(self.features[(suffix_2, word_tag)])
            if (prefix_2, word_tag) in self.features:
                temp_arr.append(self.features[(prefix_2, word_tag)])
            if Utilities.check_number(word) and word_tag == 'CD':
                temp_arr.append(self.features[('number', 'number')])
            if Utilities.check_capital(word, word_index) and word_tag == 'NNP':
                temp_arr.append(self.features[('capital', 'capital')])
            if Utilities.check_bar(word) and word_tag == 'JJ':
                temp_arr.append(self.features[('bar', 'bar')])

            self.calculated_features[(history, word_tag)] = temp_arr

            for num_feature in temp_arr:
                if num_feature in self.tuples_per_feature:
                    self.tuples_per_feature[num_feature].append((history, word_tag))
                else:
                    self.tuples_per_feature[num_feature] = [(history, word_tag)]
Пример #3
0
    def get_num_features_for_given_tuple(self, data_tuple):
        num_f = []

        word_tag = data_tuple[1]
        history = data_tuple[0]
        word_index = history[3]
        split_sentence = (history[2]).split()
        word = split_sentence[word_index]
        tag_minus = history[1]
        tag_minus2 = history[0]

        if (word, word_tag) in self.features:
            num_f.append(self.features[(word, word_tag)])
        if (tag_minus, word_tag) in self.features:
            num_f.append(self.features[(tag_minus, word_tag)])
        if ((tag_minus2, tag_minus), word_tag) in self.features:
            num_f.append(self.features[((tag_minus2, tag_minus), word_tag)])

        if self.mode == 'Improved' or self.mode == 'Comp':
            if (Utilities.get_suffix(word, 2), word_tag) in self.features:
                num_f.append(self.features[(Utilities.get_suffix(word, 2), word_tag)])
            if (Utilities.get_suffix(word, 3), word_tag) in self.features:
                num_f.append(self.features[(Utilities.get_suffix(word, 3), word_tag)])
            if (Utilities.get_prefix(word, 2), word_tag) in self.features:
                num_f.append(self.features[(Utilities.get_prefix(word, 2), word_tag)])

            if Utilities.check_number(word) and word_tag == 'CD':
                num_f.append(self.features['number', 'number'])
            if Utilities.check_capital(word, word_index) and word_tag == 'NNP':
                num_f.append(self.features['capital', 'capital'])
            if Utilities.check_bar(word) and word_tag == 'JJ':
                num_f.append(self.features['bar', 'bar'])

        # Features only for improved
        if self.mode == 'Improved':
            if (Utilities.get_suffix(word, 1), word_tag) in self.features:
                num_f.append(self.features[(Utilities.get_suffix(word, 1), word_tag)])
            if (Utilities.get_suffix(word, 4), word_tag) in self.features:
                num_f.append(self.features[(Utilities.get_suffix(word, 4), word_tag)])
            if (Utilities.get_prefix(word, 3), word_tag) in self.features:
                num_f.append(self.features[(Utilities.get_prefix(word, 3), word_tag)])
            if (Utilities.get_prefix(word, 4), word_tag) in self.features:
                num_f.append(self.features[(Utilities.get_prefix(word, 4), word_tag)])
            if (Utilities.get_prefix(word, 1), word_tag) in self.features:
                num_f.append(self.features[(Utilities.get_prefix(word, 1), word_tag)])

            if (word_tag, '') in self.features:
                num_f.append(self.features[(word_tag, '')])

        return num_f