Exemplo n.º 1
0
    def get_accuracy_data(self):
        classification = Classification()
        data = []
        data = doc_features_glob
        for i in range(80, 90):
            document = self.brat_reader.documents['essay' + str(i)]
            self.extract_doc_features(document, 'essay' + str(i))
            # data.append(doc_features_glob[-1])
            classification.set_data(data)
        arguments = classification.divided_args
        links = classification.divided_links

        arguments_features = classification.getFeatures(arguments)
        links_features = classification.getFeatures(links)

        # delete ?
        def argument_extract_features(document):
            argument_features = {}
            for word in arguments_features:
                argument_features['hold(%s)' % word] = (word in set(document))
            return argument_features

        def extract_features(document):
            features = {}
            if 'Support' and 'Attack' in document:
                src = links_features
            else:
                src = arguments_features
            for word in src:
                features['hold(%s)' % word] = (word in set(document))
            return features

        arguments_test_set = nltk.classify.apply_features(
            extract_features, arguments)
        links_test_set = []
        # links_test_set = nltk.classify.apply_features(extract_features, links)
        return [arguments_test_set, links_test_set]
Exemplo n.º 2
0
    if word not in already_in_premices:
        already_in_premices.append(word)
        premices_freq_list.append((word, all_premices_words.count(word)))
for word in all_claims_words:
    if word not in already_in_claims:
        already_in_claims.append(word)
        claims_freq_list.append((word, all_claims_words.count(word)))

premices_freq_list.sort(key=operator.itemgetter(1))
premices_freq_list = premices_freq_list[::-1]
claims_freq_list.sort(key=operator.itemgetter(1))
claims_freq_list = claims_freq_list[::-1]

links = classifier.get_divided_links()
# how visualise results?
arguments_features = classifier.getFeatures(arguments)
links_features = classifier.getFeatures(links)


def argument_extract_features(document):
    argument_features = {}
    for word in arguments_features:
        argument_features['hold(%s)' % word] = (word in set(document))
    return argument_features


def extract_features(document):
    features = {}
    if 'Support' and 'Attack' in document:
        src = links_features
    else: