def get_accuracy_data(self): classification = Classification() data = [] data = doc_features_glob for i in range(80, 90): document = self.brat_reader.documents['essay' + str(i)] self.extract_doc_features(document, 'essay' + str(i)) # data.append(doc_features_glob[-1]) classification.set_data(data) arguments = classification.divided_args links = classification.divided_links arguments_features = classification.getFeatures(arguments) links_features = classification.getFeatures(links) # delete ? def argument_extract_features(document): argument_features = {} for word in arguments_features: argument_features['hold(%s)' % word] = (word in set(document)) return argument_features def extract_features(document): features = {} if 'Support' and 'Attack' in document: src = links_features else: src = arguments_features for word in src: features['hold(%s)' % word] = (word in set(document)) return features arguments_test_set = nltk.classify.apply_features( extract_features, arguments) links_test_set = [] # links_test_set = nltk.classify.apply_features(extract_features, links) return [arguments_test_set, links_test_set]
if word not in already_in_premices: already_in_premices.append(word) premices_freq_list.append((word, all_premices_words.count(word))) for word in all_claims_words: if word not in already_in_claims: already_in_claims.append(word) claims_freq_list.append((word, all_claims_words.count(word))) premices_freq_list.sort(key=operator.itemgetter(1)) premices_freq_list = premices_freq_list[::-1] claims_freq_list.sort(key=operator.itemgetter(1)) claims_freq_list = claims_freq_list[::-1] links = classifier.get_divided_links() # how visualise results? arguments_features = classifier.getFeatures(arguments) links_features = classifier.getFeatures(links) def argument_extract_features(document): argument_features = {} for word in arguments_features: argument_features['hold(%s)' % word] = (word in set(document)) return argument_features def extract_features(document): features = {} if 'Support' and 'Attack' in document: src = links_features else: