def get_train_data_from_lang(lang): if lang == 'swedish': return dataset.get_swedish_train_corpus().parsed_sents() elif lang == 'danish': return dataset.get_danish_train_corpus().parsed_sents() elif lang == 'english': return dataset.get_english_train_corpus().parsed_sents() else: raise ValueError( "Please don't use {}, only use english, swedish or danish".format( lang))
import random from providedcode import dataset from providedcode.transitionparser import TransitionParser from providedcode.evaluate import DependencyEvaluator from featureextractor import FeatureExtractor from transition import Transition if __name__ == "__main__": data = dataset.get_swedish_train_corpus().parsed_sents() random.seed(1234) subdata = random.sample(data, 200) try: # tp = TransitionParser(Transition, FeatureExtractor) # tp.train(subdata) # tp.save('swedish.model') testdata = dataset.get_swedish_test_corpus().parsed_sents() tp = TransitionParser.load("badfeatures.model") parsed = tp.parse(testdata) with open("test.conll", "w") as f: for p in parsed: f.write(p.to_conll(10).encode("utf-8")) f.write("\n") ev = DependencyEvaluator(testdata, parsed) print "LAS: {} \nUAS: {}".format(*ev.eval()) # parsing arbitrary sentences (english):
#import os #os.chdir("/home/sidvash/NLP_coursera/Assignment1/code/") import random from providedcode import dataset from providedcode.transitionparser import TransitionParser from providedcode.evaluate import DependencyEvaluator from featureextractor import FeatureExtractor from transition import Transition if __name__ == '__main__': data = dataset.get_swedish_train_corpus().parsed_sents() random.seed(1234) subdata = random.sample(data, 200) try: tp = TransitionParser(Transition, FeatureExtractor) tp.train(subdata) tp.save('swedish.model') testdata = dataset.get_swedish_test_corpus().parsed_sents() tp = TransitionParser.load('swedish.model') parsed = tp.parse(testdata) with open('test.conll', 'w') as f: for p in parsed: f.write(p.to_conll(10).encode('utf-8')) f.write('\n')
import matplotlib.pyplot as plt from providedcode.dataset import get_swedish_train_corpus import networkx as nx import random if __name__ == '__main__': corpus = get_swedish_train_corpus() dependency_graph = random.choice(corpus.parsed_sents()) nx_graph, labels = dependency_graph.nx_graph() pos = nx.spring_layout(nx_graph) nx.draw_networkx_nodes(nx_graph, pos, node_size=1000) nx.draw_networkx_labels(nx_graph, pos, labels) nx.draw_networkx_edges(nx_graph, pos, edge_color='k', width=1) plt.show()