def call_cort(text_blob): mention_features = [ features.fine_type, features.gender, features.number, features.sem_class, features.deprel, features.head_ner, features.length, features.head, features.first, features.last, features.preceding_token, features.next_token, features.governor, features.ancestry ] pairwise_features = [ features.exact_match, features.head_match, features.same_speaker, features.alias, features.sentence_distance, features.embedding, features.modifier, features.tokens_contained, features.head_contained, features.token_distance ] # todo make sure these are exact! model_abs = '/Users/ryanpanos/Documents/code/cort_experiments/models/model-pair-train+dev.obj' #OMG evil! # perceptron_path = 'cort.coreference.approaches.mention_ranking.RankingPerceptron' # extractor_path = ' cort.coreference.approaches.mention_ranking.extract_substructures' perceptron_path = 'cort.coreference.approaches.mention_ranking.RankingPerceptron' extractor_path = ' coreference.approaches.mention_ranking.extract_substructures' corenlp_path = '/Users/ryanpanos/Documents/code/StanfordNLP/stanford-corenlp-full-2016-10-31/' #OMG evil! clusterer_path = 'cort.coreference.clusterer.all_ante' # logging.info("Loading model.") print("Loading model . ... (this takes a while) ") priors, weights = pickle.load(open(model_abs, "rb")) print("Model loaded.") # perceptron = import_helper.import_from_path(perceptron_path)( # priors=priors, # weights=weights, # cost_scaling=0 # ) perceptron = RankingPerceptron(priors=priors, weights=weights, cost_scaling=0) extractor = instance_extractors.InstanceExtractor( # import_helper.import_from_path(extractor_path), extract_substructures, mention_features, pairwise_features, cost_functions.null_cost, perceptron.get_labels()) logging.info("Reading in and preprocessing data.") p = pipeline.Pipeline(corenlp_path) testing_corpus = p.run_on_blob("corpus", text_blob) logging.info("Extracting system mentions.") for doc in testing_corpus: doc.system_mentions = mention_extractor.extract_system_mentions(doc) mention_entity_mapping, antecedent_mapping = experiments.predict( testing_corpus, extractor, perceptron, # import_helper.import_from_path(clusterer_path) all_ante) testing_corpus.read_coref_decisions(mention_entity_mapping, antecedent_mapping) logging.info("Write output to file.") output_ls = [] for doc in testing_corpus: output = doc.to_simple_output() # my_file = codecs.open(doc.identifier + "." + args.suffix, "w", "utf-8") # my_file.write(output) print " output: \n" + output # my_file.close() output_ls.append(output) logging.info("Done.") return
pairwise_features = [ features.exact_match, features.head_match, features.same_speaker, features.alias, features.sentence_distance, features.embedding, features.modifier, features.tokens_contained, features.head_contained, features.token_distance ] logging.info("Loading model.") priors, weights = pickle.load(open(args.model, "rb")) perceptron = import_helper.import_from_path(args.perceptron)(priors=priors, weights=weights, cost_scaling=0) extractor = instance_extractors.InstanceExtractor( import_helper.import_from_path(args.extractor), mention_features, pairwise_features, cost_functions.null_cost, perceptron.get_labels()) logging.info("Reading in data.") testing_corpus = corpora.Corpus.from_file( "testing", codecs.open(args.input_filename, "r", "utf-8")) logging.info("Extracting system mentions.") for doc in testing_corpus: doc.system_mentions = mention_extractor.extract_system_mentions(doc) mention_entity_mapping, antecedent_mapping = experiments.predict( testing_corpus, extractor, perceptron, import_helper.import_from_path(args.clusterer)) testing_corpus.read_coref_decisions(mention_entity_mapping, antecedent_mapping)
def __init__(self): mention_features = [ features.fine_type, features.gender, features.number, features.sem_class, features.deprel, features.head_ner, features.length, features.head, features.first, features.last, features.preceding_token, features.next_token, features.governor, features.ancestry ] pairwise_features = [ features.exact_match, features.head_match, features.same_speaker, features.alias, features.sentence_distance, features.embedding, features.modifier, features.tokens_contained, features.head_contained, features.token_distance ] self.extractor = instance_extractors.InstanceExtractor( mention_ranking.extract_substructures, mention_features, pairwise_features, cost_functions.null_cost) logging.info("Loading model.") priors, weights = pickle.load(open("latent-model-train.obj", "rb")) self.perceptron = mention_ranking.RankingPerceptron(priors=priors, weights=weights, cost_scaling=0) logging.info("Loading CoreNLP models.") self.p = pipeline.Pipeline( "/home/sebastian/Downloads/stanford-corenlp-full-2015-04-20") self.root = tki.Tk() self.root.title("cort Demo") # create a Frame for the Text and Scrollbar self.txt_frm = tki.Frame(self.root, width=400, height=200) self.txt_frm.pack(fill="both", expand=True) # ensure a consistent GUI size self.txt_frm.grid_propagate(False) # implement stretchability self.txt_frm.grid_rowconfigure(0, weight=1) self.txt_frm.grid_columnconfigure(0, weight=1) # create a Text widget self.txt = tki.Text(self.txt_frm, borderwidth=3, relief="sunken") self.txt.config(font=("consolas", 12), undo=True, wrap='word') self.txt.grid(row=0, column=0, sticky="nsew", padx=2, pady=2) # create a Scrollbar and associate it with txt scrollb = tki.Scrollbar(self.txt_frm, command=self.txt.yview) scrollb.grid(row=0, column=1, sticky='nsew') self.txt['yscrollcommand'] = scrollb.set self.button = tki.Button(self.root, text='Resolve Coreference', command=self.do_coreference) self.button.pack()