text = utils.textClean(np.getText().lower()).strip() if (text in data.ALL_PRONOUNS): continue #if specificity_utils.isProper(np): # continue anaphor_np = gold_nps.getAnnotBySpan(np.getStart(), np.getEnd()) if anaphor_np["PROPER_NAME"] != "true" and anaphor_np["PROPER_NOUN"] != "true": continue #print text add_stats(nominals, doc, np, text) #true singletons -- TODO: double check that these numbers are correct for key in list(nominals.keys()): nominals[key].singletons += doc.getSingletonCount(key) sys.stdout.write("\r \r\n") #histogram for sentence distance nominals_total_sent_histo = {} for key in list(nominals.keys()): h = nominals[key].sent_distance_histogram() for dist in list(h.keys()): nominals_total_sent_histo[dist] = nominals_total_sent_histo.get(dist, 0) + h[dist] #combine histograms make_chart(nominals_total_sent_histo, DATASET, "sentence_distance", "Sentence Distance", max_x=10) #histogram for word distance
text = utils.textClean(np.getText().lower()).strip() if text in data.THIRD_PERSON: #then it is he, him, she add_stats(third_person, doc, np, text) elif (text in data.IT) and (text != "i"): #then we have 'it' or 'its' add_stats(it, doc, np, text) elif text in data.THIRD_PERSON_PLURAL: #we have 'they' or 'them' add_stats(third_person_plural, doc, np, text) else: continue #true singletons -- TODO: double check that these numbers are correct for key in list(it.keys()): it[key].singletons += doc.getSingletonCount(key) for key in list(third_person.keys()): third_person[key].singletons += doc.getSingletonCount(key) for key in list(third_person_plural.keys()): third_person_plural[key].singletons += doc.getSingletonCount(key) #this word exists outside of annotations #This needs to take place at the document level and cycle over all the #pronouns we want to check sys.stdout.write("\r \r\n") #print "{0:5} : {1}".format("it", it["it"].getCount()) #print it["it"].wd_distance_histogram() #print it["it"].sent_distance_histogram() #print "{0} : total antecedents".format(sum(it["it"].sent_distance_histogram().values())) #print "{0} : total antecedents".format(sum(it["it"].wd_distance_histogram().values()))
elif specificity_utils.isNominal(np): add_stats(noun_classes["nominal"], doc, np, text) #sys.stderr.write("{0}\n".format(text)) elif specificity_utils.isProper(np): add_stats(noun_classes["proper"], doc, np, text) #sys.stderr.write("{0}\n".format(text)) else: #sys.stderr.write("Word not found: {0}\n".format(text)) continue #true singletons -- TODO double check that these numbers are correct #this word exists outside of annotations #This needs to take place at the document level and cycle over all the for cls in list(noun_classes.keys()): for word in noun_classes[cls]: noun_classes[cls][word].singletons += doc.getSingletonCount( word) sys.stdout.write("\r \r\n") #TODO printount the stats #with open("nouns.stats", "a") as outFile: for cls in sorted(noun_classes.keys()): total_antecendents = 0 total_productivity = 0 total_nominal_antecedents = 0 total_proper_antecedents = 0 total_pronoun_antecedents = 0 total_subject_antecedents = 0 total_object_antecedents = 0 total_self_subject = 0 total_self_object = 0