def count_training(): counts = {i: 0 for i in range(17)} pl = PickleLoader(training_dir) for sense in pl.iterate(): for n in sense.thesaurus_nodes: thesclass = tdb.get_thesclass(n) counts[thesclass.level] += 1 for i in range(17): print '%d\t%d' % (i, counts[i])
def count_classified(): counts = {i: 0 for i in range(17)} for p in parent_directories: subdir = os.path.join(p, 'classified') pl = PickleLoader(subdir) for sense in pl.iterate(): try: sense.class_id except AttributeError: pass else: thesclass = tdb.get_thesclass(sense.class_id) counts[thesclass.level] += 1 for i in range(17): print '%d\t%d' % (i, counts[i])
print """ =========================================================== Enter lemma (optionally followed by '-c' or '-u' to specify classified or unclassified): """ lemma = raw_input(">>>") lemma = lemma.strip() if lemma.endswith(" -c"): dirs = ["classified"] elif lemma.endswith(" -u"): dirs = ["unclassified"] else: dirs = ["classified", "unclassified"] lemma = re.sub(r" +-.$", "", lemma) initial = lemma[0].upper() if initial in letters: seen = set() for p in parent_directories: for d in dirs: subdir = os.path.join(p, d) pl = PickleLoader(subdir, letters=initial) for sense in pl.iterate(): if sense.lemma == lemma and sense.node_id not in seen: print "----------------------------------------" print trace_sense(sense) seen.add(sense.node_id)