if args.trace:
    print('loading %s' % args.corpus)

##############
## counting ##
##############

wc = 0
tag_counts = collections.defaultdict(int)
iob_counts = collections.defaultdict(int)
tag_iob_counts = collections.defaultdict(lambda: collections.defaultdict(int))
word_set = set()

for obj in chunked_corpus.chunked_words():
    if isinstance(obj, Tree):
        label = node_label(obj)
        iob_counts[label] += 1

        for word, tag in obj.leaves():
            wc += 1
            word_set.add(word)
            tag_counts[tag] += 1
            tag_iob_counts[tag][label] += 1
    else:
        word, tag = obj
        wc += 1
        word_set.add(word)
        tag_counts[tag] += 1

############
## output ##
if args.trace:
	print('loading %s' % args.corpus)

##############
## counting ##
##############

wc = 0
tag_counts = collections.defaultdict(int)
iob_counts = collections.defaultdict(int)
tag_iob_counts = collections.defaultdict(lambda: collections.defaultdict(int))
word_set = set()

for obj in chunked_corpus.chunked_words():
	if isinstance(obj, Tree):
		label = node_label(obj)
		iob_counts[label] += 1
		
		for word, tag in obj.leaves():
			wc += 1
			word_set.add(word)
			tag_counts[tag] += 1
			tag_iob_counts[tag][label] += 1
	else:
		word, tag = obj
		wc += 1
		word_set.add(word)
		tag_counts[tag] += 1

############
## output ##
		chunked_sents = chunked_sents[:cutoff]
	
	print(chunker.evaluate(chunked_sents))
	print('\n')

if args.trace:
	print('analyzing chunker coverage of %s with %s\n' % (args.corpus, chunker.__class__.__name__))

iobs_found = collections.defaultdict(int)
sents = corpus.sents()

if args.fraction != 1.0:
	cutoff = int(math.ceil(len(sents) * args.fraction))
	sents = sents[:cutoff]

for sent in sents:
	tree = chunker.parse(tagger.tag(sent))
	
	for child in tree.subtrees(lambda t: node_label(t) != 'S'):
		iobs_found[node_label(child)] += 1

iobs = iobs_found.keys()
justify = max(7, *[len(iob) for iob in iobs])

print('IOB'.center(justify) + '    Found  ')
print('='*justify + '  =========')

for iob in sorted(iobs):
	print('  '.join([iob.ljust(justify), str(iobs_found[iob]).rjust(9)]))

print('='*justify + '  =========')
		chunked_sents = chunked_sents[:cutoff]
	
	print(chunker.evaluate(chunked_sents))
	print('\n')

if args.trace:
	print('analyzing chunker coverage of %s with %s\n' % (args.corpus, chunker.__class__.__name__))

iobs_found = collections.defaultdict(int)
sents = corpus.sents()

if args.fraction != 1.0:
	cutoff = int(math.ceil(len(sents) * args.fraction))
	sents = sents[:cutoff]

for sent in sents:
	tree = chunker.parse(tagger.tag(sent))
	
	for child in tree.subtrees(lambda t: node_label(t) != 'S'):
		iobs_found[node_label(child)] += 1

iobs = iobs_found.keys()
justify = max(7, *[len(iob) for iob in iobs])

print('IOB'.center(justify) + '    Found  ')
print('='*justify + '  =========')

for iob in sorted(iobs):
	print('  '.join([iob.ljust(justify), str(iobs_found[iob]).rjust(9)]))

print('='*justify + '  =========')