Exemplo n.º 1
0
    def get_meter(self, meter=None):
        if not meter:
            if self.meter:
                meter = self.meter
            elif hasattr(self, '_Text__bestparses') and self.__bestparses:
                return self.get_meter(sorted(self.__bestparses.keys())[0])
            else:
                import Meter
                meter = Meter.genDefault()
                #print '>> no meter specified. defaulting to this meter:'
                #print meter
        elif type(meter) in [str, unicode]:
            meter = self.config['meters'][meter]
        else:
            pass

        return meter
Exemplo n.º 2
0
	def get_meter(self,meter=None):
		if not meter:
			if self.meter:
				meter=self.meter
			elif hasattr(self,'_Text__bestparses') and self.__bestparses:
				return self.get_meter(sorted(self.__bestparses.keys())[0])
			else:
				import Meter
				meter=Meter.genDefault()
				#print '>> no meter specified. defaulting to this meter:'
				#print meter
		elif type(meter) in [str,unicode]:
			meter= self.config['meters'][meter]
		else:
			pass

		return meter
Exemplo n.º 3
0
	def report(self,meter=None,include_bounded=False,reverse=True):
		""" Print all parses and their violations in a structured format. """

		ReportStr = ''
		if not meter:
			from Meter import Meter
			meter=Meter.genDefault()
		if (hasattr(self,'allParses')):
			self.om(unicode(self))
			allparses=self.allParses(meter=meter,include_bounded=include_bounded)
			numallparses=len(allparses)
			#allparses = reversed(allparses) if reverse else allparses
			for pi,parseList in enumerate(allparses):
				line=self.iparse2line(pi).txt
				#parseList.sort(key = lambda P: P.score())
				hdr="\n\n"+'='*30+'\n[line #'+str(pi+1)+' of '+str(numallparses)+']: '+line+'\n\n\t'
				ftr='='*30+'\n'
				ReportStr+=self.om(hdr+meter.printParses(parseList,reverse=reverse).replace('\n','\n\t')[:-1]+ftr,conscious=False)
		else:
			for child in self.children:
				if type(child)==type([]): continue
				ReportStr+=child.report()

		return ReportStr
Exemplo n.º 4
0
def assess(fn,meter=None,key_meterscheme=None, key_line='line',key_parse='parse'):
	#from prosodic import Text
	import prosodic as p
	Text=p.Text
	if not meter:
		import Meter
		meter=Meter.genDefault()

	p.config['print_to_screen']=0

	def parse2list(parse):
		l=[]
		for i,x in enumerate(parse):
			if not l or l[-1]!=x:
				l+=[x]
			else:
				l[-1]+=x
		return l

	def get_num_sylls_correct(parse_human,parse_comp):
		maxlen=max([len(parse_comp),len(parse_human)])
		#parse_human=parse2list(parse_human)
		#parse_comp=parse2list(parse_comp)
		#parse_comp_forzip = parse_comp + ['x' for x in range(maxlen-len(parse_comp))]
		#parse_human_forzip = parse_human + ['x' for x in range(maxlen-len(parse_human))]
		parse_comp_forzip = parse_comp + ''.join(['x' for x in range(maxlen-len(parse_comp))])
		parse_human_forzip = parse_human + ''.join(['x' for x in range(maxlen-len(parse_human))])

		## sylls correct?
		_sylls_iscorrect=[]
		#print '\t'.join(parse_human_forzip)
		#print '\t'.join(parse_comp_forzip)
		for syll1,syll2 in zip(parse_human_forzip,parse_comp_forzip):
			syll_iscorrect = int(syll1==syll2)
			_sylls_iscorrect+=[syll_iscorrect]
		return _sylls_iscorrect

	import codecs
	ld=read_ld(fn)
	fn_split = fn.split('.')
	ofn_split=fn_split[:-1] + ['evaluated','meter='+meter.id] + [fn_split[-1]]
	ofn_split_ot=fn_split[:-1] + ['evaluated', 'ot','meter='+meter.id] + [fn_split[-1]]
	ofn='.'.join(ofn_split)
	ofn_ot='.'.join(ofn_split_ot)

	def _print(dx):
		print
		for k,v in sorted(dx.items()):
			print k,'\t',v
		print 'HUMAN   :','\t'.join(dx['parse_human'])
		print 'PROSODIC:','\t'.join(dx['parse_comp'])
		print '         ','\t'.join(['*' if x!=y else ' ' for x,y in zip(dx['parse_human'],dx['parse_comp'])])
		print

	def _recapitalize(parse,code):
		code=' '.join([x for x in code])
		parse=parse.replace('|',' ').replace('.',' ')
		newparse=[]
		for s,c in zip(parse.split(),code.split()):
			if c=='w':
				newparse+=[s.lower()]
			else:
				newparse+=[s.upper()]
		return '  '.join(newparse)
	
	def _writegen():
		lines_iscorrect=[]
		lines_iscorrect_control=[]
		lines_iscorrect_control2=[]
		lines_iscorrect_human2=[]
		sylls_iscorrect_control=[]
		sylls_iscorrect_control2=[]
		sylls_iscorrect_human2=[]
		sylls_iscorrect=[]
		lines_iscorrect_nonbounded=[]

		otf=open(ofn_ot,'w')
		otf_nl=0

		for di,d in enumerate(ld):
			line=d[key_line]
			parse_human=''.join([x for x in d[key_parse].lower() if x in ['s','w']])
			if not parse_human: continue
			t=Text(line)
			t.parse(meter=meter)
			#if not t.isParsed: continue

			parse_comp=t.parse_str(viols=False, text=False).replace('|','')

			#if len(parse_comp) != len(parse_human): continue

			parse_str=t.parse_str(viols=False, text=True)
			parses_comp = [x.replace('|','') for x in t.parse_strs(viols=False,text=False)]

			parse_human2=''.join([x for x in d.get('parse_human2','').lower() if x in ['s','w']])

			#parse_human,parse_human2=parse_human2,parse_human

			### OT
			if not otf_nl:
				header=['','','']
				for c in meter.constraints: header+=['[*'+c.name+']']
				otf.write('\t'.join(header)+'\n')
			
			humans = [parse_human]
			if parse_human2: humans+=[parse_human2]
			for _i,_parses in enumerate(t.allParses()):
				if not _parses: continue
				_parses.sort(key=lambda _P: (-humans.count(_P.str_meter()), _P.totalCount))
				if not humans.count(_parses[0].str_meter()):
					# is the good parse in the bounded ones?
					for _bndp in t.boundParses()[_i]:
						if _bndp.str_meter() in humans:
							_parses.insert(0,_bndp)

				for _pi,_parse in enumerate(_parses):
					otf_nl+=1
					code=_parse.str_meter()
					row=[line.encode('utf-8',errors='ignore') if not _pi else '', str(_parse) + (' [*Bounded]' if _parse.isBounded else ''), str(humans.count(code)) if code in humans else '']
					for c in meter.constraints: row+=[str(_parse.constraintCounts[c]) if _parse.constraintCounts[c] else '']
					otf.write('\t'.join(row)+'\n')



			
			parse_comp_dummy2 = ''.join(['w' if not i%2 else 's' for i in range(len(parse_comp))])
			if key_meterscheme:
				if d[key_meterscheme]=='iambic':
					parse_comp_dummy = ('ws'*100)[:len(parse_comp)]
				elif d[key_meterscheme]=='trochaic':
					parse_comp_dummy = ('sw'*100)[:len(parse_comp)]
				elif d[key_meterscheme]=='anapestic':
					parse_comp_dummy = ('wws'*100)[:len(parse_comp)]
				elif d[key_meterscheme]=='dactylic':
					parse_comp_dummy = ('sww'*100)[:len(parse_comp)]
				else:
					parse_comp_dummy=parse_comp_dummy2
			else:
				parse_comp_dummy=parse_comp_dummy2


			
			## sylls correct?
			this_sylls_correct = get_num_sylls_correct(parse_human, parse_comp)
			this_sylls_correct_dummy = get_num_sylls_correct(parse_human, parse_comp_dummy)
			this_sylls_correct_dummy2 = get_num_sylls_correct(parse_human, parse_comp_dummy2)
			if parse_human2: this_sylls_correct_human2 = get_num_sylls_correct(parse_human, parse_human2)
			num_sylls_correct=sum(this_sylls_correct)
			num_sylls_correct_dummy = sum(this_sylls_correct_dummy)
			num_sylls_correct_dummy2 = sum(this_sylls_correct_dummy2)
			if parse_human2: num_sylls_correct_human2 = sum(this_sylls_correct_human2)
			sylls_iscorrect+=this_sylls_correct
			sylls_iscorrect_control+=this_sylls_correct_dummy
			sylls_iscorrect_control2+=this_sylls_correct_dummy2
			if parse_human2: sylls_iscorrect_human2+=this_sylls_correct_human2


			# line correct?
			line_iscorrect=int(parse_comp == parse_human)
			lines_iscorrect+=[line_iscorrect]
			line_iscorrect_dummy = int(parse_comp_dummy == parse_human)
			line_iscorrect_dummy2 = int(parse_comp_dummy2 == parse_human)
			if parse_human2: line_iscorrect_human2 = int(parse_human2 == parse_human)
			lines_iscorrect_control+=[line_iscorrect_dummy]
			lines_iscorrect_control2+=[line_iscorrect_dummy2]
			if parse_human2: lines_iscorrect_human2+=[line_iscorrect_human2]

			# line at least in list of nonbounded parses?
			line_iscorrect_nonbounded=int(parse_human in parses_comp)
			lines_iscorrect_nonbounded+=[line_iscorrect_nonbounded]

			parse_stress = []
			for w in t.words():
				for x in w.stress:
					parse_stress += ['w' if x=='U' else 's']
			parse_stress=''.join(parse_stress)
			

			odx=d
			odx['parse_human']=parse_human
			if parse_human2: odx['parse_human2']=parse_human2
			odx['parse_comp']=parse_comp
			odx['parses_comp_nonbounded']=' | '.join(parses_comp)
			odx['num_sylls']=len(parse_human)
			odx['num_sylls_correct']=num_sylls_correct
			odx['num_sylls_correct_control']=num_sylls_correct_dummy
			odx['num_sylls_correct_control_iambic']=num_sylls_correct_dummy2
			if parse_human2:
				odx['num_sylls_correct_human2']=num_sylls_correct_human2
				odx['perc_sylls_correct_human2']=num_sylls_correct_human2 / float(len(parse_human))
				odx['line_iscorrect_human2']=line_iscorrect_human2
			odx['perc_sylls_correct']=num_sylls_correct / float(len(parse_human))
			odx['perc_sylls_correct_control']=num_sylls_correct_dummy  / float(len(parse_human))
			odx['perc_sylls_correct_control_iambic']=num_sylls_correct_dummy2 / float(len(parse_human))
			odx['line_iscorrect']=line_iscorrect
			odx['line_iscorrect_dummy']=line_iscorrect_dummy
			odx['line_iscorrect_dummy_iambic']=line_iscorrect_dummy2
			odx['line_is_in_nonbounded_parses']=line_iscorrect_nonbounded
			odx['parse_str_human']=_recapitalize(parse_str, parse_human)
			odx['parse_str_compu']=_recapitalize(parse_str, parse_comp)
			odx['parse_str_stress']=_recapitalize(parse_str, parse_stress)
			odx['prosody_ipa']=' '.join([w.str_ipasyllstress() for w in t.words()])
			odx['prosody_stress']=' '.join([w.stress for w in t.words()])
			odx['meter_info']=str(t.meter).replace('\n',' ').replace('\t',' ')
			sumconstr=0
			for k,v in t.constraintViolations(use_weights=False,normalize=False).items():
				odx['constraint_'+k]=v
				sumconstr+=v
			odx['constraint_SUM_VIOL']=sumconstr

			#if not line_iscorrect and line_iscorrect_dummy:
			#if len(parse_comp) != len(parse_human):
			#if len(parse_human)>len(parse_comp):
			_print(odx)
			yield odx

		print
		print '##'*10
		print 'RESULTS SUMMARY'
		print '##'*10
		perc_sylls_correct = sum(sylls_iscorrect) / float(len(sylls_iscorrect)) * 100
		perc_lines_correct = sum(lines_iscorrect) / float(len(lines_iscorrect)) * 100
		perc_lines_correct_control = sum(lines_iscorrect_control) / float(len(lines_iscorrect_control)) * 100
		perc_sylls_correct_control = sum(sylls_iscorrect_control) / float(len(sylls_iscorrect_control)) * 100
		perc_lines_correct_nonbound = sum(lines_iscorrect_nonbounded) / float(len(lines_iscorrect_nonbounded)) * 100
		print 'PERCENT SYLLABLES CORRECT:',round(perc_sylls_correct,2),'% [vs.',round(perc_sylls_correct_control,2),'% for control]'
		print 'PERCENT LINES CORRECT:',round(perc_lines_correct,2),'% [vs.',round(perc_lines_correct_control,2),'% for control]'
		print 'PERCENT LINES IN AVAILABLE NONBOUNDED PARSES:',round(perc_lines_correct_nonbound,2),'%'
	
	writegen(ofn, _writegen)
Exemplo n.º 5
0
def assess(fn,meter=None,key_meterscheme=None, key_line='line',key_parse='parse'):
	#from prosodic import Text
	import prosodic as p
	Text=p.Text
	if not meter:
		import Meter
		meter=Meter.genDefault()

	p.config['print_to_screen']=0

	def parse2list(parse):
		l=[]
		for i,x in enumerate(parse):
			if not l or l[-1]!=x:
				l+=[x]
			else:
				l[-1]+=x
		return l

	def get_num_sylls_correct(parse_human,parse_comp):
		maxlen=max([len(parse_comp),len(parse_human)])
		#parse_human=parse2list(parse_human)
		#parse_comp=parse2list(parse_comp)
		#parse_comp_forzip = parse_comp + ['x' for x in range(maxlen-len(parse_comp))]
		#parse_human_forzip = parse_human + ['x' for x in range(maxlen-len(parse_human))]
		parse_comp_forzip = parse_comp + ''.join(['x' for x in range(maxlen-len(parse_comp))])
		parse_human_forzip = parse_human + ''.join(['x' for x in range(maxlen-len(parse_human))])

		## sylls correct?
		_sylls_iscorrect=[]
		#print '\t'.join(parse_human_forzip)
		#print '\t'.join(parse_comp_forzip)
		for syll1,syll2 in zip(parse_human_forzip,parse_comp_forzip):
			syll_iscorrect = int(syll1==syll2)
			_sylls_iscorrect+=[syll_iscorrect]
		return _sylls_iscorrect

	import codecs
	ld=read_ld(fn)
	fn_split = fn.split('.')
	ofn_split=fn_split[:-1] + ['evaluated','meter='+meter.id] + [fn_split[-1]]
	ofn_split_ot=fn_split[:-1] + ['evaluated', 'ot','meter='+meter.id] + [fn_split[-1]]
	ofn='.'.join(ofn_split)
	ofn_ot='.'.join(ofn_split_ot)

	def _print(dx):
		print()
		for k,v in sorted(dx.items()):
			print(k,'\t',v)
		print('HUMAN   :','\t'.join(dx['parse_human']))
		print('PROSODIC:','\t'.join(dx['parse_comp']))
		print('         ','\t'.join(['*' if x!=y else ' ' for x,y in zip(dx['parse_human'],dx['parse_comp'])]))
		print()

	def _recapitalize(parse,code):
		code=' '.join([x for x in code])
		parse=parse.replace('|',' ').replace('.',' ')
		newparse=[]
		for s,c in zip(parse.split(),code.split()):
			if c=='w':
				newparse+=[s.lower()]
			else:
				newparse+=[s.upper()]
		return '  '.join(newparse)

	def _writegen():
		lines_iscorrect=[]
		lines_iscorrect_control=[]
		lines_iscorrect_control2=[]
		lines_iscorrect_human2=[]
		sylls_iscorrect_control=[]
		sylls_iscorrect_control2=[]
		sylls_iscorrect_human2=[]
		sylls_iscorrect=[]
		lines_iscorrect_nonbounded=[]

		otf=open(ofn_ot,'w')
		otf_nl=0

		for di,d in enumerate(ld):
			line=d[key_line]
			parse_human=''.join([x for x in d[key_parse].lower() if x in ['s','w']])
			if not parse_human: continue
			t=Text(line)
			t.parse(meter=meter)
			#if not t.isParsed: continue

			parse_comp=t.parse_str(viols=False, text=False).replace('|','')

			#if len(parse_comp) != len(parse_human): continue

			parse_str=t.parse_str(viols=False, text=True)
			parses_comp = [x.replace('|','') for x in t.parse_strs(viols=False,text=False)]

			parse_human2=''.join([x for x in d.get('parse_human2','').lower() if x in ['s','w']])

			#parse_human,parse_human2=parse_human2,parse_human

			### OT
			if not otf_nl:
				header=['','','']
				for c in meter.constraints: header+=['[*'+c.name+']']
				otf.write('\t'.join(header)+'\n')

			humans = [parse_human]
			if parse_human2: humans+=[parse_human2]
			for _i,_parses in enumerate(t.allParses()):
				if not _parses: continue
				_parses.sort(key=lambda _P: (-humans.count(_P.str_meter()), _P.totalCount))
				if not humans.count(_parses[0].str_meter()):
					# is the good parse in the bounded ones?
					for _bndp in t.boundParses()[_i]:
						if _bndp.str_meter() in humans:
							_parses.insert(0,_bndp)

				for _pi,_parse in enumerate(_parses):
					otf_nl+=1
					code=_parse.str_meter()
					row=[line.encode('utf-8',errors='ignore') if not _pi else '', str(_parse) + (' [*Bounded]' if _parse.isBounded else ''), str(humans.count(code)) if code in humans else '']
					for c in meter.constraints: row+=[str(_parse.constraintCounts[c]) if _parse.constraintCounts[c] else '']
					otf.write('\t'.join(row)+'\n')




			parse_comp_dummy2 = ''.join(['w' if not i%2 else 's' for i in range(len(parse_comp))])
			if key_meterscheme:
				if d[key_meterscheme]=='iambic':
					parse_comp_dummy = ('ws'*100)[:len(parse_comp)]
				elif d[key_meterscheme]=='trochaic':
					parse_comp_dummy = ('sw'*100)[:len(parse_comp)]
				elif d[key_meterscheme]=='anapestic':
					parse_comp_dummy = ('wws'*100)[:len(parse_comp)]
				elif d[key_meterscheme]=='dactylic':
					parse_comp_dummy = ('sww'*100)[:len(parse_comp)]
				else:
					parse_comp_dummy=parse_comp_dummy2
			else:
				parse_comp_dummy=parse_comp_dummy2



			## sylls correct?
			this_sylls_correct = get_num_sylls_correct(parse_human, parse_comp)
			this_sylls_correct_dummy = get_num_sylls_correct(parse_human, parse_comp_dummy)
			this_sylls_correct_dummy2 = get_num_sylls_correct(parse_human, parse_comp_dummy2)
			if parse_human2: this_sylls_correct_human2 = get_num_sylls_correct(parse_human, parse_human2)
			num_sylls_correct=sum(this_sylls_correct)
			num_sylls_correct_dummy = sum(this_sylls_correct_dummy)
			num_sylls_correct_dummy2 = sum(this_sylls_correct_dummy2)
			if parse_human2: num_sylls_correct_human2 = sum(this_sylls_correct_human2)
			sylls_iscorrect+=this_sylls_correct
			sylls_iscorrect_control+=this_sylls_correct_dummy
			sylls_iscorrect_control2+=this_sylls_correct_dummy2
			if parse_human2: sylls_iscorrect_human2+=this_sylls_correct_human2


			# line correct?
			line_iscorrect=int(parse_comp == parse_human)
			lines_iscorrect+=[line_iscorrect]
			line_iscorrect_dummy = int(parse_comp_dummy == parse_human)
			line_iscorrect_dummy2 = int(parse_comp_dummy2 == parse_human)
			if parse_human2: line_iscorrect_human2 = int(parse_human2 == parse_human)
			lines_iscorrect_control+=[line_iscorrect_dummy]
			lines_iscorrect_control2+=[line_iscorrect_dummy2]
			if parse_human2: lines_iscorrect_human2+=[line_iscorrect_human2]

			# line at least in list of nonbounded parses?
			line_iscorrect_nonbounded=int(parse_human in parses_comp)
			lines_iscorrect_nonbounded+=[line_iscorrect_nonbounded]

			parse_stress = []
			for w in t.words():
				for x in w.stress:
					parse_stress += ['w' if x=='U' else 's']
			parse_stress=''.join(parse_stress)


			odx=d
			odx['parse_human']=parse_human
			if parse_human2: odx['parse_human2']=parse_human2
			odx['parse_comp']=parse_comp
			odx['parses_comp_nonbounded']=' | '.join(parses_comp)
			odx['num_sylls']=len(parse_human)
			odx['num_sylls_correct']=num_sylls_correct
			odx['num_sylls_correct_control']=num_sylls_correct_dummy
			odx['num_sylls_correct_control_iambic']=num_sylls_correct_dummy2
			if parse_human2:
				odx['num_sylls_correct_human2']=num_sylls_correct_human2
				odx['perc_sylls_correct_human2']=num_sylls_correct_human2 / float(len(parse_human))
				odx['line_iscorrect_human2']=line_iscorrect_human2
			odx['perc_sylls_correct']=num_sylls_correct / float(len(parse_human))
			odx['perc_sylls_correct_control']=num_sylls_correct_dummy  / float(len(parse_human))
			odx['perc_sylls_correct_control_iambic']=num_sylls_correct_dummy2 / float(len(parse_human))
			odx['line_iscorrect']=line_iscorrect
			odx['line_iscorrect_dummy']=line_iscorrect_dummy
			odx['line_iscorrect_dummy_iambic']=line_iscorrect_dummy2
			odx['line_is_in_nonbounded_parses']=line_iscorrect_nonbounded
			odx['parse_str_human']=_recapitalize(parse_str, parse_human)
			odx['parse_str_compu']=_recapitalize(parse_str, parse_comp)
			odx['parse_str_stress']=_recapitalize(parse_str, parse_stress)
			odx['prosody_ipa']=' '.join([w.str_ipasyllstress() for w in t.words()])
			odx['prosody_stress']=' '.join([w.stress for w in t.words()])
			odx['meter_info']=str(t.meter).replace('\n',' ').replace('\t',' ')
			sumconstr=0
			for k,v in list(t.constraintViolations(use_weights=False,normalize=False).items()):
				odx['constraint_'+k]=v
				sumconstr+=v
			odx['constraint_SUM_VIOL']=sumconstr

			#if not line_iscorrect and line_iscorrect_dummy:
			#if len(parse_comp) != len(parse_human):
			#if len(parse_human)>len(parse_comp):
			_print(odx)
			yield odx

		print()
		print('##'*10)
		print('RESULTS SUMMARY')
		print('##'*10)
		perc_sylls_correct = sum(sylls_iscorrect) / float(len(sylls_iscorrect)) * 100
		perc_lines_correct = sum(lines_iscorrect) / float(len(lines_iscorrect)) * 100
		perc_lines_correct_control = sum(lines_iscorrect_control) / float(len(lines_iscorrect_control)) * 100
		perc_sylls_correct_control = sum(sylls_iscorrect_control) / float(len(sylls_iscorrect_control)) * 100
		perc_lines_correct_nonbound = sum(lines_iscorrect_nonbounded) / float(len(lines_iscorrect_nonbounded)) * 100
		print('PERCENT SYLLABLES CORRECT:',round(perc_sylls_correct,2),'% [vs.',round(perc_sylls_correct_control,2),'% for control]')
		print('PERCENT LINES CORRECT:',round(perc_lines_correct,2),'% [vs.',round(perc_lines_correct_control,2),'% for control]')
		print('PERCENT LINES IN AVAILABLE NONBOUNDED PARSES:',round(perc_lines_correct_nonbound,2),'%')

	writegen(ofn, _writegen)