Пример #1
0
def fixstring(str):
    """remove crap characters due to interpreting utf-8 as microsoft code page"""
    str = str.replace(u"“", u'"').replace(u"’", u"'").replace(u"â€", u'"')
    str = cf.convert_entities(str)
    str = cf.convert_unicode_u(str)
    str = html_to_segments(str)
    return str.strip()
Пример #2
0
def fixstring(str):
	"""remove crap characters due to interpreting utf-8 as microsoft code page"""
	str = str.replace(u"“",u'"').replace(u"’",u"'").replace(u"â€",u'"')
	str = cf.convert_entities(str)
	str = cf.convert_unicode_u(str)
	str = html_to_segments(str)
	return str.strip()
Пример #3
0
	def cleansentence(self):
		return html_to_segments(self.sentence)
Пример #4
0
 def cleansentence(self):
     return html_to_segments(self.sentence)