def fixstring(str): """remove crap characters due to interpreting utf-8 as microsoft code page""" str = str.replace(u"“", u'"').replace(u"’", u"'").replace(u"â€", u'"') str = cf.convert_entities(str) str = cf.convert_unicode_u(str) str = html_to_segments(str) return str.strip()
def fixstring(str): """remove crap characters due to interpreting utf-8 as microsoft code page""" str = str.replace(u"“",u'"').replace(u"’",u"'").replace(u"â€",u'"') str = cf.convert_entities(str) str = cf.convert_unicode_u(str) str = html_to_segments(str) return str.strip()
def cleanup(claim): claim = cf.convert_entities(claim) claim = cf.convert_unicode(claim)
def fix_string(txt): txt = cf.convert_entities(txt) txt = cf.convert_unicode(txt) return txt.decode('utf-8')
def fix_string(txt): txt = cf.convert_entities(txt) txt = cf.convert_unicode(txt) return txt.decode("utf-8")