示例#1
0
    def loadText(self, filename):
        #Perform the replacement of Unicode characters
        argv = [filename]
        print argv

        U2A.processFile(argv)

        f = codecs.open(os.path.splitext(filename)[0] + "-ascii.txt", "rt", "utf-8")
        self.text = f.read()
        f.close()
示例#2
0
def u2a_convert(pmid, in_str, tmp_suffix):
	if len(in_str) == 0:
		return '%s|a|' % pmid

	ftmp_name = '/tmp/%s.%s' % (pmid, tmp_suffix)
	ftmp = open(ftmp_name, 'w')
	ftmp.write(in_str)
	ftmp.close()
	U2A.processFile(ftmp_name)
	ftmp_read = open(ftmp_name, 'r')

	return ftmp_read.readlines()[0]
示例#3
0
def u2a_convert(id, in_str, tmp_suffix):
    if len(in_str) == 0:
        return ''

    ftmp_name = '/tmp/%s.%s' % (id, tmp_suffix)
    if isinstance(in_str, unicode):
        in_str = unicodedata.normalize('NFKD',
                                       in_str).encode('ascii', 'ignore')

    ftmp = open(ftmp_name, 'w')
    ftmp.write(in_str)
    ftmp.close()
    U2A.processFile(ftmp_name)
    ftmp_read = open(ftmp_name, 'r')
    new_str = ftmp_read.readlines()[0]
    new_str = re.sub(r'\s\s+', ' ', new_str)

    return new_str