示例#1
0
class TestParsing(unittest.TestCase):
    def setUp(self):
        self.parser = SyllableParser()


    def test_identify_characters(self):
        self.assertTrue(self.parser.is_vowel(u'\u0e2F'))
        self.assertFalse(self.parser.is_consonant(u'\u0e2F'))
        self.assertTrue(self.parser.is_consonant(u'\u0e01'))
        self.assertFalse(self.parser.is_vowel(u'\u0e01'))
class ValidationParser (object):

    url = ""
    page = ""
    soup = ""

    def __init__(self):
        self.parser = SyllableParser()

    def parse(self):
        print os.getcwd()
        vfile = open("newfile.txt", "w")
        self.page = urllib.urlopen("/Users/sutee/src/thaiforengineers/src/tools/thaiwords.html")
        self.soup = BeautifulSoup(self.page, 'html.parser')
        print "initialized"
        rows = self.soup.findAll('tr')
        for row in rows:
            print "-----"
            tds = row.findAll('td')
            print str(tds)
            if len(tds)==3:
                definition = tds[2].string
                if definition:
                    tones = tds[1].findAll('span')
                    if len(tones) == 1: # only care about one syllable for now
                        thaiword = tds[0].string
                        print tds[0].findAll('a')[0].string
                        for tone in tones:
                            vfile.write(thaiword.encode('utf8')+"\t"+definition.encode('utf8')+"\t"+tone.string.encode('utf8')+"\n")
                            print tone.string
                            print definition
        vfile.close()

    def validate(self):
        print "validate"
        with open("/Users/sutee/src/thaiforengineers/src/newfile2.txt", "r") as infile:
            for line in infile:
                print "---"
                data = line.split("\t")
                thai, definition, tone = data
                print thai
                for char in thai:
                    print self.parser.is_vowel(char)
示例#3
0
 def setUp(self):
     self.parser = SyllableParser()
 def __init__(self):
     self.parser = SyllableParser()