def handle_text(text): # figure out where the match list starts startIndex = string.find(text, "{{MatchList") # find the end of the match list using the token stream lexer.input(text[startIndex:]) depth = 0 end = -1 while True: token = lexer.token() if token is None: break if token.type == "L2BRACE": depth += 1 elif token.type == "R2BRACE": depth -= 1 if depth == 0: endToken = token break # print token # pick out the match list section endIndex = startIndex + endToken.lexpos + len(endToken.value) clippedText = text[startIndex:endIndex] # generate parse tree return yaccer.parse(clippedText)
|R4W2=herO |R4W2race=p |R4W2flag=kr |R4W2score=0 |R4W2win= |R4G1details={{BracketMatchSummary |date=April 12th, 2013 |lrthread=http://www.teamliquid.net/forum/viewmessage.php?topic_id=407546 |map1=Cloud Kingdom |map1win=1 |vodgame1=http://www.youtube.com/watch?v=aQGNo5Dyv4E&list=PLn9kCgJGjpyLgB-r9PNJaRsaK3jDOqBi8 |map2=Neo Planet S |map2win=1 |vodgame2=http://www.youtube.com/watch?v=lrCTSNsJJ6g&list=PLn9kCgJGjpyLgB-r9PNJaRsaK3jDOqBi8 |map3=Daybreak |map3win=1 |vodgame3=http://www.youtube.com/watch?v=fpmf6675w5w&list=PLn9kCgJGjpyLgB-r9PNJaRsaK3jDOqBi8 }} }}''' text_stripped = strip_comments(text) # lexer.input(text_stripped) # while True: # token = lexer.token() # if token is None: break # print token tree = yaccer.parse(text_stripped) conn = sqlite3.connect('matches.db') cursor = conn.cursor() delete_db(cursor) create_db(cursor) conn.commit() handle_bracket(cursor, tree) conn.commit() conn.close()