QuotedString(quoteChar='(', endQuoteChar=')').setResultsName("subgroup") + Suppress("]") ).setParseAction(lambda m: tokens.Context( [None, 'Subjgrp:' + subjgrp_label(m.subgroup, [])], bool(m.certain))) # Phrases like '“Nonimmigrant visa”' become 'p12345678' _double_quote_label = QuotedString( quoteChar=u'“', endQuoteChar=u'”' ).setParseAction(lambda m: "p{0}".format(hash_for_paragraph(m[0]))) # Phrases like "definition for the term “Nonimmigrant visa”" become a # paragraph token with the appropriate paragraph label set definition = ( Marker("definition") + (Marker("of") | Marker("for")) + Optional(Marker("the") + Marker("term")) + _double_quote_label.copy().setResultsName("paragraph") ).setParseAction(lambda m: tokens.Paragraph.make(paragraphs=[m.paragraph])) # grammar which captures all of these possibilities token_patterns = QuickSearchable( put_active | put_passive | post_active | post_passive | delete_active | delete_passive | move_active | move_passive | designate_active | reserve_active | insert_in_order | interp | marker_subpart | appendix | comment_context_with_section | comment_context_without_section | comment_context_under_with_section | paragraph_heading_of | section_heading_of | multiple_intro_text_of | intro_text_of | appendix_section_heading_of |
QuotedString(quoteChar='(', endQuoteChar=')').setResultsName("subgroup") + Suppress("]") ).setParseAction(lambda m: tokens.Context( [None, 'Subjgrp:' + subjgrp_label(m.subgroup, [])], bool(m.certain))) # Phrases like '“Nonimmigrant visa”' become 'p12345678' _double_quote_label = QuotedString( quoteChar=u'“', endQuoteChar=u'”' ).setParseAction(lambda m: "p{}".format(hash_for_paragraph(m[0]))) # Phrases like "definition for the term “Nonimmigrant visa”" become a # paragraph token with the appropriate paragraph label set definition = ( Marker("definition") + (Marker("of") | Marker("for")) + Optional(Marker("the") + Marker("term")) + _double_quote_label.copy().setResultsName("paragraph") ).setParseAction(lambda m: tokens.Paragraph(paragraphs=[m.paragraph])) # grammar which captures all of these possibilities token_patterns = QuickSearchable( put_active | put_passive | post_active | post_passive | delete_active | delete_passive | move_active | move_passive | designate_active | reserve_active | insert_in_order | interp | marker_subpart | appendix | comment_context_with_section | comment_context_without_section | comment_context_under_with_section | paragraph_heading_of | section_heading_of | multiple_intro_text_of | intro_text_of | appendix_section_heading_of |
elif palavra in _PALAVRAS_PADRAO: palavra = _PALAVRAS_PADRAO[palavra] else: palavra = tira_acentos(palavra) return palavra palavras_pt_BR = Word(alphanums + alphas8bit + '_=!') palavras_pt_BR.setParseAction(_converte_ingles) tripleQuote = QuotedString('"""', multiline=True, unquoteResults=False) | \ QuotedString("'''", multiline=True, unquoteResults=False) unicodeTripleQuote = Combine(Literal('u') + tripleQuote.copy()).setName( "unicode string triple quoted") python_brasil = unicodeTripleQuote | tripleQuote | unicodeString | \ quotedString | pythonStyleComment | \ palavras_pt_BR def python_pt_BR(texto, dicionario={}): global PALAVRAS_BRASIL PALAVRAS_BRASIL.update(dicionario) if type(texto) != unicode: texto = unicode(texto) return python_brasil.transformString(texto)