class AdmGatherer(regexp.RegexpGatherer): '''Gatherer for the translateable portions of an admin template. This gatherer currently makes the following assumptions: - there is only one [strings] section and it is always the last section of the file - translateable strings do not need to be escaped. ''' # Finds the strings section as the group named 'strings' _STRINGS_SECTION = lazy_re.compile( '(?P<first_part>.+^\[strings\])(?P<strings>.+)\Z', re.MULTILINE | re.DOTALL) # Finds the translateable sections from within the [strings] section. _TRANSLATEABLES = lazy_re.compile( '^\s*[A-Za-z0-9_]+\s*=\s*"(?P<text>.+)"\s*$', re.MULTILINE) def __init__(self, text): regexp.RegexpGatherer.__init__(self, text) def Escape(self, text): return text.replace('\n', '\\n') def UnEscape(self, text): return text.replace('\\n', '\n') def Parse(self): if self.have_parsed_: return m = self._STRINGS_SECTION.match(self.text_) if not m: raise MalformedAdminTemplateException() # Add the first part, which is all nontranslateable, to the skeleton self._AddNontranslateableChunk(m.group('first_part')) # Then parse the rest using the _TRANSLATEABLES regexp. self._RegExpParse(self._TRANSLATEABLES, m.group('strings')) # static method def FromFile(adm_file, ext_key=None, encoding='cp1252'): '''Loads the contents of 'adm_file' in encoding 'encoding' and creates an AdmGatherer instance that gathers from those contents. The 'ext_key' parameter is ignored. Args: adm_file: file('bingo.rc') | 'filename.rc' encoding: 'utf-8' Return: AdmGatherer(contents_of_file) ''' if isinstance(adm_file, types.StringTypes): adm_file = util.WrapInputStream(file(adm_file, 'r'), encoding) return AdmGatherer(adm_file.read()) FromFile = staticmethod(FromFile)
class Accelerators(Section): '''An ACCELERATORS table. ''' # A typical ACCELERATORS section looks like this: # # IDR_ACCELERATOR1 ACCELERATORS # BEGIN # "^C", ID_ACCELERATOR32770, ASCII, NOINVERT # "^V", ID_ACCELERATOR32771, ASCII, NOINVERT # VK_INSERT, ID_ACCELERATOR32772, VIRTKEY, CONTROL, NOINVERT # END accelerators_re_ = lazy_re.compile( ''' # Match the ID on the first line ^(?P<id1>[A-Z0-9_]+)\s+ACCELERATORS\s+ | # Match accelerators specified as VK_XXX \s+VK_[A-Z0-9_]+,\s*(?P<id2>[A-Z0-9_]+)\s*, | # Match accelerators specified as e.g. "^C" \s+"[^"]*",\s+(?P<id3>[A-Z0-9_]+)\s*, ''', re.MULTILINE | re.VERBOSE) def Parse(self): '''Knows how to parse ACCELERATORS resource sections.''' self.ReadSection() self._RegExpParse(self.accelerators_re_, self.text_)
def _AddTextWithLinks(self, parent, text): """Parse a string for URLs and add it to a DOM node with the URLs replaced with <a> HTML links. Args: parent: The DOM node to which the text will be added. text: The string to be added. """ # A simple regexp to search for URLs. It is enough for now. url_matcher = lazy_re.compile("(http://[^\\s]*[^\\s\\.])") # Iterate through all the URLs and replace them with links. while True: # Look for the first URL. res = url_matcher.search(text) if not res: break # Calculate positions of the substring of the URL. url = res.group(0) start = res.start(0) end = res.end(0) # Add the text prior to the URL. self.AddText(parent, text[:start]) # Add a link for the URL. self.AddElement(parent, "a", {"href": url}, url) # Drop the part of text that is added. text = text[end:] self.AddText(parent, text)
def Init(self): dom_impl = minidom.getDOMImplementation('') self._doc = dom_impl.createDocument(None, 'html', None) body = self.AddElement(self._doc.documentElement, 'body') self._main_div = self.AddElement(body, 'div') self._indent_level = 0 # Human-readable names of supported platforms. self._PLATFORM_MAP = { 'win': 'Windows', 'mac': 'Mac', 'linux': 'Linux', 'chrome_os': self.config['os_name'], } # Human-readable names of supported products. self._PRODUCT_MAP = { 'chrome': self.config['app_name'], 'chrome_frame': self.config['frame_name'], 'chrome_os': self.config['os_name'], } # Human-readable names of supported features. Each supported feature has # a 'doc_feature_X' entry in |self.messages|. self._FEATURE_MAP = {} for message in self.messages: if message.startswith('doc_feature_'): self._FEATURE_MAP[ message[12:]] = self.messages[message]['text'] # Human-readable names of types. self._TYPE_MAP = { 'string': 'String (REG_SZ)', 'int': 'Integer (REG_DWORD)', 'main': 'Boolean (REG_DWORD)', 'int-enum': 'Integer (REG_DWORD)', 'string-enum': 'String (REG_SZ)', 'list': 'List of strings', 'dict': 'Dictionary (REG_SZ, encoded as a JSON string)', } # The CSS style-sheet used for the document. It will be used in Google # Sites, which strips class attributes from HTML tags. To work around this, # the style-sheet is a dictionary and the style attributes will be added # "by hand" for each element. self._STYLE = { 'table': 'border-style: none; border-collapse: collapse;', 'tr': 'height: 0px;', 'td': 'border: 1px dotted rgb(170, 170, 170); padding: 7px; ' 'vertical-align: top; width: 236px; height: 15px;', 'thead td': 'font-weight: bold;', 'td.left': 'width: 200px;', 'td.right': 'width: 100%;', 'dt': 'font-weight: bold;', 'dd dl': 'margin-top: 0px; margin-bottom: 0px;', '.monospace': 'font-family: monospace;', '.pre': 'white-space: pre;', 'div.note': 'border: 2px solid black; padding: 5px; margin: 5px;', 'div.group_desc': 'margin-top: 20px; margin-bottom: 20px;', 'ul': 'padding-left: 0px; margin-left: 0px;' } # A simple regexp to search for URLs. It is enough for now. self._url_matcher = lazy_re.compile('(http://[^\\s]*[^\\s\\.])')
class Placeholder(grit.extern.tclib.Placeholder): '''Modifies constructor to accept a Unicode string ''' # Must match placeholder presentation names _NAME_RE = lazy_re.compile('^[A-Za-z0-9_]+$') def __init__(self, presentation, original, example): '''Creates a new placeholder. Args: presentation: 'USERNAME' original: '%s' example: 'Joi' ''' assert presentation != '' assert original != '' assert example != '' if not self._NAME_RE.match(presentation): raise exception.InvalidPlaceholderName(presentation) self.presentation = presentation self.original = original self.example = example def GetPresentation(self): return self.presentation def GetOriginal(self): return self.original def GetExample(self): return self.example
def _AddTextWithLinks(self, parent, text): '''Parse a string for URLs and add it to a DOM node with the URLs replaced with <a> HTML links. Args: parent: The DOM node to which the text will be added. text: The string to be added. ''' # A simple regexp to search for URLs. It is enough for now. url_matcher = lazy_re.compile('(http://[^\\s]*[^\\s\\.])') # Iterate through all the URLs and replace them with links. while True: # Look for the first URL. res = url_matcher.search(text) if not res: break # Calculate positions of the substring of the URL. url = res.group(0) start = res.start(0) end = res.end(0) # Add the text prior to the URL. self.AddText(parent, text[:start]) # Add a link for the URL. self.AddElement(parent, 'a', {'href': url}, url) # Drop the part of text that is added. text = text[end:] self.AddText(parent, text)
class ShortcutGroup(object): '''Manages a list of cliques that belong together in a single shortcut group. Knows how to detect conflicting shortcut keys. ''' # Matches shortcut keys, e.g. &J SHORTCUT_RE = lazy_re.compile('([^&]|^)(&[A-Za-z])') def __init__(self, name): self.name = name # Map of language codes to shortcut keys used (which is a map of # shortcut keys to counts). self.keys_by_lang = {} # List of cliques in this group self.cliques = [] def AddClique(self, c): for existing_clique in self.cliques: if existing_clique.GetId() == c.GetId(): # This happens e.g. when we have e.g. # <if expr1><structure 1></if> <if expr2><structure 2></if> # where only one will really be included in the output. return self.cliques.append(c) for (lang, msg) in c.clique.items(): if lang not in self.keys_by_lang: self.keys_by_lang[lang] = {} keymap = self.keys_by_lang[lang] content = msg.GetRealContent() keys = [groups[1] for groups in self.SHORTCUT_RE.findall(content)] for key in keys: key = key.upper() if key in keymap: keymap[key] += 1 else: keymap[key] = 1 def GenerateWarnings(self, tc_project): # For any language that has more than one occurrence of any shortcut, # make a list of the conflicting shortcuts. problem_langs = {} for (lang, keys) in self.keys_by_lang.items(): for (key, count) in keys.items(): if count > 1: if lang not in problem_langs: problem_langs[lang] = [] problem_langs[lang].append(key) warnings = [] if len(problem_langs): warnings.append( "WARNING - duplicate keys exist in shortcut group %s" % self.name) for (lang, keys) in problem_langs.items(): warnings.append(" %6s duplicates: %s" % (lang, ', '.join(keys))) return warnings
def Init(self): dom_impl = minidom.getDOMImplementation('') self._doc = dom_impl.createDocument(None, 'html', None) body = self.AddElement(self._doc.documentElement, 'body') self._main_div = self.AddElement(body, 'div') self._indent_level = 0 # Human-readable names of supported platforms. self._PLATFORM_MAP = { 'win': 'Windows', 'mac': 'Mac', 'linux': 'Linux', 'chrome_os': self.config['os_name'], } # Human-readable names of supported products. self._PRODUCT_MAP = { 'chrome': self.config['app_name'], 'chrome_frame': self.config['frame_name'], 'chrome_os': self.config['os_name'], } # Human-readable names of supported features. self._FEATURE_MAP = { 'dynamic_refresh': self._GetLocalizedMessage('feature_dynamic_refresh'), 'can_be_recommended': self._GetLocalizedMessage( 'feature_can_be_recommended'), } # Human-readable names of types. self._TYPE_MAP = { 'string': 'String (REG_SZ)', 'int': 'Integer (REG_DWORD)', 'main': 'Boolean (REG_DWORD)', 'int-enum': 'Integer (REG_DWORD)', 'string-enum': 'String (REG_SZ)', 'list': 'List of strings', 'dict': 'Dictionary (REG_SZ, encoded as a JSON string)', } # The CSS style-sheet used for the document. It will be used in Google # Sites, which strips class attributes from HTML tags. To work around this, # the style-sheet is a dictionary and the style attributes will be added # "by hand" for each element. self._STYLE = { 'table': 'border-style: none; border-collapse: collapse;', 'tr': 'height: 0px;', 'td': 'border: 1px dotted rgb(170, 170, 170); padding: 7px; ' 'vertical-align: top; width: 236px; height: 15px;', 'thead td': 'font-weight: bold;', 'td.left': 'width: 200px;', 'td.right': 'width: 100%;', 'dt': 'font-weight: bold;', 'dd dl': 'margin-top: 0px; margin-bottom: 0px;', '.monospace': 'font-family: monospace;', '.pre': 'white-space: pre;', 'div.note': 'border: 2px solid black; padding: 5px; margin: 5px;', 'div.group_desc': 'margin-top: 20px; margin-bottom: 20px;', 'ul': 'padding-left: 0px; margin-left: 0px;' } # A simple regexp to search for URLs. It is enough for now. self._url_matcher = lazy_re.compile('(http://[^\\s]*[^\\s\\.])')
class Version(Section): '''A resource section that contains a VERSIONINFO resource.''' # A typical version info resource can look like this: # # VS_VERSION_INFO VERSIONINFO # FILEVERSION 1,0,0,1 # PRODUCTVERSION 1,0,0,1 # FILEFLAGSMASK 0x3fL # #ifdef _DEBUG # FILEFLAGS 0x1L # #else # FILEFLAGS 0x0L # #endif # FILEOS 0x4L # FILETYPE 0x2L # FILESUBTYPE 0x0L # BEGIN # BLOCK "StringFileInfo" # BEGIN # BLOCK "040904e4" # BEGIN # VALUE "CompanyName", "TODO: <Company name>" # VALUE "FileDescription", "TODO: <File description>" # VALUE "FileVersion", "1.0.0.1" # VALUE "LegalCopyright", "TODO: (c) <Company name>. All rights reserved." # VALUE "InternalName", "res_format_test.dll" # VALUE "OriginalFilename", "res_format_test.dll" # VALUE "ProductName", "TODO: <Product name>" # VALUE "ProductVersion", "1.0.0.1" # END # END # BLOCK "VarFileInfo" # BEGIN # VALUE "Translation", 0x409, 1252 # END # END # # # In addition to the above fields, VALUE fields named "Comments" and # "LegalTrademarks" may also be translateable. version_re_ = lazy_re.compile( ''' # Match the ID on the first line ^(?P<id1>[A-Z0-9_]+)\s+VERSIONINFO | # Match all potentially translateable VALUE sections \s+VALUE\s+" ( CompanyName|FileDescription|LegalCopyright| ProductName|Comments|LegalTrademarks )",\s+"(?P<text1>.*?([^"]|""))"\s ''', re.MULTILINE | re.VERBOSE) def Parse(self): '''Knows how to parse VERSIONINFO resource sections.''' self.ReadSection() self._RegExpParse(self.version_re_, self.text_)
class BasicVariable(Node): """Represents a variable. Usually used inside a plural option, but has been overloaded to store placeholders as well. """ pattern = lazy_re.compile(r'^\$?{[a-zA-Z0-9_]+}') def GetNumWords(self): return 1
class ToolbarPreProcessor(preprocess_interface.PreProcessor): ''' Toolbar PreProcessing class. ''' _IDS_COMMAND_MACRO = lazy_re.compile( r'(.*IDS_COMMAND)\s*\(([a-zA-Z0-9_]*)\s*,\s*([a-zA-Z0-9_]*)\)(.*)') _LINE_FEED_PH = lazy_re.compile(r'\$lf;') _PH_COMMENT = lazy_re.compile(r'PHRWR') _COMMENT = lazy_re.compile(r'^(\s*)//.*') def Process(self, rctext, rcpath): ''' Processes the data in rctext. Args: rctext: string containing the contents of the RC file being processed rcpath: the path used to access the file. Return: The processed text. ''' ret = '' rclines = rctext.splitlines() for line in rclines: if self._LINE_FEED_PH.search(line): # Replace "$lf;" placeholder comments by an empty line. # this will not be put into the processed result if self._PH_COMMENT.search(line): mm = self._COMMENT.search(line) if mm: line = '%s//' % mm.group(1) else: # Replace $lf by the right linefeed character line = self._LINE_FEED_PH.sub(r'\\n', line) # Deal with IDS_COMMAND_MACRO stuff mo = self._IDS_COMMAND_MACRO.search(line) if mo: line = '%s_%s_%s%s' % (mo.group(1), mo.group(2), mo.group(3), mo.group(4)) ret += (line + '\n') return ret
class ChromeScaledImage(interface.GathererBase): '''Represents an image that exists in multiple layout variants (e.g. "default", "touch") and multiple scale variants (e.g. "100_percent", "200_percent"). ''' split_context_re_ = lazy_re.compile(r'(.+)_(\d+)_percent\Z') def _FindInputFile(self): output_context = self.grd_node.GetRoot().output_context match = self.split_context_re_.match(output_context) if not match: raise exception.MissingMandatoryAttribute( 'All <output> nodes must have an appropriate context attribute' ' (e.g. context="touch_200_percent")') req_layout, req_scale = match.group(1), int(match.group(2)) layouts = [req_layout] if 'default' not in layouts: layouts.append('default') scales = [req_scale] try_low_res = self.grd_node.FindBooleanAttribute( 'fallback_to_low_resolution', default=False, skip_self=False) if try_low_res and 100 not in scales: scales.append(100) for layout in layouts: for scale in scales: dir = '%s_%s_percent' % (layout, scale) path = os.path.join(dir, self.rc_file) if os.path.exists(self.grd_node.ToRealPath(path)): return path, scale, req_scale # If we get here then the file is missing, so fail. dir = "%s_%s_percent" % (_MakeBraceGlob(layouts), _MakeBraceGlob(map(str, scales))) raise exception.FileNotFound( 'Tried ' + self.grd_node.ToRealPath(os.path.join(dir, self.rc_file))) def GetInputPath(self): path, scale, req_scale = self._FindInputFile() return path def Parse(self): pass def GetTextualIds(self): return [self.extkey] def GetData(self, *args): path, scale, req_scale = self._FindInputFile() data = util.ReadFile(self.grd_node.ToRealPath(path), util.BINARY) data = _RescaleImage(data, scale, req_scale) data = _MoveSpecialChunksToFront(data) return data def Translate(self, *args, **kwargs): return self.GetData()
class Menu(Section): '''A resource section that contains a menu resource.''' # A typical menu resource section looks something like this: # # IDC_KLONK MENU # BEGIN # POPUP "&File" # BEGIN # MENUITEM "E&xit", IDM_EXIT # MENUITEM "This be ""Klonk"" me like", ID_FILE_THISBE # POPUP "gonk" # BEGIN # MENUITEM "Klonk && is ""good""", ID_GONK_KLONKIS # END # END # POPUP "&Help" # BEGIN # MENUITEM "&About ...", IDM_ABOUT # END # END # Description used for the messages generated for menus, to explain to # the translators how to handle them. MENU_MESSAGE_DESCRIPTION = ( 'This message represents a menu. Each of the items appears in sequence ' '(some possibly within sub-menus) in the menu. The XX01XX placeholders ' 'serve to separate items. Each item contains an & (ampersand) character ' 'in front of the keystroke that should be used as a shortcut for that item ' 'in the menu. Please make sure that no two items in the same menu share ' 'the same shortcut.') # A dandy regexp to suck all the IDs and translateables out of a menu # resource menu_re_ = lazy_re.compile( ''' # Match the MENU ID on the first line ^(?P<id1>[A-Z0-9_]+)\s+MENU | # Match the translateable caption for a popup menu POPUP\s+"(?P<text1>.*?([^"]|""))"\s | # Match the caption & ID of a MENUITEM MENUITEM\s+"(?P<text2>.*?([^"]|""))"\s*,\s*(?P<id2>[A-Z0-9_]+) ''', re.MULTILINE | re.VERBOSE) def Parse(self): '''Knows how to parse menu resource sections. Because it is important that menu shortcuts are unique within the menu, we return each menu as a single message with placeholders to break up the different menu items, rather than return a single message per menu item. we also add an automatic description with instructions for the translators.''' self.ReadSection() self.single_message_ = tclib.Message( description=self.MENU_MESSAGE_DESCRIPTION) self._RegExpParse(self.menu_re_, self.text_)
class Dialog(Section): '''A resource section that contains a dialog resource.''' # A typical dialog resource section looks like this: # # IDD_ABOUTBOX DIALOGEX 22, 17, 230, 75 # STYLE DS_SETFONT | DS_MODALFRAME | WS_CAPTION | WS_SYSMENU # CAPTION "About" # FONT 8, "System", 0, 0, 0x0 # BEGIN # ICON IDI_KLONK,IDC_MYICON,14,9,20,20 # LTEXT "klonk Version ""yibbee"" 1.0",IDC_STATIC,49,10,119,8, # SS_NOPREFIX # LTEXT "Copyright (C) 2005",IDC_STATIC,49,20,119,8 # DEFPUSHBUTTON "OK",IDOK,195,6,30,11,WS_GROUP # CONTROL "Jack ""Black"" Daniels",IDC_RADIO1,"Button", # BS_AUTORADIOBUTTON,46,51,84,10 # END # We are using a sorted set of keys, and we assume that the # group name used for descriptions (type) will come after the "text" # group in alphabetical order. We also assume that there cannot be # more than one description per regular expression match. # If that's not the case some descriptions will be clobbered. dialog_re_ = lazy_re.compile( ''' # The dialog's ID in the first line (?P<id1>[A-Z0-9_]+)\s+DIALOG(EX)? | # The caption of the dialog (?P<type1>CAPTION)\s+"(?P<text1>.*?([^"]|""))"\s | # Lines for controls that have text and an ID \s+(?P<type2>[A-Z]+)\s+"(?P<text2>.*?([^"]|"")?)"\s*,\s*(?P<id2>[A-Z0-9_]+)\s*, | # Lines for controls that have text only \s+(?P<type3>[A-Z]+)\s+"(?P<text3>.*?([^"]|"")?)"\s*, | # Lines for controls that reference other resources \s+[A-Z]+\s+[A-Z0-9_]+\s*,\s*(?P<id3>[A-Z0-9_]*[A-Z][A-Z0-9_]*) | # This matches "NOT SOME_STYLE" so that it gets consumed and doesn't get # matched by the next option (controls that have only an ID and then just # numbers) \s+NOT\s+[A-Z][A-Z0-9_]+ | # Lines for controls that have only an ID and then just numbers \s+[A-Z]+\s+(?P<id4>[A-Z0-9_]*[A-Z][A-Z0-9_]*)\s*, ''', re.MULTILINE | re.VERBOSE) def Parse(self): '''Knows how to parse dialog resource sections.''' self.ReadSection() self._RegExpParse(self.dialog_re_, self.text_)
def SetupSystemIdentifiers(ids): """Adds ids to a regexp of known system identifiers. Can be called many times, ids will be accumulated. Args: ids: an iterable of strings """ KNOWN_SYSTEM_IDENTIFIERS.update(ids) global SYSTEM_IDENTIFIERS SYSTEM_IDENTIFIERS = lazy_re.compile(" | ".join([r"\b%s\b" % i for i in KNOWN_SYSTEM_IDENTIFIERS]), re.VERBOSE)
class AdmGatherer(regexp.RegexpGatherer): '''Gatherer for the translateable portions of an admin template. This gatherer currently makes the following assumptions: - there is only one [strings] section and it is always the last section of the file - translateable strings do not need to be escaped. ''' # Finds the strings section as the group named 'strings' _STRINGS_SECTION = lazy_re.compile( r'(?P<first_part>.+^\[strings\])(?P<strings>.+)\Z', re.MULTILINE | re.DOTALL) # Finds the translateable sections from within the [strings] section. _TRANSLATEABLES = lazy_re.compile( r'^\s*[A-Za-z0-9_]+\s*=\s*"(?P<text>.+)"\s*$', re.MULTILINE) def Escape(self, text): return text.replace('\n', '\\n') def UnEscape(self, text): return text.replace('\\n', '\n') def Parse(self): if self.have_parsed_: return self.have_parsed_ = True self.text_ = self._LoadInputFile().strip() m = self._STRINGS_SECTION.match(self.text_) if not m: raise MalformedAdminTemplateException() # Add the first part, which is all nontranslateable, to the skeleton self._AddNontranslateableChunk(m.group('first_part')) # Then parse the rest using the _TRANSLATEABLES regexp. self._RegExpParse(self._TRANSLATEABLES, m.group('strings')) def GetTextualIds(self): return [self.extkey]
class RawText(Node): """RawText represents regular text able to be translated.""" # Raw text can have a < or $ in it, but only at the very start. # This guarantees that it's already tried and failed to match an HTML tag # and variable. pattern = lazy_re.compile(r'^[^{}][^{}<$]*', re.S) def GetNumWords(self): return len(WORD.findall(self.text)) def Transform(self, fn): self.text = fn(self.text)
def SetupSystemIdentifiers(ids): '''Adds ids to a regexp of known system identifiers. Can be called many times, ids will be accumulated. Args: ids: an iterable of strings ''' KNOWN_SYSTEM_IDENTIFIERS.update(ids) global SYSTEM_IDENTIFIERS SYSTEM_IDENTIFIERS = lazy_re.compile( ' | '.join([r'\b%s\b' % i for i in KNOWN_SYSTEM_IDENTIFIERS]), re.VERBOSE)
class HtmlTag(Node): """HTMLTag represents a HTML tag (eg. <a href='...'> or </span>). Note that since we don't care about the HTML structure, this does not form a tree, has no children, and no linking between open and close tags. Lex text so that we don't interfere with html tokens. This lexing scheme will handle all well formed tags, html or xhtml. It will not handle comments, CDATA sections, or the unescaping tags: script, style, xmp or listing. If any of those appear in messages, something is wrong. """ pattern = lazy_re.compile( r'^</?[a-z]\w*' # beginning of tag r'(?:\s+\w+(?:\s*=\s*' # attribute start r'(?:[^\s"\'>]+|"[^\"]*"|\'[^\']*\'))?' # attribute value r')*\s*/?>', re.S | re.I)
class WindowsFilename(clique.CustomType): '''Validates that messages can be used as Windows filenames, and strips illegal characters out of translations. ''' BANNED = lazy_re.compile('\+|:|\/|\\\\|\*|\?|\"|\<|\>|\|') def Validate(self, message): return not self.BANNED.search(message.GetPresentableContent()) def ValidateAndModify(self, lang, translation): is_ok = self.Validate(translation) self.ModifyEachTextPart(lang, translation) return is_ok def ModifyTextPart(self, lang, text): return self.BANNED.sub(' ', text)
class PluralOption(Node): """Represents a single option for a plural selection. eg. =1 {singular option here} """ pattern = lazy_re.compile(r'^(=[0-9]+|other)\s*{') after = '}\n' @classmethod def Parse(cls, text): node, text = cls._MatchPattern(text) assert node is not None, text child, text = NodeSequence.Parse(text) assert child is not None, text node.children = child.children if isinstance(child, NodeSequence) else [child] assert text.startswith('}') return node, text[1:]
class RCData(Section): '''A resource section that contains some data .''' # A typical rcdataresource section looks like this: # # IDR_BLAH RCDATA { 1, 2, 3, 4 } dialog_re_ = lazy_re.compile( ''' ^(?P<id1>[A-Z0-9_]+)\s+RCDATA\s+(DISCARDABLE)?\s+\{.*?\} ''', re.MULTILINE | re.VERBOSE | re.DOTALL) def Parse(self): '''Implementation for resource types w/braces (not BEGIN/END) ''' rc_text = self._LoadInputFile() out = '' begin_count = 0 openbrace_count = 0 assert self.extkey first_line_re = re.compile(r'\s*' + self.extkey + r'\b') for line in rc_text.splitlines(True): if out or first_line_re.match(line): out += line # We stop once the braces balance (could happen in one line). begin_count_was = begin_count if len(out) > 0: openbrace_count += line.count('{') begin_count += line.count('{') begin_count -= line.count('}') if ((begin_count_was == 1 and begin_count == 0) or (openbrace_count > 0 and begin_count == 0)): break if len(out) == 0: raise exception.SectionNotFound('%s in file %s' % (self.extkey, self.rc_file)) self.text_ = out self._RegExpParse(self.dialog_re_, out)
class Plural(Node): """Represents a set of options for plurals. eg. {VARIABLE, plural, =1 {singular} other {plural}} """ pattern = lazy_re.compile(r'^{[A-Za-z0-9_]+,\s*plural,\s*(offset:\d+\s*)?', re.S) after = '}' @classmethod def Parse(cls, text): node, text = cls._MatchPattern(text) if node is None: return None, text while not text.startswith('}'): child, text = PluralOption.Parse(text) assert child is not None, text node.children.append(child) text = text.lstrip() assert text.startswith('}'), text return node, text[1:] def GetNumWords(self): return max(child.GetNumWords() for child in self.children)
from grit import tclib from grit import util from grit.tool import interface # Used to collapse presentable content to determine if # xml:space="preserve" is needed. _WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*') # See XmlEscape below. _XML_QUOTE_ESCAPES = { u"'": u''', u'"': u'"', } # See http://www.w3.org/TR/xml/#charsets _XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D' u'\u0020-\uD7FF\uE000-\uFFFD' #u'\U00010000-\U0010FFFF' ']') def _XmlEscape(s): """Returns text escaped for XML in a way compatible with Google's internal Translation Console tool. May be used for attributes as well as for contents. """ if not type(s) == unicode: s = unicode(s) result = saxutils.escape(s, _XML_QUOTE_ESCAPES) illegal_chars = _XML_BAD_CHAR_REGEX.search(result) if illegal_chars: raise Exception('String contains characters disallowed in XML: %s' % repr(result))
from grit.gather import rc from grit.gather import tr_html from grit.tool import interface from grit.tool import postprocess_interface from grit.tool import preprocess_interface from grit import grd_reader from grit import lazy_re from grit import tclib from grit import util # Matches files referenced from an .rc file _FILE_REF = lazy_re.compile(''' ^(?P<id>[A-Z_0-9.]+)[ \t]+ (?P<type>[A-Z_0-9]+)[ \t]+ "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE) # Matches a dialog section _DIALOG = lazy_re.compile( '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$', re.MULTILINE | re.DOTALL) # Matches a menu section _MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$', re.MULTILINE | re.DOTALL) # Matches a versioninfo section
def testCreatedOnlyOnDemand(self): rex = lazy_re.compile('bingo') self.assertEqual(None, rex._lazy_re) self.assertTrue(rex.match('bingo')) self.assertNotEqual(None, rex._lazy_re)
'ID_EDIT_PASTE_SPECIAL', 'ID_EDIT_REPEAT', 'ID_EDIT_REPLACE', 'ID_EDIT_SELECT_ALL', 'ID_EDIT_UNDO', 'ID_EDIT_REDO', 'VS_VERSION_INFO', 'IDRETRY', 'ID_APP_ABOUT', 'ID_APP_EXIT', 'ID_NEXT_PANE', 'ID_PREV_PANE', 'ID_WINDOW_NEW', 'ID_WINDOW_ARRANGE', 'ID_WINDOW_CASCADE', 'ID_WINDOW_TILE_HORZ', 'ID_WINDOW_TILE_VERT', 'ID_WINDOW_SPLIT', 'ATL_IDS_SCSIZE', 'ATL_IDS_SCMOVE', 'ATL_IDS_SCMINIMIZE', 'ATL_IDS_SCMAXIMIZE', 'ATL_IDS_SCNEXTWINDOW', 'ATL_IDS_SCPREVWINDOW', 'ATL_IDS_SCCLOSE', 'ATL_IDS_SCRESTORE', 'ATL_IDS_SCTASKLIST', 'ATL_IDS_MDICHILD', 'ATL_IDS_IDLEMESSAGE', 'ATL_IDS_MRU_FILE' )) # Matches character entities, whether specified by name, decimal or hex. _HTML_ENTITY = lazy_re.compile( '&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));', re.IGNORECASE) # Matches characters that should be HTML-escaped. This is <, > and &, but only # if the & is not the start of an HTML character entity. _HTML_CHARS_TO_ESCAPE = lazy_re.compile( '"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)', re.IGNORECASE | re.MULTILINE) def ReadFile(filename, encoding): '''Reads and returns the entire contents of the given file. Args: filename: The path to the file. encoding: A Python codec name or one of two special values: BINARY to read
from grit import clique from grit import exception from grit import lazy_re from grit import util from grit import tclib from grit.gather import regexp # Find portions that need unescaping in resource strings. We need to be # careful that a \\n is matched _first_ as a \\ rather than matching as # a \ followed by a \n. # TODO(joi) Handle ampersands if we decide to change them into <ph> # TODO(joi) May need to handle other control characters than \n _NEED_UNESCAPE = lazy_re.compile(r'""|\\\\|\\n|\\t') # Find portions that need escaping to encode string as a resource string. _NEED_ESCAPE = lazy_re.compile(r'"|\n|\t|\\|\ \;') # How to escape certain characters _ESCAPE_CHARS = { '"' : '""', '\n' : '\\n', '\t' : '\\t', '\\' : '\\\\', ' ' : ' ' } # How to unescape certain strings _UNESCAPE_CHARS = dict([[value, key] for key, value in _ESCAPE_CHARS.items()])
yield ('<?xml version="1.0" encoding="utf-8"?>\n' '<resources ' 'xmlns:android="http://schemas.android.com/apk/res/android">\n') for item in root.ActiveDescendants(): with item: if isinstance(item, message.MessageNode): yield _FormatMessage(item, lang) yield '</resources>\n' # The Android resource name and optional product information are placed # in the grd string name because grd doesn't know about Android product # information. _NAME_PATTERN = lazy_re.compile( 'IDS_(?P<name>[A-Z0-9_]+)(_product_(?P<product>[a-z]+))?\Z') # In most cases we only need a name attribute and string value. _SIMPLE_TEMPLATE = u'<string name="%s">%s</string>\n' # In a few cases a product attribute is needed. _PRODUCT_TEMPLATE = u'<string name="%s" product="%s">%s</string>\n' def _FormatMessage(item, lang): """Writes out a single string as a <resource/> element.""" value = item.ws_at_start + item.Translate(lang) + item.ws_at_end # Replace < > & with < > & to ensure we generate valid XML and
import os import re import sys import base64 import mimetypes from grit.node import base from grit import lazy_re DIST_DEFAULT = 'chromium' DIST_ENV_VAR = 'CHROMIUM_BUILD' DIST_SUBSTR = '%DISTRIBUTION%' # Matches beginning of an "if" block with trailing spaces. _BEGIN_IF_BLOCK = lazy_re.compile( '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*') # Matches ending of an "if" block with preceding spaces. _END_IF_BLOCK = lazy_re.compile('\s*</if>') def ReadFile(input_filename): """Helper function that returns input_filename as a string. Args: input_filename: name of file to be read Returns: string """ f = open(input_filename, 'rb') file_contents = f.read()
import types import xml.sax.saxutils from grit import lazy_re from grit.node import message # When this environmental variable has value "true", only tagged messages will # be outputted. _TAGGED_ONLY_ENV_VAR = 'ANDROID_JAVA_TAGGED_ONLY' _TAGGED_ONLY_DEFAULT = False # In tagged-only mode, only messages with this tag will be ouputted. _EMIT_TAG = 'android_java' _NAME_PATTERN = lazy_re.compile('IDS_(?P<name>[A-Z0-9_]+)\Z') # Most strings are output as a <string> element. Note the double quotes # around the value to preserve whitespace. _STRING_TEMPLATE = u'<string name="%s">"%s"</string>\n' # Some strings are output as a <plurals> element. _PLURALS_TEMPLATE = '<plurals name="%s">\n%s</plurals>\n' _PLURALS_ITEM_TEMPLATE = ' <item quantity="%s">%s</item>\n' _PLURALS_PATTERN = lazy_re.compile(r'\{[A-Z_]+,\s*plural,(?P<items>.*)\}$', flags=re.S) _PLURALS_ITEM_PATTERN = lazy_re.compile(r'(?P<quantity>\S+)\s*\{(?P<value>.*?)\}') _PLURALS_QUANTITY_MAP = { '=0': 'zero', 'zero': 'zero', '=1': 'one', 'one': 'one',
def testPositionalAndKwargsWork(self): rex = lazy_re.compile('BiNgO', flags=re.I) self.assertTrue(rex.match('bingo'))
def testJustKwargsWork(self): rex = lazy_re.compile(flags=re.I, pattern='BiNgO') self.assertTrue(rex.match('bingo'))
import os import re from grit import lazy_re from grit import util from grit.format import html_inline from grit.gather import interface # Distribution string to replace with distribution. DIST_SUBSTR = "%DISTRIBUTION%" # Matches a chrome theme source URL. _THEME_SOURCE = lazy_re.compile("(?P<baseurl>chrome://theme/IDR_[A-Z0-9_]*)(?P<query>\?.*)?") # Pattern for matching CSS url() function. _CSS_URL_PATTERN = "url\((?P<quote>\"|'|)(?P<filename>[^\"'()]*)(?P=quote)\)" # Matches CSS url() functions with the capture group 'filename'. _CSS_URL = lazy_re.compile(_CSS_URL_PATTERN) # Matches one or more CSS image urls used in given properties. _CSS_IMAGE_URLS = lazy_re.compile( "(?P<attribute>content|background|[\w-]*-image):\s*" + "(?P<urls>(" + _CSS_URL_PATTERN + "\s*,?\s*)+)" ) # Matches CSS image sets. _CSS_IMAGE_SETS = lazy_re.compile( "(?P<attribute>content|background|[\w-]*-image):[ ]*" + "-webkit-image-set\((?P<images>" + "(\s*,?\s*url\((?P<quote>\"|'|)[^\"'()]*(?P=quote)\)[ ]*[0-9.]*x)*)\)", re.MULTILINE, )
't': u"\u0163", 'T': u"\u0162", 'w': u"\u0175", 'W': u"\u0174", '$': u"\u20ac", '?': u"\u00bf", 'R': u"\u00ae", r'!': u"\u00a1", } # a character set containing the keys in ACCENTED_STRINGS # We should not accent characters in an escape sequence such as "\n". # To be safe, we assume every character following a backslash is an escaped # character. We also need to consider the case like "\\n", which means # a blackslash and a character "n", we will accent the character "n". TO_ACCENT = lazy_re.compile(r'[%s]|\\[a-z\\]' % ''.join(ACCENTED_STRINGS.keys())) # Lex text so that we don't interfere with html tokens and entities. # This lexing scheme will handle all well formed tags and entities, html or # xhtml. It will not handle comments, CDATA sections, or the unescaping tags: # script, style, xmp or listing. If any of those appear in messages, # something is wrong. TOKENS = [ lazy_re.compile( '^%s' % pattern, # match at the beginning of input re.I | re.S # html tokens are case-insensitive ) for pattern in ( # a run of non html special characters r'[^<&]+', # a tag (
''' import re import types from grit.node import base from grit import clique from grit import exception from grit import lazy_re from grit import tclib from grit import util # Matches exactly three dots ending a line or followed by whitespace. _ELLIPSIS_PATTERN = lazy_re.compile(r'(?<!\.)\.\.\.(?=$|\s)') _ELLIPSIS_SYMBOL = u'\u2026' # Ellipsis # Finds whitespace at the start and end of a string which can be multiline. _WHITESPACE = lazy_re.compile('(?P<start>\s*)(?P<body>.+?)(?P<end>\s*)\Z', re.DOTALL | re.MULTILINE) class MessageNode(base.ContentNode): '''A <message> element.''' # For splitting a list of things that can be separated by commas or # whitespace _SPLIT_RE = lazy_re.compile('\s*,\s*|\s+') def __init__(self): super(MessageNode, self).__init__()
class MessageClique(object): '''A message along with all of its translations. Also code to bring translations together with their original message.''' # change this to the language code of Messages you add to cliques_. # TODO(joi) Actually change this based on the <grit> node's source language source_language = 'en' # A constant translation we use when asked for a translation into the # special language constants.CONSTANT_LANGUAGE. CONSTANT_TRANSLATION = tclib.Translation(text='TTTTTT') # A pattern to match messages that are empty or whitespace only. WHITESPACE_MESSAGE = lazy_re.compile(u'^\s*$') def __init__(self, uber_clique, message, translateable=True, custom_type=None): '''Create a new clique initialized with just a message. Note that messages with a body comprised only of whitespace will implicitly be marked non-translatable. Args: uber_clique: Our uber-clique (collection of cliques) message: tclib.Message() translateable: True | False custom_type: instance of clique.CustomType interface ''' # Our parent self.uber_clique = uber_clique # If not translateable, we only store the original message. self.translateable = translateable # We implicitly mark messages that have a whitespace-only body as # non-translateable. if MessageClique.WHITESPACE_MESSAGE.match(message.GetRealContent()): self.translateable = False # A mapping of language identifiers to tclib.BaseMessage and its # subclasses (i.e. tclib.Message and tclib.Translation). self.clique = {MessageClique.source_language: message} # A list of the "shortcut groups" this clique is # part of. Within any given shortcut group, no shortcut key (e.g. &J) # must appear more than once in each language for all cliques that # belong to the group. self.shortcut_groups = [] # An instance of the CustomType interface, or None. If this is set, it will # be used to validate the original message and translations thereof, and # will also get a chance to modify translations of the message. self.SetCustomType(custom_type) def GetMessage(self): '''Retrieves the tclib.Message that is the source for this clique.''' return self.clique[MessageClique.source_language] def GetId(self): '''Retrieves the message ID of the messages in this clique.''' return self.GetMessage().GetId() def IsTranslateable(self): return self.translateable def AddToShortcutGroup(self, group): self.shortcut_groups.append(group) def SetCustomType(self, custom_type): '''Makes this clique use custom_type for validating messages and translations, and optionally modifying translations. ''' self.custom_type = custom_type if custom_type and not custom_type.Validate(self.GetMessage()): raise exception.InvalidMessage(self.GetMessage().GetRealContent()) def MessageForLanguage(self, lang, pseudo_if_no_match=True, fallback_to_english=False): '''Returns the message/translation for the specified language, providing a pseudotranslation if there is no available translation and a pseudo- translation is requested. The translation of any message whatsoever in the special language 'x_constant' is the message "TTTTTT". Args: lang: 'en' pseudo_if_no_match: True fallback_to_english: False Return: tclib.BaseMessage ''' if not self.translateable: return self.GetMessage() if lang == constants.CONSTANT_LANGUAGE: return self.CONSTANT_TRANSLATION for msglang in self.clique.keys(): if lang == msglang: return self.clique[msglang] if lang == constants.FAKE_BIDI: return pseudo_rtl.PseudoRTLMessage(self.GetMessage()) if fallback_to_english: self.uber_clique._AddMissingTranslation(lang, self, is_error=False) return self.GetMessage() # If we're not supposed to generate pseudotranslations, we add an error # report to a list of errors, then fail at a higher level, so that we # get a list of all messages that are missing translations. if not pseudo_if_no_match: self.uber_clique._AddMissingTranslation(lang, self, is_error=True) return pseudo.PseudoMessage(self.GetMessage()) def AllMessagesThatMatch(self, lang_re, include_pseudo=True): '''Returns a map of all messages that match 'lang', including the pseudo translation if requested. Args: lang_re: re.compile('fr|en') include_pseudo: True Return: { 'en' : tclib.Message, 'fr' : tclib.Translation, pseudo.PSEUDO_LANG : tclib.Translation } ''' if not self.translateable: return [self.GetMessage()] matches = {} for msglang in self.clique: if lang_re.match(msglang): matches[msglang] = self.clique[msglang] if include_pseudo: matches[pseudo.PSEUDO_LANG] = pseudo.PseudoMessage( self.GetMessage()) return matches def AddTranslation(self, translation, language): '''Add a translation to this clique. The translation must have the same ID as the message that is the source for this clique. If this clique is not translateable, the function just returns. Args: translation: tclib.Translation() language: 'en' Throws: grit.exception.InvalidTranslation if the translation you're trying to add doesn't have the same message ID as the source message of this clique. ''' if not self.translateable: return if translation.GetId() != self.GetId(): raise exception.InvalidTranslation( 'Msg ID %s, transl ID %s' % (self.GetId(), translation.GetId())) if language in self.clique: print(self.GetId()) assert not language in self.clique # Because two messages can differ in the original content of their # placeholders yet share the same ID (because they are otherwise the # same), the translation we are getting may have different original # content for placeholders than our message, yet it is still the right # translation for our message (because it is for the same ID). We must # therefore fetch the original content of placeholders from our original # English message. # # See grit.clique_unittest.MessageCliqueUnittest.testSemiIdenticalCliques # for a concrete explanation of why this is necessary. original = self.MessageForLanguage(self.source_language, False) if len(original.GetPlaceholders()) != len( translation.GetPlaceholders()): print("ERROR: '%s' translation of message id %s does not match" % (language, translation.GetId())) assert False transl_msg = tclib.Translation( id=self.GetId(), text=translation.GetPresentableContent(), placeholders=original.GetPlaceholders()) if (self.custom_type and not self.custom_type.ValidateAndModify(language, transl_msg)): print "WARNING: %s translation failed validation: %s" % ( language, transl_msg.GetId()) self.clique[language] = transl_msg
def Init(self): dom_impl = minidom.getDOMImplementation('') self._doc = dom_impl.createDocument(None, 'html', None) body = self.AddElement(self._doc.documentElement, 'body') self._main_div = self.AddElement(body, 'div') self._indent_level = 0 # Human-readable names of supported platforms. self._PLATFORM_MAP = { 'win': 'Windows', 'mac': 'Mac', 'linux': 'Linux', 'chrome_os': self.config['os_name'], 'android': 'Android', 'ios': 'iOS', } # Human-readable names of supported products. self._PRODUCT_MAP = { 'chrome': self.config['app_name'], 'Aviator': self.config['app_name'], # champion remedy : Keyerror Aviator Balaji. 'chrome_frame': self.config['frame_name'], 'chrome_os': self.config['os_name'], } # Human-readable names of supported features. Each supported feature has # a 'doc_feature_X' entry in |self.messages|. self._FEATURE_MAP = {} for message in self.messages: if message.startswith('doc_feature_'): self._FEATURE_MAP[message[12:]] = self.messages[message]['text'] # Human-readable names of types. self._TYPE_MAP = { 'string': 'String', 'int': 'Integer', 'main': 'Boolean', 'int-enum': 'Integer', 'string-enum': 'String', 'list': 'List of strings', 'dict': 'Dictionary', 'external': 'External data reference', } self._REG_TYPE_MAP = { 'string': 'REG_SZ', 'int': 'REG_DWORD', 'main': 'REG_DWORD', 'int-enum': 'REG_DWORD', 'string-enum': 'REG_SZ', 'dict': 'REG_SZ, encoded as a JSON string', } # The CSS style-sheet used for the document. It will be used in Google # Sites, which strips class attributes from HTML tags. To work around this, # the style-sheet is a dictionary and the style attributes will be added # "by hand" for each element. self._STYLE = { 'table': 'border-style: none; border-collapse: collapse;', 'tr': 'height: 0px;', 'td': 'border: 1px dotted rgb(170, 170, 170); padding: 7px; ' 'vertical-align: top; width: 236px; height: 15px;', 'thead td': 'font-weight: bold;', 'td.left': 'width: 200px;', 'td.right': 'width: 100%;', 'dt': 'font-weight: bold;', 'dd dl': 'margin-top: 0px; margin-bottom: 0px;', '.monospace': 'font-family: monospace;', '.pre': 'white-space: pre;', 'div.note': 'border: 2px solid black; padding: 5px; margin: 5px;', 'div.group_desc': 'margin-top: 20px; margin-bottom: 20px;', 'ul': 'padding-left: 0px; margin-left: 0px;' } # A simple regexp to search for URLs. It is enough for now. self._url_matcher = lazy_re.compile('(http://[^\\s]*[^\\s\\.])')
from xml.dom import Node import xml.dom.minidom import grit.node.empty from grit.node import io from grit.node import message from grit.tool import interface from grit import grd_reader from grit import lazy_re from grit import tclib from grit import util # The name of a string in strings.xml _STRING_NAME = lazy_re.compile(r'[a-z0-9_]+\Z') # A string's character limit in strings.xml _CHAR_LIMIT = lazy_re.compile(r'\[CHAR-LIMIT=(\d+)\]') # Finds String.Format() style format specifiers such as "%-5.2f". _FORMAT_SPECIFIER = lazy_re.compile( '%' '([1-9][0-9]*\$|<)?' # argument_index '([-#+ 0,(]*)' # flags '([0-9]+)?' # width '(\.[0-9]+)?' # precision '([bBhHsScCdoxXeEfgGaAtT%n])') # conversion class Android2Grd(interface.Tool):
u'a' : u'\u00e5', # a with ring u'e' : u'\u00e9', # e acute u'i' : u'\u00ef', # i diaresis u'o' : u'\u00f4', # o circumflex u'u' : u'\u00fc', # u diaresis u'y' : u'\u00fd', # y acute u'A' : u'\u00c5', # A with ring u'E' : u'\u00c9', # E acute u'I' : u'\u00cf', # I diaresis u'O' : u'\u00d4', # O circumflex u'U' : u'\u00dc', # U diaresis u'Y' : u'\u00dd', # Y acute } # Matches vowels and P _PSUB_RE = lazy_re.compile("(%s)" % '|'.join(_VOWELS.keys() + ['P'])) # Pseudotranslations previously created. This is important for performance # reasons, especially since we routinely pseudotranslate the whole project # several or many different times for each build. _existing_translations = {} def MapVowels(str, also_p = False): '''Returns a copy of 'str' where characters that exist as keys in _VOWELS have been replaced with the corresponding value. If also_p is true, this function will also change capital P characters into a Hebrew character Qof. ''' def Repl(match): if match.group() == 'p':
import xml.dom.minidom import grit.node.empty from grit.node import io from grit.node import message from grit.tool import interface from grit import grd_reader from grit import lazy_re from grit import tclib from grit import util # The name of a string in strings.xml _STRING_NAME = lazy_re.compile(r'[a-z0-9_]+\Z') # A string's character limit in strings.xml _CHAR_LIMIT = lazy_re.compile(r'\[CHAR-LIMIT=(\d+)\]') # Finds String.Format() style format specifiers such as "%-5.2f". _FORMAT_SPECIFIER = lazy_re.compile( '%' '([1-9][0-9]*\$|<)?' # argument_index '([-#+ 0,(]*)' # flags '([0-9]+)?' # width '(\.[0-9]+)?' # precision '([bBhHsScCdoxXeEfgGaAtT%n])') # conversion class Android2Grd(interface.Tool):
'n': u"\u00f1", 'p': u"\u00fe", 'y': u"\u00fd", 'C': u"\u00c7", 'D': u"\u00d0", 'N': u"\u00d1", 'P': u"\u00de", 'Y': u"\u00dd", 'f': u"\u0192", 's': u"\u0161", 'S': u"\u0160", 'z': u"\u017e", 'Z': u"\u017d", 'g': u"\u011d", 'G': u"\u011c", 'h': u"\u0125", 'H': u"\u0124", 'j': u"\u0135", 'J': u"\u0134", 'k': u"\u0137", 'K': u"\u0136", 'l': u"\u013c", 'L': u"\u013b", 't': u"\u0163", 'T': u"\u0162", 'w': u"\u0175", 'W': u"\u0174", '$': u"\u20ac", '?': u"\u00bf", 'R': u"\u00ae", r'!': u"\u00a1", } # a character set containing the keys in ACCENTED_STRINGS # We should not accent characters in an escape sequence such as "\n". # To be safe, we assume every character following a backslash is an escaped # character. We also need to consider the case like "\\n", which means # a blackslash and a character "n", we will accent the character "n". TO_ACCENT = lazy_re.compile( r'[%s]|\\[a-z\\]' % ''.join(ACCENTED_STRINGS.keys())) # Lex text so that we don't interfere with html tokens and entities. # This lexing scheme will handle all well formed tags and entities, html or # xhtml. It will not handle comments, CDATA sections, or the unescaping tags: # script, style, xmp or listing. If any of those appear in messages, # something is wrong. TOKENS = [ lazy_re.compile( '^%s' % pattern, # match at the beginning of input re.I | re.S # html tokens are case-insensitive ) for pattern in ( # a run of non html special characters r'[^<&]+', # a tag
u"a": u"\u00e5", # a with ring u"e": u"\u00e9", # e acute u"i": u"\u00ef", # i diaresis u"o": u"\u00f4", # o circumflex u"u": u"\u00fc", # u diaresis u"y": u"\u00fd", # y acute u"A": u"\u00c5", # A with ring u"E": u"\u00c9", # E acute u"I": u"\u00cf", # I diaresis u"O": u"\u00d4", # O circumflex u"U": u"\u00dc", # U diaresis u"Y": u"\u00dd", # Y acute } # Matches vowels and P _PSUB_RE = lazy_re.compile("(%s)" % "|".join(_VOWELS.keys() + ["P"])) # Pseudotranslations previously created. This is important for performance # reasons, especially since we routinely pseudotranslate the whole project # several or many different times for each build. _existing_translations = {} def MapVowels(str, also_p=False): """Returns a copy of 'str' where characters that exist as keys in _VOWELS have been replaced with the corresponding value. If also_p is true, this function will also change capital P characters into a Hebrew character Qof. """ def Repl(match):
from grit import util # There is a python bug that makes mimetypes crash if the Windows # registry contains non-Latin keys ( http://bugs.python.org/issue9291 # ). Initing manually and blocking external mime-type databases will # prevent that bug and if we add svg manually, it will still give us # the data we need. mimetypes.init([]) mimetypes.add_type('image/svg+xml', '.svg') DIST_DEFAULT = 'chromium' DIST_ENV_VAR = 'CHROMIUM_BUILD' DIST_SUBSTR = '%DISTRIBUTION%' # Matches beginning of an "if" block with trailing spaces. _BEGIN_IF_BLOCK = lazy_re.compile( '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*') # Matches ending of an "if" block with preceding spaces. _END_IF_BLOCK = lazy_re.compile('\s*</if>') # Used by DoInline to replace various links with inline content. _STYLESHEET_RE = lazy_re.compile( '<link rel="stylesheet"[^>]+?href="(?P<filename>[^"]*)".*?>(\s*</link>)?', re.DOTALL) _INCLUDE_RE = lazy_re.compile( '<include[^>]+?src="(?P<filename>[^"\']*)".*?>(\s*</include>)?', re.DOTALL) _SRC_RE = lazy_re.compile( r'<(?!script)(?:[^>]+?\s)src=(?P<quote>")(?!\[\[|{{)(?P<filename>[^"\']*)\1', re.MULTILINE) _ICON_RE = lazy_re.compile( r'<link rel="icon"\s(?:[^>]+?\s)?'
import os import re import sys import base64 import mimetypes from grit import lazy_re from grit import util DIST_DEFAULT = 'chromium' DIST_ENV_VAR = 'CHROMIUM_BUILD' DIST_SUBSTR = '%DISTRIBUTION%' # Matches beginning of an "if" block with trailing spaces. _BEGIN_IF_BLOCK = lazy_re.compile( '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*') # Matches ending of an "if" block with preceding spaces. _END_IF_BLOCK = lazy_re.compile('\s*</if>') # Used by DoInline to replace various links with inline content. _STYLESHEET_RE = lazy_re.compile( '<link rel="stylesheet"[^>]+?href="(?P<filename>[^"]*)".*?>(\s*</link>)?', re.DOTALL) _INCLUDE_RE = lazy_re.compile( '<include[^>]+?src="(?P<filename>[^"\']*)".*?>(\s*</include>)?', re.DOTALL) _SRC_RE = lazy_re.compile( r'<(?!script)(?:[^>]+?\s)src=(?P<quote>")(?P<filename>[^"\']*)\1', re.MULTILINE) _ICON_RE = lazy_re.compile(
# found in the LICENSE file. '''Unit tests for grit.gather.chrome_html''' import os import re import sys if __name__ == '__main__': sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) import unittest from grit import lazy_re from grit import util from grit.gather import chrome_html _NEW_LINE = lazy_re.compile('(\r\n|\r|\n)', re.MULTILINE) def StandardizeHtml(text): '''Standardizes the newline format and png mime type in Html text.''' return _NEW_LINE.sub('\n', text).replace('data:image/x-png;', 'data:image/png;') class ChromeHtmlUnittest(unittest.TestCase): '''Unit tests for ChromeHtml.''' def testFileResources(self): '''Tests inlined image file resources with available high DPI assets.''' tmp_dir = util.TempDir({ 'index.html': '''
'''Support for "strings.xml" format used by Muppet plug-ins in Google Desktop.''' import StringIO import xml.sax import xml.sax.handler import xml.sax.saxutils from grit import lazy_re from grit import tclib from grit import util from grit.gather import regexp # Placeholders can be defined in strings.xml files by putting the name of the # placeholder between [![ and ]!] e.g. <MSG>Hello [![USER]!] how are you<MSG> PLACEHOLDER_RE = lazy_re.compile('(\[!\[|\]!\])') class MuppetStringsContentHandler(xml.sax.handler.ContentHandler): '''A very dumb parser for splitting the strings.xml file into translateable and nontranslateable chunks.''' def __init__(self, parent): self.curr_elem = '' self.curr_text = '' self.parent = parent self.description = '' self.meaning = '' self.translateable = True def startElement(self, name, attrs):
import os import re from grit import lazy_re from grit import util from grit.format import html_inline from grit.gather import interface # Distribution string to replace with distribution. DIST_SUBSTR = '%DISTRIBUTION%' # Matches a chrome theme source URL. _THEME_SOURCE = lazy_re.compile('chrome://theme/IDR_[A-Z0-9_]*') # Matches CSS image urls with the capture group 'filename'. _CSS_IMAGE_URLS = lazy_re.compile( '(?P<attribute>content|background|[\w-]*-image):[ ]*' + 'url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)') # Matches CSS image sets. _CSS_IMAGE_SETS = lazy_re.compile( '(?P<attribute>content|background|[\w-]*-image):[ ]*' + '-webkit-image-set\((?P<images>' + '([,\n ]*url\((?P<quote>"|\'|)[^"\'()]*(?P=quote)\)[ ]*[0-9.]*x)*)\)', re.MULTILINE) # Matches a single image in a CSS image set with the capture group scale. _CSS_IMAGE_SET_IMAGE = lazy_re.compile( '[,\n ]*url\((?P<quote>"|\'|)[^"\'()]*(?P=quote)\)[ ]*(?P<scale>[0-9.]*x)', re.MULTILINE) _HTML_IMAGE_SRC = lazy_re.compile(
import os import re import sys if __name__ == '__main__': sys.path.append(os.path.join(os.path.dirname(__file__), '../..')) import unittest from grit import lazy_re from grit import util from grit.gather import chrome_html _NEW_LINE = lazy_re.compile('(\r\n|\r|\n)', re.MULTILINE) def StandardizeHtml(text): '''Standardizes the newline format and png mime type in Html text.''' return _NEW_LINE.sub('\n', text).replace('data:image/x-png;', 'data:image/png;') class ChromeHtmlUnittest(unittest.TestCase): '''Unit tests for ChromeHtml.''' def testFileResources(self): '''Tests inlined image file resources with available high DPI assets.''' tmp_dir = util.TempDir({
import getopt import os from xml.sax import saxutils from grit import grd_reader from grit import lazy_re from grit import tclib from grit import util from grit.tool import interface # Used to collapse presentable content to determine if # xml:space="preserve" is needed. _WHITESPACES_REGEX = lazy_re.compile(ur'\s\s*') # See XmlEscape below. _XML_QUOTE_ESCAPES = { u"'": u''', u'"': u'"', } _XML_BAD_CHAR_REGEX = lazy_re.compile(u'[^\u0009\u000A\u000D' u'\u0020-\uD7FF\uE000-\uFFFD]') def _XmlEscape(s): """Returns text escaped for XML in a way compatible with Google's internal Translation Console tool. May be used for attributes as well as for contents.
"ATL_IDS_SCMINIMIZE", "ATL_IDS_SCMAXIMIZE", "ATL_IDS_SCNEXTWINDOW", "ATL_IDS_SCPREVWINDOW", "ATL_IDS_SCCLOSE", "ATL_IDS_SCRESTORE", "ATL_IDS_SCTASKLIST", "ATL_IDS_MDICHILD", "ATL_IDS_IDLEMESSAGE", "ATL_IDS_MRU_FILE", ) ) # Matches character entities, whether specified by name, decimal or hex. _HTML_ENTITY = lazy_re.compile("&(#(?P<decimal>[0-9]+)|#x(?P<hex>[a-fA-F0-9]+)|(?P<named>[a-z0-9]+));", re.IGNORECASE) # Matches characters that should be HTML-escaped. This is <, > and &, but only # if the & is not the start of an HTML character entity. _HTML_CHARS_TO_ESCAPE = lazy_re.compile('"|<|>|&(?!#[0-9]+|#x[0-9a-z]+|[a-z]+;)', re.IGNORECASE | re.MULTILINE) def ReadFile(filename, encoding): """Reads and returns the entire contents of the given file. Args: filename: The path to the file. encoding: A Python codec name or one of two special values: BINARY to read the file in binary mode, or RAW_TEXT to read it with newline conversion but without decoding to Unicode. """
import os import re from grit import lazy_re from grit import util from grit.format import html_inline from grit.gather import interface # Distribution string to replace with distribution. DIST_SUBSTR = '%DISTRIBUTION%' # Matches a chrome theme source URL. _THEME_SOURCE = lazy_re.compile( '(?P<baseurl>chrome://theme/IDR_[A-Z0-9_]*)(?P<query>\?.*)?') # Pattern for matching CSS url() function. _CSS_URL_PATTERN = 'url\((?P<quote>"|\'|)(?P<filename>[^"\'()]*)(?P=quote)\)' # Matches CSS url() functions with the capture group 'filename'. _CSS_URL = lazy_re.compile(_CSS_URL_PATTERN) # Matches one or more CSS image urls used in given properties. _CSS_IMAGE_URLS = lazy_re.compile( '(?P<attribute>content|background|[\w-]*-image):\s*' + '(?P<urls>(' + _CSS_URL_PATTERN + '\s*,?\s*)+)') # Matches CSS image sets. _CSS_IMAGE_SETS = lazy_re.compile( '(?P<attribute>content|background|[\w-]*-image):[ ]*' + '-webkit-image-set\((?P<images>' + '(\s*,?\s*url\((?P<quote>"|\'|)[^"\'()]*(?P=quote)\)[ ]*[0-9.]*x)*)\)', re.MULTILINE) # Matches a single image in a CSS image set with the capture group scale.
# names for those tags. This will be used when generating placeholders # representing these tags. _HTML_PLACEHOLDER_NAMES = { 'a' : 'link', 'br' : 'break', 'b' : 'bold', 'i' : 'italic', 'li' : 'item', 'ol' : 'ordered_list', 'p' : 'paragraph', 'ul' : 'unordered_list', 'img' : 'image', 'em' : 'emphasis' } # We append each of these characters in sequence to distinguish between # different placeholders with basically the same name (e.g. BOLD1, BOLD2). # Keep in mind that a placeholder name must not be a substring of any other # placeholder name in the same message, so we can't simply count (BOLD_1 # would be a substring of BOLD_10). _SUFFIXES = '123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' # Matches whitespace in an HTML document. Also matches HTML comments, which are # treated as whitespace. _WHITESPACE = lazy_re.compile(r'(\s| |\\n|\\r|<!--\s*desc\s*=.*?-->)+', re.DOTALL) # Matches whitespace sequences which can be folded into a single whitespace # character. This matches single characters so that non-spaces are replaced # with spaces. _FOLD_WHITESPACE = lazy_re.compile(r'\s+') # Finds a non-whitespace character _NON_WHITESPACE = lazy_re.compile(r'\S') # Matches two or more in a row (a single   is not changed into # placeholders because different languages require different numbers of spaces # and placeholders must match exactly; more than one is probably a "special" # whitespace sequence and should be turned into a placeholder). _NBSP = lazy_re.compile(r' ( )+')
import re import types from grit.node import base import grit.format.rc_header import grit.format.rc from grit import clique from grit import exception from grit import lazy_re from grit import tclib from grit import util # Finds whitespace at the start and end of a string which can be multiline. _WHITESPACE = lazy_re.compile('(?P<start>\s*)(?P<body>.+?)(?P<end>\s*)\Z', re.DOTALL | re.MULTILINE) class MessageNode(base.ContentNode): '''A <message> element.''' # For splitting a list of things that can be separated by commas or # whitespace _SPLIT_RE = lazy_re.compile('\s*,\s*|\s+') def __init__(self): super(MessageNode, self).__init__() # Valid after EndParsing, this is the MessageClique that contains the # source message and any translations of it that have been loaded. self.clique = None