def untag_fields(fields, catalog): """Untags fields, handling translation priority.""" untagged_keys_to_add = {} nodes_and_keys_to_add = [] nodes_and_keys_to_remove = [] def callback(item, key, node): if not isinstance(key, basestring): return if key.endswith('@'): untagged_key = key.rstrip('@') priority = len(key) - len(untagged_key) content = item nodes_and_keys_to_remove.append((node, key)) if priority > 1 and untagged_key in untagged_keys_to_add: try: has_translation_for_higher_priority_key = content in catalog except AttributeError: has_translation_for_higher_priority_key = False if has_translation_for_higher_priority_key: untagged_keys_to_add[untagged_key] = True nodes_and_keys_to_add.append((node, untagged_key, content)) elif priority <= 1: untagged_keys_to_add[untagged_key] = True nodes_and_keys_to_add.append((node, untagged_key, content)) utils.walk(fields, callback) for node, key in nodes_and_keys_to_remove: if isinstance(node, dict): del node[key] for node, untagged_key, content in nodes_and_keys_to_add: if isinstance(node, dict): node[untagged_key] = content return fields
def untag_fields(fields): def callback(item, key, node): if not isinstance(key, basestring): return if key.endswith('@'): node[key[0:(len(key) - 1)]] = node.pop(key) utils.walk(fields, callback) return fields
def organize_fields(self, fields): """Structure the fields data to keep some minimal structure.""" new_fields = collections.OrderedDict() # Deep sort all fields by default. def _walk_field(item, key, node, parent_node): try: value = node[key] new_value = collections.OrderedDict() for sub_key in sorted(value.keys()): new_value[sub_key] = value[sub_key] node[key] = new_value except: pass utils.walk(fields, _walk_field) # Organization rules: # $ prefixed fields should come first. # Partials key is last. # Partials' partial key should be first in partial data. # Sort the fields to keep consistent between saves. other_keys = [] for key in sorted(fields.keys()): if key.startswith('$'): new_fields[key] = fields[key] elif key == 'partials': pass else: other_keys.append(key) for key in other_keys: new_fields[key] = fields[key] if 'partials' in fields: new_partials = [] for partial in fields['partials']: new_partial = collections.OrderedDict() try: # Put the partial name first for easy readability. if 'partial' in partial: new_partial['partial'] = partial['partial'] for key in sorted(partial.keys()): if key != 'partial': new_partial[key] = partial[key] new_partials.append(new_partial) except TypeError: # When unable to sort the partial keys, use original. new_partials.append(partial) new_fields['partials'] = new_partials return new_fields
def convert_fields(fields): """Convert raw field data from submission to use objects when needed.""" def _walk_field(item, key, node, parent_node): # Convert dates. try: value = node[key] if DATETIME_RE.match(value): node[key] = datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M") elif DATE_RE.match(value): tempValue = datetime.datetime.strptime(value, "%Y-%m-%d") node[key] = datetime.date(tempValue.year, tempValue.month, tempValue.day) except: pass # Convert the !g constructors into their objects. if key == 'tag' and item.startswith('!g.') and 'value' in node: # If the value was removed, remove the constructor. if not node['value']: newValue = None else: newValue = ConstructorReference(item, node['value']) try: # Try as an array. for index, parent_key in enumerate(parent_node): if parent_node[index] == node: parent_node[index] = newValue break except KeyError: # Try as a dict. for parent_key in parent_node: if parent_node[parent_key] == node: parent_node[parent_key] = newValue utils.walk(fields, _walk_field) return fields
def extract(self, include_obsolete=None, localized=None, paths=None, include_header=None, locales=None, use_fuzzy_matching=None, audit=False, out_path=None): include_obsolete, localized, include_header, use_fuzzy_matching, = \ self.get_extract_config(include_header=include_header, include_obsolete=include_obsolete, localized=localized, use_fuzzy_matching=use_fuzzy_matching) env = self.pod.get_jinja_env() # { # locale1: locale1_catalog, # locale2: locale2_catalog, # ... # } # This is built up as we extract localized_catalogs = {} untagged_strings = [] unlocalized_catalog = catalogs.Catalog() # for localized=False case comment_tags = [ ':', ] options = { 'extensions': ','.join(env.extensions.keys()), 'silent': 'false', } def _add_to_catalog(message, locales): # Add to all relevant catalogs for locale in locales: if locale not in localized_catalogs: # Start with a new catalog so we can track what's obsolete: # we'll merge it with existing translations later. # *NOT* setting `locale` kwarg here b/c that will load existing # translations. localized_catalogs[locale] = catalogs.Catalog(pod=self.pod) localized_catalogs[locale][message.id] = message unlocalized_catalog[message.id] = message def _handle_field(path, locales, msgid, key, node, parent_node=None): if (not key or not isinstance(msgid, basestring) or not isinstance(key, basestring)): return if not key.endswith('@'): if msgid: untagged_strings.append((path, msgid)) return # Support gettext "extracted comments" on tagged fields: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): if isinstance(key, unicode): key = key.encode('utf-8') auto_comment = node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) elif isinstance(node, list) and parent_node: auto_comment = parent_node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) message = babel_catalog.Message(msgid, None, auto_comments=auto_comments, locations=[(path, 0)]) if msgid: _add_to_catalog(message, locales) def _babel_extract(fp, locales, path): try: all_parts = extract.extract('jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for parts in all_parts: lineno, msgid, comments, context = parts message = babel_catalog.Message(msgid, None, auto_comments=comments, locations=[(path, lineno)]) _add_to_catalog(message, locales) except tokenize.TokenError: self.pod.logger.error( 'Problem extracting body: {}'.format(path)) raise # Extract from collections in /content/: # Strings only extracted for relevant locales, determined by locale # scope (pod > collection > document > document part) last_pod_path = None for collection in self.pod.list_collections(): if utils.fnmatches_paths(collection.blueprint_path, paths): text = 'Extracting: {}'.format(collection.blueprint_path) self.pod.logger.info(text) # Extract from blueprint. utils.walk( collection.tagged_fields, lambda msgid, key, node, **kwargs: _handle_field( collection.blueprint_path, collection.locales, msgid, key, node, **kwargs)) for doc in collection.list_docs(include_hidden=True): if not utils.fnmatches_paths(doc.pod_path, paths): continue if doc.pod_path != last_pod_path: self.pod.logger.info('Extracting: {} ({} locale{})'.format( doc.pod_path, len(doc.locales), 's' if len(doc.locales) != 1 else '', )) last_pod_path = doc.pod_path # If doc.locale is set, this is a doc part: only extract for # its own locales (not those of base doc). if doc.locale: doc_locales = [doc.locale] # If not is set, this is a base doc (1st or only part): extract # for all locales declared for this doc elif doc.locales: doc_locales = doc.locales # Otherwise only include in template (--no-localized) else: doc_locales = [None] doc_locales = [doc.locale] # Extract yaml fields: `foo@: Extract me` # ("tagged" = prior to stripping `@` suffix from field names) tagged_fields = doc.format.front_matter.data utils.walk( tagged_fields, lambda msgid, key, node, **kwargs: _handle_field( doc.pod_path, doc_locales, msgid, key, node, **kwargs)) # Extract body: {{_('Extract me')}} if doc.body: doc_body = cStringIO.StringIO(doc.body.encode('utf-8')) _babel_extract(doc_body, doc_locales, doc.pod_path) # Extract from CSVs for this collection's locales for filepath in self.pod.list_dir(collection.pod_path): if not utils.fnmatches_paths(filepath, paths): continue if filepath.endswith('.csv'): pod_path = os.path.join(collection.pod_path, filepath.lstrip('/')) self.pod.logger.info('Extracting: {}'.format(pod_path)) rows = self.pod.read_csv(pod_path) for i, row in enumerate(rows): for key, msgid in row.iteritems(): _handle_field(pod_path, collection.locales, msgid, key, row) # Extract from root of /content/: for path in self.pod.list_dir('/content/', recursive=False): if not utils.fnmatches_paths(path, paths): continue if path.endswith(('.yaml', '.yml')): pod_path = os.path.join('/content/', path) self.pod.logger.info('Extracting: {}'.format(pod_path)) utils.walk( self.pod.get_doc(pod_path).format.front_matter.data, lambda msgid, key, node, **kwargs: _handle_field( pod_path, self.pod.list_locales(), msgid, key, node, ** kwargs)) # Extract from /views/: # Not discriminating by file extension, because people use all sorts # (htm, html, tpl, dtml, jtml, ...) if not audit: for path in self.pod.list_dir('/views/'): if not utils.fnmatches_paths(path, paths) \ or path.startswith('.'): continue pod_path = os.path.join('/views/', path) self.pod.logger.info('Extracting: {}'.format(pod_path)) with self.pod.open_file(pod_path) as f: _babel_extract(f, self.pod.list_locales(), pod_path) # Extract from /partials/: if not audit: for path in self.pod.list_dir('/partials/'): if not utils.fnmatches_paths(path, paths) \ or path.startswith('.'): continue if path.endswith(('.yaml', '.yml', '.html', '.htm')): pod_path = os.path.join('/partials/', path) self.pod.logger.info('Extracting: {}'.format(pod_path)) with self.pod.open_file(pod_path) as f: _babel_extract(f, self.pod.list_locales(), pod_path) # Extract from podspec.yaml: if utils.fnmatches_paths('/podspec.yaml', paths): self.pod.logger.info('Extracting: /podspec.yaml') utils.walk( self.pod.get_podspec().get_config(), lambda msgid, key, node, **kwargs: _handle_field('/podspec.yaml', self.pod.list_locales( ), msgid, key, node, **kwargs)) # Save it out: behavior depends on --localized and --locale flags # If an out path is specified, always collect strings into the one catalog. if localized and not out_path: # Save each localized catalog for locale, new_catalog in localized_catalogs.items(): # Skip if `locales` defined but doesn't include this locale if locales and locale not in locales: continue existing_catalog = self.get(locale) existing_catalog.update_using_catalog( new_catalog, include_obsolete=include_obsolete) if audit: continue existing_catalog.save(include_header=include_header) missing = existing_catalog.list_untranslated() num_messages = len(existing_catalog) self.pod.logger.info( 'Saved: /{path} ({num_translated}/{num_messages})'.format( path=existing_catalog.pod_path, num_translated=num_messages - len(missing), num_messages=num_messages)) return untagged_strings, localized_catalogs.items() else: # --localized omitted / --no-localized template_catalog = self.get_template(self.template_path) template_catalog.update_using_catalog( unlocalized_catalog, include_obsolete=include_obsolete) if not audit: template_catalog.save(include_header=include_header) text = 'Saved: {} ({} messages)' self.pod.logger.info( text.format(template_catalog.pod_path, len(template_catalog))) return untagged_strings, [template_catalog]
def extract( self, include_obsolete=False, localized=False, paths=None, include_header=False, locales=None, use_fuzzy_matching=False, ): env = self.pod.create_template_env() all_locales = set(list(self.pod.list_locales())) message_ids_to_messages = {} paths_to_messages = collections.defaultdict(set) paths_to_locales = collections.defaultdict(set) comment_tags = [":"] options = {"extensions": ",".join(env.extensions.keys()), "silent": "false"} # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document # that's in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, node): if not key.endswith("@") or not isinstance(item, basestring): return # Support gettext "extracted comments" on tagged fields. This is # consistent with extracted comments in templates, which follow # the format "{#: Extracted comment. #}". An example: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get("{}#".format(key)) if auto_comment: auto_comments.append(auto_comment) locations = [(path, 0)] existing_message = message_ids_to_messages.get(item) if existing_message: message_ids_to_messages[item].locations.extend(locations) paths_to_messages[path].add(existing_message) else: message = catalog.Message(item, None, auto_comments=auto_comments, locations=locations) message_ids_to_messages[message.id] = message paths_to_messages[path].add(message) for collection in self.pod.list_collections(): text = "Extracting collection: {}".format(collection.pod_path) self.pod.logger.info(text) for doc in collection.list_docs(include_hidden=True): if not self._should_extract(paths, doc.pod_path): continue tagged_fields = doc.get_tagged_fields() utils.walk(tagged_fields, lambda *args: callback(doc, *args)) paths_to_locales[doc.pod_path].update(doc.locales) all_locales.update(doc.locales) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = "/podspec.yaml" if self._should_extract(paths, podspec_path): self.pod.logger.info("Extracting podspec: {}".format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Extract messages from content and views. pod_files = [os.path.join("/views", path) for path in self.pod.list_dir("/views/")] pod_files += [os.path.join("/content", path) for path in self.pod.list_dir("/content/")] for pod_path in pod_files: if self._should_extract(paths, pod_path): pod_locales = paths_to_locales.get(pod_path) if pod_locales: text = "Extracting: {} ({} locales)" text = text.format(pod_path, len(pod_locales)) self.pod.logger.info(text) else: self.pod.logger.info("Extracting: {}".format(pod_path)) fp = self.pod.open_file(pod_path) try: all_parts = extract.extract( "jinja2.ext.babel_extract", fp, options=options, comment_tags=comment_tags ) for parts in all_parts: lineno, string, comments, context = parts locations = [(pod_path, lineno)] existing_message = message_ids_to_messages.get(string) if existing_message: message_ids_to_messages[string].locations.extend(locations) else: message = catalog.Message( string, None, auto_comments=comments, context=context, locations=locations ) paths_to_messages[pod_path].add(message) message_ids_to_messages[message.id] = message except tokenize.TokenError: self.pod.logger.error("Problem extracting: {}".format(pod_path)) raise # Localized message catalogs. if localized: for locale in all_locales: if locales and locale not in locales: continue localized_catalog = self.get(locale) if not include_obsolete: localized_catalog.obsolete = babel_util.odict() for message in list(localized_catalog): if message.id not in message_ids_to_messages: localized_catalog.delete(message.id, context=message.context) catalog_to_merge = catalog.Catalog() for path, message_items in paths_to_messages.iteritems(): locales_with_this_path = paths_to_locales.get(path) if locales_with_this_path and locale not in locales_with_this_path: continue for message in message_items: translation = None existing_message = localized_catalog.get(message.id) if existing_message: translation = existing_message.string catalog_to_merge.add( message.id, translation, locations=message.locations, auto_comments=message.auto_comments, flags=message.flags, user_comments=message.user_comments, context=message.context, lineno=message.lineno, previous_id=message.previous_id, ) localized_catalog.update_using_catalog(catalog_to_merge, use_fuzzy_matching=use_fuzzy_matching) localized_catalog.save(include_header=include_header) missing = localized_catalog.list_untranslated() num_messages = len(localized_catalog) num_translated = num_messages - len(missing) text = "Saved: /{path} ({num_translated}/{num_messages})" self.pod.logger.info( text.format( path=localized_catalog.pod_path, num_translated=num_translated, num_messages=num_messages ) ) return # Global (or missing, specified by -o) message catalog. template_path = self.template_path catalog_obj, _ = self._get_or_create_catalog(template_path) if not include_obsolete: catalog_obj.obsolete = babel_util.odict() for message in list(catalog_obj): catalog_obj.delete(message.id, context=message.context) for message in message_ids_to_messages.itervalues(): catalog_obj.add(message.id, None, locations=message.locations, auto_comments=message.auto_comments) return self.write_template( template_path, catalog_obj, include_obsolete=include_obsolete, include_header=include_header )
def extract(self): # Create directory if it doesn't exist. TODO(jeremydw): Optimize this. template_path = os.path.join(Catalogs.root, 'messages.pot') if not self.pod.file_exists(template_path): self.pod.create_file(template_path, None) existing = False else: existing = pofile.read_po(self.pod.open_file(template_path)) template = self.pod.open_file(template_path, mode='w') catalog_obj = pofile.read_po(self.pod.open_file(template_path)) extracted = [] self.pod.logger.info( 'Updating translation template: {}'.format(template_path)) options = { 'extensions': ','.join(self.pod.get_template_env().extensions.keys()), 'silent': 'false', } # Extract messages from content and views. pod_files = [ os.path.join('/views', path) for path in self.pod.list_dir('/views/') ] pod_files += [ os.path.join('/content', path) for path in self.pod.list_dir('/content/') ] for pod_path in pod_files: if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: self.pod.logger.info('Extracting from: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: messages = extract.extract('jinja2.ext.babel_extract', fp, options=options) for message in messages: lineno, string, comments, context = message flags = set() if existing and string in existing: existing_message = existing.get(string) if existing_message and 'requested' in existing_message.flags: flags.add('requested') added_message = catalog_obj.add(string, None, [(pod_path, lineno)], auto_comments=comments, context=context, flags=flags) extracted.append(added_message) except tokenize.TokenError: self.pod.logger.error( 'Problem extracting: {}'.format(pod_path)) raise # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document that's # in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, unused_node): if not key.endswith('@') or not isinstance(item, basestring): return comments = ['{}:{}'.format(doc.pod_path, key)] added_message = catalog_obj.add(item, None, [(path, 0)], auto_comments=comments, context=None) if added_message not in extracted: extracted.append(added_message) for collection in self.pod.list_collections(): self.pod.logger.info('Extracting from collection: {}'.format( collection.pod_path)) for doc in collection.list_documents(include_hidden=True): utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args)) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' self.pod.logger.info( 'Extracting from podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Write to PO template. self.pod.logger.info( 'Writing {} messages to translation template.'.format( len(catalog_obj))) pofile.write_po(template, catalog_obj, width=80, no_location=True, omit_header=True, sort_output=True, sort_by_file=True) template.close() return catalog_obj
def extract(self, include_obsolete=None, localized=None, paths=None, include_header=None, locales=None, use_fuzzy_matching=None, audit=False): include_obsolete, localized, include_header, use_fuzzy_matching, = \ self.get_extract_config(include_header=include_header, include_obsolete=include_obsolete, localized=localized, use_fuzzy_matching=use_fuzzy_matching) env = self.pod.get_jinja_env() # { # locale1: locale1_catalog, # locale2: locale2_catalog, # ... # } # This is built up as we extract localized_catalogs = {} untagged_strings = [] unlocalized_catalog = catalogs.Catalog() # for localized=False case comment_tags = [ ':', ] options = { 'extensions': ','.join(env.extensions.keys()), 'silent': 'false', } def _add_to_catalog(message, locales): # Add to all relevant catalogs for locale in locales: if locale not in localized_catalogs: # Start with a new catalog so we can track what's obsolete: # we'll merge it with existing translations later. # *NOT* setting `locale` kwarg here b/c that will load existing # translations. localized_catalogs[locale] = catalogs.Catalog(pod=self.pod) localized_catalogs[locale][message.id] = message unlocalized_catalog[message.id] = message def _handle_field(path, locales, msgid, key, node, parent_node=None): if (not key or not isinstance(msgid, basestring) or not isinstance(key, basestring)): return if not key.endswith('@'): if msgid: untagged_strings.append((path, msgid)) return # Support gettext "extracted comments" on tagged fields: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) elif isinstance(node, list) and parent_node: auto_comment = parent_node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) message = babel_catalog.Message( msgid, None, auto_comments=auto_comments, locations=[(path, 0)]) if msgid: _add_to_catalog(message, locales) def _babel_extract(fp, locales, path): try: all_parts = extract.extract( 'jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for parts in all_parts: lineno, msgid, comments, context = parts message = babel_catalog.Message( msgid, None, auto_comments=comments, locations=[(path, lineno)]) _add_to_catalog(message, locales) except tokenize.TokenError: self.pod.logger.error( 'Problem extracting body: {}'.format(path)) raise # Extract from collections in /content/: # Strings only extracted for relevant locales, determined by locale # scope (pod > collection > document > document part) last_pod_path = None for collection in self.pod.list_collections(): text = 'Extracting: {}'.format(collection.blueprint_path) self.pod.logger.info(text) # Extract from blueprint. utils.walk(collection.tagged_fields, lambda msgid, key, node, **kwargs: _handle_field( collection.blueprint_path, collection.locales, msgid, key, node, **kwargs)) # Extract from docs in collection. for doc in collection.docs(include_hidden=True): if not self._should_extract_as_babel(paths, doc.pod_path): continue for doc in collection.list_docs(include_hidden=True): if doc.pod_path != last_pod_path: self.pod.logger.info( 'Extracting: {} ({} locale{})'.format( doc.pod_path, len(doc.locales), 's' if len(doc.locales) != 1 else '', ) ) last_pod_path = doc.pod_path # If doc.locale is set, this is a doc part: only extract for # its own locales (not those of base doc). if doc.locale: doc_locales = [doc.locale] # If not is set, this is a base doc (1st or only part): extract # for all locales declared for this doc elif doc.locales: doc_locales = doc.locales # Otherwise only include in template (--no-localized) else: doc_locales = [None] doc_locales = [doc.locale] # Extract yaml fields: `foo@: Extract me` # ("tagged" = prior to stripping `@` suffix from field names) tagged_fields = doc.format.front_matter.data utils.walk(tagged_fields, lambda msgid, key, node, **kwargs: _handle_field( doc.pod_path, doc_locales, msgid, key, node, **kwargs)) # Extract body: {{_('Extract me')}} if doc.body: doc_body = cStringIO.StringIO(doc.body.encode('utf-8')) _babel_extract(doc_body, doc_locales, doc.pod_path) # Extract from CSVs for this collection's locales for filepath in self.pod.list_dir(collection.pod_path): if filepath.endswith('.csv'): pod_path = os.path.join( collection.pod_path, filepath.lstrip('/')) self.pod.logger.info('Extracting: {}'.format(pod_path)) rows = self.pod.read_csv(pod_path) for i, row in enumerate(rows): for key, msgid in row.iteritems(): _handle_field( pod_path, collection.locales, msgid, key, row) # Extract from root of /content/: for path in self.pod.list_dir('/content/', recursive=False): if path.endswith(('.yaml', '.yml')): pod_path = os.path.join('/content/', path) self.pod.logger.info('Extracting: {}'.format(pod_path)) utils.walk( self.pod.get_doc(pod_path).format.front_matter.data, lambda msgid, key, node, **kwargs: _handle_field( pod_path, self.pod.list_locales(), msgid, key, node, **kwargs) ) # Extract from /views/: # Not discriminating by file extension, because people use all sorts # (htm, html, tpl, dtml, jtml, ...) if not audit: for path in self.pod.list_dir('/views/'): if path.startswith('.'): continue pod_path = os.path.join('/views/', path) self.pod.logger.info('Extracting: {}'.format(pod_path)) with self.pod.open_file(pod_path) as f: _babel_extract(f, self.pod.list_locales(), pod_path) # Extract from podspec.yaml: self.pod.logger.info('Extracting: /podspec.yaml') utils.walk( self.pod.get_podspec().get_config(), lambda msgid, key, node, **kwargs: _handle_field( '/podspec.yaml', self.pod.list_locales(), msgid, key, node, **kwargs) ) # Save it out: behavior depends on --localized and --locale flags if localized: # Save each localized catalog for locale, new_catalog in localized_catalogs.items(): # Skip if `locales` defined but doesn't include this locale if locales and locale not in locales: continue existing_catalog = self.get(locale) existing_catalog.update_using_catalog( new_catalog, include_obsolete=include_obsolete) if audit: continue existing_catalog.save(include_header=include_header) missing = existing_catalog.list_untranslated() num_messages = len(existing_catalog) self.pod.logger.info( 'Saved: /{path} ({num_translated}/{num_messages})'.format( path=existing_catalog.pod_path, num_translated=num_messages - len(missing), num_messages=num_messages) ) return untagged_strings, localized_catalogs.items() else: # --localized omitted / --no-localized template_catalog = self.get_template() template_catalog.update_using_catalog( unlocalized_catalog, include_obsolete=include_obsolete) if not audit: template_catalog.save(include_header=include_header) text = 'Saved: {} ({} messages)' self.pod.logger.info( text.format(template_catalog.pod_path, len(template_catalog)) ) return untagged_strings, [template_catalog]
def extract(self): catalog_obj = catalog.Catalog() # Create directory if it doesn't exist. TODO(jeremydw): Optimize this. template_path = os.path.join(self.root, 'messages.pot') if not self.pod.file_exists(template_path): self.pod.create_file(template_path, None) template = self.pod.open_file(template_path, mode='w') extracted = [] logging.info('Updating translation template: {}'.format(template_path)) # Extract messages from views. pod_files = self.pod.list_dir('/views/') for path in pod_files: pod_path = os.path.join('/views', path) if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: logging.info('Extracting from view: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: messages = extract.extract('jinja2.ext.babel_extract', fp) for message in messages: lineno, string, comments, context = message added_message = catalog_obj.add( string, None, [(pod_path, lineno)], auto_comments=comments, context=context) extracted.append(added_message) except tokenize.TokenError: logging.error('Problem extracting: {}'.format(pod_path)) raise # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document that's # in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, unused_node): if not isinstance(item, basestring): return if key.endswith('@'): comments = [] context = None added_message = catalog_obj.add( item, None, [(path, 0)], auto_comments=comments, context=context) if added_message not in extracted: extracted.append(added_message) for collection in self.pod.list_collections(): logging.info('Extracting from collection: {}'.format(collection.pod_path)) for doc in collection.list_documents(include_hidden=True): utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args)) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' logging.info('Extracting from podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Write to PO template. logging.info('Writing {} messages to translation template.'.format(len(catalog_obj))) pofile.write_po(template, catalog_obj, width=80, no_location=True, omit_header=True, sort_output=True, sort_by_file=True) template.close() return catalog_obj
def extract(self): env = self.pod.create_template_env() template_path = self.template_path catalog_obj, exists = self._get_or_create_catalog(template_path) extracted = [] comment_tags = [ ':', ] options = { 'extensions': ','.join(env.extensions.keys()), 'silent': 'false', } # Extract messages from content and views. pod_files = [os.path.join('/views', path) for path in self.pod.list_dir('/views/')] pod_files += [os.path.join('/content', path) for path in self.pod.list_dir('/content/')] for pod_path in pod_files: if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: self.pod.logger.info('Extracting from: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: messages = extract.extract('jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for message in messages: added_message = self._add_message(catalog_obj, message) extracted.append(added_message) except tokenize.TokenError: self.pod.logger.error('Problem extracting: {}'.format(pod_path)) raise # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document that's # in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, node): if not key.endswith('@') or not isinstance(item, basestring): return # Support gettext "extracted comments" on tagged fields. This is # consistent with extracted comments in templates, which follow # the format "{#: Extracted comment. #}". An example: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) added_message = catalog_obj.add(item, None, auto_comments=auto_comments) if added_message not in extracted: extracted.append(added_message) for collection in self.pod.list_collections(): self.pod.logger.info('Extracting from collection: {}'.format(collection.pod_path)) for doc in collection.list_documents(include_hidden=True): utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args)) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' self.pod.logger.info('Extracting from podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Write to PO template. return self.write_template(template_path, catalog_obj)
def extract(self): # Create directory if it doesn't exist. TODO(jeremydw): Optimize this. template_path = os.path.join(Catalogs.root, 'messages.pot') if not self.pod.file_exists(template_path): self.pod.create_file(template_path, None) existing = False else: existing = pofile.read_po(self.pod.open_file(template_path)) template = self.pod.open_file(template_path, mode='w') catalog_obj = pofile.read_po(self.pod.open_file(template_path)) extracted = [] self.pod.logger.info('Updating translation template: {}'.format(template_path)) comment_tags = [ ':', ] options = { 'extensions': ','.join(self.pod.get_template_env().extensions.keys()), 'silent': 'false', } # Extract messages from content and views. pod_files = [os.path.join('/views', path) for path in self.pod.list_dir('/views/')] pod_files += [os.path.join('/content', path) for path in self.pod.list_dir('/content/')] for pod_path in pod_files: if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: self.pod.logger.info('Extracting from: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: messages = extract.extract('jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for message in messages: lineno, string, comments, context = message flags = set() if existing and string in existing: existing_message = existing.get(string) if existing_message and 'requested' in existing_message.flags: flags.add('requested') added_message = catalog_obj.add( string, None, auto_comments=comments, context=context, flags=flags) extracted.append(added_message) except tokenize.TokenError: self.pod.logger.error('Problem extracting: {}'.format(pod_path)) raise # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document that's # in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, node): if not key.endswith('@') or not isinstance(item, basestring): return # Support gettext "extracted comments" on tagged fields. This is # consistent with extracted comments in templates, which follow # the format "{#: Extracted comment. #}". An example: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) added_message = catalog_obj.add(item, None, auto_comments=auto_comments) if added_message not in extracted: extracted.append(added_message) for collection in self.pod.list_collections(): self.pod.logger.info('Extracting from collection: {}'.format(collection.pod_path)) for doc in collection.list_documents(include_hidden=True): utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args)) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' self.pod.logger.info('Extracting from podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Write to PO template. self.pod.logger.info('Writing {} messages to translation template.'.format(len(catalog_obj))) pofile.write_po(template, catalog_obj, width=80, omit_header=True, sort_output=True, sort_by_file=True) template.close() return catalog_obj
def extract(self, include_obsolete=False, localized=False, paths=None, include_header=False, locales=None, use_fuzzy_matching=False): env = self.pod.get_jinja_env() all_locales = set(list(self.pod.list_locales())) message_ids_to_messages = {} paths_to_messages = collections.defaultdict(set) paths_to_locales = collections.defaultdict(set) comment_tags = [ ':', ] options = { 'extensions': ','.join(env.extensions.keys()), 'silent': 'false', } # Extract from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document # that's in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _add_existing_message(msgid, locations, auto_comments=None, context=None, path=None): existing_message = message_ids_to_messages.get(msgid) auto_comments = [] if auto_comments is None else auto_comments if existing_message: message_ids_to_messages[msgid].locations.extend(locations) paths_to_messages[path].add(existing_message) else: message = catalog.Message( msgid, None, auto_comments=auto_comments, context=context, locations=locations) paths_to_messages[path].add(message) message_ids_to_messages[message.id] = message def _handle_field(path, item, key, node): if (not key or not isinstance(item, basestring) or not isinstance(key, basestring) or not key.endswith('@')): return # Support gettext "extracted comments" on tagged fields. This is # consistent with extracted comments in templates, which follow # the format "{#: Extracted comment. #}". An example: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) locations = [(path, 0)] _add_existing_message( msgid=item, auto_comments=auto_comments, locations=locations, path=path) for collection in self.pod.list_collections(): text = 'Extracting collection: {}'.format(collection.pod_path) self.pod.logger.info(text) # Extract from blueprint. utils.walk(collection.tagged_fields, lambda *args: callback(collection, *args)) # Extract from docs in collection. for doc in collection.docs(include_hidden=True): if not self._should_extract_as_babel(paths, doc.pod_path): continue tagged_fields = doc.get_tagged_fields() utils.walk(tagged_fields, lambda *args: callback(doc, *args)) paths_to_locales[doc.pod_path].update(doc.locales) all_locales.update(doc.locales) # Extract from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' if self._should_extract_as_babel(paths, podspec_path): self.pod.logger.info('Extracting podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Extract from content and views. pod_files = [os.path.join('/views', path) for path in self.pod.list_dir('/views/')] pod_files += [os.path.join('/content', path) for path in self.pod.list_dir('/content/')] pod_files += [os.path.join('/data', path) for path in self.pod.list_dir('/data/')] for pod_path in pod_files: if self._should_extract_as_csv(paths, pod_path): rows = utils.get_rows_from_csv(self.pod, pod_path) self.pod.logger.info('Extracting: {}'.format(pod_path)) for row in rows: for i, parts in enumerate(row.iteritems()): key, val = parts if key.endswith('@'): locations = [(pod_path, i)] _add_existing_message( msgid=val, locations=locations, path=pod_path) elif self._should_extract_as_babel(paths, pod_path): if pod_path.startswith('/data') and pod_path.endswith(('.yaml', '.yml')): self.pod.logger.info('Extracting: {}'.format(pod_path)) content = self.pod.read_file(pod_path) fields = utils.load_yaml(content, pod=self.pod) utils.walk(fields, lambda *args: _handle_field(pod_path, *args)) continue pod_locales = paths_to_locales.get(pod_path) if pod_locales: text = 'Extracting: {} ({} locales)' text = text.format(pod_path, len(pod_locales)) self.pod.logger.info(text) else: self.pod.logger.info('Extracting: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: all_parts = extract.extract( 'jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for parts in all_parts: lineno, string, comments, context = parts locations = [(pod_path, lineno)] _add_existing_message( msgid=string, auto_comments=comments, context=context, locations=locations, path=pod_path) except tokenize.TokenError: self.pod.logger.error('Problem extracting: {}'.format(pod_path)) raise # Localized message catalogs. if localized: for locale in all_locales: if locales and locale not in locales: continue localized_catalog = self.get(locale) if not include_obsolete: localized_catalog.obsolete = babel_util.odict() for message in list(localized_catalog): if message.id not in message_ids_to_messages: localized_catalog.delete(message.id, context=message.context) catalog_to_merge = catalog.Catalog() for path, message_items in paths_to_messages.iteritems(): locales_with_this_path = paths_to_locales.get(path) if locales_with_this_path and locale not in locales_with_this_path: continue for message in message_items: translation = None existing_message = localized_catalog.get(message.id) if existing_message: translation = existing_message.string catalog_to_merge.add( message.id, translation, locations=message.locations, auto_comments=message.auto_comments, flags=message.flags, user_comments=message.user_comments, context=message.context, lineno=message.lineno, previous_id=message.previous_id) localized_catalog.update_using_catalog( catalog_to_merge, use_fuzzy_matching=use_fuzzy_matching) localized_catalog.save(include_header=include_header) missing = localized_catalog.list_untranslated() num_messages = len(localized_catalog) num_translated = num_messages - len(missing) text = 'Saved: /{path} ({num_translated}/{num_messages})' self.pod.logger.info( text.format(path=localized_catalog.pod_path, num_translated=num_translated, num_messages=num_messages)) return # Global (or missing, specified by -o) message catalog. template_path = self.template_path catalog_obj, _ = self._get_or_create_catalog(template_path) if not include_obsolete: catalog_obj.obsolete = babel_util.odict() for message in list(catalog_obj): catalog_obj.delete(message.id, context=message.context) for message in message_ids_to_messages.itervalues(): if message.id: catalog_obj.add(message.id, None, locations=message.locations, auto_comments=message.auto_comments) return self.write_template( template_path, catalog_obj, include_obsolete=include_obsolete, include_header=include_header)