示例#1
0
def untag_fields(fields, catalog):
  """Untags fields, handling translation priority."""
  untagged_keys_to_add = {}
  nodes_and_keys_to_add = []
  nodes_and_keys_to_remove = []
  def callback(item, key, node):
    if not isinstance(key, basestring):
      return
    if key.endswith('@'):
      untagged_key = key.rstrip('@')
      priority = len(key) - len(untagged_key)
      content = item
      nodes_and_keys_to_remove.append((node, key))
      if priority > 1 and untagged_key in untagged_keys_to_add:
        try:
          has_translation_for_higher_priority_key = content in catalog
        except AttributeError:
          has_translation_for_higher_priority_key = False
        if has_translation_for_higher_priority_key:
          untagged_keys_to_add[untagged_key] = True
          nodes_and_keys_to_add.append((node, untagged_key, content))
      elif priority <= 1:
        untagged_keys_to_add[untagged_key] = True
        nodes_and_keys_to_add.append((node, untagged_key, content))
  utils.walk(fields, callback)
  for node, key in nodes_and_keys_to_remove:
    if isinstance(node, dict):
      del node[key]
  for node, untagged_key, content in nodes_and_keys_to_add:
    if isinstance(node, dict):
      node[untagged_key] = content
  return fields
示例#2
0
def untag_fields(fields):
  def callback(item, key, node):
    if not isinstance(key, basestring):
      return
    if key.endswith('@'):
      node[key[0:(len(key) - 1)]] = node.pop(key)
  utils.walk(fields, callback)
  return fields
示例#3
0
    def organize_fields(self, fields):
        """Structure the fields data to keep some minimal structure."""
        new_fields = collections.OrderedDict()

        # Deep sort all fields by default.
        def _walk_field(item, key, node, parent_node):
            try:
                value = node[key]
                new_value = collections.OrderedDict()
                for sub_key in sorted(value.keys()):
                    new_value[sub_key] = value[sub_key]
                node[key] = new_value
            except:
                pass

        utils.walk(fields, _walk_field)

        # Organization rules:
        # $ prefixed fields should come first.
        # Partials key is last.
        # Partials' partial key should be first in partial data.
        # Sort the fields to keep consistent between saves.

        other_keys = []
        for key in sorted(fields.keys()):
            if key.startswith('$'):
                new_fields[key] = fields[key]
            elif key == 'partials':
                pass
            else:
                other_keys.append(key)

        for key in other_keys:
            new_fields[key] = fields[key]

        if 'partials' in fields:
            new_partials = []

            for partial in fields['partials']:
                new_partial = collections.OrderedDict()

                try:
                    # Put the partial name first for easy readability.
                    if 'partial' in partial:
                        new_partial['partial'] = partial['partial']

                    for key in sorted(partial.keys()):
                        if key != 'partial':
                            new_partial[key] = partial[key]

                    new_partials.append(new_partial)
                except TypeError:
                    # When unable to sort the partial keys, use original.
                    new_partials.append(partial)

            new_fields['partials'] = new_partials

        return new_fields
示例#4
0
def convert_fields(fields):
    """Convert raw field data from submission to use objects when needed."""
    def _walk_field(item, key, node, parent_node):
        # Convert dates.
        try:
            value = node[key]
            if DATETIME_RE.match(value):
                node[key] = datetime.datetime.strptime(value, "%Y-%m-%dT%H:%M")
            elif DATE_RE.match(value):
                tempValue = datetime.datetime.strptime(value, "%Y-%m-%d")
                node[key] = datetime.date(tempValue.year, tempValue.month,
                                          tempValue.day)
        except:
            pass

        # Convert the !g constructors into their objects.
        if key == 'tag' and item.startswith('!g.') and 'value' in node:
            # If the value was removed, remove the constructor.
            if not node['value']:
                newValue = None
            else:
                newValue = ConstructorReference(item, node['value'])

            try:
                # Try as an array.
                for index, parent_key in enumerate(parent_node):
                    if parent_node[index] == node:
                        parent_node[index] = newValue
                        break
            except KeyError:
                # Try as a dict.
                for parent_key in parent_node:
                    if parent_node[parent_key] == node:
                        parent_node[parent_key] = newValue

    utils.walk(fields, _walk_field)

    return fields
示例#5
0
    def extract(self,
                include_obsolete=None,
                localized=None,
                paths=None,
                include_header=None,
                locales=None,
                use_fuzzy_matching=None,
                audit=False,
                out_path=None):
        include_obsolete, localized, include_header, use_fuzzy_matching, = \
            self.get_extract_config(include_header=include_header,
                                    include_obsolete=include_obsolete, localized=localized,
                                    use_fuzzy_matching=use_fuzzy_matching)

        env = self.pod.get_jinja_env()
        # {
        #    locale1: locale1_catalog,
        #    locale2: locale2_catalog,
        #    ...
        # }
        # This is built up as we extract
        localized_catalogs = {}
        untagged_strings = []
        unlocalized_catalog = catalogs.Catalog()  # for localized=False case

        comment_tags = [
            ':',
        ]
        options = {
            'extensions': ','.join(env.extensions.keys()),
            'silent': 'false',
        }

        def _add_to_catalog(message, locales):
            # Add to all relevant catalogs
            for locale in locales:
                if locale not in localized_catalogs:
                    # Start with a new catalog so we can track what's obsolete:
                    # we'll merge it with existing translations later.
                    # *NOT* setting `locale` kwarg here b/c that will load existing
                    # translations.
                    localized_catalogs[locale] = catalogs.Catalog(pod=self.pod)
                localized_catalogs[locale][message.id] = message
            unlocalized_catalog[message.id] = message

        def _handle_field(path, locales, msgid, key, node, parent_node=None):
            if (not key or not isinstance(msgid, basestring)
                    or not isinstance(key, basestring)):
                return
            if not key.endswith('@'):
                if msgid:
                    untagged_strings.append((path, msgid))
                return
            # Support gettext "extracted comments" on tagged fields:
            #   field@: Message.
            #   field@#: Extracted comment for field@.
            auto_comments = []
            if isinstance(node, dict):
                if isinstance(key, unicode):
                    key = key.encode('utf-8')
                auto_comment = node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)
            elif isinstance(node, list) and parent_node:
                auto_comment = parent_node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)

            message = babel_catalog.Message(msgid,
                                            None,
                                            auto_comments=auto_comments,
                                            locations=[(path, 0)])
            if msgid:
                _add_to_catalog(message, locales)

        def _babel_extract(fp, locales, path):
            try:
                all_parts = extract.extract('jinja2.ext.babel_extract',
                                            fp,
                                            options=options,
                                            comment_tags=comment_tags)
                for parts in all_parts:
                    lineno, msgid, comments, context = parts
                    message = babel_catalog.Message(msgid,
                                                    None,
                                                    auto_comments=comments,
                                                    locations=[(path, lineno)])
                    _add_to_catalog(message, locales)
            except tokenize.TokenError:
                self.pod.logger.error(
                    'Problem extracting body: {}'.format(path))
                raise

        # Extract from collections in /content/:
        # Strings only extracted for relevant locales, determined by locale
        # scope (pod > collection > document > document part)
        last_pod_path = None
        for collection in self.pod.list_collections():
            if utils.fnmatches_paths(collection.blueprint_path, paths):
                text = 'Extracting: {}'.format(collection.blueprint_path)
                self.pod.logger.info(text)
                # Extract from blueprint.
                utils.walk(
                    collection.tagged_fields,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        collection.blueprint_path, collection.locales, msgid,
                        key, node, **kwargs))

            for doc in collection.list_docs(include_hidden=True):
                if not utils.fnmatches_paths(doc.pod_path, paths):
                    continue
                if doc.pod_path != last_pod_path:
                    self.pod.logger.info('Extracting: {} ({} locale{})'.format(
                        doc.pod_path,
                        len(doc.locales),
                        's' if len(doc.locales) != 1 else '',
                    ))
                    last_pod_path = doc.pod_path
                # If doc.locale is set, this is a doc part: only extract for
                # its own locales (not those of base doc).
                if doc.locale:
                    doc_locales = [doc.locale]
                # If not is set, this is a base doc (1st or only part): extract
                # for all locales declared for this doc
                elif doc.locales:
                    doc_locales = doc.locales
                # Otherwise only include in template (--no-localized)
                else:
                    doc_locales = [None]

                doc_locales = [doc.locale]
                # Extract yaml fields: `foo@: Extract me`
                # ("tagged" = prior to stripping `@` suffix from field names)
                tagged_fields = doc.format.front_matter.data
                utils.walk(
                    tagged_fields,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        doc.pod_path, doc_locales, msgid, key, node, **kwargs))

                # Extract body: {{_('Extract me')}}
                if doc.body:
                    doc_body = cStringIO.StringIO(doc.body.encode('utf-8'))
                    _babel_extract(doc_body, doc_locales, doc.pod_path)

            # Extract from CSVs for this collection's locales
            for filepath in self.pod.list_dir(collection.pod_path):
                if not utils.fnmatches_paths(filepath, paths):
                    continue
                if filepath.endswith('.csv'):
                    pod_path = os.path.join(collection.pod_path,
                                            filepath.lstrip('/'))
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    rows = self.pod.read_csv(pod_path)
                    for i, row in enumerate(rows):
                        for key, msgid in row.iteritems():
                            _handle_field(pod_path, collection.locales, msgid,
                                          key, row)

        # Extract from root of /content/:
        for path in self.pod.list_dir('/content/', recursive=False):
            if not utils.fnmatches_paths(path, paths):
                continue
            if path.endswith(('.yaml', '.yml')):
                pod_path = os.path.join('/content/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                utils.walk(
                    self.pod.get_doc(pod_path).format.front_matter.data,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        pod_path, self.pod.list_locales(), msgid, key, node, **
                        kwargs))

        # Extract from /views/:
        # Not discriminating by file extension, because people use all sorts
        # (htm, html, tpl, dtml, jtml, ...)
        if not audit:
            for path in self.pod.list_dir('/views/'):
                if not utils.fnmatches_paths(path, paths) \
                        or path.startswith('.'):
                    continue
                pod_path = os.path.join('/views/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                with self.pod.open_file(pod_path) as f:
                    _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from /partials/:
        if not audit:
            for path in self.pod.list_dir('/partials/'):
                if not utils.fnmatches_paths(path, paths) \
                        or path.startswith('.'):
                    continue
                if path.endswith(('.yaml', '.yml', '.html', '.htm')):
                    pod_path = os.path.join('/partials/', path)
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    with self.pod.open_file(pod_path) as f:
                        _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from podspec.yaml:
        if utils.fnmatches_paths('/podspec.yaml', paths):
            self.pod.logger.info('Extracting: /podspec.yaml')
            utils.walk(
                self.pod.get_podspec().get_config(), lambda msgid, key, node,
                **kwargs: _handle_field('/podspec.yaml', self.pod.list_locales(
                ), msgid, key, node, **kwargs))

        # Save it out: behavior depends on --localized and --locale flags
        # If an out path is specified, always collect strings into the one catalog.
        if localized and not out_path:
            # Save each localized catalog
            for locale, new_catalog in localized_catalogs.items():
                # Skip if `locales` defined but doesn't include this locale
                if locales and locale not in locales:
                    continue
                existing_catalog = self.get(locale)
                existing_catalog.update_using_catalog(
                    new_catalog, include_obsolete=include_obsolete)
                if audit:
                    continue
                existing_catalog.save(include_header=include_header)
                missing = existing_catalog.list_untranslated()
                num_messages = len(existing_catalog)
                self.pod.logger.info(
                    'Saved: /{path} ({num_translated}/{num_messages})'.format(
                        path=existing_catalog.pod_path,
                        num_translated=num_messages - len(missing),
                        num_messages=num_messages))
            return untagged_strings, localized_catalogs.items()
        else:
            # --localized omitted / --no-localized
            template_catalog = self.get_template(self.template_path)
            template_catalog.update_using_catalog(
                unlocalized_catalog, include_obsolete=include_obsolete)
            if not audit:
                template_catalog.save(include_header=include_header)
                text = 'Saved: {} ({} messages)'
                self.pod.logger.info(
                    text.format(template_catalog.pod_path,
                                len(template_catalog)))
            return untagged_strings, [template_catalog]
示例#6
0
    def extract(
        self,
        include_obsolete=False,
        localized=False,
        paths=None,
        include_header=False,
        locales=None,
        use_fuzzy_matching=False,
    ):
        env = self.pod.create_template_env()

        all_locales = set(list(self.pod.list_locales()))
        message_ids_to_messages = {}
        paths_to_messages = collections.defaultdict(set)
        paths_to_locales = collections.defaultdict(set)

        comment_tags = [":"]
        options = {"extensions": ",".join(env.extensions.keys()), "silent": "false"}

        # Extract messages from content files.
        def callback(doc, item, key, unused_node):
            # Verify that the fields we're extracting are fields for a document
            # that's in the default locale. If not, skip the document.
            _handle_field(doc.pod_path, item, key, unused_node)

        def _handle_field(path, item, key, node):
            if not key.endswith("@") or not isinstance(item, basestring):
                return
            # Support gettext "extracted comments" on tagged fields. This is
            # consistent with extracted comments in templates, which follow
            # the format "{#: Extracted comment. #}". An example:
            #   field@: Message.
            #   field@#: Extracted comment for field@.
            auto_comments = []
            if isinstance(node, dict):
                auto_comment = node.get("{}#".format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)
            locations = [(path, 0)]
            existing_message = message_ids_to_messages.get(item)
            if existing_message:
                message_ids_to_messages[item].locations.extend(locations)
                paths_to_messages[path].add(existing_message)
            else:
                message = catalog.Message(item, None, auto_comments=auto_comments, locations=locations)
                message_ids_to_messages[message.id] = message
                paths_to_messages[path].add(message)

        for collection in self.pod.list_collections():
            text = "Extracting collection: {}".format(collection.pod_path)
            self.pod.logger.info(text)
            for doc in collection.list_docs(include_hidden=True):
                if not self._should_extract(paths, doc.pod_path):
                    continue
                tagged_fields = doc.get_tagged_fields()
                utils.walk(tagged_fields, lambda *args: callback(doc, *args))
                paths_to_locales[doc.pod_path].update(doc.locales)
                all_locales.update(doc.locales)

        # Extract messages from podspec.
        config = self.pod.get_podspec().get_config()
        podspec_path = "/podspec.yaml"
        if self._should_extract(paths, podspec_path):
            self.pod.logger.info("Extracting podspec: {}".format(podspec_path))
            utils.walk(config, lambda *args: _handle_field(podspec_path, *args))

        # Extract messages from content and views.
        pod_files = [os.path.join("/views", path) for path in self.pod.list_dir("/views/")]
        pod_files += [os.path.join("/content", path) for path in self.pod.list_dir("/content/")]
        for pod_path in pod_files:
            if self._should_extract(paths, pod_path):
                pod_locales = paths_to_locales.get(pod_path)
                if pod_locales:
                    text = "Extracting: {} ({} locales)"
                    text = text.format(pod_path, len(pod_locales))
                    self.pod.logger.info(text)
                else:
                    self.pod.logger.info("Extracting: {}".format(pod_path))
                fp = self.pod.open_file(pod_path)
                try:
                    all_parts = extract.extract(
                        "jinja2.ext.babel_extract", fp, options=options, comment_tags=comment_tags
                    )
                    for parts in all_parts:
                        lineno, string, comments, context = parts
                        locations = [(pod_path, lineno)]
                        existing_message = message_ids_to_messages.get(string)
                        if existing_message:
                            message_ids_to_messages[string].locations.extend(locations)
                        else:
                            message = catalog.Message(
                                string, None, auto_comments=comments, context=context, locations=locations
                            )
                            paths_to_messages[pod_path].add(message)
                            message_ids_to_messages[message.id] = message
                except tokenize.TokenError:
                    self.pod.logger.error("Problem extracting: {}".format(pod_path))
                    raise

        # Localized message catalogs.
        if localized:
            for locale in all_locales:
                if locales and locale not in locales:
                    continue
                localized_catalog = self.get(locale)
                if not include_obsolete:
                    localized_catalog.obsolete = babel_util.odict()
                    for message in list(localized_catalog):
                        if message.id not in message_ids_to_messages:
                            localized_catalog.delete(message.id, context=message.context)

                catalog_to_merge = catalog.Catalog()
                for path, message_items in paths_to_messages.iteritems():
                    locales_with_this_path = paths_to_locales.get(path)
                    if locales_with_this_path and locale not in locales_with_this_path:
                        continue
                    for message in message_items:
                        translation = None
                        existing_message = localized_catalog.get(message.id)
                        if existing_message:
                            translation = existing_message.string
                        catalog_to_merge.add(
                            message.id,
                            translation,
                            locations=message.locations,
                            auto_comments=message.auto_comments,
                            flags=message.flags,
                            user_comments=message.user_comments,
                            context=message.context,
                            lineno=message.lineno,
                            previous_id=message.previous_id,
                        )

                localized_catalog.update_using_catalog(catalog_to_merge, use_fuzzy_matching=use_fuzzy_matching)
                localized_catalog.save(include_header=include_header)
                missing = localized_catalog.list_untranslated()
                num_messages = len(localized_catalog)
                num_translated = num_messages - len(missing)
                text = "Saved: /{path} ({num_translated}/{num_messages})"
                self.pod.logger.info(
                    text.format(
                        path=localized_catalog.pod_path, num_translated=num_translated, num_messages=num_messages
                    )
                )
            return

        # Global (or missing, specified by -o) message catalog.
        template_path = self.template_path
        catalog_obj, _ = self._get_or_create_catalog(template_path)
        if not include_obsolete:
            catalog_obj.obsolete = babel_util.odict()
            for message in list(catalog_obj):
                catalog_obj.delete(message.id, context=message.context)
        for message in message_ids_to_messages.itervalues():
            catalog_obj.add(message.id, None, locations=message.locations, auto_comments=message.auto_comments)
        return self.write_template(
            template_path, catalog_obj, include_obsolete=include_obsolete, include_header=include_header
        )
示例#7
0
    def extract(self):
        # Create directory if it doesn't exist. TODO(jeremydw): Optimize this.
        template_path = os.path.join(Catalogs.root, 'messages.pot')
        if not self.pod.file_exists(template_path):
            self.pod.create_file(template_path, None)
            existing = False
        else:
            existing = pofile.read_po(self.pod.open_file(template_path))

        template = self.pod.open_file(template_path, mode='w')
        catalog_obj = pofile.read_po(self.pod.open_file(template_path))
        extracted = []

        self.pod.logger.info(
            'Updating translation template: {}'.format(template_path))

        options = {
            'extensions':
            ','.join(self.pod.get_template_env().extensions.keys()),
            'silent': 'false',
        }

        # Extract messages from content and views.
        pod_files = [
            os.path.join('/views', path)
            for path in self.pod.list_dir('/views/')
        ]
        pod_files += [
            os.path.join('/content', path)
            for path in self.pod.list_dir('/content/')
        ]
        for pod_path in pod_files:
            if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS:
                self.pod.logger.info('Extracting from: {}'.format(pod_path))
                fp = self.pod.open_file(pod_path)
                try:
                    messages = extract.extract('jinja2.ext.babel_extract',
                                               fp,
                                               options=options)
                    for message in messages:
                        lineno, string, comments, context = message
                        flags = set()
                        if existing and string in existing:
                            existing_message = existing.get(string)
                            if existing_message and 'requested' in existing_message.flags:
                                flags.add('requested')
                        added_message = catalog_obj.add(string,
                                                        None,
                                                        [(pod_path, lineno)],
                                                        auto_comments=comments,
                                                        context=context,
                                                        flags=flags)
                        extracted.append(added_message)
                except tokenize.TokenError:
                    self.pod.logger.error(
                        'Problem extracting: {}'.format(pod_path))
                    raise

        # Extract messages from content files.
        def callback(doc, item, key, unused_node):
            # Verify that the fields we're extracting are fields for a document that's
            # in the default locale. If not, skip the document.
            _handle_field(doc.pod_path, item, key, unused_node)

        def _handle_field(path, item, key, unused_node):
            if not key.endswith('@') or not isinstance(item, basestring):
                return
            comments = ['{}:{}'.format(doc.pod_path, key)]
            added_message = catalog_obj.add(item,
                                            None, [(path, 0)],
                                            auto_comments=comments,
                                            context=None)
            if added_message not in extracted:
                extracted.append(added_message)

        for collection in self.pod.list_collections():
            self.pod.logger.info('Extracting from collection: {}'.format(
                collection.pod_path))
            for doc in collection.list_documents(include_hidden=True):
                utils.walk(doc.tagged_fields,
                           lambda *args: callback(doc, *args))

        # Extract messages from podspec.
        config = self.pod.get_podspec().get_config()
        podspec_path = '/podspec.yaml'
        self.pod.logger.info(
            'Extracting from podspec: {}'.format(podspec_path))
        utils.walk(config, lambda *args: _handle_field(podspec_path, *args))

        # Write to PO template.
        self.pod.logger.info(
            'Writing {} messages to translation template.'.format(
                len(catalog_obj)))
        pofile.write_po(template,
                        catalog_obj,
                        width=80,
                        no_location=True,
                        omit_header=True,
                        sort_output=True,
                        sort_by_file=True)
        template.close()
        return catalog_obj
示例#8
0
    def extract(self, include_obsolete=None, localized=None, paths=None,
                include_header=None, locales=None, use_fuzzy_matching=None,
                audit=False):
        include_obsolete, localized, include_header, use_fuzzy_matching, = \
            self.get_extract_config(include_header=include_header,
                                    include_obsolete=include_obsolete, localized=localized,
                                    use_fuzzy_matching=use_fuzzy_matching)

        env = self.pod.get_jinja_env()
        # {
        #    locale1: locale1_catalog,
        #    locale2: locale2_catalog,
        #    ...
        # }
        # This is built up as we extract
        localized_catalogs = {}
        untagged_strings = []
        unlocalized_catalog = catalogs.Catalog()  # for localized=False case

        comment_tags = [
            ':',
        ]
        options = {
            'extensions': ','.join(env.extensions.keys()),
            'silent': 'false',
        }

        def _add_to_catalog(message, locales):
            # Add to all relevant catalogs
            for locale in locales:
                if locale not in localized_catalogs:
                    # Start with a new catalog so we can track what's obsolete:
                    # we'll merge it with existing translations later.
                    # *NOT* setting `locale` kwarg here b/c that will load existing
                    # translations.
                    localized_catalogs[locale] = catalogs.Catalog(pod=self.pod)
                localized_catalogs[locale][message.id] = message
            unlocalized_catalog[message.id] = message

        def _handle_field(path, locales, msgid, key, node, parent_node=None):
            if (not key
                    or not isinstance(msgid, basestring)
                    or not isinstance(key, basestring)):
                return
            if not key.endswith('@'):
                if msgid:
                    untagged_strings.append((path, msgid))
                return
            # Support gettext "extracted comments" on tagged fields:
            #   field@: Message.
            #   field@#: Extracted comment for field@.
            auto_comments = []
            if isinstance(node, dict):
                auto_comment = node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)
            elif isinstance(node, list) and parent_node:
                auto_comment = parent_node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)

            message = babel_catalog.Message(
                msgid,
                None,
                auto_comments=auto_comments,
                locations=[(path, 0)])
            if msgid:
                _add_to_catalog(message, locales)

        def _babel_extract(fp, locales, path):
            try:
                all_parts = extract.extract(
                    'jinja2.ext.babel_extract',
                    fp,
                    options=options,
                    comment_tags=comment_tags)
                for parts in all_parts:
                    lineno, msgid, comments, context = parts
                    message = babel_catalog.Message(
                        msgid,
                        None,
                        auto_comments=comments,
                        locations=[(path, lineno)])
                    _add_to_catalog(message, locales)
            except tokenize.TokenError:
                self.pod.logger.error(
                    'Problem extracting body: {}'.format(path))
                raise

        # Extract from collections in /content/:
        # Strings only extracted for relevant locales, determined by locale
        # scope (pod > collection > document > document part)
        last_pod_path = None
        for collection in self.pod.list_collections():
            text = 'Extracting: {}'.format(collection.blueprint_path)
            self.pod.logger.info(text)
            # Extract from blueprint.
            utils.walk(collection.tagged_fields,
                       lambda msgid, key, node, **kwargs: _handle_field(
                           collection.blueprint_path, collection.locales, msgid, key, node,
                           **kwargs))
            # Extract from docs in collection.
            for doc in collection.docs(include_hidden=True):
                if not self._should_extract_as_babel(paths, doc.pod_path):
                    continue

            for doc in collection.list_docs(include_hidden=True):
                if doc.pod_path != last_pod_path:
                    self.pod.logger.info(
                        'Extracting: {} ({} locale{})'.format(
                            doc.pod_path,
                            len(doc.locales),
                            's' if len(doc.locales) != 1 else '',
                        )
                    )
                    last_pod_path = doc.pod_path
                # If doc.locale is set, this is a doc part: only extract for
                # its own locales (not those of base doc).
                if doc.locale:
                    doc_locales = [doc.locale]
                # If not is set, this is a base doc (1st or only part): extract
                # for all locales declared for this doc
                elif doc.locales:
                    doc_locales = doc.locales
                # Otherwise only include in template (--no-localized)
                else:
                    doc_locales = [None]

                doc_locales = [doc.locale]
                # Extract yaml fields: `foo@: Extract me`
                # ("tagged" = prior to stripping `@` suffix from field names)
                tagged_fields = doc.format.front_matter.data
                utils.walk(tagged_fields,
                           lambda msgid, key, node, **kwargs: _handle_field(
                               doc.pod_path, doc_locales, msgid, key, node, **kwargs))

                # Extract body: {{_('Extract me')}}
                if doc.body:
                    doc_body = cStringIO.StringIO(doc.body.encode('utf-8'))
                    _babel_extract(doc_body, doc_locales, doc.pod_path)

            # Extract from CSVs for this collection's locales
            for filepath in self.pod.list_dir(collection.pod_path):
                if filepath.endswith('.csv'):
                    pod_path = os.path.join(
                        collection.pod_path, filepath.lstrip('/'))
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    rows = self.pod.read_csv(pod_path)
                    for i, row in enumerate(rows):
                        for key, msgid in row.iteritems():
                            _handle_field(
                                pod_path, collection.locales, msgid, key, row)

        # Extract from root of /content/:
        for path in self.pod.list_dir('/content/', recursive=False):
            if path.endswith(('.yaml', '.yml')):
                pod_path = os.path.join('/content/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                utils.walk(
                    self.pod.get_doc(pod_path).format.front_matter.data,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        pod_path, self.pod.list_locales(), msgid, key, node, **kwargs)
                )

        # Extract from /views/:
        # Not discriminating by file extension, because people use all sorts
        # (htm, html, tpl, dtml, jtml, ...)
        if not audit:
            for path in self.pod.list_dir('/views/'):
                if path.startswith('.'):
                    continue
                pod_path = os.path.join('/views/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                with self.pod.open_file(pod_path) as f:
                    _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from podspec.yaml:
        self.pod.logger.info('Extracting: /podspec.yaml')
        utils.walk(
            self.pod.get_podspec().get_config(),
            lambda msgid, key, node, **kwargs: _handle_field(
                '/podspec.yaml', self.pod.list_locales(), msgid, key, node, **kwargs)
        )

        # Save it out: behavior depends on --localized and --locale flags
        if localized:
            # Save each localized catalog
            for locale, new_catalog in localized_catalogs.items():
                # Skip if `locales` defined but doesn't include this locale
                if locales and locale not in locales:
                    continue
                existing_catalog = self.get(locale)
                existing_catalog.update_using_catalog(
                    new_catalog,
                    include_obsolete=include_obsolete)
                if audit:
                    continue
                existing_catalog.save(include_header=include_header)
                missing = existing_catalog.list_untranslated()
                num_messages = len(existing_catalog)
                self.pod.logger.info(
                    'Saved: /{path} ({num_translated}/{num_messages})'.format(
                        path=existing_catalog.pod_path,
                        num_translated=num_messages - len(missing),
                        num_messages=num_messages)
                )
            return untagged_strings, localized_catalogs.items()
        else:
            # --localized omitted / --no-localized
            template_catalog = self.get_template()
            template_catalog.update_using_catalog(
                unlocalized_catalog,
                include_obsolete=include_obsolete)
            if not audit:
                template_catalog.save(include_header=include_header)
                text = 'Saved: {} ({} messages)'
                self.pod.logger.info(
                    text.format(template_catalog.pod_path,
                                len(template_catalog))
                )
            return untagged_strings, [template_catalog]
示例#9
0
  def extract(self):
    catalog_obj = catalog.Catalog()

    # Create directory if it doesn't exist. TODO(jeremydw): Optimize this.
    template_path = os.path.join(self.root, 'messages.pot')
    if not self.pod.file_exists(template_path):
      self.pod.create_file(template_path, None)

    template = self.pod.open_file(template_path, mode='w')
    extracted = []

    logging.info('Updating translation template: {}'.format(template_path))

    # Extract messages from views.
    pod_files = self.pod.list_dir('/views/')
    for path in pod_files:
      pod_path = os.path.join('/views', path)
      if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS:
        logging.info('Extracting from view: {}'.format(pod_path))
        fp = self.pod.open_file(pod_path)
        try:
          messages = extract.extract('jinja2.ext.babel_extract', fp)
          for message in messages:
            lineno, string, comments, context = message
            added_message = catalog_obj.add(
                string, None, [(pod_path, lineno)], auto_comments=comments,
                context=context)
            extracted.append(added_message)
        except tokenize.TokenError:
          logging.error('Problem extracting: {}'.format(pod_path))
          raise

    # Extract messages from content files.
    def callback(doc, item, key, unused_node):
      # Verify that the fields we're extracting are fields for a document that's
      # in the default locale. If not, skip the document.
      _handle_field(doc.pod_path, item, key, unused_node)

    def _handle_field(path, item, key, unused_node):
      if not isinstance(item, basestring):
        return
      if key.endswith('@'):
        comments = []
        context = None
        added_message = catalog_obj.add(
            item, None, [(path, 0)], auto_comments=comments, context=context)
        if added_message not in extracted:
          extracted.append(added_message)

    for collection in self.pod.list_collections():
      logging.info('Extracting from collection: {}'.format(collection.pod_path))
      for doc in collection.list_documents(include_hidden=True):
        utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args))

    # Extract messages from podspec.
    config = self.pod.get_podspec().get_config()
    podspec_path = '/podspec.yaml'
    logging.info('Extracting from podspec: {}'.format(podspec_path))
    utils.walk(config, lambda *args: _handle_field(podspec_path, *args))

    # Write to PO template.
    logging.info('Writing {} messages to translation template.'.format(len(catalog_obj)))
    pofile.write_po(template, catalog_obj, width=80, no_location=True,
                    omit_header=True, sort_output=True, sort_by_file=True)
    template.close()
    return catalog_obj
示例#10
0
  def extract(self):
    env = self.pod.create_template_env()
    template_path = self.template_path
    catalog_obj, exists = self._get_or_create_catalog(template_path)
    extracted = []

    comment_tags = [
        ':',
    ]
    options = {
        'extensions': ','.join(env.extensions.keys()),
        'silent': 'false',
    }
    # Extract messages from content and views.
    pod_files = [os.path.join('/views', path) for path in self.pod.list_dir('/views/')]
    pod_files += [os.path.join('/content', path) for path in self.pod.list_dir('/content/')]
    for pod_path in pod_files:
      if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS:
        self.pod.logger.info('Extracting from: {}'.format(pod_path))
        fp = self.pod.open_file(pod_path)
        try:
          messages = extract.extract('jinja2.ext.babel_extract', fp,
                                     options=options, comment_tags=comment_tags)
          for message in messages:
            added_message = self._add_message(catalog_obj, message)
            extracted.append(added_message)
        except tokenize.TokenError:
          self.pod.logger.error('Problem extracting: {}'.format(pod_path))
          raise

    # Extract messages from content files.
    def callback(doc, item, key, unused_node):
      # Verify that the fields we're extracting are fields for a document that's
      # in the default locale. If not, skip the document.
      _handle_field(doc.pod_path, item, key, unused_node)

    def _handle_field(path, item, key, node):
      if not key.endswith('@') or not isinstance(item, basestring):
        return
      # Support gettext "extracted comments" on tagged fields. This is
      # consistent with extracted comments in templates, which follow
      # the format "{#: Extracted comment. #}". An example:
      #   field@: Message.
      #   field@#: Extracted comment for field@.
      auto_comments = []
      if isinstance(node, dict):
        auto_comment = node.get('{}#'.format(key))
        if auto_comment:
          auto_comments.append(auto_comment)
      added_message = catalog_obj.add(item, None, auto_comments=auto_comments)
      if added_message not in extracted:
        extracted.append(added_message)

    for collection in self.pod.list_collections():
      self.pod.logger.info('Extracting from collection: {}'.format(collection.pod_path))
      for doc in collection.list_documents(include_hidden=True):
        utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args))

    # Extract messages from podspec.
    config = self.pod.get_podspec().get_config()
    podspec_path = '/podspec.yaml'
    self.pod.logger.info('Extracting from podspec: {}'.format(podspec_path))
    utils.walk(config, lambda *args: _handle_field(podspec_path, *args))

    # Write to PO template.
    return self.write_template(template_path, catalog_obj)
示例#11
0
  def extract(self):
    # Create directory if it doesn't exist. TODO(jeremydw): Optimize this.
    template_path = os.path.join(Catalogs.root, 'messages.pot')
    if not self.pod.file_exists(template_path):
      self.pod.create_file(template_path, None)
      existing = False
    else:
      existing = pofile.read_po(self.pod.open_file(template_path))

    template = self.pod.open_file(template_path, mode='w')
    catalog_obj = pofile.read_po(self.pod.open_file(template_path))
    extracted = []

    self.pod.logger.info('Updating translation template: {}'.format(template_path))

    comment_tags = [
        ':',
    ]

    options = {
        'extensions': ','.join(self.pod.get_template_env().extensions.keys()),
        'silent': 'false',
    }

    # Extract messages from content and views.
    pod_files = [os.path.join('/views', path) for path in self.pod.list_dir('/views/')]
    pod_files += [os.path.join('/content', path) for path in self.pod.list_dir('/content/')]
    for pod_path in pod_files:
      if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS:
        self.pod.logger.info('Extracting from: {}'.format(pod_path))
        fp = self.pod.open_file(pod_path)
        try:
          messages = extract.extract('jinja2.ext.babel_extract', fp,
                                     options=options, comment_tags=comment_tags)
          for message in messages:
            lineno, string, comments, context = message
            flags = set()
            if existing and string in existing:
              existing_message = existing.get(string)
              if existing_message and 'requested' in existing_message.flags:
                flags.add('requested')
            added_message = catalog_obj.add(
                string, None, auto_comments=comments,
                context=context, flags=flags)
            extracted.append(added_message)
        except tokenize.TokenError:
          self.pod.logger.error('Problem extracting: {}'.format(pod_path))
          raise

    # Extract messages from content files.
    def callback(doc, item, key, unused_node):
      # Verify that the fields we're extracting are fields for a document that's
      # in the default locale. If not, skip the document.
      _handle_field(doc.pod_path, item, key, unused_node)

    def _handle_field(path, item, key, node):
      if not key.endswith('@') or not isinstance(item, basestring):
        return
      # Support gettext "extracted comments" on tagged fields. This is
      # consistent with extracted comments in templates, which follow
      # the format "{#: Extracted comment. #}". An example:
      #   field@: Message.
      #   field@#: Extracted comment for field@.
      auto_comments = []
      if isinstance(node, dict):
        auto_comment = node.get('{}#'.format(key))
        if auto_comment:
          auto_comments.append(auto_comment)
      added_message = catalog_obj.add(item, None, auto_comments=auto_comments)
      if added_message not in extracted:
        extracted.append(added_message)

    for collection in self.pod.list_collections():
      self.pod.logger.info('Extracting from collection: {}'.format(collection.pod_path))
      for doc in collection.list_documents(include_hidden=True):
        utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args))

    # Extract messages from podspec.
    config = self.pod.get_podspec().get_config()
    podspec_path = '/podspec.yaml'
    self.pod.logger.info('Extracting from podspec: {}'.format(podspec_path))
    utils.walk(config, lambda *args: _handle_field(podspec_path, *args))

    # Write to PO template.
    self.pod.logger.info('Writing {} messages to translation template.'.format(len(catalog_obj)))
    pofile.write_po(template, catalog_obj, width=80,
                    omit_header=True, sort_output=True, sort_by_file=True)
    template.close()
    return catalog_obj
示例#12
0
    def extract(self, include_obsolete=False, localized=False, paths=None,
                include_header=False, locales=None, use_fuzzy_matching=False):
        env = self.pod.get_jinja_env()

        all_locales = set(list(self.pod.list_locales()))
        message_ids_to_messages = {}
        paths_to_messages = collections.defaultdict(set)
        paths_to_locales = collections.defaultdict(set)

        comment_tags = [
            ':',
        ]
        options = {
            'extensions': ','.join(env.extensions.keys()),
            'silent': 'false',
        }

        # Extract from content files.
        def callback(doc, item, key, unused_node):
            # Verify that the fields we're extracting are fields for a document
            # that's in the default locale. If not, skip the document.
            _handle_field(doc.pod_path, item, key, unused_node)

        def _add_existing_message(msgid, locations, auto_comments=None,
                                  context=None, path=None):
            existing_message = message_ids_to_messages.get(msgid)
            auto_comments = [] if auto_comments is None else auto_comments
            if existing_message:
                message_ids_to_messages[msgid].locations.extend(locations)
                paths_to_messages[path].add(existing_message)
            else:
                message = catalog.Message(
                    msgid,
                    None,
                    auto_comments=auto_comments,
                    context=context,
                    locations=locations)
                paths_to_messages[path].add(message)
                message_ids_to_messages[message.id] = message

        def _handle_field(path, item, key, node):
            if (not key
                    or not isinstance(item, basestring)
                    or not isinstance(key, basestring)
                    or not key.endswith('@')):
                return
            # Support gettext "extracted comments" on tagged fields. This is
            # consistent with extracted comments in templates, which follow
            # the format "{#: Extracted comment. #}". An example:
            #   field@: Message.
            #   field@#: Extracted comment for field@.
            auto_comments = []
            if isinstance(node, dict):
                auto_comment = node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)
            locations = [(path, 0)]
            _add_existing_message(
                msgid=item,
                auto_comments=auto_comments,
                locations=locations,
                path=path)

        for collection in self.pod.list_collections():
            text = 'Extracting collection: {}'.format(collection.pod_path)
            self.pod.logger.info(text)
            # Extract from blueprint.
            utils.walk(collection.tagged_fields, lambda *args: callback(collection, *args))
            # Extract from docs in collection.
            for doc in collection.docs(include_hidden=True):
                if not self._should_extract_as_babel(paths, doc.pod_path):
                    continue
                tagged_fields = doc.get_tagged_fields()
                utils.walk(tagged_fields, lambda *args: callback(doc, *args))
                paths_to_locales[doc.pod_path].update(doc.locales)
                all_locales.update(doc.locales)

        # Extract from podspec.
        config = self.pod.get_podspec().get_config()
        podspec_path = '/podspec.yaml'
        if self._should_extract_as_babel(paths, podspec_path):
            self.pod.logger.info('Extracting podspec: {}'.format(podspec_path))
            utils.walk(config, lambda *args: _handle_field(podspec_path, *args))

        # Extract from content and views.
        pod_files = [os.path.join('/views', path)
                     for path in self.pod.list_dir('/views/')]
        pod_files += [os.path.join('/content', path)
                      for path in self.pod.list_dir('/content/')]
        pod_files += [os.path.join('/data', path)
                      for path in self.pod.list_dir('/data/')]
        for pod_path in pod_files:
            if self._should_extract_as_csv(paths, pod_path):
                rows = utils.get_rows_from_csv(self.pod, pod_path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                for row in rows:
                    for i, parts in enumerate(row.iteritems()):
                        key, val = parts
                        if key.endswith('@'):
                            locations = [(pod_path, i)]
                            _add_existing_message(
                                msgid=val,
                                locations=locations,
                                path=pod_path)
            elif self._should_extract_as_babel(paths, pod_path):
                if pod_path.startswith('/data') and pod_path.endswith(('.yaml', '.yml')):
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    content = self.pod.read_file(pod_path)
                    fields = utils.load_yaml(content, pod=self.pod)
                    utils.walk(fields, lambda *args: _handle_field(pod_path, *args))
                    continue

                pod_locales = paths_to_locales.get(pod_path)
                if pod_locales:
                    text = 'Extracting: {} ({} locales)'
                    text = text.format(pod_path, len(pod_locales))
                    self.pod.logger.info(text)
                else:
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                fp = self.pod.open_file(pod_path)
                try:
                    all_parts = extract.extract(
                        'jinja2.ext.babel_extract', fp, options=options,
                        comment_tags=comment_tags)
                    for parts in all_parts:
                        lineno, string, comments, context = parts
                        locations = [(pod_path, lineno)]
                        _add_existing_message(
                            msgid=string,
                            auto_comments=comments,
                            context=context,
                            locations=locations,
                            path=pod_path)
                except tokenize.TokenError:
                    self.pod.logger.error('Problem extracting: {}'.format(pod_path))
                    raise

        # Localized message catalogs.
        if localized:
            for locale in all_locales:
                if locales and locale not in locales:
                    continue
                localized_catalog = self.get(locale)
                if not include_obsolete:
                    localized_catalog.obsolete = babel_util.odict()
                    for message in list(localized_catalog):
                        if message.id not in message_ids_to_messages:
                            localized_catalog.delete(message.id, context=message.context)

                catalog_to_merge = catalog.Catalog()
                for path, message_items in paths_to_messages.iteritems():
                    locales_with_this_path = paths_to_locales.get(path)
                    if locales_with_this_path and locale not in locales_with_this_path:
                        continue
                    for message in message_items:
                        translation = None
                        existing_message = localized_catalog.get(message.id)
                        if existing_message:
                            translation = existing_message.string
                        catalog_to_merge.add(
                            message.id, translation, locations=message.locations,
                            auto_comments=message.auto_comments, flags=message.flags,
                            user_comments=message.user_comments, context=message.context,
                            lineno=message.lineno, previous_id=message.previous_id)

                localized_catalog.update_using_catalog(
                    catalog_to_merge, use_fuzzy_matching=use_fuzzy_matching)
                localized_catalog.save(include_header=include_header)
                missing = localized_catalog.list_untranslated()
                num_messages = len(localized_catalog)
                num_translated = num_messages - len(missing)
                text = 'Saved: /{path} ({num_translated}/{num_messages})'
                self.pod.logger.info(
                    text.format(path=localized_catalog.pod_path,
                                num_translated=num_translated,
                                num_messages=num_messages))
            return

        # Global (or missing, specified by -o) message catalog.
        template_path = self.template_path
        catalog_obj, _ = self._get_or_create_catalog(template_path)
        if not include_obsolete:
            catalog_obj.obsolete = babel_util.odict()
            for message in list(catalog_obj):
                catalog_obj.delete(message.id, context=message.context)
        for message in message_ids_to_messages.itervalues():
            if message.id:
                catalog_obj.add(message.id, None, locations=message.locations,
                                auto_comments=message.auto_comments)
        return self.write_template(
            template_path, catalog_obj, include_obsolete=include_obsolete,
            include_header=include_header)