Exemplo n.º 1
0
    def extract(self,
                include_obsolete=None,
                localized=None,
                paths=None,
                include_header=None,
                locales=None,
                use_fuzzy_matching=None,
                audit=False,
                out_path=None):
        include_obsolete, localized, include_header, use_fuzzy_matching, = \
            self.get_extract_config(include_header=include_header,
                                    include_obsolete=include_obsolete, localized=localized,
                                    use_fuzzy_matching=use_fuzzy_matching)

        env = self.pod.get_jinja_env()
        # {
        #    locale1: locale1_catalog,
        #    locale2: locale2_catalog,
        #    ...
        # }
        # This is built up as we extract
        localized_catalogs = {}
        untagged_strings = []
        unlocalized_catalog = catalogs.Catalog()  # for localized=False case

        comment_tags = [
            ':',
        ]
        options = {
            'extensions': ','.join(env.extensions.keys()),
            'silent': 'false',
        }

        def _add_to_catalog(message, locales):
            # Add to all relevant catalogs
            for locale in locales:
                if locale not in localized_catalogs:
                    # Start with a new catalog so we can track what's obsolete:
                    # we'll merge it with existing translations later.
                    # *NOT* setting `locale` kwarg here b/c that will load existing
                    # translations.
                    localized_catalogs[locale] = catalogs.Catalog(pod=self.pod)
                localized_catalogs[locale][message.id] = message
            unlocalized_catalog[message.id] = message

        def _handle_field(path, locales, msgid, key, node, parent_node=None):
            if (not key or not isinstance(msgid, basestring)
                    or not isinstance(key, basestring)):
                return
            if not key.endswith('@'):
                if msgid:
                    untagged_strings.append((path, msgid))
                return
            # Support gettext "extracted comments" on tagged fields:
            #   field@: Message.
            #   field@#: Extracted comment for field@.
            auto_comments = []
            if isinstance(node, dict):
                if isinstance(key, unicode):
                    key = key.encode('utf-8')
                auto_comment = node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)
            elif isinstance(node, list) and parent_node:
                auto_comment = parent_node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)

            message = babel_catalog.Message(msgid,
                                            None,
                                            auto_comments=auto_comments,
                                            locations=[(path, 0)])
            if msgid:
                _add_to_catalog(message, locales)

        def _babel_extract(fp, locales, path):
            try:
                all_parts = extract.extract('jinja2.ext.babel_extract',
                                            fp,
                                            options=options,
                                            comment_tags=comment_tags)
                for parts in all_parts:
                    lineno, msgid, comments, context = parts
                    message = babel_catalog.Message(msgid,
                                                    None,
                                                    auto_comments=comments,
                                                    locations=[(path, lineno)])
                    _add_to_catalog(message, locales)
            except tokenize.TokenError:
                self.pod.logger.error(
                    'Problem extracting body: {}'.format(path))
                raise

        # Extract from collections in /content/:
        # Strings only extracted for relevant locales, determined by locale
        # scope (pod > collection > document > document part)
        last_pod_path = None
        for collection in self.pod.list_collections():
            if utils.fnmatches_paths(collection.blueprint_path, paths):
                text = 'Extracting: {}'.format(collection.blueprint_path)
                self.pod.logger.info(text)
                # Extract from blueprint.
                utils.walk(
                    collection.tagged_fields,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        collection.blueprint_path, collection.locales, msgid,
                        key, node, **kwargs))

            for doc in collection.list_docs(include_hidden=True):
                if not utils.fnmatches_paths(doc.pod_path, paths):
                    continue
                if doc.pod_path != last_pod_path:
                    self.pod.logger.info('Extracting: {} ({} locale{})'.format(
                        doc.pod_path,
                        len(doc.locales),
                        's' if len(doc.locales) != 1 else '',
                    ))
                    last_pod_path = doc.pod_path
                # If doc.locale is set, this is a doc part: only extract for
                # its own locales (not those of base doc).
                if doc.locale:
                    doc_locales = [doc.locale]
                # If not is set, this is a base doc (1st or only part): extract
                # for all locales declared for this doc
                elif doc.locales:
                    doc_locales = doc.locales
                # Otherwise only include in template (--no-localized)
                else:
                    doc_locales = [None]

                doc_locales = [doc.locale]
                # Extract yaml fields: `foo@: Extract me`
                # ("tagged" = prior to stripping `@` suffix from field names)
                tagged_fields = doc.format.front_matter.data
                utils.walk(
                    tagged_fields,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        doc.pod_path, doc_locales, msgid, key, node, **kwargs))

                # Extract body: {{_('Extract me')}}
                if doc.body:
                    doc_body = cStringIO.StringIO(doc.body.encode('utf-8'))
                    _babel_extract(doc_body, doc_locales, doc.pod_path)

            # Extract from CSVs for this collection's locales
            for filepath in self.pod.list_dir(collection.pod_path):
                if not utils.fnmatches_paths(filepath, paths):
                    continue
                if filepath.endswith('.csv'):
                    pod_path = os.path.join(collection.pod_path,
                                            filepath.lstrip('/'))
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    rows = self.pod.read_csv(pod_path)
                    for i, row in enumerate(rows):
                        for key, msgid in row.iteritems():
                            _handle_field(pod_path, collection.locales, msgid,
                                          key, row)

        # Extract from root of /content/:
        for path in self.pod.list_dir('/content/', recursive=False):
            if not utils.fnmatches_paths(path, paths):
                continue
            if path.endswith(('.yaml', '.yml')):
                pod_path = os.path.join('/content/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                utils.walk(
                    self.pod.get_doc(pod_path).format.front_matter.data,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        pod_path, self.pod.list_locales(), msgid, key, node, **
                        kwargs))

        # Extract from /views/:
        # Not discriminating by file extension, because people use all sorts
        # (htm, html, tpl, dtml, jtml, ...)
        if not audit:
            for path in self.pod.list_dir('/views/'):
                if not utils.fnmatches_paths(path, paths) \
                        or path.startswith('.'):
                    continue
                pod_path = os.path.join('/views/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                with self.pod.open_file(pod_path) as f:
                    _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from /partials/:
        if not audit:
            for path in self.pod.list_dir('/partials/'):
                if not utils.fnmatches_paths(path, paths) \
                        or path.startswith('.'):
                    continue
                if path.endswith(('.yaml', '.yml', '.html', '.htm')):
                    pod_path = os.path.join('/partials/', path)
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    with self.pod.open_file(pod_path) as f:
                        _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from podspec.yaml:
        if utils.fnmatches_paths('/podspec.yaml', paths):
            self.pod.logger.info('Extracting: /podspec.yaml')
            utils.walk(
                self.pod.get_podspec().get_config(), lambda msgid, key, node,
                **kwargs: _handle_field('/podspec.yaml', self.pod.list_locales(
                ), msgid, key, node, **kwargs))

        # Save it out: behavior depends on --localized and --locale flags
        # If an out path is specified, always collect strings into the one catalog.
        if localized and not out_path:
            # Save each localized catalog
            for locale, new_catalog in localized_catalogs.items():
                # Skip if `locales` defined but doesn't include this locale
                if locales and locale not in locales:
                    continue
                existing_catalog = self.get(locale)
                existing_catalog.update_using_catalog(
                    new_catalog, include_obsolete=include_obsolete)
                if audit:
                    continue
                existing_catalog.save(include_header=include_header)
                missing = existing_catalog.list_untranslated()
                num_messages = len(existing_catalog)
                self.pod.logger.info(
                    'Saved: /{path} ({num_translated}/{num_messages})'.format(
                        path=existing_catalog.pod_path,
                        num_translated=num_messages - len(missing),
                        num_messages=num_messages))
            return untagged_strings, localized_catalogs.items()
        else:
            # --localized omitted / --no-localized
            template_catalog = self.get_template(self.template_path)
            template_catalog.update_using_catalog(
                unlocalized_catalog, include_obsolete=include_obsolete)
            if not audit:
                template_catalog.save(include_header=include_header)
                text = 'Saved: {} ({} messages)'
                self.pod.logger.info(
                    text.format(template_catalog.pod_path,
                                len(template_catalog)))
            return untagged_strings, [template_catalog]
Exemplo n.º 2
0
    def extract(self, include_obsolete=None, localized=None, paths=None,
                include_header=None, locales=None, use_fuzzy_matching=None,
                audit=False, out_path=None):
        include_obsolete, localized, include_header, use_fuzzy_matching, = \
            self.get_extract_config(include_header=include_header,
                                    include_obsolete=include_obsolete, localized=localized,
                                    use_fuzzy_matching=use_fuzzy_matching)

        env = self.pod.get_jinja_env()
        # {
        #    locale1: locale1_catalog,
        #    locale2: locale2_catalog,
        #    ...
        # }
        # This is built up as we extract
        localized_catalogs = {}
        untagged_strings = []
        unlocalized_catalog = catalogs.Catalog()  # for localized=False case

        comment_tags = [
            ':',
        ]
        options = {
            'extensions': ','.join(env.extensions.keys()),
            'silent': 'false',
        }

        def _add_to_catalog(message, locales):
            # Add to all relevant catalogs
            for locale in locales:
                if locale not in localized_catalogs:
                    # Start with a new catalog so we can track what's obsolete:
                    # we'll merge it with existing translations later.
                    # *NOT* setting `locale` kwarg here b/c that will load existing
                    # translations.
                    localized_catalogs[locale] = catalogs.Catalog(pod=self.pod)
                localized_catalogs[locale][message.id] = message
            unlocalized_catalog[message.id] = message

        def _handle_field(path, locales, msgid, key, node, parent_node=None):
            if (not key
                    or not isinstance(msgid, basestring)
                    or not isinstance(key, basestring)):
                return
            if not key.endswith('@'):
                if msgid:
                    untagged_strings.append((path, msgid))
                return
            # Support gettext "extracted comments" on tagged fields:
            #   field@: Message.
            #   field@#: Extracted comment for field@.
            auto_comments = []
            if isinstance(node, dict):
                if isinstance(key, unicode):
                    key = key.encode('utf-8')
                auto_comment = node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)
            elif isinstance(node, list) and parent_node:
                auto_comment = parent_node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)

            message = babel_catalog.Message(
                msgid,
                None,
                auto_comments=auto_comments,
                locations=[(path, 0)])
            if msgid:
                _add_to_catalog(message, locales)

        def _babel_extract(fp, locales, path):
            try:
                all_parts = extract.extract(
                    'jinja2.ext.babel_extract',
                    fp,
                    options=options,
                    comment_tags=comment_tags)
                for parts in all_parts:
                    lineno, msgid, comments, context = parts
                    message = babel_catalog.Message(
                        msgid,
                        None,
                        auto_comments=comments,
                        locations=[(path, lineno)])
                    _add_to_catalog(message, locales)
            except tokenize.TokenError:
                self.pod.logger.error(
                    'Problem extracting body: {}'.format(path))
                raise

        # Extract from collections in /content/:
        # Strings only extracted for relevant locales, determined by locale
        # scope (pod > collection > document > document part)
        last_pod_path = None
        for collection in self.pod.list_collections():
            if utils.fnmatches_paths(collection.blueprint_path, paths):
                text = 'Extracting: {}'.format(collection.blueprint_path)
                self.pod.logger.info(text)
                # Extract from blueprint.
                utils.walk(collection.tagged_fields,
                           lambda msgid, key, node, **kwargs: _handle_field(
                               collection.blueprint_path, collection.locales, msgid, key, node,
                               **kwargs))

            for doc in collection.list_docs(include_hidden=True):
                if not utils.fnmatches_paths(doc.pod_path, paths):
                    continue
                if doc.pod_path != last_pod_path:
                    self.pod.logger.info(
                        'Extracting: {} ({} locale{})'.format(
                            doc.pod_path,
                            len(doc.locales),
                            's' if len(doc.locales) != 1 else '',
                        )
                    )
                    last_pod_path = doc.pod_path
                # If doc.locale is set, this is a doc part: only extract for
                # its own locales (not those of base doc).
                if doc.locale:
                    doc_locales = [doc.locale]
                # If not is set, this is a base doc (1st or only part): extract
                # for all locales declared for this doc
                elif doc.locales:
                    doc_locales = doc.locales
                # Otherwise only include in template (--no-localized)
                else:
                    doc_locales = [None]

                doc_locales = [doc.locale]
                # Extract yaml fields: `foo@: Extract me`
                # ("tagged" = prior to stripping `@` suffix from field names)
                tagged_fields = doc.format.front_matter.raw_data
                utils.walk(tagged_fields,
                           lambda msgid, key, node, **kwargs: _handle_field(
                               doc.pod_path, doc_locales, msgid, key, node, **kwargs))

                # Extract body: {{_('Extract me')}}
                if doc.body:
                    doc_body = cStringIO.StringIO(doc.body.encode('utf-8'))
                    _babel_extract(doc_body, doc_locales, doc.pod_path)

            # Extract from CSVs for this collection's locales
            for filepath in self.pod.list_dir(collection.pod_path):
                if not utils.fnmatches_paths(filepath, paths):
                    continue
                if filepath.endswith('.csv'):
                    pod_path = os.path.join(
                        collection.pod_path, filepath.lstrip('/'))
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    rows = self.pod.read_csv(pod_path)
                    for i, row in enumerate(rows):
                        for key, msgid in row.iteritems():
                            _handle_field(
                                pod_path, collection.locales, msgid, key, row)

        # Extract from root of /content/:
        for path in self.pod.list_dir('/content/', recursive=False):
            if not utils.fnmatches_paths(path, paths):
                continue
            if path.endswith(('.yaml', '.yml')):
                pod_path = os.path.join('/content/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                utils.walk(
                    self.pod.get_doc(pod_path).format.front_matter.raw_data,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        pod_path, self.pod.list_locales(), msgid, key, node, **kwargs)
                )

        # Extract from /views/:
        # Not discriminating by file extension, because people use all sorts
        # (htm, html, tpl, dtml, jtml, ...)
        if not audit:
            for path in self.pod.list_dir('/views/'):
                if not utils.fnmatches_paths(path, paths) \
                        or path.startswith('.'):
                    continue
                pod_path = os.path.join('/views/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                with self.pod.open_file(pod_path) as f:
                    _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from /partials/:
        if not audit:
            for path in self.pod.list_dir('/partials/'):
                if not utils.fnmatches_paths(path, paths) \
                        or path.startswith('.'):
                    continue
                pod_path = os.path.join('/partials/', path)
                if path.endswith(('.yaml', '.yml')):
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    utils.walk(
                        self.pod.get_doc(pod_path).format.front_matter.raw_data,
                        lambda msgid, key, node, **kwargs: _handle_field(
                            pod_path, self.pod.list_locales(), msgid, key, node, **kwargs)
                    )
                if path.endswith(('.html', '.htm')):
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    with self.pod.open_file(pod_path) as f:
                        _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from podspec.yaml:
        if utils.fnmatches_paths('/podspec.yaml', paths):
            self.pod.logger.info('Extracting: /podspec.yaml')
            utils.walk(
                self.pod.get_podspec().get_config(),
                lambda msgid, key, node, **kwargs: _handle_field(
                    '/podspec.yaml', self.pod.list_locales(), msgid, key, node, **kwargs)
            )

        # Save it out: behavior depends on --localized and --locale flags
        # If an out path is specified, always collect strings into the one catalog.
        if localized and not out_path:
            # Save each localized catalog
            for locale, new_catalog in localized_catalogs.items():
                # Skip if `locales` defined but doesn't include this locale
                if locales and locale not in locales:
                    continue
                existing_catalog = self.get(locale)
                existing_catalog.update_using_catalog(
                    new_catalog,
                    include_obsolete=include_obsolete)
                if audit:
                    continue
                existing_catalog.save(include_header=include_header)
                missing = existing_catalog.list_untranslated()
                num_messages = len(existing_catalog)
                self.pod.logger.info(
                    'Saved: /{path} ({num_translated}/{num_messages})'.format(
                        path=existing_catalog.pod_path,
                        num_translated=num_messages - len(missing),
                        num_messages=num_messages)
                )
            return untagged_strings, localized_catalogs.items()
        else:
            # --localized omitted / --no-localized
            template_catalog = self.get_template(self.template_path)
            template_catalog.update_using_catalog(
                unlocalized_catalog,
                include_obsolete=include_obsolete)
            if not audit:
                template_catalog.save(include_header=include_header)
                text = 'Saved: {} ({} messages)'
                self.pod.logger.info(
                    text.format(template_catalog.pod_path,
                                len(template_catalog))
                )
            return untagged_strings, [template_catalog]