Пример #1
0
class TestTraverse(unittest.TestCase):
    def setUp(self):
        self.ftl_parser = FTLParser()

    def test_simple_values(self):
        ast, _ = self.ftl_parser.parse(ftl('''
            foo = Foo
            bar = Bar
        '''))

        self.assertEqual(
            ast.traverse(lambda x: x).toJSON(),
            ast.toJSON()
        )

    def test_complex_values(self):
        ast, _ = self.ftl_parser.parse(ftl('''
            foo = Foo
            foo = Foo { bar }
                [bar]  AAA { $num ->
                           [one] One
                          *[other] Many { NUMBER($num) }
                       } BBB
        '''))

        self.assertEqual(
            ast.traverse(lambda x: x).toJSON(),
            ast.toJSON()
        )
Пример #2
0
class MergeContext(object):
    """Stateful context for merging translation resources.

    `MergeContext` must be configured with the target language and the
    directory locations of the input data.

    The transformation takes four types of input data:

        - The en-US FTL reference files which will be used as templates for
          message order, comments and sections.

        - The current FTL files for the given language.

        - The legacy (DTD, properties) translation files for the given
          language.  The translations from these files will be transformed
          into FTL and merged into the existing FTL files for this language.

        - A list of `FTL.Entity` objects some of whose nodes are special
          operation nodes: CONCAT, EXTERNAL, LITERAL, LITERAL_FROM, PLURALS,
          PLURALS_FROM, REPLACE, REPLACE_FROM, SOURCE.
    """

    def __init__(self, lang, reference_dir, localization_dir):
        self.ftl_parser = FTLParser()
        self.ftl_serializer = FTLSerializer()

        # An iterable of plural category names relevant to the context's
        # language.  E.g. ('one', 'other') for English.
        self.plural_categories = get_plural_categories(lang)

        # Paths to directories with input data, relative to CWD.
        self.reference_dir = reference_dir
        self.localization_dir = localization_dir

        # Parsed input resources stored by resource path.
        self.reference_resources = {}
        self.localization_resources = {}

        # An iterable of `FTL.Entity` objects some of whose nodes can be the
        # transform operations.
        self.transforms = {}

        # A dict whose keys are `(path, key)` tuples corresponding to target
        # FTL translations, and values are sets of `(path, key)` tuples
        # corresponding to localized entities which will be migrated.
        self.dependencies = {}

    def read_ftl_resource(self, path):
        """Read an FTL resource and parse it into an AST."""
        f = codecs.open(path, 'r', 'utf8')
        try:
            contents = f.read()
        finally:
            f.close()

        ast, errors = self.ftl_parser.parse(contents)

        if len(errors):
            logger = logging.getLogger('migrate')
            for err in errors:
                logger.warn(u'Syntax error in {}: {}'.format(path, err))

        return ast

    def read_legacy_resource(self, path):
        """Read a legacy resource and parse it into a dict."""
        parser = getParser(path)
        parser.readFile(path)
        # Transform the parsed result which is an iterator into a dict.
        return {entity.key: entity.val for entity in parser}

    def add_reference(self, path, realpath=None):
        """Add an FTL AST to this context's reference resources."""
        fullpath = os.path.join(self.reference_dir, realpath or path)
        try:
            ast = self.read_ftl_resource(fullpath)
        except IOError as err:
            logger = logging.getLogger('migrate')
            logger.error(u'Missing reference file: {}'.format(path))
            raise err
        except UnicodeDecodeError as err:
            logger = logging.getLogger('migrate')
            logger.error(u'Error reading file {}: {}'.format(path, err))
            raise err
        else:
            self.reference_resources[path] = ast

    def add_localization(self, path):
        """Add an existing localization resource.

        If it's an FTL resource, add an FTL AST.  Otherwise, it's a legacy
        resource.  Use a compare-locales parser to create a dict of (key,
        string value) tuples.
        """
        fullpath = os.path.join(self.localization_dir, path)
        if fullpath.endswith('.ftl'):
            try:
                ast = self.read_ftl_resource(fullpath)
            except IOError:
                logger = logging.getLogger('migrate')
                logger.warn(u'Missing localization file: {}'.format(path))
            except UnicodeDecodeError as err:
                logger = logging.getLogger('migrate')
                logger.warn(u'Error reading file {}: {}'.format(path, err))
            else:
                self.localization_resources[path] = ast
        else:
            try:
                collection = self.read_legacy_resource(fullpath)
            except IOError:
                logger = logging.getLogger('migrate')
                logger.warn(u'Missing localization file: {}'.format(path))
            else:
                self.localization_resources[path] = collection

    def add_transforms(self, path, transforms):
        """Define transforms for path.

        Each transform is an extended FTL node with `Transform` nodes as some
        values.  Transforms are stored in their lazy AST form until
        `merge_changeset` is called, at which point they are evaluated to real
        FTL nodes with migrated translations.

        Each transform is scanned for `SOURCE` nodes which will be used to
        build the list of dependencies for the transformed message.
        """
        def get_sources(acc, cur):
            if isinstance(cur, SOURCE):
                acc.add((cur.path, cur.key))
            return acc

        for node in transforms:
            # Scan `node` for `SOURCE` nodes and collect the information they
            # store into a set of dependencies.
            dependencies = fold(get_sources, node, set())
            # Set these sources as dependencies for the current transform.
            self.dependencies[(path, node.id.name)] = dependencies

        path_transforms = self.transforms.setdefault(path, [])
        path_transforms += transforms

    def get_source(self, path, key):
        """Get an entity value from the localized source.

        Used by the `SOURCE` transform.
        """
        if path.endswith('.ftl'):
            resource = self.localization_resources[path]
            return get_entity(resource.entities(), key)
        else:
            resource = self.localization_resources[path]
            return resource.get(key, None)

    def merge_changeset(self, changeset=None):
        """Return a generator of FTL ASTs for the changeset.

        The input data must be configured earlier using the `add_*` methods.
        if given, `changeset` must be a set of (path, key) tuples describing
        which legacy translations are to be merged.

        Given `changeset`, return a dict whose keys are resource paths and
        values are `FTL.Resource` instances.  The values will also be used to
        update this context's existing localization resources.
        """

        if changeset is None:
            # Merge all known legacy translations.
            changeset = {
                (path, key)
                for path, strings in self.localization_resources.iteritems()
                for key in strings.iterkeys()
            }

        for path, reference in self.reference_resources.iteritems():
            current = self.localization_resources.get(path, FTL.Resource())
            transforms = self.transforms.get(path, [])

            def in_changeset(ident):
                """Check if entity should be merged.

                If at least one dependency of the entity is in the current
                set of changeset, merge it.
                """
                message_deps = self.dependencies.get((path, ident), None)

                # Don't merge if we don't have a transform for this message.
                if message_deps is None:
                    return False

                # As a special case, if a transform exists but has no
                # dependecies, it's a hardcoded `FTL.Node` which doesn't
                # migrate any existing translation but rather creates a new
                # one.  Merge it.
                if len(message_deps) == 0:
                    return True

                # If the intersection of the dependencies and the current
                # changeset is non-empty, merge this message.
                return message_deps & changeset

            # Merge legacy translations with the existing ones using the
            # reference as a template.
            snapshot = merge_resource(
                self, reference, current, transforms, in_changeset
            )

            # If none of the transforms is in the given changeset, the merged
            # snapshot is identical to the current translation. We compare
            # JSON trees rather then use filtering by `in_changeset` to account
            # for translations removed from `reference`.
            if snapshot.toJSON() == current.toJSON():
                continue

            # Store the merged snapshot on the context so that the next merge
            # already takes it into account as the existing localization.
            self.localization_resources[path] = snapshot

            # The result for this path is a complete `FTL.Resource`.
            yield path, snapshot

    def serialize_changeset(self, changeset):
        """Return a dict of serialized FTLs for the changeset.

        Given `changeset`, return a dict whose keys are resource paths and
        values are serialized FTL snapshots.
        """

        return {
            path: self.ftl_serializer.serialize(snapshot.toJSON())
            for path, snapshot in self.merge_changeset(changeset)
        }