示例#1
0
    def build(self, outfile_name, infile_names, changed, context):
        # The infiles here are genfiles/extracted_string/foo.pot.pickle
        # Copy unchanged messages from the existing all.pot, if possible.
        po_entries = collections.OrderedDict()
        if outfile_name in changed or changed == infile_names:
            log.v1('Regenerating %s from scratch (it changed on us!)' %
                   outfile_name)
            changed = infile_names  # everything changed
        else:
            # Extract unchanged messages from the existing all.pot
            existing_all_pot = _read_pofile(self.abspath(outfile_name))
            if existing_all_pot:  # we found an existing file
                log.v2('Loading existing messages')

                # We don't care about deleted files: those that
                # existed in the last call to build() but don't exist
                # now.  (They'll be removed from all.pot by default.)
                # Get rid of them from 'changed' so they don't gum up
                # the code below.
                changed = [f for f in changed if f in infile_names]

                # Elements in infile_names and changed look like
                # 'genfiles/extracted_strings/en/foo.pot.pickle'. Here,
                # we want the version of infiles/changed that are just
                # 'foo'.  We use the _input_map to get that mapping.
                orig_infiles = set(context['_input_map'][f][0]
                                   for f in infile_names)
                # f might not be in _input_map if it's ben deleted.
                orig_changed = set(context['_input_map'][f][0]
                                   for f in changed)
                unchanged = orig_infiles - orig_changed
                for entry in existing_all_pot:
                    # Get rid of occurrences for files that no longer exist.
                    # TODO(csilvers): get rid of comments in the same way.
                    entry.occurrences = [
                        occ for occ in entry.occurrences if occ[0] in unchanged
                    ]
                    # If the msgid still exists at all, let's keep it!
                    if entry.occurrences:
                        po_entries[entry.msgid] = entry
            else:
                changed = infile_names

        log.v2('Extracting new and changed messages')
        for filename in changed:
            input_pot = _read_pofile(self.abspath(filename))
            for poentry in input_pot:
                if poentry.msgid in po_entries:
                    existing_poentry = po_entries[poentry.msgid]
                    _merge_poentry(existing_poentry, poentry)
                else:
                    po_entries[poentry.msgid] = poentry

        log.v2('Writing merged output')
        _write_pofile(po_entries.itervalues(),
                      self.abspath(outfile_name),
                      write_debug_file_to=self.abspath(
                          outfile_name.replace('.pickle',
                                               '.txt_for_debugging')))
示例#2
0
    def build(self, outfile_name, infile_names, _, context):
        import topic_icons.icon_util

        with open(self.abspath(infile_names[0]), 'r') as f:
            md5_manifest = json.load(f)

        symlink_map = {}
        symlink_from_dir = os.path.join('genfiles', 'topic-icons', 'icons')
        for infile_name in infile_names[1:]:
            # The infile names are, e.g., 'unit_circle.png.416.png'.
            infile_basename = os.path.basename(infile_name)

            # Gives us, e.g., 'unit_circle.png.416' and '.png', the source
            # icon name with the size configuration still attached, and the
            # final icon format.
            (src_icon_name_and_size,
             icon_format) = os.path.splitext(infile_basename)

            # Strip the '.' from the icon format.
            icon_format = icon_format[1:]

            # Gives us, e.g., 'unit_circle.png' and '.416', the source icon
            # name and the size configuration.
            (src_icon_name, size) = os.path.splitext(src_icon_name_and_size)

            # Strip the '.' from the size configuration.
            size = size[1:]

            # Given 'unit_circle.png.416.png', we want the symlink to be called
            # 'icons/unit_circle.png-123abc-416.png', so that the format is:
            # '{src-icon}-{hash-suffix}-{size}.{format}'.
            symlink_to = self.abspath(infile_name)

            md5sum = md5_manifest[src_icon_name]

            file_name = topic_icons.icon_util.name_for_built_icon(
                src_icon_name, md5sum, size, icon_format)
            symlink_from = self.abspath(
                os.path.join(symlink_from_dir, file_name))

            symlink_util.symlink(symlink_to, symlink_from)

            symlink_map[symlink_to] = symlink_from

        # We can also clean-up any unused symlinks in the directory.
        symlink_from_dir_abspath = self.abspath(symlink_from_dir)
        for f in os.listdir(symlink_from_dir_abspath):
            abspath = os.path.join(symlink_from_dir_abspath, f)
            if os.path.islink(abspath) and not abspath in symlink_map.values():
                log.v1('   ... removing obsolete symlink %s', abspath)
                os.unlink(abspath)

        with open(self.abspath(outfile_name), 'w') as f:
            json.dump(symlink_map, f, sort_keys=True)
示例#3
0
 def _download_from_s3(gitbigfile_module, outfile_abspath, sha):
     s3_fetcher = gitbigfile_module.GitBigfile().transport()
     log.v2('Downloading s3://%s/%s to %s' % (
         s3_fetcher.bucket.name, sha, outfile_abspath + '.tmp'))
     s3_fetcher.get(sha, outfile_abspath + '.tmp')
     # Make sure we don't create the 'real' file until it's fully
     # downloaded.
     try:
         os.unlink(outfile_abspath)
     except (IOError, OSError):
         pass    # probably "file not found"
     try:
         os.rename(outfile_abspath + '.tmp', outfile_abspath)
     except OSError:
         log.v1('Error fetching %s' % outfile_abspath)
         raise
示例#4
0
    def build_many(self, outfile_infiles_changed_context):
        from shared.testutil import fake_datetime

        sha_to_files = {}            # for the files we need to get from S3
        for (outfile, infiles, _, context) in outfile_infiles_changed_context:
            assert len(infiles) == 1, infiles
            assert infiles[0].startswith('intl/translations/')

            with open(self.abspath(infiles[0])) as f:
                head = f.read(64).strip()

            # Does the head look like a sha1?  (sha1's are only 40 bytes.)
            # If so, store it for later.  If not, take care of it now.
            if head.strip('0123456789abcdefABCDEF') == '':
                sha_to_files.setdefault(head, []).append(outfile)
            else:
                # Nope, not a sha1.  NOTE: We could also use a hard-link,
                # but that could fail if genfiles is on a different
                # filesystem from the source.  Copying is more expensive
                # but safer.  Symlinks are right out.
                shutil.copyfile(self.abspath(infiles[0]),
                                self.abspath(outfile))

        if not sha_to_files:
            return

        # We could just call 'git bigfile pull' but we purposefully
        # don't so as to leave untouched the file-contents in
        # intl/translations.  This works better with kake, which
        # doesn't like it when input contents change as part of a kake
        # rule.
        self._munge_sys_path()     # so the following import succeeds
        import gitbigfile.command

        # Download all our files from S3 in parallel.  We store these
        # files under a 'permanent' name based on the sha1.  (Later
        # we'll copy these files to outfile_name.)  That way even if
        # you check out a different branch and come back to this one
        # again, you can get the old contents without needing to
        # revisit S3.
        # GitBigfile() (in _download_from_s3) runs 'git' commands in a
        # subprocess, so we need to be in the right repository for that.
        old_cwd = os.getcwd()
        os.chdir(self.abspath('intl/translations'))
        try:
            # This will actually try to download translation files via
            # bigfile.  This requires a real datetime for making the
            # api requests to S3 (S3 complains about weird dates).
            with fake_datetime.suspend_fake_datetime():
                arglists = []
                for (sha, outfiles) in sha_to_files.iteritems():
                    # Typically a given sha will have only one outfile,
                    # but for some shas (an empty po-file, e.g.), many
                    # outfiles may share the same sha!
                    log.v1('Fetching %s from S3' % ' '.join(outfiles))
                    # We just need to put this in a directory we know we
                    # can write to: take one of the outfile dirs arbitrarily.
                    sha_name = os.path.join(os.path.dirname(outfiles[0]), sha)
                    arglists.append(
                        (gitbigfile.command, self.abspath(sha_name), sha))
                shared.util.thread.run_many_threads(
                    self._download_from_s3, arglists)
        except RuntimeError as why:
            log.error(why)    # probably misleading, but maybe helpful
            # TODO(csilvers): check whether git-bigfile *is* set up
            # correctly, and give a more precise failure message if so.
            raise compile_rule.CompileFailure(
                "Failed to download translation file for %s from S3. "
                "Make sure you have git-bigfile set up as per the "
                "configs in the khan-dotfiles repo: namely, the "
                "'bigfile' section in .gitconfig.khan, and the "
                "update_credentials() section in setup.sh." % outfile)
        finally:
            os.chdir(old_cwd)

        # Now copy from the sha-name to the actual output filename.
        for (sha, outfiles) in sha_to_files.iteritems():
            sha_name = os.path.join(os.path.dirname(outfiles[0]), sha)
            for outfile in outfiles:
                log.v2('Copying from %s to %s' % (sha_name, outfile))
                try:
                    os.unlink(self.abspath(outfile))
                except OSError:
                    pass     # probably file not found
                os.link(self.abspath(sha_name), self.abspath(outfile))
示例#5
0
def _maybe_inline_images(compressed_content):
    """For small images, it's more efficient to inline them in the html.

    Most modern browsers support inlining image contents in html:
       css: background-image: url(data:image/png;base64,...)
       html: <img src='data:image/png;base64,...'>
    The advantage of doing this is to avoid an http request.  The
    disadvantages are that the image can't be cached separately from
    the webpage (bad if the web page changes often and the image
    changes never), and the total size is bigger due to the need to
    base64-encode.

    In general, it makes sense to use data uris for small images, for
    some value of 'small', or for (possibly large) images that a) are
    only used on one web page, b) are on html pages that do not change
    very much, and c) are on pages where rendering speed matters (just
    because it's not worth the effort otherwise).

    We also support a manual decision to inline via a text annotation:
    /*! data-uri... */.

    Arguments:
        compressed_content: The content to inline the image-urls in.

    Returns:
        Returns the input content, but with zero, some, or all images
        inlined.
    """
    output = []
    lastpos = 0
    for m in _CSS_IMAGE_RE.finditer(compressed_content):
        image_url = m.group(1)
        always_inline = m.group(2)

        # Find how often the image appears in our packages.  If it
        # only appears once, inlining it is a no-brainer (if it's
        # 'small', anyway).  If it appears twice, we probably don't
        # want to inline -- it's better to use the browser cache.
        # If it appears more than twice, we definitely don't inline.
        try:
            (callers, img_relpath, img_size) = _IMAGE_URL_INFO.get()[image_url]
        except KeyError:
            log.v4('Not inlining image-content of %s: file not found on disk',
                   image_url)
            continue
        url_count = len(callers)
        if (always_inline or (url_count == 1 and img_size <= _MAX_INLINE_SIZE)
                or (url_count == 2 and img_size <= _MAX_INLINE_SIZE_IF_TWICE)):
            log.v1('Inlining image-content of %s', img_relpath)
            with open(ka_root.join(img_relpath)) as f:
                image_content = f.read()
            output.append(compressed_content[lastpos:m.start(1)])
            output.append(_data_uri_for_file(img_relpath, image_content))
            lastpos = m.end(1)
            if always_inline:  # let's nix the !data-uri comment in the output
                output.append(compressed_content[lastpos:m.start(2)])
                lastpos = m.end(2)
        else:
            log.v4(
                'Not inlining image-content of %s '
                '(url-count %s, img size %s)', img_relpath, url_count,
                img_size)

    # Get the last chunk, and then we're done!
    output.append(compressed_content[lastpos:])
    return ''.join(output)