Exemplo n.º 1
0
def download_url_to_zip(zipfile, url):
    url = url.replace("https://ka-", "http://ka-")
    filename = os.path.basename(url)
    try:
        filecontent = fetch_file_from_url_or_cache(url)
    except Exception as e:
        # we don't want a failed image request to download, but we
        # want to inform the user of the error
        logging.error("Error when downloading from URL: %s (%s)" % (url, e))
        return

    zipfile.writestr(filename, filecontent)
Exemplo n.º 2
0
def _make_zip(zipfile, options):
    excludes = set(options.plugin.excludes)
    skips = options.plugin.skip_exclude

    src_dir = options.plugin.source_dir
    # noinspection PyShadowingNames
    exclude = lambda p: any([fnmatch.fnmatch(p, e) for e in excludes])

    def filter_excludes(some_files):
        if not some_files:
            return []
        # to prevent descending into dirs, modify the list in place
        for i in range(len(some_files) - 1, -1, -1):
            some_f = some_files[i]
            if exclude(some_f) and some_f not in skips:
                some_files.remove(some_f)
        return some_files

    for root, dirs, files in os.walk(src_dir):
        for f in filter_excludes(files):
            relpath = os.path.relpath(root, '.')
            zipfile.write(path(root) / f, path(relpath) / f)
        filter_excludes(dirs)

    for root, dirs, files in os.walk(options.sphinx.builddir):
        for f in files:
            relpath = os.path.join(
                options.plugin.name, "docs",
                os.path.relpath(root, options.sphinx.builddir))
            zipfile.write(path(root) / f, path(relpath) / f)

    utils_filename = os.path.join(os.path.dirname(__file__), "planet_explorer",
                                  "pe_utils.py")
    with open(utils_filename) as f:
        txt = f.read()
        if hasattr(options.package, 'segments'):
            txt = txt.replace(
                "# [set_segments_write_key]",
                f"os.environ['SEGMENTS_WRITE_KEY'] = '{options.package.segments}'"
            )
        else:
            print("WARNING: No Segments write key provided.")
        if hasattr(options.package, 'sentry'):
            txt = txt.replace(
                "# [set_sentry_dsn]",
                f"os.environ['SENTRY_DSN'] = '{options.package.sentry}'")
        else:
            print("WARNING: No Sentry DSN write key provided.")

        zipfile.writestr("planet_explorer/pe_utils.py", txt)
Exemplo n.º 3
0
def rezip_one(target_zip, source_zip, compression):
    try:
        with ZipFile(source_zip).open('data') as f:
            data = f.read()
    except Exception as e:
        print('skipping zip file', source_zip, 'for error', e)
        return

    # write to intermediate file in case source_dir == target_dir
    zf = ZipFile(target_zip + '.new', 'w', compression=compression)
    zf.writestr('data', data)
    zf.close()

    copystat(source_zip, target_zip + '.new')
    move(target_zip + '.new', target_zip)
Exemplo n.º 4
0
    def create_artifact_bundle(org, release, project=None):
        import zipfile

        bundle = io.BytesIO()
        bundle_dir = get_fixture_path("artifact_bundle")
        with zipfile.ZipFile(bundle, "w", zipfile.ZIP_DEFLATED) as zipfile:
            for path, _, files in os.walk(bundle_dir):
                for filename in files:
                    fullpath = os.path.join(path, filename)
                    relpath = os.path.relpath(fullpath, bundle_dir)
                    if filename == "manifest.json":
                        manifest = _patch_artifact_manifest(fullpath, org, release, project)
                        zipfile.writestr(relpath, manifest)
                    else:
                        zipfile.write(fullpath, relpath)

        return bundle.getvalue()
  def image_to_file(self, href, zipfile):
    try:
      if re.match("^//", href):
        href = "http:"+href
      if not href in self._images_cache:
        r = requests.get(href, headers=self.headers)
        r.raise_for_status()

        filename = os.path.basename(href)
        zipfile.writestr("%s/img/%s" % (self.username, filename), r.content)
        self._images_cache[href] = "img/%s" % filename

      return self._images_cache[href]

    except requests.exceptions.ConnectionError:
      sys.stderr.write("\n  Ignoring ConnectionError for %s\n" % href)
      return "image-unavailable.png"
Exemplo n.º 6
0
    def render_zip_contents(self, zipfile, story, filename, **kw):
        ext = self.chapter_extension

        chapters = story.chapter_set.order_by('order')
        num_width = len(str(chapters.count()))
        for i, chapter in enumerate(chapters.iterator()):
            data = render_to_string(
                self.chapter_template,
                {
                    'chapter': chapter,
                    'story': story,
                },
            ).encode(self.chapter_encoding)

            name = slugify(chapter.title)
            num = str(i + 1).rjust(num_width, '0')
            arcname = str('%s/%s_%s.%s' % (filename, num, name, ext))

            zipfile.writestr(arcname, data)
Exemplo n.º 7
0
 def render_zip_contents(self, zipfile, story, filename, **kw):
     ext = self.chapter_extension
     
     chapters = story.chapter_set.order_by('order')
     num_width = len(str(chapters.count()))
     for i, chapter in enumerate(chapters.iterator()):
         data = render_to_string(
             self.chapter_template,
             {
                 'chapter': chapter,
                 'story': story,
             },
         ).encode(self.chapter_encoding)
         
         name = slugify(chapter.title)
         num = str(i+1).rjust(num_width, '0')
         arcname = str('%s/%s_%s.%s' % (filename, num, name, ext))
         
         zipfile.writestr(arcname, data)
Exemplo n.º 8
0
def add_dirs(zipfile):
    """
    Given a writable zipfile, inject directory entries for
    any directories implied by the presence of children.
    """
    names = zipfile.namelist()
    consume(
        zipfile.writestr(name + "/", b"")
        for name in map(posixpath.dirname, names)
        if name and name + "/" not in names)
    return zipfile
Exemplo n.º 9
0
def to_zipfile(obj, zipfile, path=''):
    summary = OrderedDict()
    for key in obj.keys():
        val = obj[key]
        new_path = join_path(path, key)
        if isinstance(val, h5py.Group):
            to_zipfile(val, zipfile, new_path)
        else:
            fname = join_path(path, key + '.dat')
            if 'target' in val.attrs and val.attrs['target'] != join_path(
                    '/', path, key):
                print val.attrs['target'], join_path('/', path, key)
                summary[key] = OrderedDict([[
                    'target', val.attrs['target']
                ]])  #, ['shape', (obj[val.attrs['target']]).shape]])
            elif numpy.product(val.shape) <= 1:
                summary[key] = val.value.tolist()
            else:
                value = obj[key].value
                formats = {'S': '%s', 'f': '%.8g', 'i': '%d', 'u': '%d'}
                if value.dtype.kind in formats:
                    fd, fn = tempfile.mkstemp()
                    os.close(fd)  # to be opened by name
                    if DEBUG: print fname, value.dtype.kind
                    if len(value.shape) > 2:
                        with open(fn, 'w') as f:
                            simplejson.dump(value.tolist(), f)
                    else:
                        numpy.savetxt(fn,
                                      value,
                                      delimiter='\t',
                                      fmt=formats[value.dtype.kind])
                    zipfile.write(fn, fname)
                    os.remove(fn)
                    summary[key] = OrderedDict(
                        [['target', join_path('/', fname)],
                         ['shape', obj[key].shape]])
                else:
                    print "unknown type of array: ", fname, value.dtype
    zipfile.writestr(os.path.join(path, 'fields.json'),
                     simplejson.dumps(summary, indent='  '))
Exemplo n.º 10
0
def to_zipfile(obj, zipfile, path=''):
    summary = OrderedDict()
    for key in obj.keys():
        val = obj[key]
        new_path = join_path(path, key)
        if isinstance(val, h5py.Group):
            to_zipfile(val, zipfile, new_path)
        else:
            fname = join_path(path, key+'.dat')
            if 'target' in val.attrs and val.attrs['target'] != join_path('/', path, key):
                print val.attrs['target'], join_path('/', path, key)
                summary[key] = OrderedDict([['target', val.attrs['target']]]) #, ['shape', (obj[val.attrs['target']]).shape]])
            elif numpy.product(val.shape) <= 1:
                summary[key] = val.value.tolist()
            else:
                value = obj[key].value
                formats = {
                    'S': '%s', 
                    'f': '%.8g',
                    'i': '%d',
                    'u': '%d' }
                if value.dtype.kind in formats:
                    fd, fn = tempfile.mkstemp()
                    os.close(fd) # to be opened by name
                    if DEBUG: print fname, value.dtype.kind
                    if len(value.shape) > 2:
                        with open(fn, 'w') as f:
                            simplejson.dump(value.tolist(), f)
                    else:
                        numpy.savetxt(fn, value, delimiter='\t', fmt=formats[value.dtype.kind])
                    zipfile.write(fn, fname)
                    os.remove(fn)
                    summary[key] = OrderedDict([['target', join_path('/', fname)], ['shape',  obj[key].shape]])
                else:
                    print "unknown type of array: ", fname, value.dtype
    zipfile.writestr(os.path.join(path, 'fields.json'), simplejson.dumps(summary, indent='  '))
Exemplo n.º 11
0
def _make_zip(zipfile, options):
    excludes = set(options.plugin.excludes)
    skips = options.plugin.skip_exclude

    src_dir = options.plugin.source_dir

    exclude = lambda p: any([fnmatch.fnmatch(p, e) for e in excludes])

    def filter_excludes(some_files):
        if not some_files:
            return []
        # to prevent descending into dirs, modify the list in place
        for i in range(len(some_files) - 1, -1, -1):
            some_f = some_files[i]
            if exclude(some_f) and some_f not in skips:
                some_files.remove(some_f)
        return some_files

    for root, dirs, files in os.walk(src_dir):
        for f in filter_excludes(files):
            relpath = os.path.relpath(root, ".")
            zipfile.write(path(root) / f, path(relpath) / f)
        filter_excludes(dirs)

    analytics_filename = os.path.join(os.path.dirname(__file__),
                                      "planet_explorer", "pe_analytics.py")
    with open(analytics_filename) as f:
        txt = f.read()
        if hasattr(options.package, "segments"):
            txt = txt.replace(
                "# [set_segments_write_key]",
                f"os.environ['SEGMENTS_WRITE_KEY'] = '{options.package.segments}'",
            )
        else:
            print("WARNING: No Segments write key provided.")
        if hasattr(options.package, "sentry"):
            txt = txt.replace(
                "# [set_sentry_dsn]",
                f"os.environ['SENTRY_DSN'] = '{options.package.sentry}'",
            )
        else:
            print("WARNING: No Sentry DSN write key provided.")

        zipfile.writestr("planet_explorer/pe_analytics.py", txt)

    metadata_filename = os.path.join(os.path.dirname(__file__),
                                     "planet_explorer", "metadata.txt")
    cfg = SafeConfigParser()
    cfg.optionxform = str
    cfg.read(metadata_filename)
    if hasattr(options.package, "version"):
        version = "".join(c for c in options.package.version
                          if c.isdigit() or c == ".")
        cfg.set("general", "version", version)
    buf = StringIO()
    cfg.write(buf)
    zipfile.writestr("planet_explorer/metadata.txt", buf.getvalue())

    utils_filename = os.path.join(os.path.dirname(__file__), "planet_explorer",
                                  "pe_utils.py")
    with open(utils_filename) as f:
        txt = f.read()
        commitid = (subprocess.check_output(["git", "rev-parse",
                                             "HEAD"]).decode("utf-8").strip())
        txt = txt.replace(
            'COMMIT_ID = ""',
            f'COMMIT_ID = "{commitid}"',
        )

        zipfile.writestr("planet_explorer/pe_utils.py", txt)
Exemplo n.º 12
0
  def load_recipe_index(self, zipfile):
    self._progress("Downloading recipe index:\n")
    start_date = "2013-12-12T00:00:00"
    recipe_cards = self.recipe_cards = []
    while start_date:
      self._progress("\r - %d cards" % len(recipe_cards))

      r = requests.get("http://punchfork.com/api/rc",
                       params={ "query": "likes/%s/new" % username,
                                "start": start_date,
                                "size": "100",
                                "_": "%0.10f" % random.random() },
                       headers=self.headers)
      r.raise_for_status()

      json_data = r.json
      if hasattr(json_data, "__call__"):
        json_data = json_data()

      recipe_cards.extend(json_data["cards"])

      if len(json_data["cards"]) < 100:
        start_date = None
      else:
        start_date = json_data["next"]

    self._progress("\r - Found %d cards.\n" % len(recipe_cards))

    # generate html
    soup = BeautifulSoup(self._index_template, "lxml")
    soup.title.string = self.data["page_title"]
    marquee_title = soup.find(id="marquee-title")
    marquee_title.h1.string = self.data["marquee_title_name"]
    marquee_title.img["src"] = self.image_to_file(self.data["marquee_title_avatar"], zipfile)
    if len(self.recipe_cards) == 1:
      recipe_notice = "%d recipe" % len(self.recipe_cards)
    else:
      recipe_notice = "%d recipes" % len(self.recipe_cards)
    end_marker = soup.find(id="end-marker")
    end_marker.find("div", "notice").string = recipe_notice 
    soup.find("div", "meta").string = "This recipe list was archived on %s." % (strftime("%d %B %Y, %H:%M:%S UTC", gmtime()))


    self._progress("Building index page:\n")
    i = 0
    recipe_cards_div = end_marker.parent
    for recipe_card in self.recipe_cards:
      self._progress("\r - Added %d cards to index." % i)
      i += 1
      card_el = BeautifulSoup(recipe_card, "lxml")
      likes_a = card_el.find("a", "svc")
      likes_span = soup.new_tag("span")
      likes_span["class"] = "metric svc"
      likes_span.string = likes_a.string
      likes_a.replace_with(likes_span)
      for img in card_el.find_all("img"):
        img["src"] = self.image_to_file(img["src"], zipfile)
      a = card_el.find("a")
      a["href"] = "recipe/" + a["href"].split("/")[2] + ".html"
      del a["target"]
      end_marker.insert_before(card_el)
    self._progress("\r - Added %d cards to index.\n" % i)

    zipfile.writestr("%s/index.html" % self.username, soup.prettify(formatter="minimal").encode("utf-8"))
Exemplo n.º 13
0
  def load_recipes(self, zipfile):
    self._progress("Downloading recipes:\n")

    i = 0
    for recipe_card in self.recipe_cards:
      self._progress("\r - Saved %d recipes." % i)
      i += 1

      recipe_href = BeautifulSoup(recipe_card, "lxml").div.a["href"]
      recipe_name = recipe_href.split("/")[2]

      r = requests.get("http://punchfork.com/recipe/%s" % recipe_name,
                       headers=self.headers)
      r.raise_for_status()

      # replace some parts
      t = r.text
      soup = BeautifulSoup(re.sub("<script(.|\n)+?</script>", "", t, re.DOTALL), "lxml")
      for tag in soup.find_all("script"):
        tag.extract()
      for tag in soup.find_all("link", rel="stylesheet"):
        tag.extract()
      for tag_id in ("announcement-banner", "action-buttons", "sharing-block", "footer", "fb-root"):
        tag = soup.find(id=tag_id)
        if tag:
          tag.extract()

      who_likes = soup.find("div", id="who-likes")
      if who_likes:
        for tag in who_likes.find_all("div", "tiny-user-card"):
          tag.extract()
      publisher_card = soup.find("div", id="publisher-card")
      if publisher_card:
        for tag in publisher_card.find_all("a", href=re.compile("^/from/")):
          del tag["href"]

      inner_header = soup.find("div", id="inner-header")
      del inner_header.find("a", "logo")["href"]
      for tag in inner_header.find_all("ul", "dropdown-menu"):
        tag.extract()
      for source_a in soup.find_all("a", href=re.compile("^/r\?url=")):
        del source_a["onclick"]
        source_a["href"] = urlparse.parse_qs(urlparse.urlparse(source_a["href"]).query)["url"]
      ul = soup.new_tag("ul") ; ul["class"] = "left dropdown-menu dark"
      li = soup.new_tag("li") ; li["class"] = "menu dropdown-item"
      a  = soup.new_tag("a", href="../index.html")
      a.string = "Back to index"
      li.append(a)
      ul.append(li)
      inner_header.append(ul)

      for img in soup.find_all("img"):
        img["src"] = "../"+self.image_to_file(img["src"], zipfile)
      for img in soup.find_all("link", rel="apple-touch-icon"):
        img["href"] = "../"+self.image_to_file(img["href"], zipfile)
      for img in soup.find_all("link", rel="shortcut icon"):
        img["href"] = "../"+self.image_to_file(img["href"], zipfile)
      for img in soup.find_all("meta", property="og:image"):
        img["content"] = "../"+self.image_to_file(img["content"], zipfile)

      footer = soup.new_tag("div")
      footer["class"] = "footer"
      soup.body.append(footer)

      soup.head.append(soup.new_tag("link", rel="stylesheet", type="text/css", href="../css/punchfork-81HpuHrf7cX.css"))
      new_script_tag = soup.new_tag("script", type="text/javascript", src="../js/punchfork-export.js")
      new_script_tag.string = '// '
      soup.body.append(new_script_tag)

      try:
        zipfile.writestr("%s/recipe/%s.html" % (self.username, recipe_name), soup.prettify(formatter="minimal").encode("utf-8"))
      except UnicodeDecodeError:
        sys.stderr.write("\n  Skipping broken recipe with multiple encodings for user %s" % self.username)
        pass

    self._progress("\r - Saved %d recipes.\n" % i)