Пример #1
0
def css_in_json(s, localize=True, dir='', prefix=''):
    '''
  Retrieve css package information from the DOM/src and download them.
  NOTE: if prefix is a dir, remember to replace '/' with '\/'!
  '''
    m_csses = re_json_css.finditer(s)
    csses = set()
    for m_css in m_csses:
        url = m_css.group('url')
        download_url = dejsonify(url)
        if localize:
            file = url_to_file(url)
            s = s.replace(url, prefix + file, 1)
            save_resource(download_url, dir, file)
            csses.add(prefix + file)
        else:
            csses.add(url)
    return {'source': s, 'csses': csses}
Пример #2
0
def css_in_json(s, localize=True, dir='', prefix=''):
  '''
  Retrieve css package information from the DOM/src and download them.
  NOTE: if prefix is a dir, remember to replace '/' with '\/'!
  '''
  m_csses = re_json_css.finditer(s)
  csses = set()
  for m_css in m_csses:
    url = m_css.group('url')
    download_url = dejsonify(url)
    if localize:
      file = url_to_file(url)
      s = s.replace(url, prefix + file, 1)
      save_resource(download_url, dir, file)
      csses.add(prefix + file)
    else:
      csses.add(url)
  return {'source': s, 'csses': csses}
Пример #3
0
def css_in_html(s, localize=True, dir='', prefix=''):
    '''
  Retrieve css package information from the DOM/src and download them.
  '''
    m_csses = re_html_css.finditer(s)
    csses = set()
    for m_css in m_csses:
        if m_css.group(0).find('type="text/css"') < 0:  # avoid false positives
            continue
        url = m_css.group('url')
        if localize:
            file = url_to_file(url)
            save_resource(url, dir, file)
            s = s.replace(url, prefix + file, 1)
            csses.add(prefix + file)
        else:
            csses.add(url)
    return {'source': s, 'csses': csses}
Пример #4
0
def css_in_html(s, localize=True, dir='', prefix=''):
  '''
  Retrieve css package information from the DOM/src and download them.
  '''
  m_csses = re_html_css.finditer(s)
  csses = set()
  for m_css in m_csses:
    if m_css.group(0).find('type="text/css"') < 0:  # avoid false positives
      continue
    url = m_css.group('url')
    if localize:
      file = url_to_file(url)
      save_resource(url, dir, file)
      s = s.replace(url, prefix + file, 1)
      csses.add(prefix + file)
    else:
      csses.add(url)
  return {'source': s, 'csses': csses}
Пример #5
0
def js_in_html(s, localize=True, dir='', prefix=''):
    '''
  Retrieve js information from the html/DOM and download them by default.
  At the mean time replace all js url references by refs to local file.
  '''
    m_javascripts = re_html_js.finditer(s)
    javascripts = set()
    for m_javascript in m_javascripts:
        url = m_javascript.group('url')
        if url:
            if localize:
                file = url_to_file(url)
                s = s.replace(url, prefix + file, 1)
                save_resource(url, dir, file)
                javascripts.add(prefix + file)
            else:
                javascripts.add(url)
    return {'source': s, 'javascripts': javascripts}
Пример #6
0
def js_in_html(s, localize=True, dir='', prefix=''):
  '''
  Retrieve js information from the html/DOM and download them by default.
  At the mean time replace all js url references by refs to local file.
  '''
  m_javascripts = re_html_js.finditer(s)
  javascripts = set()
  for m_javascript in m_javascripts:
    url = m_javascript.group('url')
    if url:
      if localize:
        file = url_to_file(url)
        s = s.replace(url, prefix + file, 1)
        save_resource(url, dir, file)
        javascripts.add(prefix + file)
      else:
        javascripts.add(url)
  return {'source': s, 'javascripts': javascripts}
Пример #7
0
def js_in_json(s, localize=True, dir='', prefix=''):
    '''
  Retrieve js information from json strings and download them by default.
  At the mean time replace all js url references by refs to local file.
  NOTE: if prefix is a dir, remember to replace '/' with '\/'!
  '''
    m_jssources = re_json_js.finditer(s)
    javascripts = set()
    for m_jssource in m_jssources:
        url = m_jssource.group('url')
        download_url = dejsonify(url)
        if localize:
            file = url_to_file(url)
            s = s.replace(url, prefix + file, 1)
            save_resource(download_url, dir, file)
            javascripts.add(prefix + file)
        else:
            javascripts.add(url)
    return {'source': s, 'javascripts': javascripts}
Пример #8
0
def js_in_json(s, localize=True, dir='', prefix=''):
  '''
  Retrieve js information from json strings and download them by default.
  At the mean time replace all js url references by refs to local file.
  NOTE: if prefix is a dir, remember to replace '/' with '\/'!
  '''
  m_jssources = re_json_js.finditer(s)
  javascripts = set()
  for m_jssource in m_jssources:
    url = m_jssource.group('url')
    download_url = dejsonify(url)
    if localize:
      file = url_to_file(url)
      s = s.replace(url, prefix + file, 1)
      save_resource(download_url, dir, file)
      javascripts.add(prefix + file)
    else:
      javascripts.add(url)
  return {'source': s, 'javascripts': javascripts}
Пример #9
0
def img_in_css(s,
               localize=True,
               site='http://static.ak.fbcdn.net',
               dir='',
               prefix=''):
    '''
  Retrieve image information from the CSS and download them.
  '''
    images = set()
    m_images = re_css_img.finditer(s)
    for m_image in m_images:
        url = m_image.group('url')
        file = url_to_file(url)
        if localize and url[0:9] == '/rsrc.php':
            images.add(prefix + file)
            s = s.replace(url, prefix + file, 1)
            save_resource(site + url, dir, file)
        else:  # just form the set, don't need to be or already localized
            images.add(url)
    return {'source': s, 'images': images}
Пример #10
0
def img_in_css(s,
               localize=True,
               site='http://static.ak.fbcdn.net',
               dir='',
               prefix=''):
  '''
  Retrieve image information from the CSS and download them.
  '''
  images = set()
  m_images = re_css_img.finditer(s)
  for m_image in m_images:
    url = m_image.group('url')
    file = url_to_file(url)
    if localize and url[0:9] == '/rsrc.php':
      images.add(prefix + file)
      s = s.replace(url, prefix + file, 1)
      save_resource(site + url, dir, file)
    else:  # just form the set, don't need to be or already localized
      images.add(url)
  return {'source': s, 'images': images}
Пример #11
0
def img_in_html(s,
                localize=True,
                site='http://static.ak.fbcdn.net',
                dir='',
                prefix=''):
    '''
  Retrieve image information from the DOM/src and download them.
  '''
    m_images = re_html_img.finditer(s)
    images = set()
    for m_image in m_images:
        url = m_image.group('url')
        if url[0] == '/':
            url = site + url
        if localize:
            file = url_to_file(url)
            download_url = unquote(url).replace('&amp;', '&')
            save_resource(download_url, dir, file)
            images.add(prefix + file)
            s = s.replace(url, prefix + file, 1)
        else:  # only getting a list of urls
            images.add(url)
    return {'source': s, 'images': images}
Пример #12
0
def img_in_html(s,
                localize=True,
                site='http://static.ak.fbcdn.net',
                dir='',
                prefix=''):
  '''
  Retrieve image information from the DOM/src and download them.
  '''
  m_images = re_html_img.finditer(s)
  images = set()
  for m_image in m_images:
    url = m_image.group('url')
    if url[0] == '/':
      url = site + url
    if localize:
      file = url_to_file(url)
      download_url = unquote(url).replace('&amp;', '&')
      save_resource(download_url, dir, file)
      images.add(prefix + file)
      s = s.replace(url, prefix + file, 1)
    else:  # only getting a list of urls
      images.add(url)
  return {'source': s, 'images': images}