def css_in_json(s, localize=True, dir='', prefix=''): ''' Retrieve css package information from the DOM/src and download them. NOTE: if prefix is a dir, remember to replace '/' with '\/'! ''' m_csses = re_json_css.finditer(s) csses = set() for m_css in m_csses: url = m_css.group('url') download_url = dejsonify(url) if localize: file = url_to_file(url) s = s.replace(url, prefix + file, 1) save_resource(download_url, dir, file) csses.add(prefix + file) else: csses.add(url) return {'source': s, 'csses': csses}
def css_in_html(s, localize=True, dir='', prefix=''): ''' Retrieve css package information from the DOM/src and download them. ''' m_csses = re_html_css.finditer(s) csses = set() for m_css in m_csses: if m_css.group(0).find('type="text/css"') < 0: # avoid false positives continue url = m_css.group('url') if localize: file = url_to_file(url) save_resource(url, dir, file) s = s.replace(url, prefix + file, 1) csses.add(prefix + file) else: csses.add(url) return {'source': s, 'csses': csses}
def js_in_html(s, localize=True, dir='', prefix=''): ''' Retrieve js information from the html/DOM and download them by default. At the mean time replace all js url references by refs to local file. ''' m_javascripts = re_html_js.finditer(s) javascripts = set() for m_javascript in m_javascripts: url = m_javascript.group('url') if url: if localize: file = url_to_file(url) s = s.replace(url, prefix + file, 1) save_resource(url, dir, file) javascripts.add(prefix + file) else: javascripts.add(url) return {'source': s, 'javascripts': javascripts}
def js_in_json(s, localize=True, dir='', prefix=''): ''' Retrieve js information from json strings and download them by default. At the mean time replace all js url references by refs to local file. NOTE: if prefix is a dir, remember to replace '/' with '\/'! ''' m_jssources = re_json_js.finditer(s) javascripts = set() for m_jssource in m_jssources: url = m_jssource.group('url') download_url = dejsonify(url) if localize: file = url_to_file(url) s = s.replace(url, prefix + file, 1) save_resource(download_url, dir, file) javascripts.add(prefix + file) else: javascripts.add(url) return {'source': s, 'javascripts': javascripts}
def img_in_css(s, localize=True, site='http://static.ak.fbcdn.net', dir='', prefix=''): ''' Retrieve image information from the CSS and download them. ''' images = set() m_images = re_css_img.finditer(s) for m_image in m_images: url = m_image.group('url') file = url_to_file(url) if localize and url[0:9] == '/rsrc.php': images.add(prefix + file) s = s.replace(url, prefix + file, 1) save_resource(site + url, dir, file) else: # just form the set, don't need to be or already localized images.add(url) return {'source': s, 'images': images}
def img_in_html(s, localize=True, site='http://static.ak.fbcdn.net', dir='', prefix=''): ''' Retrieve image information from the DOM/src and download them. ''' m_images = re_html_img.finditer(s) images = set() for m_image in m_images: url = m_image.group('url') if url[0] == '/': url = site + url if localize: file = url_to_file(url) download_url = unquote(url).replace('&', '&') save_resource(download_url, dir, file) images.add(prefix + file) s = s.replace(url, prefix + file, 1) else: # only getting a list of urls images.add(url) return {'source': s, 'images': images}