Пример #1
0
 def get_links_from_css(self, style_text, item):
     '''
         This function extracts urls from css style text
         and returns requests for download thees images.
         Also in this function we are replacing urls to
         absolute uri to replace it by local url
     '''
     response = item['response']
     sheet = CSSStyleSheet()
     sheet.cssText = style_text
     urls = cssutils.getUrls(sheet)
     requests = []
     item_content = item['content']
     for url in urls:
         request_url = response.url.replace('http://', '')
         if url[0] == '/':
             request_url = request_url.split('/')[0] + url
         else:
             request_url = request_url.split('/')
             request_url[-1] = url
             request_url = '/'.join(request_url)
         request_url = 'http://%s' % request_url
         item_content = item_content.replace(url, request_url)
         requests.append(Request(request_url))
     item['content'] = item_content
     return requests
Пример #2
0
 def get_links_from_css(self, style_text, item):
     '''
         This function extracts urls from css style text
         and returns requests for download thees images.
         Also in this function we are replacing urls to
         absolute uri to replace it by local url
     '''
     response = item['response']
     sheet = CSSStyleSheet()
     sheet.cssText = style_text
     urls = cssutils.getUrls(sheet)
     requests = []
     item_content = item['content']
     for url in urls:
         request_url = response.url.replace('http://', '')
         if url[0] == '/':
             request_url = request_url.split('/')[0] + url
         else:
             request_url = request_url.split('/')
             request_url[-1] = url
             request_url = '/'.join(request_url)
         request_url = 'http://%s' % request_url
         item_content = item_content.replace(url, request_url)
         requests.append(Request(request_url))
     item['content'] = item_content
     return requests
Пример #3
0
def parse_css_stylesheet(content):
    from datetime import datetime
    start = datetime.now()
    sheet = CSSStyleSheet()
    try:
        sheet.cssText = content
    except Exception:
        # Parsing failed
        parser.process_content(content, contexts.CSS_UNKNOWN)
    for rule in sheet.cssRules:
        parse_css_rule(rule)
    end = datetime.now()
    library.css_us += end - start
Пример #4
0
 def get_media_requests(self, item, info):
     sheet = CSSStyleSheet()
     sheet.cssText = item['content']
     urls = cssutils.getUrls(sheet)
     return [Request(u) for u in urls]
Пример #5
0
 def get_media_requests(self, item, info):
     sheet = CSSStyleSheet()
     sheet.cssText = item['content']
     urls = cssutils.getUrls(sheet)
     return [Request(u) for u in urls]