예제 #1
0
    def parse(self, response):

        for post in response.css("#archive .floated-thumb .post-thumb a"):
            img_url = post.css("img::attr(src)").extract_first("")
            post_url = post.css("::attr(href)").extract_first("")
            yield Request(url=basejoin(response.url, post_url), meta={"front_image_url": basejoin(response.url, img_url)},
                          callback=self.parse_detail)
예제 #2
0
    def __call__(self, url, baseURL=None):
        """Load the given multi-value url and call callbacks

        url -- vrml97-style url (multi-value string)
        baseURL -- optional base url from which items in url will
            be resolved.  protofunctions.root(node).baseURI will
            give you the baseURL normally used for the given node.

        raises IOError on failure
        returns (successfulURL, filename, open_file, headers) on success

        headers will be None for local files
        """
        log.info("Loading: %s, %s", url, baseURL)
        url = as_unicode(url)
        if isinstance(url, unicode):
            url = [url]
        else:
            url = [as_unicode(u) for u in url]
        file = None
        for u in url:
            # get the "absolute" url
            if baseURL:
                u = basejoin(baseURL, u)
            resolvedURL, file, filename, headers = self.get(u)
            if file is not None and filename is not None:
                break
        if not file or not filename:
            raise IOError("""Unable to download url %s""" % url)
        return (resolvedURL, os.path.abspath(filename), file, headers)
예제 #3
0
파일: urllib.py 프로젝트: Pluckyduck/eve
 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
     if 'location' in headers:
         newurl = headers['location']
     elif 'uri' in headers:
         newurl = headers['uri']
     else:
         return
     void = fp.read()
     fp.close()
     newurl = basejoin(self.type + ':' + url, newurl)
     return self.open(newurl)
예제 #4
0
파일: urllib.py 프로젝트: connoryang/1v1dec
 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
     if 'location' in headers:
         newurl = headers['location']
     elif 'uri' in headers:
         newurl = headers['uri']
     else:
         return
     void = fp.read()
     fp.close()
     newurl = basejoin(self.type + ':' + url, newurl)
     return self.open(newurl)
예제 #5
0
 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
     if 'location' in headers:
         newurl = headers['location']
     elif 'uri' in headers:
         newurl = headers['uri']
     else:
         return
     void = fp.read()
     fp.close()
     # In case the server sent a relative URL, join with original:
     newurl = basejoin(self.type + ":" + url, newurl)
     return self.open(newurl)
예제 #6
0
 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
     if 'location' in headers:
         newurl = headers['location']
     elif 'uri' in headers:
         newurl = headers['uri']
     else:
         return
     fp.close()
     newurl = basejoin(self.type + ':' + url, newurl)
     newurl_lower = newurl.lower()
     if not (newurl_lower.startswith('http://') or newurl_lower.startswith('https://') or newurl_lower.startswith('ftp://')):
         raise IOError('redirect error', errcode, errmsg + " - Redirection to url '%s' is not allowed" % newurl, headers)
     return self.open(newurl)
예제 #7
0
 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
     if 'location' in headers:
         newurl = headers['location']
     elif 'uri' in headers:
         newurl = headers['uri']
     else:
         return
     fp.close()
     newurl = basejoin(self.type + ':' + url, newurl)
     newurl_lower = newurl.lower()
     if not (newurl_lower.startswith('http://')
             or newurl_lower.startswith('https://')
             or newurl_lower.startswith('ftp://')):
         raise IOError(
             'redirect error', errcode,
             errmsg + " - Redirection to url '%s' is not allowed" % newurl,
             headers)
     return self.open(newurl)