Exemplo n.º 1
0
        return
        
    #客户端方法
    def csqueryConents(self,csdompagination):
        return csdompagination 
        
    def csqueryPagination(self,csdom,pagesPath):
        pages=[]
        for index,item in enumerate(pagesPath):
            csquery=item
            cspage=csdom.Select(csquery)           
            if cspage:                
                #找到所有的a标签 
                children=cspage.Find("a")                
                if children.Length>0:
                    for i in range(0,children.Length):
                        cshyper=CsQuery.CQ.Create(children[i])              
                        href=cshyper.Attr("href")
                        text=cshyper.Text()
                        pagelamda=self.config.cfgContent.Options.PageLamda
                        if pagelamda:
                            str=pagelamda(cshyper,href,text)
                            print "this is type for url : %s" % (type(str))
                            
                            if str:
                                href=str
                                print " this is true"
                            else:
                                print "this is false"
                                continue

                        if href and href[0:1]=="/":
                            proto, rest = urllib2.splittype(self.config.cfgUrl)  
                            host, rest = urllib2.splithost(rest)
                            href=proto+"://"+host+href
                        elif href and href[0:1]=="?":                            
                            proto, rest = urllib2.splittype(self.config.cfgUrl)  
                            host, rest = urllib2.splithost(rest)
                            p=rest.split("?")
                            p[1]=href[1:]
                            href=proto+"://"+host+"?".join(p)
                        elif href.find("http")==-1:
                            proto, rest = urllib2.splittype(self.config.cfgUrl)  
                            host, rest = urllib2.splithost(rest)
                            p_rest=rest.split("/")
                            p_rest[len(p_rest)-1]=href
                            href=proto+"://"+host+"/".join(p_rest)
                            
                        scale=self.config.cfgContent.Options.PageSimilarity          
                        rate=0.0
                        simlilar=StringHelper.LevenshteinDistance(self.__url,href,rate)
                        print "this is simliar :%f " % simlilar[1]
                        if href and simlilar[1]>scale and simlilar[1]<1:
                            pages.append(href) 
Exemplo n.º 2
0
def parse(uri):
    with closing(
            urllib2.urlopen(uri) if urllib2.splittype(uri)[0] else codecs.
            open(uri, 'r')) as inf:
        # initialize playlist variables before reading file
        playlist = []
        song = Track(None, None, None)

        for line_no, line in enumerate(inf):
            try:
                line = line.strip(codecs.BOM_UTF8).strip()
                if line.startswith('#EXTINF:'):
                    # pull length and title from #EXTINF line
                    length, title = line.split('#EXTINF:')[1].split(',', 1)
                    song = Track(length, title, None)
                elif line.startswith('#'):
                    # comment, #EXTM3U
                    pass
                elif len(line) != 0:
                    # pull song path from all other, non-blank lines
                    song.path = line
                    playlist.append(song)

                    # reset the song variable so it doesn't use the same EXTINF more than once
                    song = Track(None, None, None)
            except Exception, ex:
                raise Exception("Can't parse line %d: %s" % (line_no, line),
                                ex)
Exemplo n.º 3
0
 def url_splits(url):
     domain_splits = []
     path_splits = []
     try:
         if url:
             protocol, rest = urllib2.splittype(url)
             if not protocol:
                 rest = '//' + rest
             host, rest = urllib2.splithost(rest)
             #域部分解析
             if host:
                 splits = host.split('.')
                 if splits:
                     index_list = range(len(splits))
                     index_list.reverse()
                     for index in index_list:
                         if not splits[index]:
                             splits.remove('')
                     domain_splits += splits
             #路径部分解析
             if rest:
                 rest = urlparse.urlparse(rest)
                 splits = rest.path.split('/')
                 if splits:
                     index_list = range(len(splits))
                     index_list.reverse()
                     for index in index_list:
                         if not splits[index]:
                             splits.remove('')
                     path_splits += splits
     except Exception, e:
         log.msg(traceback.format_exc(), level=log.ERROR)
Exemplo n.º 4
0
def loadUrl():
    args = request.args
    url = args.get("url")
    jobTemplateId = args.get("jobTemplateId")
    request.args = ImmutableMultiDict()
    data = {}
    if jobTemplateId != None and jobTemplateId != "":
        data = TestService.parseTemplate(jobTemplateId, flag=False)
        # url = data.get("renderUrl")
    global host, cookie, refer, headers
    if (url.startswith("http")):
        host = urllib2.splittype(url)
        refer = url
        if host != None:
            proto, rest = urllib.splittype(url)
            res, host = urllib.splithost(rest)
            host = proto + "://" + res
    else:
        url = host + url
    request.url = url
    send_headers = {
        # 'Host':'www.jb51.net',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 6.2; rv:16.0) Gecko/20100101 Firefox/16.0',
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Connection': 'keep-alive'
    }
    postRequest = urllib2.Request(url, headers=send_headers)
    response = None
    try:
        response = urllib2.urlopen(postRequest)
    except Exception, e:
        logging.error("error" + str(e))
Exemplo n.º 5
0
    def download_file(self, url):
        injectd_url = self.extract_url(urllib2.unquote(url))
        try:
            req = urllib2.Request(injectd_url)
            # Set User-Agent to look more credible
            req.add_unredirected_header('User-Agent', '-')
            # FIXME: We need a timeout on read here
            injected_file = urllib2.urlopen(req, timeout=4).read()
            #  If the file is hosted on a SSL enabled host get the certificate
            if re.match('^https', injectd_url, re.IGNORECASE):
                proto, rest = urllib2.splittype(injectd_url)
                host, rest = urllib2.splithost(rest)
                host, port = urllib2.splitport(host)
                if port is None:
                    port = 443

                cert_file = ssl.get_server_certificate((host, int(port)))
                cert_name = self.store_file(cert_file)

        except IOError as e:
            logger.exception(
                "Failed to fetch injected file, I/O error: {0}".format(e))
            # TODO: We want to handle the case where we can't download
            # the injected file but pretend to be vulnerable.
            file_name = None
        else:
            file_name, file_sha256 = self.store_file(injected_file)
        return file_name, file_sha256
Exemplo n.º 6
0
    def download_file(self, url):
        injectd_url = self.extract_url(urllib2.unquote(url))
        try:
            req = urllib2.Request(injectd_url)
            # Set User-Agent to look more credible
            req.add_unredirected_header('User-Agent', '-')
            # FIXME: We need a timeout on read here
            injected_file = urllib2.urlopen(req, timeout=4).read()
            #  If the file is hosted on a SSL enabled host get the certificate
            if re.match('^https', injectd_url, re.IGNORECASE):
                proto, rest = urllib2.splittype(injectd_url)
                host, rest = urllib2.splithost(rest)
                host, port = urllib2.splitport(host)
                if port is None:
                    port = 443

                cert_file = ssl.get_server_certificate((host, int(port)))
                cert_name = self.store_file(cert_file)

        except IOError as e:
            logger.exception("Failed to fetch injected file, I/O error: {0}".format(e))
            # TODO: We want to handle the case where we can't download
            # the injected file but pretend to be vulnerable.
            file_name = None
        else:
            file_name, file_sha256 = self.store_file(injected_file)
        return file_name, file_sha256
Exemplo n.º 7
0
def _add_proxies():
    if sickrage.app.config.proxy_setting:
        sickrage.app.log.debug("Using global proxy: " + sickrage.app.config.proxy_setting)
        scheme, address = urllib2.splittype(sickrage.app.config.proxy_setting)
        address = ('http://{}'.format(sickrage.app.config.proxy_setting),
                   sickrage.app.config.proxy_setting)[scheme]
        return {"http": address, "https": address}
Exemplo n.º 8
0
    def _hook(self, request):
        host = request.get_host()
        if not host:
            raise urllib2.URLError('no host given')

        if request.has_data():  # POST
            data = request.get_data()
            if not request.has_header('Content-type'):
                request.add_unredirected_header(
                    'Content-type',
                    'application/x-www-form-urlencoded')
            if not request.has_header('Content-length') and not conf.chunk:
                request.add_unredirected_header(
                    'Content-length', '%d' % len(data))

        sel_host = host
        if request.has_proxy():
            scheme, sel = urllib2.splittype(request.get_selector())
            sel_host, sel_path = urllib2.splithost(sel)

        if not request.has_header('Host'):
            request.add_unredirected_header('Host', sel_host)
        for name, value in self.parent.addheaders:
            name = name.capitalize()
            if not request.has_header(name):
                request.add_unredirected_header(name, value)
        return request
Exemplo n.º 9
0
def _add_proxies():
    if sickrage.app.config.proxy_setting:
        sickrage.app.log.debug("Using global proxy: " + sickrage.app.config.proxy_setting)
        scheme, address = urllib2.splittype(sickrage.app.config.proxy_setting)
        address = ('http://{}'.format(sickrage.app.config.proxy_setting),
                   sickrage.app.config.proxy_setting)[scheme]
        return {"http": address, "https": address}
Exemplo n.º 10
0
def get_host_from_url(url):
	"""
	功能:把url转换为域名
	"""
	root_proto, root_rest = urllib2.splittype(url)
	root_host, root_rest = urllib2.splithost(root_rest)
	return root_host
def _expand_recipe(content, url=''):
    urls = []
    for line in content.splitlines():
        line = line.lstrip().rstrip()
        try:
            target_type, target = line.split(':', 1)
        except ValueError:
            continue # blank line in recipe
        if target_type in ACCEPTED_RECIPE_TYPES:
            if isinstance(target, unicode):
                target = target.encode('utf-8')
            target = target.lstrip().rstrip()
            # translate well-known variables
            for name in COOK_VARIABLES:
                target = target.replace("$"+name, COOK_VARIABLES[name])
            # Check to see if the target is a URL (has a scheme)
            # if not we want to join it to the current url before
            # carrying on.
            scheme, _ = urllib2.splittype(target)
            if not scheme:
                if not '%' in target:
                    target = urllib.quote(target)
                target = urlparse.urljoin(url, target)
            if target_type == 'recipe':
                urls.extend(recipe_to_urls(target))
            else:
                urls.append(target)
    return urls
Exemplo n.º 12
0
    def doQuery(self, query, name):
        # urllib doesn't honor user Content-type, use urllib2
        garbage, path = urllib2.splittype(FieldVal(self.site, "url"))
        host, selector = urllib2.splithost(path)
        response = False
        try:
            errmsg = "** An ERROR occurred attempting HTTPS connection to"
            h = httplib.HTTPSConnection(host, timeout=5)

            errmsg = "** An ERROR occurred sending POST request to"
            p = h.request(
                'POST', selector, query, {
                    "Content-type": "application/x-ofx",
                    "Accept": "*/*, application/x-ofx"
                })

            errmsg = "** An ERROR occurred retrieving POST response from"
            #allow up to 30 secs for the server response (it has to assemble the statement)
            h.sock.settimeout(30)
            response = h.getresponse().read()
            f = file(name, "w")
            f.write(response)
            f.close()
        except Exception as inst:
            self.status = False
            print errmsg, host
            print "   Exception type:", type(inst)
            print "   Exception Val :", inst
            if response:
                print "   HTTPS ResponseCode  :", response.status
                print "   HTTPS ResponseReason:", response.reason

        if h: h.close()
Exemplo n.º 13
0
def get_cookie(url):
    """
    获取该的可用cookie
    :param url:
    :return:
    """

    domain = urllib2.splithost(urllib2.splittype(url)[1])[0]
    domain_list = ['.' + domain, domain]
    if len(domain.split('.')) > 2:
        dot_index = domain.find('.')
        domain_list.append(domain[dot_index:])
        domain_list.append(domain[dot_index + 1:])
    print domain_list
    conn = None
    cookie_str = None
    try:
        conn = sqlite3.connect(r'%s\Google\Chrome\User Data\Default\Cookies' % os.getenv('LOCALAPPDATA'))
        cursor = conn.cursor()
        sql = 'select host_key, name, value, encrypted_value, path from cookies where host_key in (%s)' % ','.join(['"%s"' % x for x in domain_list])
        row_list = cursor.execute(sql).fetchall()
        cookie_list = []
        for host_key, name, value, encrypted_value, path in row_list:
            decrypted_value = win32crypt.CryptUnprotectData(encrypted_value, None, None, None, 0)[1].decode(print_charset) or value
            cookie_list.append(name + '=' + decrypted_value)
        cookie_str = '; '.join(cookie_list)
    except Exception:
        raise CookieException()
    finally:
        conn.close()
        print cookie_str
        return cookie_str, domain
Exemplo n.º 14
0
    def doQuery(self,query,name):
        # urllib doesn't honor user Content-type, use urllib2
        garbage, path = urllib2.splittype(FieldVal(self.site,"url"))
        host, selector = urllib2.splithost(path)
        response=False
        try:
            errmsg= "** An ERROR occurred attempting HTTPS connection to"
            h = httplib.HTTPSConnection(host, timeout=5)

            errmsg= "** An ERROR occurred sending POST request to"
            p = h.request('POST', selector, query, 
                     {"Content-type": "application/x-ofx",
                      "Accept": "*/*, application/x-ofx"}
                     )

            errmsg= "** An ERROR occurred retrieving POST response from"
            #allow up to 30 secs for the server response (it has to assemble the statement)
            h.sock.settimeout(30)      
            response = h.getresponse().read()
            f = file(name,"w")
            f.write(response)
            f.close()
        except Exception as inst:
            self.status = False
            print errmsg, host
            print "   Exception type:", type(inst)
            print "   Exception Val :", inst
            if response:
                print "   HTTPS ResponseCode  :", response.status
                print "   HTTPS ResponseReason:", response.reason

        if h: h.close()
Exemplo n.º 15
0
def file_or_url_context(resource_name):
    """Yield name of file from the given resource (i.e. file or url)."""
    if is_url(resource_name):
        _, ext = os.path.splitext(resource_name)
        try:
            with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as f:
                proto, rest = urllib2.splittype(resource_name)
                HOST, rest = urllib2.splithost(rest)
                HEADER['Host'] = HOST
                req = urllib2.Request(resource_name, headers=HEADER)
                u = urllib2.urlopen(req, timeout=4)
                f.write(u.read())
            # f must be closed before yielding
            yield f.name
        finally:
            os.remove(f.name)
    else:
        yield resource_name


# from skimage.io import imread
# fname = "https://imgsa.baidu.com/forum/w%3D580/sign=e960450646086e066aa83f4332097b5a/36844b59252dd42a79cdd89c093b5bb5c8eab874.jpg"
# with file_or_url_context(fname) as f:
#     img = imread(f)
#     print img.shape
Exemplo n.º 16
0
    def getRSSFeed(self, url, post_data=None, items=[]):
        handlers = []

        if self.provider.proxy.isEnabled():
            self.provider.headers.update(
                {'Referer': self.provider.proxy.getProxyURL()})
        elif sickbeard.PROXY_SETTING:
            logger.log("Using proxy for url: " + url, logger.DEBUG)
            scheme, address = urllib2.splittype(sickbeard.PROXY_SETTING)
            address = sickbeard.PROXY_SETTING if scheme else 'http://' + sickbeard.PROXY_SETTING
            handlers = [
                urllib2.ProxyHandler({
                    'http': address,
                    'https': address
                })
            ]
            self.provider.headers.update({'Referer': address})
        elif 'Referer' in self.provider.headers:
            self.provider.headers.pop('Referer')

        return RSSFeeds(self.providerID).getFeed(
            self.provider.proxy._buildURL(url),
            post_data,
            self.provider.headers,
            items,
            handlers=handlers)
Exemplo n.º 17
0
    def request(self, method, url, headers=None, params=None, proxies=None, cache=True, verify=False, *args, **kwargs):
        if headers is None: headers = {}
        if params is None: params = {}
        if proxies is None: proxies = {}

        headers['Accept-Encoding'] = 'gzip, deflate'
        headers["User-Agent"] = sickrage.app.user_agent

        # request session ssl verify
        if sickrage.app.config.ssl_verify:
            try:
                verify = certifi.where()
            except:
                pass

        # request session proxies
        if 'Referer' not in headers and sickrage.app.config.proxy_setting:
            sickrage.app.log.debug("Using global proxy: " + sickrage.app.config.proxy_setting)
            scheme, address = urllib2.splittype(sickrage.app.config.proxy_setting)
            address = ('http://{}'.format(sickrage.app.config.proxy_setting),
                       sickrage.app.config.proxy_setting)[scheme]
            proxies.update({"http": address, "https": address})
            headers.update({'Referer': address})

        # setup caching adapter
        if cache:
            adapter = CacheControlAdapter(DBCache(os.path.abspath(os.path.join(sickrage.app.data_dir, 'sessions.db'))))
            self.mount('http://', adapter)
            self.mount('https://', adapter)

        # get web response
        response = super(WebSession, self).request(
            method,
            url,
            headers=headers,
            params=params,
            verify=verify,
            proxies=proxies,
            hooks={'response': WebHooks.log_url},
            *args, **kwargs
        )

        try:
            # check web response for errors
            response.raise_for_status()
        except requests.exceptions.SSLError as e:
            if ssl.OPENSSL_VERSION_INFO < (1, 0, 1, 5):
                sickrage.app.log.info(
                    "SSL Error requesting url: '{}' You have {}, try upgrading OpenSSL to 1.0.1e+".format(
                        e.request.url, ssl.OPENSSL_VERSION))

            if sickrage.app.config.ssl_verify:
                sickrage.app.log.info(
                    "SSL Error requesting url: '{}', try disabling cert verification in advanced settings".format(
                        e.request.url))
        except Exception:
            pass

        return response
Exemplo n.º 18
0
    def request(self, method, url, headers=None, params=None, proxies=None, cache=True, verify=False, *args, **kwargs):
        if headers is None: headers = {}
        if params is None: params = {}
        if proxies is None: proxies = {}

        url = self.normalize_url(url)

        headers.update({'Accept-Encoding': 'gzip, deflate'})
        headers.update(random.choice(USER_AGENTS))

        # request session ssl verify
        if sickrage.srCore.srConfig.SSL_VERIFY:
            try:
                verify = certifi.where()
            except:
                pass

        # request session proxies
        if 'Referer' not in headers and sickrage.srCore.srConfig.PROXY_SETTING:
            sickrage.srCore.srLogger.debug("Using global proxy: " + sickrage.srCore.srConfig.PROXY_SETTING)
            scheme, address = urllib2.splittype(sickrage.srCore.srConfig.PROXY_SETTING)
            address = ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING),
                       sickrage.srCore.srConfig.PROXY_SETTING)[scheme]
            proxies.update({"http": address, "https": address})
            headers.update({'Referer': address})

        # setup session caching
        if cache:
            cache_file = os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sessions.db'))
            self.__class__ = cachecontrol.CacheControl(self,
                                                       cache=DBCache(cache_file),
                                                       heuristic=ExpiresAfter(days=7)).__class__

        # get web response
        response = super(srSession, self).request(method,
                                                  url,
                                                  headers=headers,
                                                  params=params,
                                                  verify=verify,
                                                  proxies=proxies,
                                                  *args, **kwargs)

        try:
            # check web response for errors
            response.raise_for_status()
        except requests.exceptions.SSLError as e:
            if ssl.OPENSSL_VERSION_INFO < (1, 0, 1, 5):
                sickrage.srCore.srLogger.info(
                    "SSL Error requesting url: '{}' You have {}, try upgrading OpenSSL to 1.0.1e+".format(
                        e.request.url, ssl.OPENSSL_VERSION))

            if sickrage.srCore.srConfig.SSL_VERIFY:
                sickrage.srCore.srLogger.info(
                    "SSL Error requesting url: '{}', try disabling cert verification in advanced settings".format(
                        e.request.url))
        except Exception:
            pass

        return response
Exemplo n.º 19
0
def get_domain(url):
    try:
        return get_tld(url)
    except:
        base_url = "".join(url)  # 删除所有\s+
        protocol, rest = urllib2.splittype(base_url)
        host, rest = urllib2.splithost(rest)
        return host
def get_local_name(url):
    url = url.strip()
    url = re.sub('[\/]+$', '', url)
    rest = urllib2.splittype(url)[1]
    host, rest = urllib2.splithost(rest)
    if rest is None or rest == '':
        return host
    return os.path.basename(rest)
Exemplo n.º 21
0
 def decorator(*args, **kwargs):
     request = args[0]
     enabled_https = getattr(settings, 'SESSION_COOKIE_SECURE', False)
     if enabled_https and not request.is_secure():
         http_url = request.build_absolute_uri(request.get_full_path())
         https_url = 'https:' + urllib2.splittype(http_url)[1]
         return HttpResponseRedirect(https_url)
     return func(*args, **kwargs)
Exemplo n.º 22
0
    def download(self, url, insecure):
        """ Tries to download a file from url.

        Returns the path to the local file.
        """
        scheme = urllib2.splittype(url)[0]
        DL = downloaders.get(scheme, Downloader)
        return DL(url, self, insecure).execute()
Exemplo n.º 23
0
 def decorator(*args, **kwargs):
     request = args[0]
     enabled_https = getattr(settings, 'SESSION_COOKIE_SECURE', False)
     if enabled_https and not request.is_secure():
         http_url = request.build_absolute_uri(request.get_full_path())
         https_url = 'https:' + urllib2.splittype(http_url)[1]
         return HttpResponseRedirect(https_url)
     return func(*args, **kwargs)
Exemplo n.º 24
0
def get_local_name(url):
    url = url.strip()
    url = re.sub('[\/]+$', '', url)
    rest = urllib2.splittype(url)[1]
    host, rest = urllib2.splithost(rest)
    if rest is None or  rest == '':
        return host
    return os.path.basename(rest)
Exemplo n.º 25
0
    def request(self, method, url, headers=None, params=None, proxies=None, cache=True, verify=False, *args, **kwargs):
        if headers is None: headers = {}
        if params is None: params = {}
        if proxies is None: proxies = {}

        headers['Accept-Encoding'] = 'gzip, deflate'
        headers["User-Agent"] = sickrage.srCore.USER_AGENT

        # request session ssl verify
        if sickrage.srCore.srConfig.SSL_VERIFY:
            try:
                verify = certifi.where()
            except:
                pass

        # request session proxies
        if 'Referer' not in headers and sickrage.srCore.srConfig.PROXY_SETTING:
            sickrage.srCore.srLogger.debug("Using global proxy: " + sickrage.srCore.srConfig.PROXY_SETTING)
            scheme, address = urllib2.splittype(sickrage.srCore.srConfig.PROXY_SETTING)
            address = ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING),
                       sickrage.srCore.srConfig.PROXY_SETTING)[scheme]
            proxies.update({"http": address, "https": address})
            headers.update({'Referer': address})

        # setup caching adapter
        if cache:
            adapter = CacheControlAdapter(DBCache(os.path.abspath(os.path.join(sickrage.DATA_DIR, 'sessions.db'))))
            self.mount('http://', adapter)
            self.mount('https://', adapter)

        # get web response
        response = super(srSession, self).request(
            method,
            url,
            headers=headers,
            params=params,
            verify=verify,
            proxies=proxies,
            *args, **kwargs
        )

        try:
            # check web response for errors
            response.raise_for_status()
        except requests.exceptions.SSLError as e:
            if ssl.OPENSSL_VERSION_INFO < (1, 0, 1, 5):
                sickrage.srCore.srLogger.info(
                    "SSL Error requesting url: '{}' You have {}, try upgrading OpenSSL to 1.0.1e+".format(
                        e.request.url, ssl.OPENSSL_VERSION))

            if sickrage.srCore.srConfig.SSL_VERIFY:
                sickrage.srCore.srLogger.info(
                    "SSL Error requesting url: '{}', try disabling cert verification in advanced settings".format(
                        e.request.url))
        except Exception:
            pass

        return response
Exemplo n.º 26
0
    def request(self,
                method,
                url,
                headers=None,
                params=None,
                cache=True,
                raise_exceptions=True,
                *args,
                **kwargs):
        url = self.normalize_url(url)
        kwargs.setdefault('params', {}).update(params or {})
        kwargs.setdefault('headers', {}).update(headers or {})

        # if method == 'POST':
        #    self.session.headers.update({"Content-type": "application/x-www-form-urlencoded"})
        kwargs.setdefault('headers',
                          {}).update({'Accept-Encoding': 'gzip, deflate'})
        kwargs.setdefault('headers', {}).update(random.choice(USER_AGENTS))

        # request session ssl verify
        kwargs['verify'] = False
        if sickrage.srCore.srConfig.SSL_VERIFY:
            try:
                kwargs['verify'] = certifi.where()
            except:
                pass

        # request session proxies
        if 'Referer' not in kwargs.get(
                'headers', {}) and sickrage.srCore.srConfig.PROXY_SETTING:
            sickrage.srCore.srLogger.debug(
                "Using global proxy: " +
                sickrage.srCore.srConfig.PROXY_SETTING)
            scheme, address = urllib2.splittype(
                sickrage.srCore.srConfig.PROXY_SETTING)
            address = \
            ('http://{}'.format(sickrage.srCore.srConfig.PROXY_SETTING), sickrage.srCore.srConfig.PROXY_SETTING)[scheme]
            kwargs.setdefault('proxies', {}).update({
                "http": address,
                "https": address
            })
            kwargs.setdefault('headers', {}).update({'Referer': address})

        # setup session caching
        if cache:
            cache_file = os.path.abspath(
                os.path.join(sickrage.DATA_DIR, 'sessions.db'))
            cachecontrol.CacheControl(self,
                                      cache=DBCache(cache_file),
                                      heuristic=ExpiresAfter(days=7))

        # get result
        response = super(srSession, self).request(method, url, *args,
                                                  **kwargs).result()
        if raise_exceptions:
            response.raise_for_status()

        return response
Exemplo n.º 27
0
def _setUpSession(session=None, headers=None, params=None):
    """
    Returns a session initialized with default cache and parameter settings

    :param session: session object to (re)use
    :param headers: Headers to pass to session
    :return: session object
    """

    # request session
    if headers is None:
        headers = {}

    sessionCache = None
    FileCacheDir = sickrage.srConfig.CACHE_DIR or get_temp_dir()
    if FileCacheDir:
        sessionCache = FileCache(os.path.join(FileCacheDir, 'sessions'), use_dir_lock=True)
    session = cachecontrol.CacheControl(sess=session or requests.Session(), cache=sessionCache, cache_etags=False)

    # request session headers
    session.headers.update(headers)
    session.headers.update({'Accept-Encoding': 'gzip,deflate'})
    session.headers.update(random.choice(USER_AGENTS))

    # request session clear residual referer
    if 'Referer' in session.headers and 'Referer' not in headers:
        session.headers.pop('Referer')

    try:
        # request session ssl verify
        session.verify = False
        if sickrage.srConfig.SSL_VERIFY:
            session.verify = certifi.where()
    except:pass

    # request session proxies
    if 'Referer' not in session.headers and sickrage.srConfig.PROXY_SETTING:
        sickrage.srLogger.debug("Using global proxy: " + sickrage.srConfig.PROXY_SETTING)
        scheme, address = urllib2.splittype(sickrage.srConfig.PROXY_SETTING)
        address = ('http://{}'.format(sickrage.srConfig.PROXY_SETTING), sickrage.srConfig.PROXY_SETTING)[scheme]
        session.proxies = {
            "http": address,
            "https": address,
        }
        session.headers.update({'Referer': address})

    if 'Content-Type' in session.headers:
        session.headers.pop('Content-Type')

    if params and isinstance(params, (list, dict)):
        for param in params:
            if isinstance(params[param], unicode):
                params[param] = params[param].encode('utf-8')
        session.params = params

    return session
Exemplo n.º 28
0
    def getRSSFeed(self, url, params=None):
        handlers = []

        if sickrage.app.config.proxy_setting:
            sickrage.app.log.debug("Using global proxy for url: " + url)
            scheme, address = urllib2.splittype(sickrage.app.config.proxy_setting)
            address = sickrage.app.config.proxy_setting if scheme else 'http://' + sickrage.app.config.proxy_setting
            handlers = [urllib2.ProxyHandler({'http': address, 'https': address})]

        return getFeed(url, params=params, handlers=handlers)
Exemplo n.º 29
0
    def getRSSFeed(self, url):
        handlers = []

        if sickrage.srCore.srConfig.PROXY_SETTING:
            sickrage.srCore.srLogger.debug("Using global proxy for url: " + url)
            scheme, address = urllib2.splittype(sickrage.srCore.srConfig.PROXY_SETTING)
            address = sickrage.srCore.srConfig.PROXY_SETTING if scheme else 'http://' + sickrage.srCore.srConfig.PROXY_SETTING
            handlers = [urllib2.ProxyHandler({'http': address, 'https': address})]

        return getFeed(url, handlers=handlers)
Exemplo n.º 30
0
    def getRSSFeed(self, url, params=None):
        handlers = []

        if sickrage.srCore.srConfig.PROXY_SETTING:
            sickrage.srCore.srLogger.debug("Using global proxy for url: " + url)
            scheme, address = urllib2.splittype(sickrage.srCore.srConfig.PROXY_SETTING)
            address = sickrage.srCore.srConfig.PROXY_SETTING if scheme else 'http://' + sickrage.srCore.srConfig.PROXY_SETTING
            handlers = [urllib2.ProxyHandler({'http': address, 'https': address})]

        return getFeed(url, params=params, handlers=handlers)
Exemplo n.º 31
0
 def _setup_server(self, server=None):
     if server:
         host, path = urllib2.splithost(urllib2.splittype(server)[-1])
         if not path:
             path = '/'
         self.client_con = python_webdav.client.Client(host,
                                                       webdav_path=path)
         self.client_con.set_connection('wibble', 'fish')
     else:
         print "I need a server!"
         self.client_con = None
Exemplo n.º 32
0
def parse_protocols(ctx, base_uri=None):
    """ Parse ``protocols`` from a root context.

    If protocols are not provided in root, use baseUri protocol.
    """
    protocols = ctx.get_property_with_schema('protocols', RamlRoot.protocols)
    if protocols is None and base_uri is not None:
        protocols = [urllib2.splittype(base_uri)[0]]
    if protocols:
        protocols = [p.upper() for p in protocols]
    return protocols
Exemplo n.º 33
0
 def _setup_server(self, server=None):
     if server:
         host, path = urllib2.splithost(urllib2.splittype(server)[-1])
         if not path:
             path = '/'
         self.client_con = python_webdav.client.Client(host,
                                                       webdav_path=path)
         self.client_con.set_connection('wibble', 'fish')
     else:
         print "I need a server!"
         self.client_con = None
Exemplo n.º 34
0
def url_size(url):
    import httplib, urllib2
    proto, url = urllib2.splittype(url)
    assert (proto.lower() == 'http')
    host, path = urllib2.splithost(url)
    # http://stackoverflow.com/questions/107405/how-do-you-send-a-head-http-request-in-python
    conn = httplib.HTTPConnection(host)
    conn.request('HEAD', path)
    res = conn.getresponse()
    # FIXME: Follow any redirects
    return int(res.getheader('content-length'))
Exemplo n.º 35
0
def url_size(url):
	import httplib, urllib2
	proto, url = urllib2.splittype(url)
	assert(proto.lower() == 'http')
	host, path = urllib2.splithost(url)
	# http://stackoverflow.com/questions/107405/how-do-you-send-a-head-http-request-in-python
	conn = httplib.HTTPConnection(host)
	conn.request('HEAD', path)
	res = conn.getresponse()
	# FIXME: Follow any redirects
	return int(res.getheader('content-length'))
Exemplo n.º 36
0
            echo(content)
        except Exception as err:
            pass
        finally:
            return content

    def pageCsContentImage(self,cspage):
        """本地img替换为完全img路径"""
        proto, rest = urllib2.splittype(self.config.cfgUrl)  
        host, rest = urllib2.splithost(rest)
        csimgs=cspage.Find("img")        
Exemplo n.º 37
0
 def lamda(self,csblock):
     href=csblock[0].Attr("href") 
     if not href:
         href=csblock[1]
     if href:
         href=href.replace("\\","").replace("\"","")   
     if href and href[0:1]=="/":
         proto, rest = urllib2.splittype(self.config.cfgUrl)  
         host, rest = urllib2.splithost(rest)
         href=proto+"://"+host+href        
     return href
Exemplo n.º 38
0
def parse_protocols(ctx, base_uri=None):
    """ Parse ``protocols`` from a root context.

    If protocols are not provided in root, use baseUri protocol.
    """
    protocols = ctx.get_property_with_schema(
        'protocols', RamlRoot.protocols)
    if protocols is None and base_uri is not None:
        protocols = [urllib2.splittype(base_uri)[0]]
    if protocols:
        protocols = [p.upper() for p in protocols]
    return protocols
Exemplo n.º 39
0
 def url_split(url):
     protocol = None
     domain = None
     rest = None
     try:
         protocol, rest = urllib2.splittype(url)
         if not protocol:
             protocol = 'https'
             rest = '//' + rest
         domain, rest = urllib2.splithost(rest)
     except Exception, e:
         log.msg(traceback.format_exc(), level=log.ERROR)
    def _get_pingback_server(self, target):
        " Try to find the target's pingback xmlrpc server address "

        # first try to find the pingback server in the HTTP header
        try:
            host, path = urllib2.splithost(urllib2.splittype(target)[1])
            conn = httplib.HTTPConnection(host)
            conn.request('HEAD', path)
            res = conn.getresponse()
            server = dict(res.getheaders()).get('x-pingback')
        except Exception, e:
            raise PingbackClientError(e.message)
    def _get_pingback_server(self, target):
        " Try to find the target's pingback xmlrpc server address "

        # first try to find the pingback server in the HTTP header
        try:
            host, path = urllib2.splithost(urllib2.splittype(target)[1])
            conn = httplib.HTTPConnection(host)
            conn.request('HEAD', path)
            res = conn.getresponse()
            server = dict(res.getheaders()).get('x-pingback')
        except Exception, e:
            raise PingbackClientError(e.message)
Exemplo n.º 42
0
def RemoteAccess(url, *args, **kwargs):
    """Connect to a remote Subversion server

    :param url: URL to connect to
    :return: RemoteAccess object
    """
    if isinstance(url, bytes):
        url = url.decode("utf-8")
    (type, opaque) = splittype(url)
    if type not in url_handlers:
        raise SubversionException("Unknown URL type '%s'" % type, ERR_BAD_URL)
    return url_handlers[type](url, *args, **kwargs)
Exemplo n.º 43
0
    def getRSSFeed(self, url):
        handlers = []

        if sickbeard.PROXY_SETTING:
            logger.log(u"Using global proxy for url: " + url, logger.DEBUG)
            scheme, address = urllib2.splittype(sickbeard.PROXY_SETTING)
            address = sickbeard.PROXY_SETTING if scheme else "http://" + sickbeard.PROXY_SETTING
            handlers = [urllib2.ProxyHandler({"http": address, "https": address})]
            self.provider.headers.update({"Referer": address})
        elif "Referer" in self.provider.headers:
            self.provider.headers.pop("Referer")

        return getFeed(url, request_headers=self.provider.headers, handlers=handlers)
Exemplo n.º 44
0
def get_host(url):
    '''
    通过url获取域名
    :param url: 带获取的url地址
    :return: host结果
    '''
    proto, rest = urllib2.splittype(url)
    res, rest = urllib2.splithost(rest)
    if res:
        return res
    else:
        print "获取host" + url + "失败"
        sys.exit(0)
Exemplo n.º 45
0
def format_and_filter_urls(base_url, url):
    # 转换非完整的url格式
    if url.startswith('/'):  # 以根开头的绝对url地址
        base_url = "".join(base_url.split())  # 删除所有\s+
        protocol, rest = urllib2.splittype(base_url)
        host, rest = urllib2.splithost(rest)
        url = (protocol + "://" + host).rstrip('/') + "/" + url.lstrip('/')

    if url.startswith('.') or not url.startswith('http'):  # 相对url地址
        url = base_url.rstrip('/') + "/" + url.lstrip('./')

    # 过滤描点
    return url.split('#')[0]
Exemplo n.º 46
0
def go(url):
    protocol, address=urllib2.splittype(url)
#    print protocol,address
    if protocol == "http":
        global host;
        host,path=urllib2.splithost(address)
#        print host,path;
        content = getPageContent(url);
        soup = BeautifulSoup(content,'html.parser');
        getAllImage(soup);
        getAllHyperlink(soup);
    else :
        print 'URL is not http'
Exemplo n.º 47
0
def go(url):
    protocol, address = urllib2.splittype(url)
    #    print protocol,address
    if protocol == "http":
        global host
        host, path = urllib2.splithost(address)
        #        print host,path;
        content = getPageContent(url)
        soup = BeautifulSoup(content, 'html.parser')
        getAllImage(soup)
        getAllHyperlink(soup)
    else:
        print 'URL is not http'
Exemplo n.º 48
0
    def getRSSFeed(self, url):
        handlers = []

        if sickrage.PROXY_SETTING:
            sickrage.LOGGER.debug("Using global proxy for url: " + url)
            scheme, address = urllib2.splittype(sickrage.PROXY_SETTING)
            address = sickrage.PROXY_SETTING if scheme else 'http://' + sickrage.PROXY_SETTING
            handlers = [urllib2.ProxyHandler({'http': address, 'https': address})]
            self.provider.headers.update({'Referer': address})
        elif 'Referer' in self.provider.headers:
            self.provider.headers.pop('Referer')

        return getFeed(url, request_headers=self.provider.headers, handlers=handlers)
Exemplo n.º 49
0
def get_host(url):
    """
    通过url获取域名
    :param url: 带获取的url地址
    :return: host结果
    """
    proto, rest = urllib2.splittype(url)
    res, rest = urllib2.splithost(rest)
    if res:
        return res
    else:
        print "获取host" + url + "失败"
        sys.exit(0)
Exemplo n.º 50
0
 def lamda(self,csblock):
     href=csblock[0].Select("url").Text()
     type=csblock[0].Select("tag").Text()
     try:            
         name=csblock[0].Find("name").Text().encode('utf8')
         time=csblock[0].Select("time").Text()
         host, rest = urllib2.splittype(href)
         filename= rest[rest.rindex("/")+1:]
         id=filename[:filename.rindex(".")]
         url="http://3g.ali213.net/gl/m/"+id+".html?d="+time+"&t="+type+"&n="+name
         return url
     except Exception as err:    
         return href
Exemplo n.º 51
0
 def __init__(self, url):
     self.url = url
     self.schema, url = urllib2.splittype(url)
     host, path = urllib2.splithost(url)
     userpass, host = urllib2.splituser(host)
     if userpass:
         self.user, self.password = urllib2.splitpasswd(userpass)
     path, self.querystring = urllib.splitquery(path)
     self.query = self.querystring and self.querystring.split('&') or []
     #urllib.splitquery(url)
     self.host, self.port = urllib2.splitport(host)
     path, self.tag = urllib2.splittag(path)
     self.path = path.strip('/')
Exemplo n.º 52
0
def load_json_from_url(*args, **kwargs):
    "Loads and returns a JSON object obtained from a URL specified by the " \
    "positional arguments with query-string generated from keyword arguments"
    url = '/'.join(args)
    scheme, url = urllib2.splittype(url)
    url = url.strip('/') + '/'
    while '//' in url:
        url = url.replace('//', '/')
    url_req = '://'.join((scheme, url))
    if kwargs:
        url_req += '?' + urllib.urlencode(kwargs)
    req = urllib2.Request(url_req)
    opener = urllib2.build_opener()
    return json.load(opener.open(req))
Exemplo n.º 53
0
def gain_links(url='http://www.jianshu.com/p/05cfea46e4fd'):
    html_page = urllib2.urlopen(url)
    links = BeautifulSoup(html_page).findAll('a')
    links = [
        i.get('href') for i in links
        if i.get('href') and not i.get('href').startswith('javascript:')
    ]
    proto, rest = urllib2.splittype(url)  # python提取url中的域名和端口号
    domain = urllib2.splithost(rest)[0]  # 获取url的host
    links = map(lambda i: proto + '://' + domain + i
                if i[0] == '/' else url + i
                if i[0] == '#' else i, links)  # 把链接补全
    with open('links_list.txt', 'w') as f:
        f.write('\n'.join(links))
Exemplo n.º 54
0
def _path(uri):
	# gobject gives us a unicode URL, which is cool.
	# But python2 decodes this into a _unicode_ string with _utf8_
	# byte sequences, like a right loon.
	#
	# However, if we give python a str (bytes) instead, it spits out a
	# str with those same utf-8 bytes, which will do. This may still be broken
	# if your FS uses something other than UTF-8, though.
	uri = uri.encode('ascii')
	# XXX python3 note: this hack should not be necessary, and will break

	transport, path = parse.splittype(uri)
	if transport != 'file':
		raise ValueError("%r type is not 'file'" % (transport,))
	return parse.unquote(path[2:]).decode('utf-8').encode(fsenc)
Exemplo n.º 55
0
def getHtmlByUrl(url):
    global domains
    try:
           u = urllib2.urlopen(url,timeout = 10.0)
           content = u.read()
           if content !="":
               try:
                  proto, rest = urllib2.splittype(url)
                  host, rest = urllib2.splithost(rest) 
                  host, port = urllib2.splitport(host)
                  domains[host] = int(port)
               except:
                  pass
           return content
    except:
           pass