def do_open(self, http_class, request): """Return the response object for the given request. Overrides the HTTPHandler method of the same name to return a FakeResponse instead of creating any network connections. Args: http_class: The http protocol being used. request: A urllib2.Request object. Returns: A FakeResponse object. """ self.__class__.request = request # Store the most recent request object if self._response_map: key = create_response_key( request.get_method(), request.get_full_url(), request.headers) if str(key) in self._response_map: (code, body, response_headers) = self._response_map[str(key)] return FakeResponse(code, body, response_headers) else: raise Error('Unknown request %s %s' '\nrequest:%s\nresponse_map:%s' % ( request.get_method(), request.get_full_url(), str(key), pformat(iter(self._response_map.keys())))) elif isinstance(self._response, Exception): raise self else: return self._response
def glsrequest(uri, method, data=None): ''' Returns xml node tree as Element instance. 'uri' may be absolute or relative to _BASEURI. 'method' in ('GET', 'POST', 'PUT') 'data' can be a string or Element instance ''' if method not in {'GET', 'POST', 'PUT'}: raise GlslibException(MSGUNSUPPORTEDMETHOD % method) if not uri.startswith(_BASEURI): uri = _BASEURI.rstrip('/') + '/' + uri.lstrip('/') request = urllib.request.Request(uri) request.add_header("Authorization", "Basic %s" % _AUTHSTR) if etree.iselement(data): # tostring generates bytestring (as required for data) data = etree.tostring(data) request.add_header('Content-Type', 'application/xml') request.add_data(data) request.get_method = lambda: method msg = '%s %s\n%s\n%s' % (request.get_method(), request.get_full_url(), request.headers, data.decode('utf-8') if data else '') logger.debug(msg) try: r = urllib.request.urlopen(request) return etree.XML(r.read()) except urllib.error.HTTPError as httperr: logger.error(httperr.read()) raise except urllib.error.URLError as urlerr: logger.error(request.get_full_url()) raise
def default_open(self,request): if ((request.get_method() == "GET") and (CachedResponse.ExistsInCache(self.cacheLocation, request.get_full_url()))): # print "CacheHandler: Returning CACHED response for %s" % request.get_full_url() return CachedResponse(self.cacheLocation, request.get_full_url(), setCacheHeader=True) else: return urllib.request.urlopen(request.get_full_url())
def _send_request(self, url, data=None): """Make the rpc. Creates a request object from a base url contatenated with the additional url information provided by the argument. Adds headers, such as content type, length, ..., as well as the OAuth2.0 header. For PUT calls, converts the data object to JSON and encodes. Arguments: url (str): Specific url details to add to the base url for the request. data (dict): Dictionary with details required for the request. """ base_url = "https://api.tdameritrade.com/v1/" url = base_url + url request = urllib.request.Request(url) request.add_header("Authorization", "Bearer {}".format(self.oauth_hash).encode("utf-8")) if data is None: self._logger.info("URL: %s", request.get_full_url()) self._logger.debug("headers: %s", request.headers) response = urllib.request.urlopen(request) self.message = json.loads(response.read().decode("utf-8")) else: request.add_header("""Content-Type", "application/json;\ charset=utf-8""") data = json.dumps(data).encode("utf-8") request.add_header("Content-Length", len(data)) self._logger.info("URL: %s", request.get_full_url()) self._logger.debug("headers: %s", request.headers) self._logger.debug("data: %s", data) response = urllib.request.urlopen(request, data=data) status = response.getcode() if (status == 200 or status == 201): self._logger.info("response: %s", self.message) else: self._logger.error("response: %s", self.message)
def http_response(self, request, response): if request.get_method() == "GET": if 'd-cache' not in response.info(): CachedResponse.StoreInCache(self.cacheLocation, request.get_full_url(), response) return CachedResponse(self.cacheLocation, request.get_full_url(), setCacheHeader=False) else: return CachedResponse(self.cacheLocation, request.get_full_url(), setCacheHeader=True) else: return response
def default_open(self, request): '''Respond to the request by first checking if there is a cached response otherwise defer to http handler''' if ((request.get_method() == "GET") and (CachedResponse.ExistsInCache( self.cacheLocation, request.get_full_url()))): # print "CacheHandler: Returning CACHED response for %s" % request.get_full_url() return CachedResponse(self.cacheLocation, request.get_full_url(), setCacheHeader=True) else: return None # let the next handler try to handle the request
def queryApi(urlSuffix, isPretty = False): prettyPrintParam = '?pretty=true' if isPretty else '' request = urllib.request.Request(OpenShiftQuery.API_URL + urlSuffix + prettyPrintParam, headers = {'Authorization': 'Bearer ' + OpenShiftQuery.getToken(), 'Accept': 'application/json'}) logger.debug('query for: "%s"', request.get_full_url()) try: return urllib.request.urlopen(request, cafile = OpenShiftQuery.CERT_FILE_PATH).read() except: logger.critical('Cannot query OpenShift API for "%s"', request.get_full_url()) raise
def oauth(self, step): if step == 1: import webbrowser authorize_url = "https://auth.tdameritrade.com/auth?response_type=code&redirect_uri=http%3a%2f%2flocalhost%3a8080&client_id=sware%40AMER.OAUTHAP" webbrowser.open(authorize_url, new=1, autoraise=True) # copy resulting string from after code=. url decode. let that become self._code print("type what is after 'code=' in the URI:") code = urllib.parse.unquote(input()) self._code = code step = 2 if step == 2: step2url = "https://api.tdameritrade.com/v1/oauth2/token?" step2args = { 'grant_type': 'authorization_code', 'refresh_token': '', 'access_type': 'offline', 'code': self._code, 'client_id': self._config['tda_api_key'], 'redirect_uri': 'http://localhost:8080' } args = urllib.parse.urlencode(step2args).encode("utf-8") headers = {"Content-Type": 'application/x-www-form-urlencoded'} request = urllib.request.Request(step2url, data=args, headers=headers, method='POST') pprint(request.get_full_url()) pprint(request.headers) try: response = urllib.request.urlopen(request) except urllib.error.HTTPError as e: print("v" * 20) print("Error occurred fetching {0}".format( request.get_full_url())) print(e) print("^" * 20) else: html = response.read() d = json.loads(html) pprint(d) # TODO: abstract config saving out. This is duplicate code from self.refreshToken() # configTDA = configparser.ConfigParser() configTDA.read('configTDA.ini') if not 'OAUTH' in configTDA.sections(): configTDA.add_section('OAUTH') configTDA.set('OAUTH', 'refresh_token', d['refresh_token']) configTDA.set('OAUTH', 'access_token', d['access_token']) with open('configTDA.ini', 'w') as configfile: configTDA.write(configfile)
def http_request(self, request): host, full_url = request.host, request.get_full_url() url_path = full_url[full_url.find(host) + len(host):] log_url(self.log, "Requesting: ", request.get_full_url(), TRACE_LEVEL) self.log.log(self.log_level, "%s %s" % (request.get_method(), url_path)) for header in request.header_items(): self.log.log(self.log_level, " . %s: %s" % header[:]) return request
def URLOpen(request): if sys.version_info.major > 2: try: return urllib.request.urlopen(request) except: print('Could not open URL: {0}'.format(request.get_full_url())) return None else: try: return urllib2.urlopen(request) except: print('Could not open URL: {0}'.format(request.get_full_url())) return None
def http_request(self, request): if __debug__: host, full_url = request.get_host(), request.get_full_url() url_path = full_url[full_url.find(host) + len(host):] self.httpout.write("%s\n" % request.get_full_url()) self.httpout.write("\n") self.httpout.write("%s %s\n" % (request.get_method(), url_path)) for header in request.header_items(): self.httpout.write("%s: %s\n" % header[:]) self.httpout.write("\n") return request
def __getRepositoryItems__(self, item, state='all', page='0', direction='desc'): if not self.repository: raise Exception("Github", "No repository set") request = self.__getRequest__("repos/" + self.repository + "/" + item + "?" + "page=" + page + ("&state=" + state if state else "") + ("&direction=" + direction if direction else "")) key = request.get_method() + ":" + request.get_full_url() if not key in self.__data_caching__: response = urllib.request.urlopen(request) self.__data_caching__[request.get_method() + ":" + request.get_full_url()] = { "response": response, "content": json.loads(response.read().decode("utf-8")) } return self.__data_caching__[key]["response"], self.__data_caching__[key]["content"]
def default_open(self, request): """Handles GET requests, if the response is cached it returns it """ if request.get_method() is not "GET": return None # let the next handler try to handle the request if exists_in_cache( self.cache_location, request.get_full_url(), self.max_age ): return CachedResponse( self.cache_location, request.get_full_url(), set_cache_header = True ) else: return None
def load_baidu(): url = 'http://www.baidu.com' #添加请求头信息 headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36', # 'user-name':'Justin' } #创建请求对象 request = urllib.request.Request(url, headers=headers) #动态添加请求头信息 request.add_header('user-name', 'Justin') #请求网络数据 response = urllib.request.urlopen(request) print(response) data = response.read().decode("utf-8") # print(data) #响应头 # print(response.headers) #获取请求头信息 request_headers = request.headers print(request_headers) # 获取完整的url full_url = request.get_full_url() print(full_url)
def _send_data(self, request, data, filename, content_type, schema, urlencoded): self._logger.info(request.get_full_url()) f = None if content_type and urlencoded: msg = 'content_type and urlencoded are mutually exclusive' raise ValueError(msg) if content_type: request.add_header('Content-type', content_type) elif urlencoded: request.add_header('Content-type', 'application/x-www-form-urlencoded') else: request.add_header('Content-type', 'application/octet-stream') try: if filename: f = self._send_file(request, filename, urlencoded) else: if urlencoded: data = urllib.parse.quote_plus(data) f = urllib.request.urlopen(request, data) except urllib.error.HTTPError as e: raise Urllib2HTTPError(e) f = self._new_response(f) self._validate_response(f, schema) return f
def test_instagram_oembed_return_values(self, urlopen): urlopen.return_value = self.dummy_response result = InstagramOEmbedFinder( app_id='123', app_secret='abc').find_embed("https://instagr.am/p/CHeRxmnDSYe/") self.assertEqual( result, { 'type': 'something', 'title': 'test_title', 'author_name': 'test_author', 'provider_name': 'Instagram', 'thumbnail_url': 'test_thumbail_url', 'width': 'test_width', 'height': 'test_height', 'html': '<blockquote class="instagram-media">Content</blockquote>' }) # check that a request was made with the expected URL / authentication request = urlopen.call_args[0][0] # check that a request was made with the expected URL / authentication request = urlopen.call_args[0][0] self.assertEqual( request.get_full_url(), "https://graph.facebook.com/v9.0/instagram_oembed?url=https%3A%2F%2Finstagr.am%2Fp%2FCHeRxmnDSYe%2F&format=json" ) self.assertEqual(request.get_header('Authorization'), "Bearer 123|abc")
def login(): url = 'http://www.qiushibaike.com/session.js' postData = { 'login': '******', 'password': '******', 'remember_me': 'checked', 'duration': '-1' } cj = http.cookiejar.LWPCookieJar() cookie_support = urllib.request.HTTPCookieProcessor(cj) opener = urllib.request.build_opener(cookie_support, urllib.request.HTTPHandler) urllib.request.install_opener(opener) postDataStr = urllib.parse.urlencode(postData) headers = HttpHeaders.headers headers['Accept'] = 'application/json, text/javascript, */*; q=0.01' headers['Referer'] = 'http://www.qiushibaike.com/' headers['Origin'] = 'http://www.qiushibaike.com' headers['Host'] = 'www.qiushibaike.com' request = urllib.request.Request(url=url, data=postDataStr.encode("utf-8"), headers=headers, method='POST') print(request.get_full_url()) try: response = urllib.request.urlopen(request) except urllib.error.HTTPError as e: print(e.reason, e.code, e.msg) print('出现异常,%s已停止运行' % MASTER_NAME) return text = response.read() print(text) print(MASTER_NAME + "登录了糗百网页")
def fetchAndParse(request): response = urllib.request.urlopen(request) data = response.read().decode('utf-8') jsonData = json.loads(data) if jsonData['status'] == 'error': raise HTTPError(request.get_full_url(), 400, jsonData['data'], None, None) return jsonData['data']
def _get_parameters_from_request(request): if request.get_method() == 'GET': pieces_str = urlparse(request.get_full_url()).query else: pieces_str = request.data.decode('ascii') return parse_qs(pieces_str)
def load_data(): url = "https://www.baidu.com/" #有useragent时可以加s user_agent_list = [ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" ] #创建请求头信息 #request = urllib.request.Request(url,headers=header) # 动态添加headers信息 request = urllib.request.Request(url) rand_user_agent = random.choice(user_agent_list) request.add_header("User-Agent",rand_user_agent) #请求网络数据 #响应头 response = urllib.request.urlopen(request) #request包含url信息 data = response.read().decode("utf-8") #获取完整url final_url = request.get_full_url() print(2,final_url) # print(response.headers) request_headers = request.headers print(request_headers) #打印特定信息 !!!!!!首字母大写,其他字母都要小写,不然返回none request_header = request.get_header("User-agent") print(request_header) with open("02_headers.html","w",encoding="utf-8") as f: f.write(data)
def test_make_request(self): """v1 requests have correct URL and no Auth header.""" getter = GetMazaDataAPI1('foo', 'bar') request = getter.make_request() self.assertEqual('https://uccs.landscape.canonical.com/api/1/foo/bar', request.get_full_url()) self.assertIs(None, request.headers.get('Authorization'))
def test_instagram_oembed_return_values(self, urlopen): urlopen.return_value = self.dummy_response result = InstagramOEmbedFinder( app_id="123", app_secret="abc").find_embed("https://instagr.am/p/CHeRxmnDSYe/") self.assertEqual( result, { "type": "something", "title": "test_title", "author_name": "test_author", "provider_name": "Instagram", "thumbnail_url": "test_thumbail_url", "width": "test_width", "height": "test_height", "html": '<blockquote class="instagram-media">Content</blockquote>', }, ) # check that a request was made with the expected URL / authentication request = urlopen.call_args[0][0] self.assertEqual( request.get_full_url(), "https://graph.facebook.com/v11.0/instagram_oembed?url=https%3A%2F%2Finstagr.am%2Fp%2FCHeRxmnDSYe%2F&format=json", ) self.assertEqual(request.get_header("Authorization"), "Bearer 123|abc")
def get_search_result_modify_ua(): #User-Agent列表,请求时随机一个,让服务器不要认为我们是爬虫 user_agent_list = [ "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.9.168 Version/11.50", 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36' ] header = { #随机一个UA 'User-Agent': random.choice(user_agent_list), 'h_key': 'key', 'h_val': 'value' } url = "http://www.baidu.com/s?wd=美女" #UnicodeEncodeError: 'ascii' codec can't encode characters in position 10-11: ordinal not in range(128) request_url = urllib.parse.quote(url, safe=string.printable) #创建请求对象 request = urllib.request.Request(request_url, headers=header) response = urllib.request.urlopen(request) #查看响应头信息 print(response.headers) # 读取网页内容 data = response.read().decode() #print(data) #完整的url print('完整url = ' + request.get_full_url()) #打印完整的请求头信息 print(request.headers)
def test_facebook_oembed_return_values(self, urlopen): urlopen.return_value = self.dummy_response result = FacebookOEmbedFinder( app_id="123", app_secret="abc").find_embed("https://fb.watch/ABC123eew/") self.assertEqual( result, { "type": "something", "title": "test_title", "author_name": "test_author", "provider_name": "Facebook", "thumbnail_url": None, "width": "test_width", "height": "test_height", "html": '<blockquote class="facebook-media">Content</blockquote>', }, ) # check that a request was made with the expected URL / authentication request = urlopen.call_args[0][0] self.assertEqual( request.get_full_url(), "https://graph.facebook.com/v11.0/oembed_video?url=https%3A%2F%2Ffb.watch%2FABC123eew%2F&format=json", ) self.assertEqual(request.get_header("Authorization"), "Bearer 123|abc")
def load_baidu(): url = "http://www.baidu.com" #添加请求头信息 header = { "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36" } #创建 指定url的请求对象,添加ua request = urllib.request.Request(url, headers=header) request = urllib.request.Request(url) #动态添加UA #request.add_header("User-agent","Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36") #请求网络数据 respons = urllib.request.urlopen(request) data = respons.read().decode("utf-8") with open("hesders.html", "w", encoding="utf-8") as f: f.write(data) #响应头 #print(respons.headers) #注意,要求首字母大写,其它字母小写 request_header = request.get_header("User-agent") print(request_header) #获取完整的url final_url = request.get_full_url() print(final_url)
def https_open(self, request): """ Send an HTTP request, which can be either GET or POST, depending on req.has_data() Args: request - instance of urllib2.Request """ full_url = request.get_full_url() url_parts = parse.urlsplit(full_url) robo = None if url_parts.netloc in self.robots: robo = self.robots[url_parts.netloc] else: # Getting request url, for checking robots.txt host = parse.urlsplit(full_url)[1] rurl = parse.urlunparse(("http", host, "/robots.txt", "", "")) robo = reppy.cache.RobotsCache() robo.fetch(rurl, self.agent_name) self.robots[url_parts.netloc] = robo # Is url allow for crawler in robots.txt if robo.allowed(full_url, self.agent_name): # Return result of request return request.HTTPHandler.https_open(self, request) else: raise RuntimeError('Forbidden by robots.txt')
def load_baidu(): url = "https://www.baidu.com" # 添加请求头的信息 header = { # 浏览器的版本 "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36", "ha": "he" } # 创建请求对象 request = urllib.request.Request(url) request.add_header("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36") # 请求网络数据 response = urllib.request.urlopen(request) print(response) data = response.read().decode("utf-8") # 获取到完整的url final_url = request.get_full_url() print(final_url) # 获取响应头 # print(response.headers) # request_headers = request.headers # print(request_headers) # 第二种方式打印headers的信息 request_headers = request.get_header("User-agent") print(request_headers) with open("02header.html", "w") as f: f.write(data)
def load_baidu(): url = "http://www.baidu.com" header = { # 浏览器基本信息 "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' } # request.get_header('User-Agent', "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36") request = urllib.request.Request(url, headers=header) response = urllib.request.urlopen(request) print(response) data = response.read().decode('utf-8') # 获取完整的url final_url = request.get_full_url() #响应头 #print(response.headers) #获取响应头的信息 # request_header = request.headers request_header = request.get_header('User-agent') print(request_header) with open("02header.html", "w", encoding='utf-8') as f: f.write(data)
def load_baidu(): url = "http://www.baidu.com" response = urllib.request.urlopen(url) # 创建请求对象 # headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"} # 动态添加headers信息 request = urllib.request.Request(url) # 动态添加请求头 request.add_header( "User_Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36" ) data = response.read().decode("utf-8") with open("data.html", "w", encoding="utf-8") as f: f.write(data) # 查看响应头信息 # print(response.headers) #第二种打印headers的方法 ret = request.get_header("User-Agent") # 获取完整的url final_url = request.get_full_url() print(final_url) #获取请求头信息 request_headers = request.headers
def load_baidu(): url = "https://www.baidu.com" # 添加请求头信息 header = { # 浏览器的版本 "User-Agent": "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36" } # 创建请求对象(添加headers方式一) #request = urllib.request.Request(url, headers=header) # 创建请求对象(添加headers方式二) request = urllib.request.Request(url) request.add_header( "User-Agent", "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36" ) final_url = request.get_full_url() print(final_url) # 请求网络数据 response = urllib.request.urlopen(request) data = response.read().decode("utf-8") # 第一种获取请求头的信息 request_headers1 = request.headers # print(request_headers1) # 第二种获取请求头的信息(首字母大写,其他字母小写) request_headers2 = request.get_header("User-agent") # print(request_headers2) # 将数据写入文件 超文本写入用字符串 视频音频用wb with open("load_baidu.html", "w", encoding="utf-8") as f: f.write(data)
def load_baidu(): url = "https://www.baidu.com" header = { # 浏览器版本,告诉浏览器我是真实的用户 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" } # 创建请求对象 request = urllib.request.Request(url, headers=header) # print(request) # 动态的添加请求头信息 request.add_header( "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" ) # 获取请求头信息(打印所有头的信息) request_header = request.headers print(request_header) # 第二种方式打印headers信息 print(request.get_header("User-agent")) # 获取完整的url print(request.get_full_url()) # 请求网络数据(不在此处增加请求头,系统没有此参数) response = urllib.request.urlopen(request) print(response) data = response.read().decode("utf-8") with open("02header.html", "w", encoding="utf-8") as f: f.write(data)
def test_endpoint_with_format_param(self, loads, urlopen): urlopen.return_value = self.dummy_response loads.return_value = {'type': 'video', 'url': 'http://www.example.com'} result = OEmbedFinder().find_embed("https://vimeo.com/217403396") self.assertEqual(result['type'], 'video') request = urlopen.call_args[0][0] self.assertEqual(request.get_full_url().split('?')[0], "http://www.vimeo.com/api/oembed.json")
def test_endpoint_with_format_param(self, loads, urlopen): urlopen.return_value = self.dummy_response loads.return_value = {'type': 'video', 'url': 'http://www.example.com'} result = OEmbedFinder().find_embed("https://vimeo.com/217403396") self.assertEqual(result['type'], 'video') request = urlopen.call_args[0][0] self.assertEqual(request.get_full_url().split('?')[0], "https://www.vimeo.com/api/oembed.json")
def get_cached_response(self, request: urllib.request.Request) -> dict: """ Serialize request URL to a subpath relative to self.cache_path """ url = request.get_full_url() cur = self._cx.cursor() cur.execute("SELECT * FROM response WHERE url = ?", (url, )) return cur.fetchone()
def testOnlyConneg(self): # see issue #82 query = "prefix whatever: <http://example.org/blah#> ASK { ?s ?p ?o }" self.wrapper.setOnlyConneg(False) self.wrapper.setQuery(query) request = self._get_request(self.wrapper) request_params = dict(parse_qsl(urlparse(request.get_full_url()).query)) for returnFormatSetting in ["format", "output", "results"]: # Obviously _returnFormatSetting is not accessible from SPARQLWrapper, so we copy&paste the possible values self.assertTrue(returnFormatSetting in request_params, "URL parameter '%s' was not sent, and it was expected" %returnFormatSetting) #ONLY Content Negotiation self.wrapper.resetQuery() self.wrapper.setOnlyConneg(True) self.wrapper.setQuery(query) request = self._get_request(self.wrapper) request_params = dict(parse_qsl(urlparse(request.get_full_url()).query)) for returnFormatSetting in ["format", "output", "results"]: # Obviously _returnFormatSetting is not accessible from SPARQLWrapper, so we copy&paste the possible values self.assertFalse(returnFormatSetting in request_params, "URL parameter '%s' was sent, and it was not expected (only Content Negotiation)" %returnFormatSetting)
def __init__(self, request): url = request.get_full_url() if not url in canned_response: raise Exception("%s not in canned response!" % url) self.request = request self.code = 200 self.msg = "OK" self.content = io.StringIO(canned_response[url]) self.read = self.content.read
def test_prepare_curl(self): """prepare_curl sets URL and auth header.""" curl = FakeCurl() getter = PycURLGetter(curl) request = self.make_request() getter.prepare_curl(request) self.assertEqual(request.get_full_url(), curl.options[pycurl.URL]) self.assertEqual(['Authorization: Basic cGV0ZTpwYXNz\n'], curl.options[pycurl.HTTPHEADER])
def http_request(self, request): if not request.has_header(self.AUTH_HEADER): url = request.get_full_url() user, password = self._creds_mgr.get_credentials(url) if user is not None and password is not None: creds = base64.b64encode("%s:%s" % (user, password)) auth = "Basic %s" % creds request.add_unredirected_header(self.AUTH_HEADER, auth) return request
def _send_request(self, method, path, apiurl, schema, **query): request = self._build_request(method, path, apiurl, **query) self._logger.info(request.get_full_url()) try: f = urllib.request.urlopen(request) except urllib.error.HTTPError as e: raise Urllib2HTTPError(e) f = self._new_response(f) self._validate_response(f, schema) return f
def Get(self, request, timeout=None): """Accepts an Http request and returns a precanned response.""" url = request.get_full_url() if url == utils.METADATA_URL_PREFIX: return 'v1/' elif url.startswith(utils.METADATA_V1_URL_PREFIX): url = url.replace(utils.METADATA_V1_URL_PREFIX, '') if url == 'instance/?recursive=true': return self._instance_response raise urllib.error.HTTPError
def http_request(self, request): scheme = request.get_type() if scheme not in ["http", "https"]: # robots exclusion only applies to HTTP return request if request.get_selector() == "/robots.txt": # /robots.txt is always OK to fetch return request host = request.get_host() # robots.txt requests don't need to be allowed by robots.txt :-) origin_req = getattr(request, "_origin_req", None) if (origin_req is not None and origin_req.get_selector() == "/robots.txt" and origin_req.get_host() == host ): return request if host != self._host: self.rfp = self.rfp_class() try: self.rfp.set_opener(self.parent) except AttributeError: debug("%r instance does not support set_opener" % self.rfp.__class__) self.rfp.set_url(scheme+"://"+host+"/robots.txt") self.rfp.read() self._host = host ua = request.get_header("User-agent", "") if self.rfp.can_fetch(ua, request.get_full_url()): return request else: # XXX This should really have raised URLError. Too late now... msg = "request disallowed by robots.txt" raise RobotExclusionError( request, request.get_full_url(), 403, msg, self.http_response_class(StringIO()), StringIO(msg))
def fetch(opener, request): resp = opener.open( request, timeout=FETCH_TIMEOUT, ) if resp.geturl() != request.get_full_url() or resp.getcode() != 200: raise FetchError('invalid url or invalid code') text = resp.read(FETCH_MAX_LENGTH).decode('utf-8', 'replace') return text
def requestB(opener,url, headers, data, method = 'POST'): answer = '' retcode = None additionalInfo = '[None]' contentLenght = None if ImportCookie: headers = processingCookies(headers) data = urllib.parse.urlencode(data) if method == 'GET': if data: url = url + '?' + data data = None elif method == 'POST': headers['Content-Length'] = len(data) data = data.encode() request = urllib.request.Request(url,data,headers) try: printRequest(request.get_method(), request.get_full_url()) if data: printData(data) f = opener.open(request) headers = f.getheaders() code = f.code retcode = code answer = f.read() m = hashlib.md5() m.update(answer) for h in headers: if h[0].lower() == 'content-length': contentLenght = h[1] additionalInfo = '[' + str(h[1]) + ']' printAnswer(code,additionalInfo) printHeaders(headers,'Set-Cookie') except urllib.error.HTTPError as error: for h in error.headers: if h.lower() == 'content-length': printAnswer(str(error.code) , ' [' + str(error.headers[h]) + ']') else: printAnswer(str(error.code) , ' [-1]') retcode = error.code answer = error.read() except urllib.error.URLError as error: printAnswer(str(error)) return answer, retcode
def http_response(self, request, response): """Gets a HTTP response, if it was a GET request and the status code starts with 2 (200 OK etc) it caches it and returns a CachedResponse """ if (request.get_method() == "GET" and str(response.code).startswith("2") ): if 'x-local-cache' not in response.info(): # Response is not cached set_cache_header = store_in_cache( self.cache_location, request.get_full_url(), response ) else: set_cache_header = True return CachedResponse( self.cache_location, request.get_full_url(), set_cache_header = set_cache_header ) else: return response
def _SendRawRequest(request, dump_path=None): logging.debug('Sending request to %s with data %s', request.get_full_url(), request.data) resp = urllib.request.urlopen(request) url_info = resp.info() encoding = url_info.get('Content-Encoding', 'utf-8') decoded_raw = resp.read().decode(encoding) logging.debug('urlopen returned \n%s\n', decoded_raw) if dump_path: with open(os.path.join( dump_path, request.selector[1:request.selector.find('?')].replace('/', '_') + '.raw'), 'w') as dump: dump.write(decoded_raw) return decoded_raw
def urlopen(request, data=None): if isinstance(request, str): request = urllib.request.Request(request) if data is not None and type(data) != bytes: raise TypeError('POST data must be bytes') # track the last call arguments urlopen.request = request urlopen.data = data url = request.get_full_url() parts = urllib.parse.urlparse(url) if parts.netloc.split(':')[0] not in ['unittest', 'proxy.xri.net']: raise urllib.error.URLError('Wrong host: %s' % parts.netloc) query = urllib.parse.parse_qs(parts.query) if 'redirect' in query: return urlopen(query['redirect'][0]) try: path = parts.path.lstrip('/') or '200.txt' if parts.netloc == 'proxy.xri.net': path = path.replace('=', '_').replace('*', '_') + '.xri' with open(os.path.join(DATAPATH, path), 'rb') as f: body = f.read() except FileNotFoundError: raise urllib.error.HTTPError(url, 404, '%s not found' % path, {}, io.BytesIO()) status = int(query.get('status', ['200'])[0]) if 300 <= status < 400: raise ValueError('Can\'t return 3xx status', url) if 400 <= status: raise urllib.error.HTTPError(url, status, 'Requested status: %s' % status, {}, io.BytesIO(body)) headers = { 'Server': 'Urlopen-Mock', 'Date': 'Mon, 21 Jul 2014 19:52:42 GMT', 'Content-type': TYPES.get(os.path.splitext(path)[1], 'text/plain'), 'Content-length': len(body), } extra_headers = query.get('header', []) headers.update(h.split(': ', 1) for h in extra_headers) return HTTPResponse(url, status, headers, body)
def statuses_mentions(query=None): base_url = "http://api.twitter.com/1/statuses/mentions.json" parameters = generate_base_data() query_string = "" if not query == None: query_string = add_params(query) parameters.extend(query) request = urllib.request.Request(base_url + query_string) signature = myoauth.oauth_sign( request.get_method(), base_url, parameters, OAUTH_CONSUMER_SECRET, OAUTH_TOKEN_SECRET ) header_string = generate_header_string(parameters, [["oauth_signature", signature]]) request.add_header("Authorization", header_string) print(request.get_header("Authorization")) print(request.get_method()) print(request.get_full_url()) return urllib.request.urlopen(request)
def _request(self, path, parameters): # Throw out parameters where the value is not None parameters = dict([(k,v) for k,v in parameters.items() if v]) query_str = urlencode(parameters) request = Request("%s%s?%s" % (self.url_base, path, query_str)) _log.debug("requesting: %s", request.get_full_url()) data = None try: response = self.opener.open(request) data = response.read() response.close() except HTTPError as httperror: data = httperror.read() httperror.close() #_log.debug("response: %s", data) return ET.fromstring(data.strip().replace("…", "..."))
def _request_raw(self, path, parameters,method="GET", request_body=""): if method == "POST": request = Request("%s%s" % (self.url_base, path),parameters) else: # Throw out parameters where the value is not None parameters = dict([(k,v) for k,v in parameters.items() if v]) query_str = urlencode(parameters) request = Request("%s%s?%s" % (self.url_base, path, query_str), request_body) _log.debug("requesting (%s): %s", method,request.get_full_url()) data = None try: response = self.opener.open(request) data = response.read().replace("…", "...").replace("d'éveil", "d éveil") response.close() except HTTPError as httperror: data = httperror.read() httperror.close() _log.debug("response: %s", data.strip()) return data.strip()
def _SendRequest(request, dump_path=None): logging.debug('Sending request to %s with data %s', request.get_full_url(), request.data) resp = urllib.request.urlopen(request) url_info = resp.info() encoding = url_info.get('Content-Encoding', 'utf-8') raw_json = resp.read().decode(encoding) logging.debug('urlopen returned \n%s\n', raw_json) if dump_path: with open(os.path.join( dump_path, request.selector[1:request.selector.find('?')].replace('/', '_') + '.json'), 'w') as dump: dump.write(raw_json) json_resp = json.loads(raw_json)['reply'] status = json_resp.get('status', '') if status != 'OK': raise NessusError('Status was not OK: %s: %s' % (status, json_resp['contents'])) if 'contents' not in json_resp: return '' return json_resp['contents']
def main(argv=None): ''' Process the command line arguments and create the JSON dump. :param argv: List of arguments, as if specified on the command-line. If None, ``sys.argv[1:]`` is used instead. :type argv: list of str ''' # Get command line arguments parser = argparse.ArgumentParser( description="Transfer all projects/repositories from GitLab to Stash. \ Note: This script assumes you have your SSH key \ registered with both GitLab and Stash.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, conflict_handler='resolve') parser.add_argument('gitlab_url', help='The full URL to your GitLab instance.') parser.add_argument('stash_url', help='The full URL to your Stash instance.') parser.add_argument('-p', '--password', help='The password to use to authenticate if token is \ not specified. If password and token are both \ unspecified, you will be prompted to enter a \ password.') parser.add_argument('-P', '--page_size', help='When retrieving result from GitLab, how many \ results should be included in a given page?.', type=int, default=20) parser.add_argument('-s', '--verify_ssl', help='Enable SSL certificate verification', action='store_true') parser.add_argument('-S', '--skip_existing', help='Do not update existing repositories and just \ skip them.', action='store_true') parser.add_argument('-t', '--token', help='The private GitLab API token to use for \ authentication. Either this or username and \ password must be set.') parser.add_argument('-u', '--username', help='The username to use for authentication, if token\ is unspecified.') parser.add_argument('-v', '--verbose', help='Print more status information. For every ' + 'additional time this flag is specified, ' + 'output gets more verbose.', default=0, action='count') parser.add_argument('--version', action='version', version='%(prog)s {0}'.format(__version__)) args = parser.parse_args(argv) args.page_size = max(100, args.page_size) # Convert verbose flag to actually logging level log_levels = [logging.WARNING, logging.INFO, logging.DEBUG] log_level = log_levels[min(args.verbose, 2)] # Make warnings from built-in warnings module get formatted more nicely logging.captureWarnings(True) logging.basicConfig(format=('%(asctime)s - %(name)s - %(levelname)s - ' + '%(message)s'), level=log_level) # Setup authenticated GitLab and Stash instances if args.token: git = GitLab(args.gitlab_url, token=args.token, verify_ssl=args.verify_ssl) else: git = None if not args.username: print('Username: '******'').strip() if not args.password: args.password = getpass.getpass('Password: '******'Retrieving existing Stash projects...', end="", file=sys.stderr) sys.stderr.flush() key_set = {proj['key'] for proj in stash.projects} stash_project_names = {proj['name'] for proj in stash.projects} names_to_keys = {proj['name']: proj['key'] for proj in stash.projects} stash_project_names = stash_project_names.union({'~' + user['slug'] for user in stash.admin.users.list()}) # Initialize users private repositories for user in stash.admin.users.list(): request = urllib.request.Request(args.stash_url+ "users/" + user['slug'] + "/repos?start=0&limit=50") print(request.get_full_url()) base64string = base64.b64encode((args.username + ":" + args.password).encode('ascii')) request.add_header("Authorization", "Basic %s" % base64string) request.add_header("Referer", args.stash_url + "users/" + user['slug']) request.add_header("Accept", "application/json, text/javascript, */*; q=0.01") try: # result = urllib.request.urlopen(request) print("Personnal repository for : " + user['slug'] + " may be not initialized \n") except urllib.error.HTTPError: print("repository initialized") stash_users = {user['slug'] for user in stash.admin.users.list()} print('done', file=sys.stderr) sys.stderr.flush() updated_projects = set() repo_to_slugs = {} failed_to_clone = set() cwd = os.getcwd() transfer_count = 0 skipped_count = 0 for project in stash_project_names: print ('detected projetct : %s' % project, file=sys.stderr) print('Processing GitLab projects...', file=sys.stderr) sys.stderr.flush() for project in gen_all_results(git.getprojectsall, per_page=args.page_size): print('\n' + ('=' * 80) + '\n', file=sys.stderr) sys.stderr.flush() proj_name = project['namespace']['name'] if proj_name in stash_users: proj_name = "~" + proj_name # Create Stash project if it doesn't already exist if proj_name not in stash_project_names: # Create Stash project key key = proj_name if key.islower(): key = key.title() key = re.sub(r'[^A-Z]', '', key) if len(key) < 2: key = re.sub(r'[^A-Za-z]', '', proj_name)[0:2].upper() added = False suffix = 65 while key in key_set: if not added: key += 'A' else: suffix += 1 key = key[:-1] + chr(suffix) key_set.add(key) # Actually add the project to Stash print('Creating Stash project "%s" with key %s...' % (proj_name, key), end="", file=sys.stderr) sys.stderr.flush() stash.projects.create(key, proj_name) names_to_keys[proj_name] = key stash_project_names.add(proj_name) print('done', file=sys.stderr) sys.stderr.flush() else: if proj_name[0:1] != "~": key = names_to_keys[proj_name] else: key = proj_name stash_project = stash.projects[key] # Initialize maping from repository names to slugs for later if key not in repo_to_slugs: repo_to_slugs[key] = {repo['name']: repo['slug'] for repo in stash_project.repos} # Create Stash-compatible name for repository # Repository names are limited to 128 characters. # They must start with a letter or number and may contain spaces, # hyphens, underscores and periods repo_name = project['name'] if not repo_name[0].isalnum(): repo_name = 'A ' + repo_name repo_name = re.sub(r'[^A-Za-z0-9 _.-]', ' ', repo_name) if len(repo_name) > 128: repo_name = repo_name[0:128] # Add repository to Stash project if it's not already there if repo_name not in repo_to_slugs[key]: print('Creating Stash repository "%s" in project "%s"...' % (repo_name, proj_name), end="", file=sys.stderr) sys.stderr.flush() stash_repo = stash_project.repos.create(repo_name) repo_to_slugs[key][repo_name] = stash_repo['slug'] print('done', file=sys.stderr) sys.stderr.flush() elif args.skip_existing: print('Skipping existing Stash repository "%s" in project "%s"' % (repo_name, proj_name), file=sys.stderr) sys.stderr.flush() skipped_count += 1 continue else: print('Updating existing Stash repository "%s" in project "%s"' % (repo_name, proj_name), file=sys.stderr) sys.stderr.flush() repo_slug = repo_to_slugs[key][repo_name] stash_repo = stash_project.repos[repo_slug].get() for clone_link in stash_repo['links']['clone']: if clone_link['name'] == 'ssh': stash_repo_url = clone_link['href'] break with tempfile.TemporaryDirectory() as temp_dir: # Clone repository to temporary directory print('\nCloning GitLab repository...', file=sys.stderr) sys.stderr.flush() try: subprocess.check_call(['git', 'clone', '--mirror', project['ssh_url_to_repo'], temp_dir]) except subprocess.CalledProcessError: print('Failed to clone GitLab repository. This usually when ' + 'it does not exist.', file=sys.stderr) failed_to_clone.add(project['name_with_namespace']) skipped_count += 1 continue os.chdir(temp_dir) # Check that repository is not empty try: subprocess.check_call(['git', 'log', '--format=oneline', '-1'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except subprocess.CalledProcessError: print('Repository is empty, so skipping push to Stash.', file=sys.stderr) skipped_count += 1 else: # Change remote to Stash and push print('\nPushing repository to Stash...', file=sys.stderr) sys.stderr.flush() subprocess.check_call(['git', 'remote', 'set-url', 'origin', stash_repo_url]) subprocess.check_call(['git', 'push', '--mirror']) transfer_count += 1 os.chdir(cwd) updated_projects.add(proj_name) print('\n' + ('=' * 35) + 'SUMMARY' + ('=' * 35), file=sys.stderr) print('{} repositories transferred.\n'.format(transfer_count), file=sys.stderr) print('{} repositories skipped.\n'.format(skipped_count), file=sys.stderr) print('Projects created/updated:', file=sys.stderr) for proj in sorted(updated_projects): print('\t' + proj, file=sys.stderr) print('Repositories that we could not clone:', file=sys.stderr) for repo_name in sorted(failed_to_clone): print('\t' + repo_name, file=sys.stderr)
def open(self, request, timeout=None): assert request.get_full_url() == url raise MyException
def request(self, path, parameters=None, data=None, method=None, auto_login=True, json_answer=True, filename=None): """ Send a request to the Nuclos server. :param path: The path to open. :param parameters: A dictionary of parameters to add to the request URL. :param data: The data to add. If this is given the request will automatically be a POST request. :param method: The HTTP method to use. If not set this will be GET or POST, depending on the data. :param auto_login: Try to log in automatically in case of a 401 error. :param json_answer: Parse the servers answer as JSON. :param filename: A file to save the downloaded data in. :return: The answer of the server. None in case of an error. :raise: URLError in case of an HTTP error. Returns None instead if the 'handle_http_errors' option is set. """ if not self.session_id and auto_login: self.login() url = path if not url.startswith("http"): url = self._build_url(path, parameters) request = urllib.request.Request(url) if json_answer: request.add_header("Accept", "application/json") if data: request.data = json.dumps(data).encode("utf-8") request.add_header("Content-Type", "application/json") if method: request.method = method if method and request.data and method not in ["POST", "PUT"]: logging.warning("Overriding the POST method while sending data!") if self.session_id: request.add_header("Cookie", "JSESSIONID=" + str(self.session_id)) logging.debug("Sending {} request to {}.".format(request.get_method(), request.get_full_url())) if request.data: logging.debug("Sending data {}.".format(request.data)) try: result = urllib.request.urlopen(request) if filename is not None: with open(filename, "wb") as f: shutil.copyfileobj(result, f) return None answer = result.read().decode() if answer: logging.debug("Received answer {}".format(answer)) if not json_answer: return answer try: return json.loads(answer) except ValueError: logging.error("Invalid JSON in '{}'.".format(answer)) return None except urllib.request.HTTPError as e: if e.code == 401 and auto_login: logging.info("Unauthorized. Trying to log in again.") self.session_id = None self.login() return self.request(path, data=data, method=method, auto_login=False, json_answer=json_answer) elif e.code == 403: raise NuclosAuthenticationException() else: logging.error("HTTP Error {}: {}".format(e.code, e.reason)) raise NuclosHTTPException(e)
def default_open(self, request): if config.TRACE_API_CALLS: logger.info("%s" % (request.get_full_url(),)) request.start_time = time.time() return None
def encodeASP(text): """ base64 encode function for (ASP).NET """ isbytes = True if not isinstance(text, bytes): text = base64.urlsafe_b64encode(text.encode()) isbytes = False else: text = base64.urlsafe_b64encode(text) count = len(re.findall(b'=',text)) for i in range(count): text = text[:-1] text = text + str(count).encode() if isbytes: return text else: return text.decode() class SmartRedirectHandler(urllib.request.HTTPRedirectHandler): def http_error_301(self, req, fp, code, msg, headers): self.preProcessingRedirection(req, fp, code, msg, headers) result = super(SmartRedirectHandler, self).http_error_301(req, fp, code, msg, headers) self.postProcessingRedirection(result) return result def http_error_302(self, req, fp, code, msg, headers): self.preProcessingRedirection(req, fp, code, msg, headers) result = super(SmartRedirectHandler, self).http_error_302(req, fp, code, msg, headers) self.postProcessingRedirection(result) return result def preProcessingRedirection(self, req, fp, code, msg, headers): location = '' for i in headers._headers: if i[0] == 'Location': location = i[1].strip() req.add_header('Host',urlparse(location).netloc) printAnswer(code, str(msg) + " " + location) printHeaders(headers._headers,'Set-Cookie') def postProcessingRedirection(self, result): printRequest("GET", result.geturl()) def stringToHexCSV(s): hexs = s.encode('hex') ret = ' '.join(hexs[i:i+2] for i in range(0, len(hexs), 2)) return ret def defaultCreateOpener(withCookieJar = True, withBurpProxy = True): global cookieJar if withCookieJar: cookieJar = urllib.request.HTTPCookieProcessor(http.cookiejar.CookieJar()) proxy_handler = None if withBurpProxy: proxy_handler = urllib.request.ProxyHandler({'https': 'https://127.0.0.1:8080/', 'http': 'http://127.0.0.1:8080/'}) ret = None if withCookieJar and withBurpProxy: ret = urllib.request.build_opener(proxy_handler, SmartRedirectHandler(), cookieJar) elif withCookieJar: ret = urllib.request.build_opener(SmartRedirectHandler(), cookieJar) elif withBurpProxy: ret = urllib.request.build_opener(proxy_handler, SmartRedirectHandler()) return ret def processingCookies(headers): cookies = headers['Cookie'] final = '' if type(cookies) == type(""): return for c in cookies.keys(): final += " " + c + "=" + cookies[c] + ";" headers['Cookie'] = final return headers createOpener = defaultCreateOpener def requestC(opener,url, headers, data, method = 'POST'): [answer, code] = requestB(opener,url, headers, data, method) return answer def requestB(opener,url, headers, data, method = 'POST'): answer = '' retcode = None additionalInfo = '[None]' contentLenght = None if ImportCookie: headers = processingCookies(headers) data = urllib.parse.urlencode(data) if method == 'GET': if data: url = url + '?' + data data = None elif method == 'POST': headers['Content-Length'] = len(data) data = data.encode() request = urllib.request.Request(url,data,headers) try: printRequest(request.get_method(), request.get_full_url()) if data: printData(data) f = opener.open(request) headers = f.getheaders() code = f.code retcode = code answer = f.read() m = hashlib.md5() m.update(answer) for h in headers: if h[0].lower() == 'content-length': contentLenght = h[1] additionalInfo = '[' + str(h[1]) + ']' printAnswer(code,additionalInfo) printHeaders(headers,'Set-Cookie') except urllib.error.HTTPError as error: for h in error.headers: if h.lower() == 'content-length': printAnswer(str(error.code) , ' [' + str(error.headers[h]) + ']') else: printAnswer(str(error.code) , ' [-1]') retcode = error.code answer = error.read() except urllib.error.URLError as error: printAnswer(str(error)) return answer, retcode def parseBurpData(fileName): global Protocol url = '' host = '' data = None contentType = 'None' headers = {} indata = None try: indata = open(fileName,"r") except IOError as error: print(str(error)) sys.exit(1) line = indata.readline() res = line.partition(' ') method = res[0] printDebug('method ' + method) res = res[2].rpartition(' ') uri = res[0] printDebug('URI: ' + str(uri)) if Protocol == None: rulo = urlparse(uri) printOut('Scheme not given, trying to guess it from burp request.') if rulo.scheme != 'http' or rulo.scheme != 'https': printOut('** Could not determine the scheme from the HTTP request, please configure one **') sys.exit(1) else: Protocol = rulo.scheme line = indata.readline() while line.strip(): res = line.partition(':') if res[0] == 'Host': host = res[2].strip() if res[0] == 'Content-Type': contentType = res[2].strip() if res[0] == 'Cookie': if ImportCookie: cookies = res[2].split(';') for c in cookies: tm = c.strip().partition('=') if res[0] not in headers: headers[res[0]] = {tm[0]:tm[2]} else: headers[res[0]][tm[0]] = tm[2] line = indata.readline() continue headers[res[0]] = res[2].strip() line = indata.readline() if method == 'POST': url = Protocol + '://' + host + uri data = indata.read().strip() if len(data) == 0: data = None else: urlencodedcontenttype = re.compile('application\/x-www-form-urlencoded') if urlencodedcontenttype.match(contentType): data = urllib.parse.parse_qs(data) for d in list(data.keys()): if len(data[d]) > 1: printOut("Multiple value for the same field. Odd... taking the first one") data[d] = data[d][0] elif contentType == 'text/xml; charset=UTF-8': data = parseString(data) else: printOut('Unknown Content type: ' + str(contentType)) elif method == 'GET': res = uri.rpartition('?') uri = res[0] if len(res) == 3: if uri == '': uri = res[2] data = urllib.parse.parse_qs(res[2]) for d in list(data.keys()): if len(data[d]) > 1: printOut("Multiple value for the same field. Odd... taking the first one") data[d] = data[d][0] url = Protocol + '://' + host + uri indata.close() return url, headers,data, method
def __write_capture(self, request, response): ohandle = io.StringIO() response_body = b'' saved_exception = None try: ohandle.write('<capture>\n') ohandle.write('<request>\n') method = request.get_method() url = request.get_full_url() parsed = urlparse.urlsplit(url) relative_url = parsed.path if parsed.query: relative_url += '?' + parsed.query if parsed.fragment: # TODO: will this ever happen? relative_url += '#' + parsed.fragment host = None request_body = None if hasattr(request, 'get_host'): host = request.get_host() # support 3.3 if request.has_data(): request_body = request.get_data() else: host = request.host request_body = request.data ohandle.write('<method>%s</method>\n' % escape(method)) ohandle.write('<url>%s</url>\n' % escape(url)) ohandle.write('<host>%s</host>\n' % escape(host)) try: # ghetto addr = response.fp.raw._sock.getpeername() if addr: ohandle.write('<hostip>%s</hostip>\n' % escape(addr[0])) except Exception as error: pass ohandle.write('<datetime>%s</datetime>\n' % escape(time.asctime(time.gmtime())+' GMT')) # TODO: can we calculate request time and elapsed? request_headers = '%s %s HTTP/1.1\r\n' % (method, relative_url) # TODO: is there access to the HTTP version? for item in request.header_items(): request_headers += item[0] + ': ' + '\r\n\t'.join(item[1:]) + '\r\n' if self.re_nonprintable_str.search(request_headers): ohandle.write('<headers encoding="base64">%s</headers>\n' % base64.b64encode(request_headers.encode('utf-8')).decode('ascii')) else: ohandle.write('<headers>%s</headers>\n' % escape(request_headers)) if request_body is not None: if self.re_nonprintable.search(request_body): ohandle.write('<body encoding="base64">%s</body>\n' % base64.b64encode(request_body).decode('ascii')) else: ohandle.write('<body>%s</body>\n' % escape(request_body.decode('ascii'))) ohandle.write('</request>\n') ohandle.write('<response>\n') status = int(response.getcode()) ohandle.write('<status>%d</status>\n' % status) headers = response.info() if 'HEAD' == method or status < 200 or status in (204, 304,): response_body = b'' else: try: response_body = response.read() except urllib2.IncompleteRead as e: saved_exception = e response_headers = 'HTTP/1.1 %d %s\r\n' % (status, response.msg) # TODO: is there access to the HTTP version? response_headers += headers.as_string() content_type = headers.get('Content-Type') content_length = headers.get('Content-Length') if content_type: ohandle.write('<content_type>%s</content_type>\n' % escape(content_type)) if content_length: ohandle.write('<content_length>%d</content_length>\n' % int(content_length)) if self.re_nonprintable_str.search(response_headers): ohandle.write('<headers encoding="base64">%s</headers>\n' % base64.b64encode(response_headers.encode('utf-8')).decode('ascii')) else: ohandle.write('<headers>%s</headers>\n' % escape(response_headers)) if response_body: if self.re_nonprintable.search(response_body): ohandle.write('<body encoding="base64">%s</body>\n' % base64.b64encode(response_body).decode('ascii')) else: ohandle.write('<body>%s</body>\n' % escape(response_body.decode('ascii'))) ohandle.write('</response>\n') ohandle.write('</capture>\n') self.ofhandle.write(ohandle.getvalue().encode('utf-8')) ohandle.close() self.write_count += 1 if 0 == (self.write_count % self.cut_count): self.close() self.open_file() except Exception as e: sys.stderr.write('*** unhandled error in RaftCaptureProcessor: %s\n' % (e)) if saved_exception: raise(saved_exception) return response_body