예제 #1
0
	def __init__(self, host, timeout = CONFIG.TIME_OUT, proxy = CONFIG.PROXY, user_agents_type = ''):
		self.timeout 		= timeout
		self.proxy			= proxy
		
		self.store_cookie	= False
		self.redirect		= True				#flag redirect when check found Location in header
		self.rand_useragent	= True				#random useragents flag

		if user_agents_type == 'bot':
			self.user_agents 	= Bot_User_Agent()
		else:
			self.user_agents 	= User_Agent()

		self.user_agent	= self.user_agents.user_agent

		HTTPConnection.debuglevel 	= 10
		######################################
		self.url 		= ''
		self.method		= 'GET'
		self.headers	= {
				'User-Agent': self.user_agent,
				'Accept-Encoding': 'gzip, deflate',
				'Connection': 'close'
			}
		######################################
		https	= False
		if self.proxy != None:
			host	= proxy
		if host.startswith('http'):
			parser		= parse.urlparse(host)
			self.host	= parser.hostname
			self.port 	= parser.port
			if self.port == None:
				if parser.scheme == 'https':
					self.port	= 443
				else:
					self.port	= 80
			if parser.scheme == 'https':
				https	= True
			
			_user 	= parser.username if parser.username else ''
			_pass	= parser.password if parser.password else ''
			_login	= str(_user) + ':' + str(_pass)
		else:
			spliter		= host.split(':')
			self.host 	= spliter[0]
			if len(spliter) < 2:
				self.port	= 80

		if len(_login) > 1:
			self.headers.update({'Authorization': "Basic " + b64encode(_login.encode('ascii')).decode('utf-8')})

		if https:
			self.request 	= HTTPSConnection(self.host,self.port, timeout = self.timeout)
		else:
			self.request 	= HTTPConnection(self.host,self.port, timeout = self.timeout)
예제 #2
0
class HTTP:
	def __init__(self, host, timeout = CONFIG.TIME_OUT, proxy = CONFIG.PROXY, user_agents_type = ''):
		self.timeout 		= timeout
		self.proxy			= proxy
		
		self.store_cookie	= False
		self.redirect		= True				#flag redirect when check found Location in header
		self.rand_useragent	= True				#random useragents flag

		if user_agents_type == 'bot':
			self.user_agents 	= Bot_User_Agent()
		else:
			self.user_agents 	= User_Agent()

		self.user_agent	= self.user_agents.user_agent

		HTTPConnection.debuglevel 	= 10
		######################################
		self.url 		= ''
		self.method		= 'GET'
		self.headers	= {
				'User-Agent': self.user_agent,
				'Accept-Encoding': 'gzip, deflate',
				'Connection': 'close'
			}
		######################################
		https	= False
		if self.proxy != None:
			host	= proxy
		if host.startswith('http'):
			parser		= parse.urlparse(host)
			self.host	= parser.hostname
			self.port 	= parser.port
			if self.port == None:
				if parser.scheme == 'https':
					self.port	= 443
				else:
					self.port	= 80
			if parser.scheme == 'https':
				https	= True
			
			_user 	= parser.username if parser.username else ''
			_pass	= parser.password if parser.password else ''
			_login	= str(_user) + ':' + str(_pass)
		else:
			spliter		= host.split(':')
			self.host 	= spliter[0]
			if len(spliter) < 2:
				self.port	= 80

		if len(_login) > 1:
			self.headers.update({'Authorization': "Basic " + b64encode(_login.encode('ascii')).decode('utf-8')})

		if https:
			self.request 	= HTTPSConnection(self.host,self.port, timeout = self.timeout)
		else:
			self.request 	= HTTPConnection(self.host,self.port, timeout = self.timeout)

	def init(self, url, method , header = {}):
		try:
			if method:
				self.method 	= method
			# proxy setting
			if not self.proxy and url.startswith('http'):
				url = url.split('/', 3)
				if len(url) > 3:
					self.url 	= '/' + url[3]
			else:
				self.url 	= url

			#random user agent
			if self.rand_useragent:
				self.user_agent 	= self.user_agents.getRandomUserAgent()
				self.headers.update({'User-Agent': self.user_agent})
			
			#update header for POST method
			if self.method == 'POST':
				self.headers.update({'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
			elif 'Content-Type' in self.headers:
				del self.headers['Content-Type']

			# update header
			if len(header) > 0:
				self.headers.update(header)

			# put all paramater request
			self.request.putrequest(self.method, self.url)

			# put all request header
			for hk, hv in self.headers.items():
				self.request.putheader(hk, hv)

			# add \r\n\r\n end of header
			self.request.endheaders()

			# set timeout request
			self.request.sock.settimeout(self.timeout)
		except Exception as e:
			raise

	def send(self, data =b''):
		if data:
			self.request.send(data)

	def recv(self, chunk = 0):
		### get response header
		self.response	= self.request.getresponse()

		### store cookie
		if self.store_cookie and self.response.getheader('set-cookie'):
			cookies = dict()
			cook 	= self.request.getheader['cookie'] + '; ' + self.response.getheader('set-cookie')
			for c in cook.split(';'):
				tmp	= c.trim().split('=', 1)
				if len(tmp) == 2:
					cookies.update({tmp[0],tmp[1]})
			self.request.putheader('cookie', '; '.join(k +'='+ v for k,v in cookies.items()))

		### yield all data
		if chunk > 0:
			while not self.response.closed:
				yield self.response.read(chunk)
		else:
			result 	= self.response.read()
			print(self.response.getheaders())

			### extract gzip file, this method not support for send chunk data
			if self.response.getheader('content-encoding') == 'gzip':
				result = GzipFile(fileobj=BytesIO(result)).read()

			### auto redirect, this method not support for send chunk data
			if self.redirect:
				refresh	= search(b'http-equiv=.refresh(.*?)content=("|\')(.*?)url=(.*?)\2', result[:1000].lower())
				if(self.response.getheader('location') or refresh):
					self.refresh 	= True
					if self.response.getheader('location'):
						self.url 	= self.response.getheader('location')
					else:
						self.url 	= refresh.group(4)
					self.params		= ''
					self.method 	= 'GET'
					if 'Content-Type' in self.headers:
						del self.headers['Content-Type']
			### yield all content
			yield result
			
		self.request.close()

	def Request(self, url, method = 'GET', params = b'', header = {}):
		self.refresh	= False
		self.params 	= params
		if type(self.params) == type(""):
			self.params = self.params.encode()
		if method.upper() == "POST":
			header.update({"Content-Length": len(self.params)})
			
		while True:
			self.init(url, method, header)
			self.send(self.params)
			self.result 	= b''
			for data in self.recv():
				self.result	+= data
			if not self.refresh:
				# print(self.result)
				return self.result
			else:
				self.refresh	= False

	# def Request(self, url, method = 'GET', params = None, header = {}, chunk = 0):
	# 	# print("data : %s \nparams : %s" % (url,params))
	# 	if self.rand_useragent:
	# 		self.user_agent 	= self.user_agents.getRandomUserAgent()
	# 		self.headers.update({'User-Agent': self.user_agent})
		
	# 	if params:
	# 		self.headers.update({'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'})
	# 	elif 'Content-Type' in self.headers:
	# 		del self.headers['Content-Type']

	# 	if len(header) > 0:
	# 		self.headers.update(header)

	# 	while True:
	# 		if not self.proxy:
	# 			if url.startswith('http'):
	# 				url = url.split('/', 3)
	# 				if len(url) > 3:
	# 					url 	= '/' + url[3]
	# 		if params:
	# 			self.request.request(method, url, params, headers = self.headers)
	# 		else:
	# 			self.request.request(method, url, headers = self.headers)
			
	# 		self.request.sock.settimeout(self.timeout)
	# 		self.response	= self.request.getresponse()
	# 		######## store cookie ###########
	# 		if self.storecookie and self.response.getheader('set-cookie'):
	# 			cookies = []
	# 			for c in self.response.getheader('set-cookie').split(','):
	# 				cookies.append(c.split(';', 1)[0])

	# 			cookie 	= self.headers['Cookie'] + ';' if 'Cookie' in self.headers else ''
	# 			cookies = cookie + '; '.join(cookies)
				
	# 			result	= {}
	# 			for c in cookies.split(';'):
	# 				tmp	= c.strip().split('=', 1)
	# 				if len(tmp) == 2:
	# 					result[tmp[0]] = tmp[1]
	# 			self.cookie	= '; '.join(k +'='+ v for k,v in result.items())
	# 			self.headers.update({'Cookie': self.cookie})
	# 		###
	# 		if chunk > 0:
	# 			break

	# 		self.result = self.response.read()
	# 		if self.response.getheader('content-encoding') == 'gzip':
	# 			self.result = GzipFile(fileobj=BytesIO(self.result)).read()
	# 		# self.result	= self.result.decode('utf-8', 'replace')
	# 		###
	# 		refresh	= search(b'http-equiv=.refresh(.*?)content=("|\')(.*?)url=(.*?)\2',self.result[:1000].lower())
	# 		if (self.response.getheader('location') or refresh) and self.redirect:
	# 			if self.response.getheader('location'):
	# 				url 	= self.response.getheader('location')
	# 			else:
	# 				url = refresh.group(4)
	# 			params	= None
	# 			method	= 'GET'
	# 			if 'Content-Type' in self.headers:
	# 				del self.headers['Content-Type']
	# 		else:
	# 			break
	# 	#########################
	# 	if chunk > 0:
	# 		while not self.response.closed:
	# 			yield self.response.read(chunk)
	# 		self.request.close()
	# 	else:	
	# 		self.request.close()
	# 		yield self.result