def curl(url, file_ids, log): log.info('\tstarting curl fetch of gdc files') params = {'ids': file_ids} c = None with open('gdc_curl_download.tar.gz', 'wb') as f: try: c = Curl() c.setopt(c.URL, url) c.setopt(c.WRITEDATA, f) c.setopt(c.HTTPHEADER, ["Content-Type: application/json"]) c.setopt(pycurl.CUSTOMREQUEST, "POST") c.setopt(pycurl.POSTFIELDS, json.dumps(params)) # TODO: set up using a local certificate c.setopt(pycurl.SSL_VERIFYPEER, 0) c.setopt(pycurl.SSL_VERIFYHOST, 0) c.perform() except: log.exception('problem with curl') raise finally: if None != c: if int(c.getinfo(pycurl.RESPONSE_CODE)) != 200: f.close() with open('gdc_curl_download.tar.gz') as e: err = e.read() log.error('\tbad status on curl call(%s):\n%s' % (c.getinfo(pycurl.RESPONSE_CODE), err)) c.close()
def _finish( self, curl: pycurl.Curl, curl_error: int = None, curl_message: str = None ) -> None: info = curl.info curl.info = None self._multi.remove_handle(curl) self._free_list.append(curl) buffer = info["buffer"] if curl_error: assert curl_message is not None error = CurlError(curl_error, curl_message) # type: Optional[CurlError] assert error is not None code = error.code effective_url = None buffer.close() buffer = None else: error = None code = curl.getinfo(pycurl.HTTP_CODE) effective_url = curl.getinfo(pycurl.EFFECTIVE_URL) buffer.seek(0) # the various curl timings are documented at # http://curl.haxx.se/libcurl/c/curl_easy_getinfo.html time_info = dict( queue=info["curl_start_ioloop_time"] - info["queue_start_time"], # DNS 解析时间 namelookup=curl.getinfo(pycurl.NAMELOOKUP_TIME), # Create conn time connect=curl.getinfo(pycurl.CONNECT_TIME), appconnect=curl.getinfo(pycurl.APPCONNECT_TIME), pretransfer=curl.getinfo(pycurl.PRETRANSFER_TIME), starttransfer=curl.getinfo(pycurl.STARTTRANSFER_TIME), # 下载时间速度 speed_download=curl.getinfo(pycurl.SPEED_DOWNLOAD), total=curl.getinfo(pycurl.TOTAL_TIME), redirect=curl.getinfo(pycurl.REDIRECT_TIME), ) try: info["callback"]( HTTPResponse( request=info["request"], code=code, headers=info["headers"], buffer=buffer, effective_url=effective_url, error=error, reason=info["headers"].get("X-Http-Reason", None), request_time=self.io_loop.time() - info["curl_start_ioloop_time"], start_time=info["curl_start_time"], time_info=time_info, ) ) except Exception: self.handle_callback_exception(info["callback"])
class Httpy: """ Easily perform GET and POST requests with web servers. Keeps cookies to retain web sessions. Includes helpful methods that go beyond GET and POST: * get_meta - retrieves meta info about a URL * unshorten - returns (some) redirected URLs """ def __init__(self): self.curl = Curl() self.curl.setopt(self.curl.SSL_VERIFYPEER, 0) self.curl.setopt(self.curl.SSL_VERIFYHOST, 0) self.curl.setopt(self.curl.TIMEOUT, DEFAULT_TIMEOUT) self.curl.setopt(self.curl.PROXY, HTTP_PROXY) self.curl.setopt(self.curl.FOLLOWLOCATION, True) def get(self, url): """ GET request """ try: body = BytesIO() self.curl.setopt(self.curl.WRITEFUNCTION, body.write) self.curl.setopt(self.curl.URL, url) self.curl.perform() r = body.getvalue() body.close() return r.decode() except Exception as e: raise e def download(self, url): """ Downloads file from URL to save_as path. """ retries = 3 while retries: try: body = BytesIO() self.curl.setopt(self.curl.WRITEFUNCTION, body.write) self.curl.setopt(self.curl.URL, url) self.curl.perform() if self.curl.getinfo(self.curl.HTTP_CODE) != 200: text = body.getvalue() if "404" not in text: raise Exception( "HTTP" + str(self.curl.getinfo(self.curl.HTTP_CODE))) r = body.getvalue() body.close() return r except Exception as e: if str(e).find("transfer closed") > 0 and retries: retries -= 1 continue raise Exception( str(e) + " HTTP" + str(self.curl.getinfo(self.curl.HTTP_CODE)))
def get_login(c: pycurl.Curl, url: str) -> bytes: logger.info("get_login() called") buffer = BytesIO() header = [ "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0)", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: en-US,en;q=0.7,fi;q=0.3", "DNT: 1", "Connection: keep-alive", "Upgrade-Insecure-Requests: 1", ] c.setopt(c.WRITEFUNCTION, buffer.write) c.setopt(c.HEADERFUNCTION, header_function) c.setopt(c.BUFFERSIZE, 102400) c.setopt(c.URL, url) c.setopt(c.HTTPHEADER, header) c.setopt(c.USERAGENT, "curl/7.65.1") c.setopt(c.MAXREDIRS, 50) # c.setopt(c.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_2TLS) c.setopt(c.ACCEPT_ENCODING, "") # c.setopt(c.HTTP09_ALLOWED, 1) c.setopt(c.TCP_KEEPALIVE, 1) c.setopt(c.FOLLOWLOCATION, True) c.perform() logger.info("get_login() HTTP response: %s", c.getinfo(c.HTTP_CODE)) return buffer.getvalue()
def moodle_admin_login_curl(self): fd, path = tempfile.mkstemp() try: response = BytesIO() url = 'https://' + self.deployment['siteURL'] + '/login/index.php' curl = Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.WRITEFUNCTION, response.write) curl.setopt(pycurl.POST, True) curl.setopt(pycurl.COOKIEJAR, path) curl.setopt(pycurl.COOKIEFILE, path) post = urllib.parse.urlencode({ 'username': '******', 'password': self.deployment['moodleAdminPassword'] }) curl.setopt(pycurl.POSTFIELDS, post) curl.setopt(pycurl.FOLLOWLOCATION, True) curl.perform() status = curl.getinfo(pycurl.HTTP_CODE) if status != 200: print("*** FAILED: {} ***".format(status)) sys.exit(1) response = response.getvalue().decode('utf-8') finally: os.remove(path) return response
def http_perform(curl: pycurl.Curl): # Utility function for curl - just do our usual stuff try: curl.perform() except pycurl.error as e: raise CurlError from e status = curl.getinfo(pycurl.HTTP_CODE) HTTPFamily.check_status(status)
def http_req_perform(self, curl: pycurl.Curl, writefun: object) -> None: curl.setopt(pycurl.WRITEFUNCTION, writefun) try: curl.perform() except pycurl.error as e: code = e.args[0]; errstr = e.args[1] if (code == pycurl.E_URL_MALFORMAT or code == pycurl.E_PARTIAL_FILE): raise NetError(code, errstr) elif (code == pycurl.E_COULDNT_RESOLVE_PROXY or code == pycurl.E_COULDNT_RESOLVE_HOST): raise DNSError(code, errstr) elif code == pycurl.E_HTTP_RETURNED_ERROR: raise HTTPError(curl.getinfo(pycurl.HTTP_CODE), None, code, errstr) elif (code == pycurl.E_COULDNT_CONNECT or code == pycurl.E_OPERATION_TIMEOUTED or code == pycurl.E_SEND_ERROR or code == pycurl.E_RECV_ERROR): raise ConnError(curl.getinfo(pycurl.OS_ERRNO), code, errstr) else: raise NetError(code, errstr)
def post_login(c: pycurl.Curl, url: str, sessionid: str, token: str, username: str, password: str, remember=2678400) -> bytes: logger.info("post_login() called") buffer = BytesIO() header = [ "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0)", "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language: en-US,en;q=0.7,fi;q=0.3", "Referer: http://81.19.210.136:1005/", "Content-Type: application/x-www-form-urlencoded", "DNT: 1", "Connection: keep-alive", f"Cookie: {sessionid}", "Upgrade-Insecure-Requests: 1", ] password_hash = hashlib.sha1( bytearray(password, "utf-8") + bytearray(username, "utf-8")).hexdigest() postfields = (f"token={token}&password_hash=%24sha1%24{password_hash}" + f"&username={username}&password=&remember={remember}") postfieldsize = len(postfields) logger.info("postfieldsize: %s", postfieldsize) logger.debug("postfields: %s", postfields) c.setopt(c.WRITEFUNCTION, buffer.write) c.setopt(c.HEADERFUNCTION, header_function) c.setopt(c.BUFFERSIZE, 102400) c.setopt(c.URL, url) c.setopt(c.POSTFIELDS, postfields) c.setopt(c.POSTFIELDSIZE_LARGE, postfieldsize) c.setopt(c.HTTPHEADER, header) c.setopt(c.USERAGENT, "curl/7.65.1") c.setopt(c.MAXREDIRS, 50) # c.setopt(c.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_2TLS) c.setopt(c.ACCEPT_ENCODING, "") # c.setopt(c.HTTP09_ALLOWED, True) c.setopt(c.TCP_KEEPALIVE, 1) c.setopt(c.FOLLOWLOCATION, True) c.perform() logger.info("post_login() HTTP response: %s", c.getinfo(c.HTTP_CODE)) return buffer.getvalue()
def moodle_smoke_test(self): print("\nMoodle Smoke Test...") url = 'https://' + self.deployment['siteURL'] curl = Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.WRITEFUNCTION, lambda x: None) curl.perform() status = curl.getinfo(pycurl.HTTP_CODE) if status != 200: print("*** DEPLOY FAILED ***") print('HTTP Status Code: {}'.format(status)) sys.exit(1) print('(ok: {})'.format(status))
def sendTelegramAlert(self, telegram_chat_id, telegram_bot_token, message): if len(message) > 4096: message = "The size of the message in Telegram (4096) has been exceeded. Overall size: " + str( len(message)) c = Curl() url = 'https://api.telegram.org/bot' + str( telegram_bot_token) + '/sendMessage' c.setopt(c.URL, url) data = {'chat_id': telegram_chat_id, 'text': message} pf = urlencode(data) c.setopt(c.POSTFIELDS, pf) c.perform_rs() status_code = c.getinfo(HTTP_CODE) c.close() self.getStatusByTelegramCode(status_code)
def get_messages(c: pycurl.Curl, url: str, sessionid: str, authcred: str, authtimeout: int) -> bytes: logger.info("get_messages() called") buffer = BytesIO() header = [ "User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0)", "Accept: */*", "Accept-Language: en-US,en;q=0.7,fi;q=0.3", "Referer: http://81.19.210.136:1005/", "Content-Type: application/x-www-form-urlencoded", "X-Requested-With: XMLHttpRequest", "DNT: 1", "Connection: keep-alive", f"Cookie: {sessionid}; {authcred}; {authtimeout}", "Upgrade-Insecure-Requests: 1", ] postfields = "ajax=1" postfieldsize = len(postfields) logger.info("postfieldsize: %s", postfieldsize) c.setopt(c.WRITEFUNCTION, buffer.write) c.setopt(c.HEADERFUNCTION, header_function) c.setopt(c.BUFFERSIZE, 102400) c.setopt(c.URL, url) c.setopt(c.POSTFIELDS, postfields) c.setopt(c.POSTFIELDSIZE_LARGE, postfieldsize) c.setopt(c.HTTPHEADER, header) c.setopt(c.USERAGENT, "curl/7.65.1") c.setopt(c.MAXREDIRS, 50) # c.setopt(c.HTTP_VERSION, pycurl.CURL_HTTP_VERSION_2TLS) c.setopt(c.ACCEPT_ENCODING, "") # c.setopt(c.HTTP09_ALLOWED, True) c.setopt(c.TCP_KEEPALIVE, 1) c.setopt(c.FOLLOWLOCATION, True) # print_headers(HEADERS) c.perform() logger.info("get_messages() HTTP response: %s", c.getinfo(c.HTTP_CODE)) return buffer.getvalue()
def __fetch_page(self, url): useragent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36' encoding = 'gzip, deflate, sdch' httpheader = [ 'Accept: text/html, application/xhtml+xml, application/xml; q=0.9, image/webp, */*; q=0.8', 'Accept-Language: it-IT, it; q=0.8, en-US; q=0.6, en; q=0.4', 'Host: uniparthenope.esse3.cineca.it' ] cookiefile = 'cookiefile' page = StringIO() c = Curl() c.setopt(c.FOLLOWLOCATION, True) c.setopt(c.WRITEFUNCTION, page.write) c.setopt(c.COOKIEJAR, cookiefile) c.setopt(c.URL, url) c.perform() c.close() page.close() page = StringIO() c = Curl() c.setopt(c.USERPWD, self.__username + ':' + self.__password) c.setopt(c.FOLLOWLOCATION, 1) c.setopt(c.WRITEFUNCTION, page.write) c.setopt(c.COOKIEFILE, cookiefile) c.setopt(c.ENCODING, encoding) c.setopt(c.HTTPHEADER, httpheader) c.setopt(c.REFERER, url) c.setopt(c.USERAGENT, useragent) c.setopt(c.URL, url) c.perform() if (c.getinfo(pycurl.HTTP_CODE) != 200): return None c.close() page_str = page.getvalue() page.close() p = re.compile('\\s+') page_str = p.sub(" ", page_str) return page_str
def http_query(url, timeout=1000): print url c = Curl() c.setopt(pycurl.URL, url) c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.TIMEOUT_MS, timeout) body_writer = StringIO() head_writer = StringIO() c.setopt(pycurl.WRITEFUNCTION, body_writer.write) c.setopt(pycurl.HEADERFUNCTION, head_writer.write) result = {} c.perform() head_writer.seek(0) first = head_writer.readline() result['header'] = {} for line in head_writer: parts = line.split(':' , 1) if len(parts) == 2: result['header'][parts[0]] = parts[1].strip() result['code'] = c.getinfo(pycurl.HTTP_CODE) result['body'] = body_writer.getvalue() return result
def http_query(url, timeout=1000): print url c = Curl() c.setopt(pycurl.URL, url) c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.TIMEOUT_MS, timeout) body_writer = StringIO() head_writer = StringIO() c.setopt(pycurl.WRITEFUNCTION, body_writer.write) c.setopt(pycurl.HEADERFUNCTION, head_writer.write) result = {} c.perform() head_writer.seek(0) first = head_writer.readline() result['header'] = {} for line in head_writer: parts = line.split(':', 1) if len(parts) == 2: result['header'][parts[0]] = parts[1].strip() result['code'] = c.getinfo(pycurl.HTTP_CODE) result['body'] = body_writer.getvalue() return result
def moodle_admin_login_curl(self): fd, path = tempfile.mkstemp() try: response = BytesIO() url = 'https://' + self.deployment['siteURL'] + '/login/index.php' curl = Curl() curl.setopt(pycurl.URL, url) curl.setopt(pycurl.SSL_VERIFYPEER, False) curl.setopt(pycurl.WRITEFUNCTION, response.write) curl.setopt(pycurl.POST, True) curl.setopt(pycurl.COOKIEJAR, path) curl.setopt(pycurl.COOKIEFILE, path) post = urllib.parse.urlencode({'username': '******', 'password': self.deployment['moodleAdminPassword']}) curl.setopt(pycurl.POSTFIELDS, post) curl.setopt(pycurl.FOLLOWLOCATION, True) curl.perform() status = curl.getinfo(pycurl.HTTP_CODE) if status != 200: print("*** FAILED: {} ***".format(status)) sys.exit(1) response = response.getvalue().decode('utf-8') finally: os.remove(path) return response
def _perform(self, url: str, curl_obj: pycurl.Curl = None, headers: dict = None, postfields: dict = None, skip_auth=False) -> bytes: if not skip_auth: self._wait_authenticated() if not curl_obj: curl_obj = pycurl.Curl() if postfields: postfields = urlencode(postfields) _set_postfields(curl_obj, postfields) logger.debug("url={url}, headers={headers}", url=url, headers=headers) if not headers: headers = self.BASE_HEADERS.copy() headers = self._headers_to_list(headers) logger.debug("prepared headers={h}", h=headers) buffer = BytesIO() curl_obj.setopt(pycurl.WRITEFUNCTION, buffer.write) curl_obj.setopt(pycurl.HEADERFUNCTION, self._header_function) curl_obj.setopt(pycurl.BUFFERSIZE, 102400) curl_obj.setopt(pycurl.URL, url) curl_obj.setopt(pycurl.HTTPHEADER, headers) curl_obj.setopt(pycurl.USERAGENT, CURL_USERAGENT) curl_obj.setopt(pycurl.MAXREDIRS, 50) curl_obj.setopt(pycurl.ACCEPT_ENCODING, "") curl_obj.setopt(pycurl.TCP_KEEPALIVE, 1) curl_obj.setopt(pycurl.FOLLOWLOCATION, True) curl_obj.setopt(pycurl.ENCODING, "gzip, deflate") try: curl_obj.perform() except pycurl.error as e: logger.debug(e, exc_info=True) logger.warning(e) return b"" status = curl_obj.getinfo(pycurl.HTTP_CODE) logger.debug("HTTP status: {s}", s=status) curl_obj.close() if status != HTTPStatus.OK: hdrs = None try: hdrs = {k: v[-1] for k, v in self._headers.items()} except (IndexError, KeyError): pass phrase = "error" try: phrase = http.client.responses[status] logger.error("HTTP status error: {s}", s=status) except KeyError: pass raise HTTPError(url=url, msg=phrase, code=status, hdrs=hdrs, fp=None) # Server changing maps will trigger sessionid change, # keep track of latest sessionid in response headers. sessionid = self._find_sessionid() if sessionid and self._auth_data: self._auth_data.sessionid = sessionid return buffer.getvalue()
def _execute(curl: Curl, close_connection: bool) -> int: curl.perform() status_code = curl.getinfo(curl.HTTP_CODE) if close_connection: curl.close() return status_code
def _complete_request(curl: pycurl.Curl, buffer: BytesIO, response: Response): curl.perform() response.status = curl.getinfo(curl.RESPONSE_CODE) response.body = buffer.getvalue().decode(_CHAR_ENCODING) curl.close()
def _finish( self, curl: pycurl.Curl, curl_error: int = None, curl_message: str = None ) -> None: info = curl.info curl.info = None self._multi.remove_handle(curl) self._free_list.append(curl) buffer = info["buffer"] if curl_error: assert curl_message is not None error = CurlError(curl_error, curl_message) # type: Optional[CurlError] assert error is not None code = error.code effective_url = None buffer.close() buffer = None else: error = None code = curl.getinfo(pycurl.HTTP_CODE) effective_url = curl.getinfo(pycurl.EFFECTIVE_URL) buffer.seek(0) # the various curl timings are documented at # http://curl.haxx.se/libcurl/c/curl_easy_getinfo.html time_info = dict( queue=info["curl_start_ioloop_time"] - info["queue_start_time"], namelookup=curl.getinfo(pycurl.NAMELOOKUP_TIME), connect=curl.getinfo(pycurl.CONNECT_TIME), appconnect=curl.getinfo(pycurl.APPCONNECT_TIME), pretransfer=curl.getinfo(pycurl.PRETRANSFER_TIME), starttransfer=curl.getinfo(pycurl.STARTTRANSFER_TIME), total=curl.getinfo(pycurl.TOTAL_TIME), redirect=curl.getinfo(pycurl.REDIRECT_TIME), ) try: info["callback"]( HTTPResponse( request=info["request"], code=code, headers=info["headers"], buffer=buffer, effective_url=effective_url, error=error, reason=info["headers"].get("X-Http-Reason", None), request_time=self.io_loop.time() - info["curl_start_ioloop_time"], start_time=info["curl_start_time"], time_info=time_info, ) ) except Exception: self.handle_callback_exception(info["callback"])