def get_notif_queue(self): url = self._url + "/api/internal/notif/queue?service_id=" + str( self._service_id) curl = Curl('GET', url, headers=self._headers, auth=self._auth) if len(curl.errors) > 0: raise Exception(curl.errors) return curl.jsonData
def crawling(self): curl = Curl() curl.set_url(self.url) with open(self.filname, "wb") as output: curl.set_option(pycurl.WRITEFUNCTION, output.write) curl.get() curl.close()
def crawlCraigz(iterations): global cities global links if get_counter(0) is not 0 and iterations*120 != get_counter(0): return cl = Curl(base_url="", fakeheaders=[ 'Cookie: cl_b=5GEZ9Y0F6RGXNBZ5tq5GrwngXVs; cl_def_hp=dallas', ]) page = cl.get("http://dallas.craigslist.org/search/roo",{ 's': get_counter(120), 'search_distance': 13, 'postal': 75214, 'min_price': 400, 'max_price': 600, 'availabilityMode': 0, }) doc = lhtml.document_fromstring(page) for l in doc.iterlinks(): for c in cities: linktext = l[2] linktext = linktext[14::] if c in str(l[0].text) or c.lower() in linktext: links.append(l[2]+'\n') print(l[2]) return crawlCraigz(iterations)
def test_status_while_still_executing(self, executor): self.mock_executor.return_value.isalive.return_value = True subject = Curl('www.yahoo.com', self.mock_executor) result = subject.status() assert result == 'curl www.yahoo.com is still waiting for a response'
def curl(self, curl_what=CURL_SITE): # TODO prompt for the url, default if none given # TODO move all user interaction stuff out into command files. curl_count = -1 while curl_count < 0: user_count = raw_input( "How many curls for %s? (blank line to cancel) " % self.name).strip() if user_count == "": return try: curl_count = int(user_count) except ValueError: print("%s is not a number" % user_count) continue if curl_count < 0: print("%i is not a valid number" % curl_count) print("\tstarting curl on %s..." % self.name) for idx in range(curl_count): print("%s sending curl %s #%i" % (time.ctime(), curl_what, idx)) self.traffic_handles.append( Curl(curl_what, self.traffic_commands.curl)) time.sleep(2) print("\tdone.")
def get_integration_params(self): headers = {'content-type': 'application/json'} url = self._url + "/api/v3/integrations/" + self._integration_name curl = Curl('GET', url, headers=headers, auth=self._auth) if len(curl.errors) > 0: raise Exception(curl.errors) return curl.jsonData
def get_channel_by_field_name_and_trackor_id(self, field_name, tid): url = self._url + "/api/v3/trackors/" + str( tid) + "?fields=" + field_name curl = Curl('GET', url, headers=self._headers, auth=self._auth) if len(curl.errors) > 0: raise Exception(curl.errors) return curl.jsonData[field_name]
def test_status_when_already_done_and_queried(self, executor): subject = Curl('www.yahoo.com', self.mock_executor) subject.result = 'result' subject.done = True result = subject.status() assert result == 'result'
def get_zip(self, url, filename): fp = open(filename, "wb") c = Curl() c.get(url, ) c.set_option(c.WRITEDATA, fp) c.perform() c.close() fp.close()
def get_users_by_ids(self, user_ids): user_ids = list(set(user_ids)) url = self._url + "/api/internal/users?user_ids=" url = url + ','.join([str(user_id) for user_id in user_ids]) curl = Curl('GET', url, headers=self._headers, auth=self._auth) if len(curl.errors) > 0: raise Exception(curl.errors) return curl.jsonData
def test_status_with_unknown_exit_code(self, executor): self.mock_executor.return_value.isalive.return_value = False self.mock_executor.return_value.exitstatus = 78 subject = Curl('www.yahoo.com', self.mock_executor) result = subject.status() assert subject.done is True assert subject.result == 'curl www.yahoo.com (78: unknown) FAILED' assert result == 'curl www.yahoo.com (78: unknown) FAILED'
def test_status_with_77_exit_code(self, executor): self.mock_executor.return_value.isalive.return_value = False self.mock_executor.return_value.exitstatus = 77 subject = Curl('www.yahoo.com', self.mock_executor) result = subject.status() assert subject.done is True assert subject.result == 'curl www.yahoo.com (77: SSL public key does not matched pinned public key) FAILED' assert result == 'curl www.yahoo.com (77: SSL public key does not matched pinned public key) FAILED'
def test_status_with_1_exit_code(self, executor): self.mock_executor.return_value.isalive.return_value = False self.mock_executor.return_value.exitstatus = 1 subject = Curl('www.yahoo.com', self.mock_executor) result = subject.status() assert subject.done is True assert subject.result == 'curl www.yahoo.com (1: Unsupported protocol. This build of curl has no support for this protocol.) FAILED' assert result == 'curl www.yahoo.com (1: Unsupported protocol. This build of curl has no support for this protocol.) FAILED'
def add(self, log_level, message, description=""): if log_level.log_level_id <= self._ov_log_level.log_level_id: parameters = {'message': message, 'description': description, 'log_level_name': log_level.log_level_name} json_data = json.dumps(parameters) headers = {'content-type': 'application/json'} url_log = self._url + "/api/v3/integrations/runs/" + str(self._process_id) + "/logs" curl = Curl('POST', url_log, data=json_data, headers=headers, auth=self._auth) if len(curl.errors) > 0: raise Exception(curl.errors) return curl.jsonData
def get_phone_number_by_field_name_and_trackor_id(self, field_name, tid): url = self._url + "/api/v3/trackors/" + str( tid) + "?fields=" + field_name curl = Curl('GET', url, headers=self._headers, auth=(self._username, self._password)) if len(curl.errors) > 0: raise Exception(curl.errors) return curl.jsonData[field_name]
def test_status_with_successful_exit_code(self, executor): self.mock_executor.return_value.isalive.return_value = False self.mock_executor.return_value.exitstatus = 0 subject = Curl('www.yahoo.com', self.mock_executor) result = subject.status() assert subject.done is True assert subject.result == 'curl www.yahoo.com (0: Success) SUCCEEDED' assert result == 'curl www.yahoo.com (0: Success) SUCCEEDED'
def saveFile2Local(self,url): self.getFileNameByUrl(url) if self.filename: with open(self.filename,"wb") as output: curl = Curl() curl.set_url(url) curl.set_option(pycurl.WRITEFUNCTION,output.write) curl.get() curl.close() Log4Spider.downLog(self,"downloaded a file:[[[",self.filename,"]]]")
def update_specific_trackor(self, trackor_id, data): headers = {'content-type': 'application/json'} url = 'https://{ov_url}/api/v3/trackors/{trackor_id}'.format( ov_url=self._ov_auth.url, trackor_id=trackor_id) fields = self._field_mappings.get_ready_fields_mapping(data) curl = Curl('PUT', url, headers=headers, auth=HTTPBearerAuth(self._ov_auth.access_key, self._ov_auth.secret_key), data=json.dumps(fields)) if len(curl.errors) > 0: raise Exception(curl.errors) return curl.jsonData
def container_list(self, node_ip, node_port): # print(node_ip) # print(node_port) url = 'http://' + node_ip + ":" + node_port + "/containers/json?all=1" container_url = Curl(url) ret_json = container_url.get_value() con_data = {} container_id = [] if ret_json: for i in ret_json: container_id.append(i['Id'][0:12]) else: return con_data if len(container_id) < 1: return con_data else: con_data = {} con_num = 1 for con_id in container_id: tmp_dict = {} ret_json = self._container_detail(node_ip, node_port, con_id) if len(ret_json) < 1: return con_data con_state = "" if ('Running' in ret_json['State'].keys()) and ( 'Status' not in ret_json['State'].keys()): # for docker 1.7 con_state = str(ret_json['State']['Running']) elif 'Status' in ret_json['State'].keys( ): # for docker 1.9 and higher con_state = str(ret_json['State']['Status']) else: # for else con_state = "Exited" tmp_dict['id_num'] = ret_json['Id'][0:12] tmp_dict['con_ip'] = ret_json['NetworkSettings']['IPAddress'] tmp_dict['name'] = ret_json['Name'] tmp_dict['cpuperiod'] = ret_json['HostConfig']['CpuPeriod'] tmp_dict['cpuquota'] = ret_json['HostConfig']['CpuQuota'] tmp_dict['memory'] = ret_json['HostConfig']['Memory'] tmp_dict['state'] = con_state tmp_dict['cmd'] = str(ret_json['Config']['Cmd']) tmp_dict['created'] = ret_json['State']['StartedAt'] con_data[con_num] = tmp_dict con_num += 1 return con_data
def node_list(self, node_ip, node_port): node_dict = {} url = 'http://' + node_ip + ":" + node_port + "/info" node_curl = Curl(url) ret_json = node_curl.get_value() if len(ret_json) < 1: print("Can not get node info") else: node_dict['state'] = '1' node_dict['cpus'] = ret_json['NCPU'] node_dict['mem'] = ret_json['MemTotal'] node_dict['name'] = ret_json['Name'] node_dict['os_version'] = ret_json['OperatingSystem'] node_dict['kernel_version'] = ret_json['KernelVersion'] node_dict['images'] = ret_json['Images'] node_dict['containers'] = ret_json['Containers'] return node_dict
def fetch_url(url, nobody=0, timeout=30, follow_redirect=0, agent=USER_AGENT): """Fetch url using curl :param url: :param nobody: :param timeout: :param follow_redirect: :param agent: """ t = io.StringIO() c = Curl() s = r"%s" % (url) c.set_option(pycurl.USERAGENT, agent) c.set_option(pycurl.URL, s.encode('utf-8')) c.set_option(pycurl.NOBODY, nobody) c.set_option(pycurl.FOLLOWLOCATION, follow_redirect) c.set_option(pycurl.WRITEFUNCTION, t.write) c.set_option(pycurl.TIMEOUT, timeout) attempt = 0 try: c.get() except: return (None, None) return (c, t)
if self.infos == 5 and 'beliked' not in self.info.keys(): self.info['beliked'] = int(data) def handle_endtag(self, tag): if tag == "h3": self.h3 = 0 if self.clearfix and tag == "ul": self.clearfix = 0 if hasattr(self, "infoHook"): self.infoHook(self.info) def handle_startendtag(self, tag, attrs): pass @property def urlList(self): return self.current_urlList() if __name__ == "__main__": parser = JianShuUserInfo_HtmlParser() from curl import Curl import pycurl c = Curl() c.set_url("http://www.jianshu.com/users/d9edcb44e2f2/latest_articles") data = c.get() #parser.setParseFile("parse.txt") parser.setInfoHook(lambda info: print(str(info))) parser.feed(data.decode("utf-8")) parser.close() c.close()
def add_new_attempt(self, notif_queue_rec_id, error_message): url = self._url + "/api/internal/notif/queue/" + str( notif_queue_rec_id) + "/attempts?error_code=" + error_message curl = Curl('POST', url, headers=self._headers, auth=self._auth) if len(curl.errors) > 0: raise Exception(curl.errors)
def update_notif_queue_rec_status(self, notif_queue_rec_id, status): url = self._url + "/api/internal/notif/queue/" + str( notif_queue_rec_id) + "/update_status?status=" + status curl = Curl('PATCH', url, headers=self._headers, auth=self._auth) if len(curl.errors) > 0: raise Exception(curl.errors)
def test_cleanup(self, mocker, executor): subject = Curl('www.nyan.cat', self.mock_executor) subject.cleanup(mocker.stub()) self.mock_executor.return_value.terminate.assert_called_with(force=True)
def _container_detail(self,node_ip,node_port,container_id): url = 'http://%s:%s/containers/%s/json' % (node_ip,node_port,container_id) container_more_curl = Curl(url) ret_json = container_more_curl.get_value() return ret_json
def crawl(self, link, *args): """Crawls webpages for information and returns it in a certain format.""" m = crawl.search(Curl().get(link).strip('\n')) return "" if not m else m.group('title').strip() + ' | ' + link.split('/')[0]
def pycurl_detection(url, ip): ''' 探测程序 :param url: 请求地址 :param ip: dig ip :return: 状态码,响应时间 ''' try: domain = getDomain(url) # protol=getProtol(url) # path = getPath(url) # print("="*74+"pycurl_detect ion\n") new_url = url.replace(domain, ip) # print("url:"+url,"ip:"+ip) # print("domain:"+domain) # print("path:"+path) # print("new_url:"+new_url) header = [ 'GET %s HTTP/1.1' % path, 'Host: %s' % domain, 'Accept: */*', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Accept-Encoding: gzip, deflate', 'Connection: keep-alive', 'Cache-Control: no-cache', 'User-Agent: Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)', ] if url.find("baidu.com") > 0: header.pop(len(header) - 1) c = Curl(fakeheaders=header) c.get(new_url) http_code = c.get_info(pycurl.HTTP_CODE) # 返回的HTTP状态码 # print("返回的HTTP状态码:%s"%http_code) size_download = c.get_info(pycurl.SIZE_DOWNLOAD) # 下载数据包大小 speed_download = c.get_info(pycurl.SPEED_DOWNLOAD) # 平均下载速度 file_time = c.get_info(pycurl.INFO_FILETIME) # 检索文档的远程时间 namelookup_time = c.get_info(pycurl.NAMELOOKUP_TIME) # DNS解析所消耗的时间 content_time = c.get_info(pycurl.CONNECT_TIME) # 建立连接所消耗的时间 pretransfer_time = c.get_info( pycurl.PRETRANSFER_TIME) # 从建立连接到准备传输所消耗的时间 starttransfer_time = c.get_info( pycurl.STARTTRANSFER_TIME) # 从建立连接到传输开始消耗的时间 total_time = c.get_info(pycurl.TOTAL_TIME) # 传输结束所消耗的总时间 redirect_time = c.get_info(pycurl.REDIRECT_TIME) # 重定向所消耗的时间 redirect_url = c.get_info(pycurl.REDIRECT_URL) # 重定向url redirect_count = c.get_info(pycurl.REDIRECT_COUNT) # 重定向次数 primary_ip = '' # c.get_info(pycurl.PRIMARY_IP) primary_port = '' # c.get_info(pycurl.PRIMARY_PORT) local_ip = '' #c.get_info(pycurl.LOCAL_IP) local_port = '' # c.get_info(pycurl.LOCAL_PORT) info = c.info() header = c.header() str = ''' url:%s,ip:%s,size_download:%s,speed_download:%s,file_time:%s,redirect_count:%s, namelookup_time:%s,content_time:%s,pretransfer_time:%s,starttransfer_time:%s,total_time:%s,redirect_time:%s redirect url:%s,count:%s primary ip:%s,port:%s local ip:%s,port:%s info:%s ''' % (url, ip, size_download, speed_download, file_time, redirect_count, namelookup_time, content_time, pretransfer_time, starttransfer_time, total_time, redirect_time, redirect_url, redirect_count, primary_ip, primary_port, local_ip, local_port, info) print(str) ''' #print("传输结束所消耗的总时间:%s" % total_time) namelookup_time=c.get_info(pycurl.NAMELOOKUP_TIME) # DNS解析所消耗的时间 #print("DNS解析所消耗的时间:%s" % namelookup_time) content_time=c.get_info(pycurl.CONNECT_TIME) # 建立连接所消耗的时间 #print("建立连接所消耗的时间:%s" % content_time) pretransfer_time=c.get_info(pycurl.PRETRANSFER_TIME) # 从建立连接到准备传输所消耗的时间 #print("从建立连接到准备传输所消耗的时间:%s" % pretransfer_time) starttransfer_time=c.get_info(pycurl.STARTTRANSFER_TIME) # 从建立连接到传输开始消耗的时间 #print("从建立连接到传输开始消耗的时间:%s" % starttransfer_time) redirect_time=c.get_info(pycurl.REDIRECT_TIME) # 重定向所消耗的时间 #print("重定向所消耗的时间:%s" % redirect_time) size_upload=c.get_info(pycurl.SIZE_UPLOAD) # 上传数据包大小 size_download=c.get_info(pycurl.SIZE_DOWNLOAD) # 下载数据包大小 speed_download=c.get_info(pycurl.SPEED_DOWNLOAD) # 平均下载速度 speed_upload=c.get_info(pycurl.SPEED_UPLOAD) # 平均上传速度 header_size=c.get_info(pycurl.HEADER_SIZE) # HTTP头部大小 print(c.body()) print('=' * 74 + '\n') print(c.header()) print('=' * 74 + '\n') import pprint pprint.pprint(c.info()) print(c.get_info(pycurl.OS_ERRNO)) print(c.info()['os-errno']) ''' except Exception as e: str = "def pycurl_detection(%s,%s) Exception %s" % (url, ip, e.args) print(str) logging.exception(str) return -1, -1, 0, 0, 0 finally: c.close() return http_code, "%.3f" % total_time, size_download, speed_download, redirect_count
def _container_detail(self, node_ip, node_port, containers_id): url = ('http://' + node_ip + ":" + node_port + "/containers/" + containers_id + "/json") container_more_url = Curl(url) ret_json = container_more_url.get_value() return ret_json
def setUp(self): self.curl = Curl()