def _preload(): for url in feed_db().keys(): try: proxy(url) except Exception: log.warning("Couldn't preload %s", url) return ("Done", 204, {})
def hashFile(event, context): bucket = event['Records'][0]['s3']['bucket']['name'] key = urllib.unquote_plus(event['Records'][0]['s3']['object']['key'].encode('utf8')) print("File: %s:%s" % (bucket, key)) proxyPattern = os.environ.get("PROXY_PATTERN") if proxyPattern and re.match(proxyPattern,key): return proxy.proxy(event,context, funcName="hashProxy") if key.endswith(".md5") or key.endswith(".sha1"): print("Nothing to do") return try: response = s3.get_object(Bucket=bucket, Key=key) md5 = hash_stream(response['Body'],hashlib.md5()) s3.put_object(Bucket=bucket,Key=key+".md5", Body=md5) sha1 = hash_stream(response['Body'],hashlib.sha1()) s3.put_object(Bucket=bucket,Key=key+".sha1", Body=sha1) print("MD5: %s SHA1: %s" % (md5,sha1)) return { "md5": md5, "sha1":sha1 } except Exception as e: print(e) print('Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'.format(key, bucket)) raise e
def distribute_request(self,http_req_handler): ''' 根据URL匹配规则路由请求到相应地处理器 ''' path = urlparse(http_req_handler.path).path handled = False #代理支持 if C('enable_proxy') and utils.isDict(C('proxy')): for reg,target in C('proxy').items(): target_path = get_proxy_url(http_req_handler.path,reg,target) if target_path: log.info('[proxy](%s) to (%s)'%(http_req_handler.path,target_path)) return proxy(target_path,Request(http_req_handler),Response(http_req_handler)) for h in self.handlers: if 'ALL' == h.get('method') or h.get('method') == http_req_handler.command and re.findall(h.get('pattern'),path): handled = True ret = (h.get('handler'))(Request(http_req_handler),Response(http_req_handler)) if True == ret: continue else: break #if not handled by any handlers,405 if not handled: log.error('%s is not handled'%path) http_req_handler.send_header(405,'%s not supported'%path) http_req_handler.end_headers() self.http_req_handler.wfile.close()
def requesetGet(self, url): """ 统一管理request的get请求 :param url: :return: """ if hasattr(self, 'ses') == False: self.ses = requests.session() ret = None try: if hasattr(self, 'proxy') == False: self.proxy = proxy.proxy() retry_count = 1 while True: ret = self.ses.get(url, proxies = self.proxy.getProxy()) if ret.status_code == 429: wait = random.randint(1, 3) time.sleep(wait) self.log.info(u'requesetGet, 请求代理超过5个,返回429{0},随机等待{1}秒'.format(url, wait)) continue elif ret.status_code != 200 and retry_count < 3: wait = random.randint(1, 3) time.sleep(wait) self.log.info(u'requesetGet, 请求失败,返回code:{2}, url:{0},随机等待{1}秒'.format(url, wait, ret.status_code)) retry_count += 1 continue else: break except: # ip = self.proxy.getCurIp() # self.log.error(u'代理请求数据异常, url:{0}, ip:{1}'.format(url, ip)) self.log.error(u'代理请求数据异常, url:{0}'.format(url)) self.log.error(traceback.format_exc()) self.log.info(u'代理请求数据完成,url:{0}'.format(url)) return ret
def __init__(self, world_module, port, arg): self.mud_encoding = 'iso-8859-1' self.client_encoding = 'utf-8' self.world_module = world_module self.arg = arg self.world = world_module.getClass()(self, self.arg) try: self.socketToPipeR, self.pipeToSocketW, self.stopFlag, runProxy = proxy( '::1', port) self.pipeToSocketW = os.fdopen(self.pipeToSocketW, 'wb') self.proxyThread = threading.Thread(target=runProxy) self.proxyThread.start() host_port = self.world.getHostPort() self.log("Connecting") self.telnet = self.connect(*host_port) self.log("Connected") except: self.log("Shutting down") self.stopFlag.set() self.world.quit() raise
def __init__(self, browser_type='chrome', path=''): self.logger = logging.getLogger('AliCompany') self.proxy = proxy('chrome')
def __init__(self, browser_type='chrome'): self.logger = logging.getLogger('PcCompany') self.init_mongo() self.init_redis() self.proxy = proxy('chrome') self.init_browser(browser_type)
import logging import traceback import selenium.webdriver.support.ui as ui import pymongo import datetime from pic_recognise import PicRecognise from proxy import proxy reload(sys) sys.setdefaultencoding('utf-8') pic_vcode_name = os.path.join(os.path.dirname(__file__), 'image', 'vcode.jpg') #测试图片文件 pic_ab_name = os.path.join(os.path.dirname(__file__), 'image', 'ab.jpg') proxy = proxy('chrome') def init_browser(browser_type): pass def browser_quit(browser): try: if browser: pid = browser.service.process.pid os.kill(pid, 9) except Exception, e: pass finally: browser = None
def __init__(self): self.logger = logging.getLogger('MBaiduKeyword') self.init_oracle() self.proxy = proxy('phantomjs')
from proxy import proxy from sys import argv, exit try: p = proxy(argv[1], int(argv[2]), argv[3], int(argv[4])) except: print 'Use: main.py <Server IP> <Server Port> <Client IP> <Client Port>' exit(1) p.initialize() p.start()
def urls(environ): if environ['PATH_INFO'] == '/fun/page': return proxy.page(environ) else: return proxy.proxy(environ)
def proxy_run(self): from proxy import proxy p = proxy(self._proxy_qs) p.on_run()
def setUp(self): self.object = X() self.proxy = proxy.proxy(self.object)
from proxy import proxy, HttpResponse def func(req): if 's' not in req.args: return HttpResponse('', code=400) return HttpResponse(req.args['s']) main = proxy(func)
class auto_brush(): p = proxy.proxy() urls = ['https://www.woquba.cn'] url_weight = [] ips = [] threads = [] thread_num = 10 time_distribution = [ 2, 1, 1, 2, 3, 4, 5, 6, 7, 7, 7, 8, 9, 7, 6, 5, 5, 7, 8, 9, 9, 8, 7, 5 ] total_fb = sum(time_distribution) base_num = 10000 last_hour = 0 last_min = 0 rand = random.randint(-10, 10) last_task_num = int( (base_num / total_fb) * (time_distribution[last_hour] + rand / 10.0)) residue_sec = 3600 residue_task = last_task_num * len(urls) ######################################################################### # # 自动刷量主程序 # ######################################################################### def run(self): self.__init_url_weight__() #self.__update_ips__() while True: #print 'residue task num =',self.residue_task,'residue_sec =',self.residue_sec self.__update_task__() if len(self.ips) <= 0: self.__update_ips__() if self.residue_task > 0: #if len(self.threads) < self.residue_task: self.__add_thread_task__() self.residue_task = self.residue_task - 1 #print "add thread task,residue task is",self.residue_task else: time.sleep(self.residue_sec) if self.residue_sec > self.residue_task: st = self.residue_sec / self.residue_task #print "sleep time is",st,"residue_sec is ",self.residue_sec time.sleep(st) self.residue_sec = self.residue_sec - st ######################################################################### # # 更新任务列表和代理ip列表{} # ######################################################################### def __update_task__(self): now = datetime.datetime.now() hour = now.hour minite = now.minute if hour is not self.last_hour: self.rand = random.randint(-10, 10) self.last_task_num = int( (self.base_num / self.total_fb) * (self.time_distribution[hour] + self.rand / 10.0)) self.last_hour = hour self.residue_sec = 3600 self.residue_task = self.last_task_num * len(self.urls) if ((minite - self.last_min) > 5) | (hour is not self.last_hour): self.__update_ips__() self.last_min = minite ######################################################################### # # 添加一个线程 # ######################################################################### def __add_thread_task__(self): for t in self.threads: if not t.isAlive(): self.threads.remove(t) #print "thread release,thread num ",t #if len(self.threads)<self.thread_num: index = self.__get_url_index__() t = threading.Thread(target=self.__task__, args=(index, )) self.threads.append(t) #t.setDaemon(True) t.start() ######################################################################### # # 线程处理函数 # ######################################################################### def __task__(self, index): b = browser.SeleniumBrowser() self.url_weight[index] = self.url_weight[index] + 1 flag = False while flag is False: try: proxyIp = random.choice(self.ips) requests.get('http://www.baidu.com', timeout=5, proxies={"http": "http://" + proxyIp}) except: print 'connect failed', proxyIp self.ips.remove(proxyIp) else: flag = True proxyL = proxyIp.split(':') ht = 'http' ip = proxyL[0] port = proxyL[1] self.ips.remove(proxyIp) print 'start task.....proxy is', proxyL, 'residue_sec is', self.residue_sec, 'residue_task is', self.residue_task, 'residue_ips is', len( self.ips) b.open_url_proxy(self.urls[index], ht, ip, port) #b.open_url(self.urls[index]) self.url_weight[index] = self.url_weight[index] - 1 #print "thread start,thread num is",len(self.threads),index ######################################################################### # # 初始化网站列表中的各网站任务分布 # ######################################################################### def __init_url_weight__(self): for i in range(len(self.urls)): # self.url_weight[i]=0 self.url_weight.append(0) ######################################################################### # # 更新任务列表和代理ip列表{} # ######################################################################### def __update_ips__(self): self.ips = self.p.get_proxy_ips() ######################################################################### # # 根据网站任务分布情况选择一个处理量最低的网站索引 # ######################################################################### def __get_url_index__(self): minNum = self.url_weight[0] index = 0 for i in range(len(self.url_weight)): if minNum > self.url_weight[i]: minNum = self.url_weight[i] index = i return index
def _twitter(id, date=lambda: (date.today() - timedelta(days=10)).strftime("%Y-%m-%d")): date = date() return proxy( "https://twitrss.me/twitter_search_to_rss/" + "?term=from%3A{id}+since%3A{date}".format(id=id, date=date) )
def _medium(id): return proxy("https://medium.com/feed/@" + id)
def _feed(url): return proxy(url)
def init(): ret = proxy.proxy('https://www.baidu.com') if (ret.status_code == 200): print(ret.text)
def __init__(self, browser_type='chrome'): self.logger = logging.getLogger('MBaiduKeyword') self.init_oracle() self.proxy = proxy('chrome')