def main(): urls = [ # 'http://v.youku.com/v_show/id_XNzUyNDE4MTQw.html' # 'http://i.youku.com/u/UNTc4NzI3MjY0', # 'http://v.youku.com/v_show/id_XNzQ5NDAwMDIw.html?from=y1.1-2.10001-0.1-1', # 'http://v.youku.com/v_show/id_XNzUwMTE2MDQw.html?f=22611771', # 'http://v.youku.com/v_show/id_XNzQ3MjMxMTYw.html', 'http://video.sina.com.cn/p/ent/v/m/2014-08-14/102164094039.html' ] log = util.get_logger() bar = ProgressBar() ws = WorkShop(tmin=1, tmax=2, log=log) dlvs = [] for i, url in enumerate(urls): dlvideo = VUrlTask(url, 0, 3, './tmp', bar=bar, log=log) dlvs.append(dlvideo) try: ws.serve() ws.addTasks(dlvs) while len(dlvs) > 0: for i, dlv in enumerate(dlvs): if dlv.isArchived() or dlv.isError(): del dlvs[i] _sleep(1) except KeyboardInterrupt: pass except Exception as e: log.exception(e) finally: ws.setToStop() ws.join()
def main(): urls = [ # 'http://v.youku.com/v_show/id_XNzUyNDE4MTQw.html' # 'http://i.youku.com/u/UNTc4NzI3MjY0', # 'http://v.youku.com/v_show/id_XNzQ5NDAwMDIw.html?from=y1.1-2.10001-0.1-1', # 'http://v.youku.com/v_show/id_XNzUwMTE2MDQw.html?f=22611771', # 'http://v.youku.com/v_show/id_XNzQ3MjMxMTYw.html', 'http://video.sina.com.cn/p/ent/v/m/2014-08-14/102164094039.html' ] log = util.get_logger() bar = ProgressBar() ws = WorkShop(tmin=1, tmax=2, log=log) dlvs = [] for i, url in enumerate(urls): dlvideo = VUrlTask(url, 0, 3, './tmp', bar=bar, log=log) dlvs.append(dlvideo) try: ws.serve() ws.addTasks(dlvs) while len(dlvs) > 0: for i, dlv in enumerate(dlvs): if dlv.isArchived() or dlv.isError(): del dlvs[i] _sleep(1) except KeyboardInterrupt: pass except Exception as e: log.exception(e) finally: ws.setToStop() ws.join()
def __init__(self, name="<?thread>", log=None): self._name = name self._thread = None self.log = get_logger(log) self._is_paused=threading.Event() self._is_paused.clear() self._is_running=threading.Event() self._is_running.clear()
def main(): scanner = IPScanner(util.get_logger()) scanner.info_duration = 5 global coutries_filter coutries_filter = {'Korea'} scanner.start() _sleep(0.5) if scanner.isAvailable(): print scanner.allip try: _sleep(1) while True: _sleep(1) except KeyboardInterrupt as e: print 'stop by user' finally: if scanner.isAlive(): scanner.setToStop() scanner.join()
def main(): scanner = IPScanner(util.get_logger()) scanner.info_duration = 5 global coutries_filter coutries_filter = {'Korea'} scanner.start() _sleep(0.5) if scanner.isAvailable(): print scanner.allip try: _sleep(1) while True: _sleep(1) except KeyboardInterrupt as e: print 'stop by user' finally: if scanner.isAlive(): scanner.setToStop() scanner.join()
#!/usr/bin/env python # -*- coding: utf-8 -*- import os import time import hashlib import json from vavava.httputil import HttpUtil from vavava.httputil import DownloadStreamHandler from vavava import util util.set_default_utf8() LOG = util.get_logger() CHARSET = "utf-8" class Spider: def __init__(self): self.http = HttpUtil(charset="utf-8") self.http.header_refer_ = "http://v.ifeng.com/include/ifengLivePlayer_v1.40.4.swf" self.http.header_user_agent_ = r"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)" self.http.add_header("x-flash-version", "11,5,502,146") self.http.add_header("Accept-Language", "zh-CN") self.http.add_header("Accept", "*/*") self.http.add_header("Proxy-Connection", "Keep-Alive") self.uuid = "" self.flv_location = "" self.schedule_json = None self.channels = {} self.down_handle = None
#!/usr/bin/env python # coding=utf-8 import sys import json import StringIO import BaseHTTPServer from time import sleep as _sleep from SimpleHTTPServer import SimpleHTTPRequestHandler from ipscanner import IPScanner from vavava import util ServerClass = BaseHTTPServer.HTTPServer Protocol = "HTTP/1.0" log = util.get_logger() gIpScanner = IPScanner(log=util.get_logger()) class MyRequestHandler(SimpleHTTPRequestHandler): def send_head(self): content_type = 'text/html; charset=utf-8' param = '' req_path = self.path if req_path.find('?') > 0: req_path, param = req_path.split('?') if req_path in ('/'): self.path = '/www/index.html' return SimpleHTTPRequestHandler.send_head(self) elif req_path in ('/curr'): iplist = [[ip.duration, ip.ip, ip.country, ip.timeString] for ip in gIpScanner.currBuff]
def getmatches(string): regstr = """机构名称:\s*([^\<]*)[^\<]*\</p>[^\>]+>机构地址:\s*([^\<]*)[^\<]*\</p>[^\>]+>区县:\s*([^\<]*)[^\<]*\</p>[^\>]+>机构电话:\s*([^\<]*)[^\<]*\</p>[^\>]+>基本医保点:\s*([^\<]*)[^\<]*\</p>[^\>]+>医保编码:\s*([^\<]*)[^\<]*\</p>[^\>]+>新农合定点:\s*([^\<]*)[^\<]*\</p>[^\>]+>邮政编码:\s*([^\<]*)[^\<]*\</p>[^\>]+>""" matches = util.reg_helper(string, regstr) return matches def save(matches, num): with open("%d.txt"%num, "w") as f: f.write("%s,%s,%s,%s,%s,%s,%s,%s,\n"%("机构名称", "机构地址", "区县", "机构电话", "基本医保点", "医保编码", "新农合定点", "邮政编码")) for match in matches: line = "%s,%s,%s,%s,%s,%s,%s,%s,\n"%match f.write(line) global total total += 1 if __name__ == "__main__": log = util.get_logger() try: i = 0 for url in geturls(): content = httputil.http_get(url) matches = getmatches(content) save(matches, i) i += 1 print total except KeyboardInterrupt as e: print 'stop by user' exit(0) except Exception as e: log.exception(e)
def mainTest(axel, bar, log): cmd = "1" #'1,2,3,4,5,6'# raw_input('n=') for n in cmd.split(','): n = int(n) for md5, url in test_urls.items(): fTestFunc(axel, bar, url, md5, n, log) # mTestFunc(axel, url, md5, n, log) log.debug('add a test work: %s,%s,%d', url, md5, n) from vavava.threadutil import WorkShop from vavava.util import get_logger if __name__ == '__main__': log = get_logger() bar = ProgressBar() axel = WorkShop(tmin=2, tmax=5, log=log) try: if not axel.serve(timeout=3): raise ValueError('server not started') mainTest(axel, bar, log) while True: _sleep(1) if axel.allTasksDone(): if raw_input('again ??') in ('y'): mainTest(axel, bar, log) else: break except KeyboardInterrupt as e: pass
#!/usr/bin/env python # coding=utf-8 import sys import json import StringIO import BaseHTTPServer from time import sleep as _sleep from SimpleHTTPServer import SimpleHTTPRequestHandler from ipscanner import IPScanner from vavava import util ServerClass = BaseHTTPServer.HTTPServer Protocol = "HTTP/1.0" log = util.get_logger() gIpScanner = IPScanner(log=util.get_logger()) class MyRequestHandler(SimpleHTTPRequestHandler): def send_head(self): content_type = 'text/html; charset=utf-8' param = '' req_path = self.path if req_path.find('?') > 0: req_path, param = req_path.split('?') if req_path in ('/'): self.path = '/www/index.html' return SimpleHTTPRequestHandler.send_head(self) elif req_path in ('/curr'): iplist = [[ip.duration, ip.ip, ip.country, ip.timeString] for ip in gIpScanner.currBuff] html = json.dumps({'name': 'curr', 'data': iplist, 'columns': ['duration', 'ip', 'country', 'time']})
def __init__(self, db_path): self.log = util.get_logger(level=CONFIG.log_level) self.categries = {} self.pool = None self.dbpool = sqliteutil.dbpool(path=db_path, cls=dbutil.DBUrl)
def __init__(self, path): json_config.SimpleJsonConfig.__init__(self, path) util.get_logger().info("load config file:%s", path) if not hasattr(self, "http_proxy") or len(self.http_proxy)==0: self.http_proxy = None
retrans=True, callback=archive_callback) axel.addTask(urltask) def mainTest(axel, bar, log): cmd = "1" #'1,2,3,4,5,6'# raw_input('n=') for n in cmd.split(','): n = int(n) for md5, url in test_urls.items(): fTestFunc(axel, bar, url, md5, n, log) # mTestFunc(axel, url, md5, n, log) log.debug('add a test work: %s,%s,%d', url, md5, n) from vavava.threadutil import WorkShop from vavava.util import get_logger if __name__ == '__main__': log = get_logger() bar = ProgressBar() axel = WorkShop(tmin=2, tmax=5, log=log) try: if not axel.serve(timeout=3): raise ValueError('server not started') mainTest(axel, bar, log) while True: _sleep(1) if axel.allTasksDone(): if raw_input('again ??') in ('y'): mainTest(axel, bar, log) else: break except KeyboardInterrupt as e: pass