Пример #1
0
def stateMachine(config):
	numclients = 1
	maxVal = 1000
	cli = RedisClient()
	print 'Going to start failure scheduler'
	t = threading.Thread(target=failureScheduler, args=(10, config,)) 
	t.start()

	spawnClients(numclients, maxVal, cli)


	(successes, fails) = collateLogFiles(numclients, maxVal)
	log = set(cli.getLog())
	(ackedAndLost, ackedAndPresent, notAckedAndPresent, notAckedAndAbsent) = printStats (log, successes, fails)

	print 'STATISTICS'
	print 'ackedAndLost %s' %(len(ackedAndLost))
	print 'ackedAndPresent %s'%(len(ackedAndPresent))
	print 'notAckedAndPresent %s'%(len(notAckedAndPresent))
	print 'notAckedAndAbsent %s' %(len(notAckedAndAbsent))
	print ackedAndLost
	print sorted(successes)
	print sorted(log)

	t.join()
Пример #2
0
    def extract_article(self):
        r = RedisClient()
        articles = r.get(self._id + ":articles")
        selected = r.get(self._id + ":selected")

        article = None

        entities = self.outcome["entities"]
        if "ordinal" in entities:
            value = entities["ordinal"][0]["value"]
            if value < len(articles):
                article = articles[value - 1]
        if not article and "topic" in entities:
            topic = entities["topic"][0]["value"]
            is_current_context = False
            for word in ["it", "this", "article"]:
                if word in topic:
                    article = selected
                    is_current_context = True
                    break
            if not is_current_context:
                # fuzzy string matching
                result = process.extractOne(topic, map(lambda x: x["headline"], articles))
                if (result[1] > 75):
                    for i, article_candidate in enumerate(articles):
                        if (article_candidate["headline"] == result[0]):
                            article = article_candidate
                            break
        if not article and selected and selected != "None":
            article = selected
        return article
Пример #3
0
 def get_media(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response(
             "Sorry, what kind of news would you like to hear about?")
     article = self.extract_article()
     if article:
         if article['multimedia']:
             r.set(self._id + ":selected", article)
             r.set(self._id + ":state", "selected")
             have_media = Media(self.name)
             self.payload = {
                 "read": have_media.get_phrase()  # usable url for html
             }
             self.pushlater = {
                 "type": "media",
                 "value": article['multimedia']
             }
             self.finish_response()
         else:
             self.error_response(
                 "Sorry I don't have images for that article")
     else:
         self.error_response(
             "Sorry I don't know what article you are talking about")
Пример #4
0
class CmdLineApp(Cmd):
    def do_list(self, arg, opts=None):
        data = config.list_conns()
        header = ["ID", "Name", "Host", "Port", "DB", "comments"]
        # tabulate.WIDE_CHARS_MODE = True
        # print_table(header,data)
        print tabulate(data, headers=header)

    @options(
        [
            make_option("-p", "--piglatin", action="store_true", help="atinLay"),
            make_option("-s", "--shout", action="store_true", help="N00B EMULATION MODE"),
            make_option("-r", "--repeat", type="int", help="output [n] times"),
        ]
    )
    def do_conn(self, arg, opts=None):
        (host, port, db) = config.get_conn(int(arg))
        self.prompt = "%s:%s > " % (host, str(db))
        self.conn = RedisClient(host, port, db)

    # @options([make_option('-p', '--piglatin', action="store_true", help="atinLay"),
    #           make_option('-s', '--shout', action="store_true", help="N00B EMULATION MODE"),
    #           make_option('-r', '--repeat', type="int", help="output [n] times")
    #           ])
    def do_get(self, arg, opts=None):
        data = self.conn.get(arg)
        txt = highlight(unicode(data, "UTF-8"), JsonLexer(), pygments.formatters.TerminalFormatter(bg="dark"))
        print txt.rstrip("\r\n").decode("raw_unicode_escape")

    def do_keys(self, arg, opts=None):
        print self.conn.keys(arg)
Пример #5
0
	def __init__(self):
		self.dbClient = MongoDbClient()
		self.redisClient = RedisClient()
		with open('/Users/vinusebastian/uci/quarter2/IR/project/WEBPAGES_RAW/bookkeeping.json') as json_data:
			self.bookKeeper = json.load(json_data)
   		self.stopWords = []
   		self.documentFrequencyThreshold = 1000
Пример #6
0
    def extract_article(self):
        r = RedisClient()
        articles = r.get(self._id + ":articles")
        selected = r.get(self._id + ":selected")

        article = None

        entities = self.outcome["entities"]
        if "ordinal" in entities:
            value = entities["ordinal"][0]["value"]
            if value < len(articles):
                article = articles[value - 1]
        if not article and "topic" in entities:
            topic = entities["topic"][0]["value"]
            is_current_context = False
            for word in ["it", "this", "article"]:
                if word in topic:
                    article = selected
                    is_current_context = True
                    break
            if not is_current_context:
                # fuzzy string matching
                result = process.extractOne(
                    topic, map(lambda x: x["headline"], articles))
                if (result[1] > 75):
                    for i, article_candidate in enumerate(articles):
                        if (article_candidate["headline"] == result[0]):
                            article = article_candidate
                            break
        if not article and selected and selected != "None":
            article = selected
        return article
Пример #7
0
    def confirm_action(self):
        r = RedisClient()
        current_state = r.get(self._id + ":state")

        if (current_state != "start"):
            self.error_response("Sorry, what are you saying okay for?")
        else:
            articles = r.get(self._id + ":articles")
            self.respond_get_headlines(articles)
Пример #8
0
    def start(self):
        r = RedisClient()

        # Clear the state for this new user
        r.set(self._id + ":state", None)
        r.set(self._id + ":articles", None)
        r.set(self._id + ":selected", None)

        NYTimes.get_headlines(self.respond_start)
Пример #9
0
    def confirm_action(self):
        r = RedisClient()
        current_state = r.get(self._id + ":state")

        if (current_state != "start"):
            self.error_response("Sorry, what are you saying okay for?")
        else:
            articles = r.get(self._id + ":articles")
            self.respond_get_headlines(articles)
Пример #10
0
	def __init__(self, lowerBound, upperBound):
		self.bigDictionary = {}
		self.folders = glob.glob("/Users/vinusebastian/uci/quarter2/IR/project/WEBPAGES_RAW/*")
		self.htmlParser = htmlParser()
		self.lowerBound = lowerBound
		self.upperBound = upperBound

		self.dbClient = MongoDbClient()
		self.redisClient = RedisClient()
		self.documentLength = {}
		self.textDictionary = {}
Пример #11
0
 def post(self):
     payload = {}
     data = tornado.escape.json_decode(self.request.body)
     user_id = data["id"]
     r = RedisClient()
     name = r.get(user_id + ":name")
     if not name:
         payload = {"status": 500}
     else:
         payload = {"id": user_id, "name": name, "status": 200}
     self.write(tornado.escape.json_encode(payload))
Пример #12
0
 def post(self):
     payload = {}
     data = tornado.escape.json_decode(self.request.body)
     user_id = data["id"]
     if ("name" not in data):
         payload = {"status": 500}
     else:
         user_name = data["name"]
         r = RedisClient()
         r.set(user_id + ":name", user_name)
         payload = {"id": user_id, "name": user_name, "status": 200}
     self.write(tornado.escape.json_encode(payload))
Пример #13
0
 def __init__(self,
              host="localhost",
              port='6379',
              key='Proxy',
              moni_port=9999):
     self.__redis = RedisClient(host=host,
                                port=port,
                                password=None,
                                key=key)
     self.__port = moni_port
     self.__host = host
     self.__db_port = port
     self.__key = key
Пример #14
0
    def respond_get_headlines(self, payload):
        if not payload:
            self.error_response("API error")
        sentence_headlines, topic_headlines, article_order = NLPParser.parse_headlines(
            map(lambda x: x["headline"], payload))
        article_values = []
        for article in payload:
            article_values.append({
                "headline": article["headline"],
                "url": article["url"],
                "snippet": article["snippet"]
            })
        self.pushlater = {"type": "article", "value": article_values}
        found = FoundHeadlines(sentence_headlines, topic_headlines, self.name)
        self.payload = {"read": found.get_phrase()}

        r = RedisClient()

        ordered_articles = map(lambda x: payload[x], article_order)

        # State transition
        r.set(self._id + ":selected", None)
        r.set(self._id + ":articles", ordered_articles)
        r.set(self._id + ":state", "headlines")
        self.finish_response()
Пример #15
0
 def __init__(self,
              validation_addr="http://www.baidu.com",
              host="www.baidu.com",
              batch_size=100,
              db_host='localhost',
              port='6379',
              key='Proxy'):
     self.__redis = RedisClient(host=db_host,
                                port=port,
                                password=None,
                                key=key)
     self.__host = host
     self.__addr = validation_addr
     self.__batch_size = batch_size
Пример #16
0
 def post(self):
   payload = {}
   data = tornado.escape.json_decode(self.request.body)
   user_id = data["id"]
   r = RedisClient()
   name = r.get(user_id + ":name")
   if not name:
     payload = {"status": 500}
   else:
     payload = {
       "id": user_id,
       "name": name,
       "status": 200
     }
   self.write(tornado.escape.json_encode(payload))
Пример #17
0
class WeiboCookiesGenerator(object):
    def __init__(self, website='weibo'):
        """
        初始化一些对象
        :param website: 名称
        :param browser: 浏览器, 若不使用浏览器则可设置为 None
        """
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)
        self.uid_db = RedisClient('uid', self.website)

    def run(self):
        """
        运行, 得到所有账户, 然后逐个模拟登录获取cookies,以及uid
        :return:
        """
        accounts_usernames = self.accounts_db.usernames()
        cookies_usernames = self.cookies_db.usernames()

        for username in accounts_usernames:
            if not username in cookies_usernames:
                password = self.accounts_db.get(username)
                print('正在生成Cookies', '账号', username, '密码', password)
                result = self.new_cookies(username, password)
                # 成功获取
                if result.get('status') == 1:
                    cookies = result.get('content')  #cookies为dict类型
                    uid = result.get('uid')
                    print('成功获取到Cookies', cookies, '成功获取到uid', uid)
                    if self.cookies_db.set(username, json.dumps(cookies)):
                        print('成功保存Cookies')
                    if self.uid_db.set(username, uid):
                        print('成功保存uid')
                # 登录出错
                elif result.get('status') == 0:
                    print(result.get('content'))
        else:
            print('所有账号都已经成功获取Cookies')

    def new_cookies(self, username, password):
        """
        生成Cookies
        :param username: 用户名
        :param password: 密码
        :return: 用户名和Cookies
        """
        return WeiboCookies(username, password).main()
Пример #18
0
 def post(self):
   payload = {}
   data = tornado.escape.json_decode(self.request.body)
   user_id = data["id"]
   if ("name" not in data):
     payload = {"status": 500}
   else:
     user_name = data["name"]
     r = RedisClient()
     r.set(user_id + ":name", user_name)
     payload = {
       "id": user_id,
       "name": user_name,
       "status": 200
     }
   self.write(tornado.escape.json_encode(payload))
Пример #19
0
 def save_headline(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response("Sorry, I didn't get that.")
     article = self.extract_article()
     if article:
         confirmation = SaveConfirmation()
         self.payload = {"read": confirmation.get_phrase()}
         # Do some other stuff to actually save the article somewhere
         r.set(self._id + ":selected", article)
         r.set(self._id + ":state", "selected")
         self.finish_response()
     else:
         self.error_response(
             "I'm sorry. I don't know which article you want me to save.")
Пример #20
0
class Getter(object):
    # 爬取代理网站的免费代理IP,存入redis
    def __init__(self):
        self.redis_client = RedisClient()
        self.crawler = Crawler()

    def is_full(self):
        # 判断代理池是否满了
        return self.redis_client.get_proxy_count() >= FULL_COUNT

    def run(self):
        # 将爬取到的代理存入redis
        if not self.is_full():
            proxys = self.crawler.get_crawler_proxy()
            for proxy in proxys:
                self.redis_client.add(proxy)
Пример #21
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy: 单个代理
        :return: NOne
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = "http://" + proxy
                print("正在测试", proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STATUS_CODES:
                        self.redis.max(proxy)
                        print("代理可用", proxy)
                    else:
                        self.redis.decrease(proxy)
                        print("请求接口相应不合法", proxy)
            except:
                pass

    def run(self):
        """
        测试主函数
        :return: None
        """
        print("测试器开始执行")
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            #批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print("测试器发生错误", e.args)
Пример #22
0
 def save_headline(self):
   r = RedisClient()
   state = r.get(self._id + ":state")
   if state == "start":
     self.error_response("Sorry, I didn't get that.");
   article = self.extract_article()
   if article:
     confirmation = SaveConfirmation()
     self.payload = {
       "read": confirmation.get_phrase()
     }
     # Do some other stuff to actually save the article somewhere
     r.set(self._id + ":selected", article)
     r.set(self._id + ":state", "selected")
     self.finish_response()
   else:
     self.error_response("I'm sorry. I don't know which article you want me to save.")
Пример #23
0
    def respond_get_headlines(self, payload):
        if not payload:
            self.error_response("API error")
        sentence_headlines, topic_headlines, article_order = NLPParser.parse_headlines(map(lambda x: x["headline"], payload))
        article_values = []
        for article in payload:
          article_values.append({"headline": article["headline"], "url": article["url"], "snippet": article["snippet"]})
        self.pushlater = {
          "type": "article",
          "value": article_values
        }
        found = FoundHeadlines(sentence_headlines, topic_headlines, self.name)
        self.payload = {
            "read": found.get_phrase()
        }

        r = RedisClient()

        ordered_articles = map(lambda x: payload[x], article_order)

        # State transition
        r.set(self._id + ":selected", None)
        r.set(self._id + ":articles", ordered_articles)
        r.set(self._id + ":state", "headlines")
        self.finish_response()
Пример #24
0
def store_proxies(proxies):
    conn = RedisClient(name='certificate_proxies')
    now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    conn_check = connect_check(proxies)
    if conn_check:
        proxies = json.dumps(proxies)
        duplicate_check = conn.exist(proxies)
        if not duplicate_check:
            conn.set(proxies, 1)
            info_logger.info(str(now) + ' New proxies: ' + str(proxies))
            print(str(now) + ' New proxies: ' + str(proxies))
        else:
            info_logger.info(
                str(now) + ' Already exist proxies: ' + str(proxies))
    else:
        err_logger.error(
            str(now) + ' Can not connect baidu.com -- proxies: ' +
            str(proxies))
Пример #25
0
 def get_summary(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response("Sorry, what kind of news would you like to hear about?")
     article = self.extract_article()
     if article:
         r.set(self._id + ":selected", article)
         r.set(self._id + ":state", "selected")
         print "**************"
         print type(article)
         print article
         print article is None
         self.payload = {
             "read": "%s. Would you like me to send the full article to your kindle?" % article["snippet"]
         }
         self.finish_response()
     else:
         self.error_response("Sorry I don't know what article you are talking about")
Пример #26
0
class API(object):
    def __init__(self,
                 host="localhost",
                 port='6379',
                 key='Proxy',
                 moni_port=9999):
        self.__redis = RedisClient(host=host,
                                   port=port,
                                   password=None,
                                   key=key)
        self.__port = moni_port
        self.__host = host
        self.__db_port = port
        self.__key = key

    def __message(self, method):
        if method == "random":
            return self.__redis.random()
        elif method == "random_max":
            return self.__redis.random_max()
        elif method == "all":
            return self.__redis.all()
        elif method == "all_max":
            return self.__redis.all_max()
        elif method == "count":
            return self.__redis.count()
        else:
            return "Sorry,wrong name or wrong password,try again..."

    def run(self):
        self.__set_api()
        app.run(host="0.0.0.0", port=self.__port)

    def __set_api(self):
        global HOST
        global PORT
        global KEY
        global MONI_PORT

        HOST = self.__host
        PORT = self.__db_port
        KEY = self.__key
        MONI_PORT = self.__port
Пример #27
0
def get_uniform_proxy(name, url):
    proxy_url = 'http://166.188.20.55:3000/api/proxy/web'
    headers = {
        'Content-Type':
        'application/json',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/'
        '537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }
    data = {'name': name, 'url': url}
    while True:
        resp = requests.post(url=proxy_url,
                             headers=headers,
                             data=json.dumps(data)).json()
        region = resp['proxy']['region']
        if region == 'R1' or region == 'R2':
            # print(region, 'err')
            continue
        else:
            conn = RedisClient(name='certificate_proxies')
            ip = resp['proxy']['ip']
            port = '30000'
            proxies = {
                'http': 'http://%s:%s' % (ip, port),
                'https': 'http://%s:%s' % (ip, port),
            }
            ping_url = 'http://www.baidu.com'
            status_code = requests.get(ping_url,
                                       proxies=proxies,
                                       timeout=(3.1, 15)).status_code
            if status_code == 200:
                p = json.dumps(proxies)
                check = conn.exist(p)
                if not check:
                    conn.set(p, 1)
                    # conn.lpush(p)
                    print('New proxies: ', proxies)
                    break
                else:
                    print('already exist proxies: ', p)
            else:
                print('connection error...')
Пример #28
0
 def get_media(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response("Sorry, what kind of news would you like to hear about?")
     article = self.extract_article()
     if article:
         if article['multimedia']:
             r.set(self._id + ":selected", article)
             r.set(self._id + ":state", "selected")
             have_media = Media(self.name)
             self.payload = {
                 "read": have_media.get_phrase() # usable url for html
             }
             self.pushlater = {"type": "media", "value": article['multimedia']}
             self.finish_response()
         else:
             self.error_response("Sorry I don't have images for that article")
     else:
         self.error_response("Sorry I don't know what article you are talking about")
Пример #29
0
  def __init__(self, redsmin_prod):
    #creating both sockets
    self.redisClient = RedisClient(config["uri"])
    self.endpoint = Endpoint(redsmin_prod)

    #binding data's transfer to the sockets
    self.redisClient.handle_read = self.sendDataFromRedisToEndPoint
    self.endpoint.handle_read = self.sendDataFromEndPointToRedis

    #calling the handshake
    self.endpoint.handshake()
Пример #30
0
def do_response():
    redis = RedisClient(host=HOST, port=PORT, key=KEY)
    RANDOM = str(redis.random())
    RANDOM_MAX = str(redis.random_max())
    ALL = redis.all()
    all = ''
    for ip in ALL:
        all += str(ip) + "<br />"
    ALL = all
    ALL_MAX = redis.all_max()
    all = ''
    for ip in ALL_MAX:
        all += str(ip) + "<br />"
    ALL_MAX = all
    COUNT = str(redis.count())
    WRONG = 'Sorry,wrong name or wrong password,try again...'
    if request.method == "GET":
        name = request.args.get("name", "")
        password = request.args.get("password", "")
        method = request.args.get("method", "")
        if not name:
            return app.send_static_file("index.html")
        if name == "yooongchun" and password == "121561":
            if method == "random":
                return RANDOM
            elif method == "random_max":
                return RANDOM_MAX
            elif method == "all":
                return ALL
            elif method == "count":
                return COUNT
            elif method == "all_max":
                return ALL_MAX
            else:
                return WRONG
        else:
            return WRONG
    if request.method == "POST":
        name = request.form['name']
        password = request.form['password']
        method = request.form['method']
        if name == "yooongchun" and password == "121561":
            if method == "random":
                return RANDOM
            elif method == "random_max":
                return RANDOM_MAX
            elif method == "all":
                return ALL
            elif method == "count":
                return COUNT
            elif method == "all_max":
                return ALL_MAX
            else:
                return WRONG
        else:
            return WRONG
Пример #31
0
    def post(self):
        self.pushlater = {}
        self.payload = {}
        self.outcome = tornado.escape.json_decode(self.request.body)
        intent = self.outcome["intent"]
        if ("id" in self.outcome):
            self._id = self.outcome["id"]
        else:
            self.error_response("No id passed in outcome")

        print "DEBUGGING RESPONSE"
        print self.outcome
        print self._id
        f = FirebaseDB()
        if ("user" in self.outcome):
            f.post(self._id, {"type": "user", "value": self.outcome["user"]})
        if ("reporta" in self.outcome):
            f.post(self._id, {
                "type": "reporta",
                "value": self.outcome["reporta"]
            })

        r = RedisClient()
        self.name = r.get(self._id + ":name")

        if (intent == "start"):
            self.start()
        elif (intent == "confirm_action"):
            self.confirm_action()
        elif (intent == "get_headlines"):
            self.get_headlines()
        elif (intent == "get_summary"):
            self.get_summary()
        elif (intent == "get_media"):
            self.get_media()
        elif (intent == "save_headline"):
            self.save_headline()
        else:
            error_messages = Error(self.name)
            self.error_response(error_messages.get_phrase())
Пример #32
0
class Validation(object):
    def __init__(self,
                 validation_addr="http://www.baidu.com",
                 host="www.baidu.com",
                 batch_size=100,
                 db_host='localhost',
                 port='6379',
                 key='Proxy'):
        self.__redis = RedisClient(host=db_host,
                                   port=port,
                                   password=None,
                                   key=key)
        self.__host = host
        self.__addr = validation_addr
        self.__batch_size = batch_size

    async def validation_single(self, proxy):
        '''validate single proxy'''
        # print("validation proxy: ", proxy)
        headers = UserAgent(host=self.__host).headers()
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                async with session.get(self.__addr,
                                       proxy=real_proxy,
                                       headers=headers,
                                       timeout=15) as response:
                    if response.status == 200:
                        self.__redis.max(proxy)
                        # print("proxy {} is good.".format(proxy))
                    else:
                        self.__redis.decrease(proxy)
                        # print("proxy {} is not good".format(proxy))
            except Exception:
                # print("proxy {} is not good".format(proxy))
                self.__redis.decrease(proxy)

    def run(self):
        try:
            proxies = self.__redis.all()
            for i in range(0, len(proxies), self.__batch_size):
                # print("processing i: {}\tprogress:{}/{} {:.2f}%".format(i + 1, i + 1, len(proxies),
                # (i + 1) / len(proxies) * 100))
                task_proxies = proxies[i:self.__batch_size + i]
                loop = asyncio.get_event_loop()
                tasks = [
                    self.validation_single(proxy) for proxy in task_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
            # print("After validation,total {} in database".format(self.__redis.count()))
        except Exception as e:
            pass
Пример #33
0
class WeiboValidTester(object):
    def __init__(self, website='weibo'):
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.uid_db = RedisClient('uid', self.website)

    def test(self, username, cookies):
        print('正在测试Cookies', '用户名', username)
        try:
            cookies = json.loads(cookies)
        except TypeError:
            print('Cookies不合法', username)
            self.cookies_db.delete(username)
            print('删除Cookies', username)
            return
        try:
            test_url = TEST_URL_MAP[self.website]
            response = requests.get(test_url % self.uid_db.get(username),
                                    cookies=cookies,
                                    timeout=5,
                                    allow_redirects=False)
            if response.status_code == 200:
                print('Cookies有效', username)
            else:
                print(response.status_code, response.headers)
                print('Cookies失效', username)
                self.cookies_db.delete(username)
                print('删除Cookies', username)
        except ConnectionError as e:
            print('发生异常', e.args)

    def run(self):
        cookies_groups = self.cookies_db.all()
        for username, cookies in cookies_groups.items():
            self.test(username, cookies)
Пример #34
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('测试代理: {}'.format(real_proxy))
                async with session.get(TEST_URL,
                                       proxy=real_proxy,
                                       allow_redirects=False,
                                       timeout=10) as resp:
                    if resp.status in VALID_STATUS_CODE:
                        self.redis.max(proxy)
                        print('代理{} 可用'.format(real_proxy))
                    else:
                        self.redis.decrase(proxy)
                        print('代理{}的返回状态错误'.format(real_proxy))
            except (ClientError,
                    aiohttp.client_exceptions.ClientConnectorError,
                    asyncio.TimeoutError, AttributeError):
                self.redis.decrase(proxy)
                print("代理{}请求异常".format(real_proxy))

    def run(self):
        '''
        测试主函数
        '''
        print('开始测试')
        try:
            count = self.redis.count()
            print('当前剩余{}个代理'.format(count))
            for i in range(0, count, BATCH_TEST_SIZE):
                start = i
                stop = min(i + BATCH_TEST_SIZE, count)
                print('测试测范围{}-{}'.format(start, stop))
                test_proxies = self.redis.batch(start, stop)
                loop = asyncio.get_event_loop()
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                sys.stdout.flush()
                time.sleep(2)
        except Exception as e:
            print('测试异常{}'.format(e.args))
Пример #35
0
class Tester(object):
    """
        从redis中取出代理,测试代理是否可用,并调整代理IP的优先级
    """
    def __init__(self, test_url):
        self.redisdb = RedisClient()
        # 用来测试代理是否可用的地址
        self.test_url = test_url

    def test_proxy(self, proxy):
        try:
            if isinstance(proxy, bytes):
                proxy = proxy.decode('utf-8')
            proxies = {
                'http': 'http://' + proxy,
                'https': 'https://' + proxy
            }
            print('正在检测:{}'.format(proxy))
            res = requests.get(self.test_url, proxies=proxies, timeout=10)
            if res.status_code == 200:
                return True, proxy
            else:
                return False, proxy
                # 代理不可用,就降低其优先级
        except Exception as e:
            return False, proxy
            # print('代理检测异常:{}  {}'.format(proxy, e))
            self.redisdb.decrease(proxy)
            print('代理不可用:{}'.format(proxy))


    def run(self):
        print('启动检测模块......')
        try:
            # 获取redis中所有爬取到的代理
            proxies = self.redisdb.get_all_proxy()
            for i in range(0, len(proxies), 50):
                test_proxies = proxies[i:i+50]
                workers = len(test_proxies)
                with futures.ThreadPoolExecutor(workers) as executor:
                    tasks_res = executor.map(self.test_proxy, test_proxies)
                    for res, proxy in tasks_res:
                        if not res:
                            # 代理不可用,就降低其优先级
                            self.redisdb.decrease(proxy)
                            print('代理不可用:{}'.format(proxy))
                        else:
                            # 代理可用,将其优先级置为最大
                            self.redisdb.max(proxy)
                            print('代理可用:{}'.format(proxy))

        except Exception as e:
            print(traceback.format_exc())
            print('检测模块出错!!!')
Пример #36
0
class Getter(object):
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        '''
        判断是否超出了抓取的限制
        '''
        if self.redis.count() >= settings.POOL_UPPER_THRESHOLD:
            return True
        return False

    def run(self):
        print('开始抓取')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                proxies = self.crawler.get_proxies(callback)
                sys.stdout.flush()
                for proxy in proxies:
                    self.redis.add(proxy)
Пример #37
0
class Getter(object):
    def __init__(self):
        self.redis_client = RedisClient()
        self.crawler = Crawler()

    def is_full(self):
        """
            判断代理池是否满了
        """
        if self.redis_client.get_proxy_count() >= FULL_COUNT:
            return True
        else:
            return False

    def run(self):
        """
            将爬取到的代理存入redis
        """
        if not self.is_full():
            proxy_list = self.crawler.get_crawler_proxy()
            for proxy in proxy_list:
                self.redis_client.add(proxy)
Пример #38
0
 def get_summary(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response(
             "Sorry, what kind of news would you like to hear about?")
     article = self.extract_article()
     if article:
         r.set(self._id + ":selected", article)
         r.set(self._id + ":state", "selected")
         print "**************"
         print type(article)
         print article
         print article is None
         self.payload = {
             "read":
             "%s. Would you like me to send the full article to your kindle?"
             % article["snippet"]
         }
         self.finish_response()
     else:
         self.error_response(
             "Sorry I don't know what article you are talking about")
Пример #39
0
class Getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """
        判断是否达到代理池限制
        :return:
        """
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print("获取器开始执行")
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                proxies = self.crawler.get_proxies(callback)
                for proxy in proxies:
                    self.redis.add(proxy)
Пример #40
0
    def post(self):
        self.pushlater = {}
        self.payload = {}
        self.outcome = tornado.escape.json_decode(self.request.body)
        intent = self.outcome["intent"]
        if ("id" in self.outcome):
            self._id = self.outcome["id"]
        else:
            self.error_response("No id passed in outcome")

        print "DEBUGGING RESPONSE"
        print self.outcome
        print self._id
        f = FirebaseDB()
        if ("user" in self.outcome):
            f.post(self._id, {"type": "user", "value": self.outcome["user"] })
        if ("reporta" in self.outcome):
            f.post(self._id, {"type": "reporta", "value": self.outcome["reporta"] })

        r = RedisClient()
        self.name = r.get(self._id + ":name")

        if (intent == "start"):
            self.start()
        elif (intent == "confirm_action"):
            self.confirm_action()
        elif (intent == "get_headlines"):
            self.get_headlines()
        elif (intent == "get_summary"):
            self.get_summary()
        elif (intent == "get_media"):
            self.get_media()
        elif (intent == "save_headline"):
            self.save_headline()
        else:
            error_messages = Error(self.name)
            self.error_response(error_messages.get_phrase())
Пример #41
0
    def start(self):
        r = RedisClient()

        # Clear the state for this new user
        r.set(self._id + ":state", None)
        r.set(self._id + ":articles", None)
        r.set(self._id + ":selected", None)

        NYTimes.get_headlines(self.respond_start)
Пример #42
0
class Proxy:
  def __init__(self, redsmin_prod):
    #creating both sockets
    self.redisClient = RedisClient(config["uri"])
    self.endpoint = Endpoint(redsmin_prod)

    #binding data's transfer to the sockets
    self.redisClient.handle_read = self.sendDataFromRedisToEndPoint
    self.endpoint.handle_read = self.sendDataFromEndPointToRedis

    #calling the handshake
    self.endpoint.handshake()

  #transfer data from Redsmin to Redis (called when the sockets have something to read)
  def sendDataFromEndPointToRedis(self):
    data = self.endpoint.recv(8192)
    if(not self.endpoint.handshaken):
      print data
    self.redisClient.send(data)

  #transfer data from Redis to Redsmin (called when the sockets have something to read)
  def sendDataFromRedisToEndPoint(self):
    data = self.redisClient.recv(8192)
    self.endpoint.send(data)
Пример #43
0
 def __init__(self, website='weibo'):
     """
     初始化一些对象
     :param website: 名称
     :param browser: 浏览器, 若不使用浏览器则可设置为 None
     """
     self.website = website
     self.cookies_db = RedisClient('cookies', self.website)
     self.accounts_db = RedisClient('accounts', self.website)
     self.uid_db = RedisClient('uid', self.website)
Пример #44
0
class Tester(object):
    def __init__(self):
        self.redis = RedisClient()

    async def test_single_proxy(self, proxy):
        """
        测试单个代理
        :param proxy:
        :return:
        """
        conn = aiohttp.TCPConnector(verify_ssl=False)
        async with aiohttp.ClientSession(connector=conn) as session:
            try:
                if isinstance(proxy, bytes):
                    proxy = proxy.decode('utf-8')
                real_proxy = 'http://' + proxy
                print('正在测试:', proxy)
                async with session.get(TEST_URL, proxy=real_proxy,
                                       timeout=15) as response:
                    if response.status in VALID_STSTUS_CODES:
                        self.redis.max(proxy)
                        print('代理可用:', proxy)
                    else:
                        self.redis.decrease(proxy)
                        print('请求响应码不合法', proxy)
            except (ClientError, ClientConnectorError, TimeoutError,
                    AttributeError):
                self.redis.decrease(proxy)
                print('代理请求失败', proxy)

    def run(self):
        """
        测试主函数
        :return:
        """
        print('测试器开始运行')
        try:
            proxies = self.redis.all()
            loop = asyncio.get_event_loop()
            #批量测试
            for i in range(0, len(proxies), BATCH_TEST_SIZE):
                test_proxies = proxies[i:i + BATCH_TEST_SIZE]
                tasks = [
                    self.test_single_proxy(proxy) for proxy in test_proxies
                ]
                loop.run_until_complete(asyncio.wait(tasks))
                time.sleep(5)
        except Exception as e:
            print('测试器发生错误', e.args)
Пример #45
0
    def respond_start(self, articles):
        if not article:
            self.error_response("API error")
        r = RedisClient()
        keywords = r.get(self._id + ":keywords")

        if keywords:
            keyword_articles = NYTimes.check_keywords(articles, keywords)
            hasUpdate = len(keywords) > 0
            articles = keyword_articles
        else:
            hasUpdate = False

        start_state = UpdatesOrNoUpdates(hasUpdate, self.name)
        self.payload = {
            "read": start_state.get_phrase()
        }
        r.set(self._id + ":articles", articles)
        r.set(self._id + ":state", "start")
        self.finish_response()
Пример #46
0
import sys
from RedisClient import RedisClient
import json

def usage():
    print "usage: python test_redis.py command{add|update|get} winid params"
    print "\ttest_redis.py add winid dev"
    print "\ttest_redis.py update winid app state"
    print "\ttest_redis.py get winid"
    exit(-1)

if len(sys.argv) < 3:
    usage()

winid = int(sys.argv[2])
rcli = RedisClient()

if sys.argv[1] == 'add':
    dev = sys.argv[3]
    rcli.add_window(winid,dev)
    print "add window [%x | %s] succeed!" %(winid,dev)
elif sys.argv[1] == 'update':
    app = sys.argv[3]
    state = sys.argv[4]
    rcli.update(winid,app,state)
    print "update window [%x] app=[%s]; state=[%s] succeed!" %(winid,app,state)
elif sys.argv[1] == 'get':
    val = rcli.get_window_status(winid)
    txt =  json.dumps(val).decode('unicode-escape')
    print "window id=%x; %s" %(winid,txt)
else:
Пример #47
0
 def do_conn(self, arg, opts=None):
     (host, port, db) = config.get_conn(int(arg))
     self.prompt = "%s:%s > " % (host, str(db))
     self.conn = RedisClient(host, port, db)