Пример #1
0
 def get_media(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response(
             "Sorry, what kind of news would you like to hear about?")
     article = self.extract_article()
     if article:
         if article['multimedia']:
             r.set(self._id + ":selected", article)
             r.set(self._id + ":state", "selected")
             have_media = Media(self.name)
             self.payload = {
                 "read": have_media.get_phrase()  # usable url for html
             }
             self.pushlater = {
                 "type": "media",
                 "value": article['multimedia']
             }
             self.finish_response()
         else:
             self.error_response(
                 "Sorry I don't have images for that article")
     else:
         self.error_response(
             "Sorry I don't know what article you are talking about")
Пример #2
0
    def start(self):
        r = RedisClient()

        # Clear the state for this new user
        r.set(self._id + ":state", None)
        r.set(self._id + ":articles", None)
        r.set(self._id + ":selected", None)

        NYTimes.get_headlines(self.respond_start)
Пример #3
0
    def start(self):
        r = RedisClient()

        # Clear the state for this new user
        r.set(self._id + ":state", None)
        r.set(self._id + ":articles", None)
        r.set(self._id + ":selected", None)

        NYTimes.get_headlines(self.respond_start)
Пример #4
0
 def post(self):
     payload = {}
     data = tornado.escape.json_decode(self.request.body)
     user_id = data["id"]
     if ("name" not in data):
         payload = {"status": 500}
     else:
         user_name = data["name"]
         r = RedisClient()
         r.set(user_id + ":name", user_name)
         payload = {"id": user_id, "name": user_name, "status": 200}
     self.write(tornado.escape.json_encode(payload))
Пример #5
0
class WeiboCookiesGenerator(object):
    def __init__(self, website='weibo'):
        """
        初始化一些对象
        :param website: 名称
        :param browser: 浏览器, 若不使用浏览器则可设置为 None
        """
        self.website = website
        self.cookies_db = RedisClient('cookies', self.website)
        self.accounts_db = RedisClient('accounts', self.website)
        self.uid_db = RedisClient('uid', self.website)

    def run(self):
        """
        运行, 得到所有账户, 然后逐个模拟登录获取cookies,以及uid
        :return:
        """
        accounts_usernames = self.accounts_db.usernames()
        cookies_usernames = self.cookies_db.usernames()

        for username in accounts_usernames:
            if not username in cookies_usernames:
                password = self.accounts_db.get(username)
                print('正在生成Cookies', '账号', username, '密码', password)
                result = self.new_cookies(username, password)
                # 成功获取
                if result.get('status') == 1:
                    cookies = result.get('content')  #cookies为dict类型
                    uid = result.get('uid')
                    print('成功获取到Cookies', cookies, '成功获取到uid', uid)
                    if self.cookies_db.set(username, json.dumps(cookies)):
                        print('成功保存Cookies')
                    if self.uid_db.set(username, uid):
                        print('成功保存uid')
                # 登录出错
                elif result.get('status') == 0:
                    print(result.get('content'))
        else:
            print('所有账号都已经成功获取Cookies')

    def new_cookies(self, username, password):
        """
        生成Cookies
        :param username: 用户名
        :param password: 密码
        :return: 用户名和Cookies
        """
        return WeiboCookies(username, password).main()
Пример #6
0
 def post(self):
   payload = {}
   data = tornado.escape.json_decode(self.request.body)
   user_id = data["id"]
   if ("name" not in data):
     payload = {"status": 500}
   else:
     user_name = data["name"]
     r = RedisClient()
     r.set(user_id + ":name", user_name)
     payload = {
       "id": user_id,
       "name": user_name,
       "status": 200
     }
   self.write(tornado.escape.json_encode(payload))
Пример #7
0
 def save_headline(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response("Sorry, I didn't get that.")
     article = self.extract_article()
     if article:
         confirmation = SaveConfirmation()
         self.payload = {"read": confirmation.get_phrase()}
         # Do some other stuff to actually save the article somewhere
         r.set(self._id + ":selected", article)
         r.set(self._id + ":state", "selected")
         self.finish_response()
     else:
         self.error_response(
             "I'm sorry. I don't know which article you want me to save.")
Пример #8
0
 def save_headline(self):
   r = RedisClient()
   state = r.get(self._id + ":state")
   if state == "start":
     self.error_response("Sorry, I didn't get that.");
   article = self.extract_article()
   if article:
     confirmation = SaveConfirmation()
     self.payload = {
       "read": confirmation.get_phrase()
     }
     # Do some other stuff to actually save the article somewhere
     r.set(self._id + ":selected", article)
     r.set(self._id + ":state", "selected")
     self.finish_response()
   else:
     self.error_response("I'm sorry. I don't know which article you want me to save.")
Пример #9
0
    def respond_start(self, articles):
        if not article:
            self.error_response("API error")
        r = RedisClient()
        keywords = r.get(self._id + ":keywords")

        if keywords:
            keyword_articles = NYTimes.check_keywords(articles, keywords)
            hasUpdate = len(keywords) > 0
            articles = keyword_articles
        else:
            hasUpdate = False

        start_state = UpdatesOrNoUpdates(hasUpdate, self.name)
        self.payload = {"read": start_state.get_phrase()}
        r.set(self._id + ":articles", articles)
        r.set(self._id + ":state", "start")
        self.finish_response()
Пример #10
0
def store_proxies(proxies):
    conn = RedisClient(name='certificate_proxies')
    now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
    conn_check = connect_check(proxies)
    if conn_check:
        proxies = json.dumps(proxies)
        duplicate_check = conn.exist(proxies)
        if not duplicate_check:
            conn.set(proxies, 1)
            info_logger.info(str(now) + ' New proxies: ' + str(proxies))
            print(str(now) + ' New proxies: ' + str(proxies))
        else:
            info_logger.info(
                str(now) + ' Already exist proxies: ' + str(proxies))
    else:
        err_logger.error(
            str(now) + ' Can not connect baidu.com -- proxies: ' +
            str(proxies))
Пример #11
0
 def get_summary(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response("Sorry, what kind of news would you like to hear about?")
     article = self.extract_article()
     if article:
         r.set(self._id + ":selected", article)
         r.set(self._id + ":state", "selected")
         print "**************"
         print type(article)
         print article
         print article is None
         self.payload = {
             "read": "%s. Would you like me to send the full article to your kindle?" % article["snippet"]
         }
         self.finish_response()
     else:
         self.error_response("Sorry I don't know what article you are talking about")
Пример #12
0
def get_uniform_proxy(name, url):
    proxy_url = 'http://166.188.20.55:3000/api/proxy/web'
    headers = {
        'Content-Type':
        'application/json',
        'User-Agent':
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/'
        '537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }
    data = {'name': name, 'url': url}
    while True:
        resp = requests.post(url=proxy_url,
                             headers=headers,
                             data=json.dumps(data)).json()
        region = resp['proxy']['region']
        if region == 'R1' or region == 'R2':
            # print(region, 'err')
            continue
        else:
            conn = RedisClient(name='certificate_proxies')
            ip = resp['proxy']['ip']
            port = '30000'
            proxies = {
                'http': 'http://%s:%s' % (ip, port),
                'https': 'http://%s:%s' % (ip, port),
            }
            ping_url = 'http://www.baidu.com'
            status_code = requests.get(ping_url,
                                       proxies=proxies,
                                       timeout=(3.1, 15)).status_code
            if status_code == 200:
                p = json.dumps(proxies)
                check = conn.exist(p)
                if not check:
                    conn.set(p, 1)
                    # conn.lpush(p)
                    print('New proxies: ', proxies)
                    break
                else:
                    print('already exist proxies: ', p)
            else:
                print('connection error...')
Пример #13
0
    def respond_start(self, articles):
        if not article:
            self.error_response("API error")
        r = RedisClient()
        keywords = r.get(self._id + ":keywords")

        if keywords:
            keyword_articles = NYTimes.check_keywords(articles, keywords)
            hasUpdate = len(keywords) > 0
            articles = keyword_articles
        else:
            hasUpdate = False

        start_state = UpdatesOrNoUpdates(hasUpdate, self.name)
        self.payload = {
            "read": start_state.get_phrase()
        }
        r.set(self._id + ":articles", articles)
        r.set(self._id + ":state", "start")
        self.finish_response()
Пример #14
0
 def get_media(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response("Sorry, what kind of news would you like to hear about?")
     article = self.extract_article()
     if article:
         if article['multimedia']:
             r.set(self._id + ":selected", article)
             r.set(self._id + ":state", "selected")
             have_media = Media(self.name)
             self.payload = {
                 "read": have_media.get_phrase() # usable url for html
             }
             self.pushlater = {"type": "media", "value": article['multimedia']}
             self.finish_response()
         else:
             self.error_response("Sorry I don't have images for that article")
     else:
         self.error_response("Sorry I don't know what article you are talking about")
Пример #15
0
    def respond_get_headlines(self, payload):
        if not payload:
            self.error_response("API error")
        sentence_headlines, topic_headlines, article_order = NLPParser.parse_headlines(
            map(lambda x: x["headline"], payload))
        article_values = []
        for article in payload:
            article_values.append({
                "headline": article["headline"],
                "url": article["url"],
                "snippet": article["snippet"]
            })
        self.pushlater = {"type": "article", "value": article_values}
        found = FoundHeadlines(sentence_headlines, topic_headlines, self.name)
        self.payload = {"read": found.get_phrase()}

        r = RedisClient()

        ordered_articles = map(lambda x: payload[x], article_order)

        # State transition
        r.set(self._id + ":selected", None)
        r.set(self._id + ":articles", ordered_articles)
        r.set(self._id + ":state", "headlines")
        self.finish_response()
Пример #16
0
def download_proxies():
    # url = 'http://www.xdaili.cn/ipagent/privateProxy/applyStaticProxy?' \
    #       'count=1&spiderId=fd4708592c97444c9f42060c500649ac&returnType=2'

    url = 'http://www.xdaili.cn/ipagent/privateProxy/applyStaticProxy' \
          '?count=1&spiderId=8407fdf0311d4eb7b30f9e39699795e5&returnType=2'
    second = datetime.datetime.now().second
    if second == 0 or second == 24 or second == 48:
        #print(second)
        time.sleep(1)
        conn = RedisClient(name='certificate_proxies')
        content = requests.get(url, timeout=(3.1, 15)).json()
        result = content['RESULT']
        if result == '提取太频繁,请按规定频率提取!':
            err_logger.error(result)
        else:
            for proxy in content['RESULT']:
                ip = proxy['ip']
                port = proxy['port']
                proxies = {
                    'http': 'http://%s:%s' % (ip, port),
                    'https': 'http://%s:%s' % (ip, port),
                }
                ping_url = 'http://www.baidu.com'
                status_code = requests.get(ping_url,
                                           timeout=(3.1, 15),
                                           proxies=proxies).status_code
                if status_code == 200:
                    p = json.dumps(proxies)
                    now = time.strftime('%Y-%m-%d %H:%M:%S',
                                        time.localtime(time.time()))
                    check = conn.exist(p)
                    if not check:
                        conn.set(p, 1)
                        conn.lpush(p)
                        now = time.strftime('%Y-%m-%d %H:%M:%S',
                                            time.localtime(time.time()))
                        print(now, ' New proxies: ', p)
                    else:
                        print(now, ' already exist proxies: ', p)
Пример #17
0
    def respond_get_headlines(self, payload):
        if not payload:
            self.error_response("API error")
        sentence_headlines, topic_headlines, article_order = NLPParser.parse_headlines(map(lambda x: x["headline"], payload))
        article_values = []
        for article in payload:
          article_values.append({"headline": article["headline"], "url": article["url"], "snippet": article["snippet"]})
        self.pushlater = {
          "type": "article",
          "value": article_values
        }
        found = FoundHeadlines(sentence_headlines, topic_headlines, self.name)
        self.payload = {
            "read": found.get_phrase()
        }

        r = RedisClient()

        ordered_articles = map(lambda x: payload[x], article_order)

        # State transition
        r.set(self._id + ":selected", None)
        r.set(self._id + ":articles", ordered_articles)
        r.set(self._id + ":state", "headlines")
        self.finish_response()
Пример #18
0
 def get_summary(self):
     r = RedisClient()
     state = r.get(self._id + ":state")
     if state == "start":
         self.error_response(
             "Sorry, what kind of news would you like to hear about?")
     article = self.extract_article()
     if article:
         r.set(self._id + ":selected", article)
         r.set(self._id + ":state", "selected")
         print "**************"
         print type(article)
         print article
         print article is None
         self.payload = {
             "read":
             "%s. Would you like me to send the full article to your kindle?"
             % article["snippet"]
         }
         self.finish_response()
     else:
         self.error_response(
             "Sorry I don't know what article you are talking about")