Пример #1
0
 def _item_info_update(items):
     column_id = items[0]
     item_id = items[1]
     item_id = str(item_id)
     sq = Sql()
     pr = Proxy()
     if PROXY_CRAWL:
         # Using free proxy pool
         while True:
             proxy_name = pr.get_proxy(0)  # tuple: header, proxy
             name = Crawler.get_name_jd(item_id, proxy_name[0],
                                        proxy_name[1])
             if name:
                 sq.update_item_name(column_id, name)
                 while True:
                     proxy_price = pr.get_proxy(1)  # tuple: header, proxy
                     price = Crawler.get_price_jd(item_id, proxy_price[0],
                                                  proxy_price[1])
                     if price:
                         sq.update_item_price(column_id, price)
                         break
                 break
     else:
         # Using local ip
         name = Crawler.get_name_jd(item_id, pr.get_ua())
         sq.update_item_name(column_id, name)
         price = Crawler.get_price_jd(item_id, pr.get_ua())
         sq.update_item_price(column_id, price)
         return name, price
Пример #2
0
 def _check_item():
     """
     检查本轮需要爬取的商品
     :return: [{column_id, item_id}, ...]
     """
     sq = Sql()
     items = sq.read_all_not_updated_item()
     logging.warning('This loop ready to crawl: %s', items)
     return items
Пример #3
0
 def _send_email():
     sq = Sql()
     # items_alert = {column_id, item_id, user_price, item_price, name, email}
     items_alert = sq.check_item_need_to_remind()
     logging.warning('This loop sent email: %s', items_alert)
     for item_alert in items_alert:  # item: [email, item_name, item_price, user_price, item_id, column_id]
         item_url = 'https://item.jd.com/' + str(item_alert['item_id']) + '.html'
         email_text = ' 您监控的物品:' + item_alert['name'] + ',现在价格为:' + item_alert['item_price'] + \
                      ',您设定的价格为:' + item_alert['user_price'] + ',赶紧购买吧!' + item_url
         email_subject = '您监控的物品降价了!'
         try:
             send_email = Mail(email_text, '价格监控系统', '亲爱的用户', email_subject, item_alert['email'])
             send_email.send()
             time.sleep(Email_TIME)
         except:
             logging.critical('Sent email failure, skip in this loop: %s', item_alert['email'])
             continue
         sq.update_status(item_alert['column_id'])
         logging.warning('Sent monitor email SUCCESS: %s', item_alert['email'])
Пример #4
0
 def _send_email():
     # Send email in a loop, avoid sending simultaneously.
     sq = Sql()
     items = sq.check_item_need_to_remind()
     logging.warning('This loop sent email: %s', items)
     for item in items:  # email, item_name, item_price, user_price, item_id, column_id
         item_url = 'https://item.jd.com/' + str(item[4]) + '.html'
         email_text = '您监控的物品:' + item[1] + ',现在价格为:' + item[2] + \
                      ',您设定的价格为:' + item[3] + ',赶紧购买吧!' + item_url
         email_subject = '您监控的物品降价了!'
         try:
             send_email = Mail(email_text, 'admin', 'user', email_subject,
                               item[0])
             send_email.send()
             time.sleep(Email_TIME)
         except:
             logging.critical('Sent email failure, skip in this loop: %s',
                              item[0])
             continue
         sq.update_status(item[5])
         logging.warning('Sent email SUCCESS: %s', item[0])
Пример #5
0
    def _item_info_update(self, item):
        # 1.参数说明:需要传入一个item集合:从里面取出column_id、item_id
        # 2.实例化Sql类、Proxy类
        # 3.

        column_id = item['column_id']
        #取出 item_id
        item_id = str(item['item_id'])
        sq = Sql()
        pr = Proxy()
        if PROXY_CRAWL == 1:
            # Using free proxy pool
            while True:
                # tuple: header, proxy. Header for Js crawler
                proxy_info = pr.get_proxy()
                cr = Crawler(proxy_info[1])
                item_info = cr.get_jd_item(item_id)
                if item_info:
                    sq.update_item_name(column_id, item_info['name'])
                    sq.update_item_price(column_id, item_info['price'])
                    sq.update_item_plus_price(column_id, item_info['plus_price'])
                    sq.update_item_subtitle(column_id, item_info['subtitle'])
                    cr = Crawler(proxy_info[1])
                    huihui_info = cr.get_huihui_item(item_id)
                    if huihui_info:
                        sq.update_item_max_price(column_id, huihui_info[0])
                        sq.update_item_min_price(column_id, huihui_info[1])
                    break
        #PROXY_CRAAWL通过from导入
        elif PROXY_CRAWL == 2:
            # Using zhima proxy
            while True:
                if not self.proxy_info_zhima:
                    self.proxy_info_zhima = pr.get_proxy_zhima()
                logging.info('Zhima proxy: %s', self.proxy_info_zhima[1])
                # tuple: header, proxy. Header for Js crawler
                cr = Crawler(self.proxy_info_zhima[1])
                item_info = cr.get_jd_item(item_id)
                if not item_info:
                    self.proxy_info_zhima = ()
                    logging.warning('Zhima proxy crawl failure, changing proxy...')
                    time.sleep(5)
                    continue
                else:
                    sq.update_item_name(column_id, item_info['name'])
                    sq.update_item_price(column_id, item_info['price'])
                    sq.update_item_plus_price(column_id, item_info['plus_price'])
                    sq.update_item_subtitle(column_id, item_info['subtitle'])
                    cr = Crawler(self.proxy_info_zhima[1])
                    huihui_info = cr.get_huihui_item(item_id)
                    if huihui_info:
                        sq.update_item_max_price(column_id, huihui_info['max_price'])
                        sq.update_item_min_price(column_id, huihui_info['min_price'])
                    break
        else:
            # Using local ip
            cr = Crawler()
            # item_info: {name, price, plus_price, subtitle}
            item_info = cr.get_jd_item(item_id)
            sq.update_item_name(column_id, item_info['name'])
            sq.update_item_price(column_id, item_info['price'])
            sq.update_item_plus_price(column_id, item_info['plus_price'])
            sq.update_item_subtitle(column_id, item_info['subtitle'])

            cr = Crawler()
            # huihui_info = {max_price, min_price}
            # 访问慧慧获取商品的最高价格和平均价格。
            huihui_info = cr.get_huihui_item(item_id)
            if huihui_info:
                sq.update_item_max_price(column_id, huihui_info['max_price'])
                sq.update_item_min_price(column_id, huihui_info['min_price'])

        return item_info
Пример #6
0
 def _check_item():
     sq = Sql()
     updated_time = UPDATE_TIME
     items = sq.read_all_not_updated_item(updated_time)
     logging.warning('This loop: %s', items)
     return items
Пример #7
0
 def _item_info_update(self, items):
     column_id = items[0]
     item_id = items[1]
     item_id = str(item_id)
     sq = Sql()
     pr = Proxy()
     if PROXY_CRAWL == 1:
         # Using free proxy pool
         while True:
             proxy_info = pr.get_proxy(0)  # tuple: header, proxy
             name = Crawler.get_name_jd(item_id, proxy_info[0],
                                        proxy_info[1])
             if name:
                 sq.update_item_name(column_id, name)
                 while True:
                     proxy_price = pr.get_proxy(1)  # tuple: header, proxy
                     price = Crawler.get_price_jd(item_id, proxy_price[0],
                                                  proxy_price[1])
                     if price:
                         sq.update_item_price(column_id, price)
                         break
                 break
     elif PROXY_CRAWL == 2:
         # Using zhima proxy
         while True:
             if not self.proxy_info_zhima_name:
                 self.proxy_info_zhima_name = pr.get_proxy_zhima()
             print('Name proxy:', self.proxy_info_zhima_name, items)
             name = Crawler.get_name_jd(item_id,
                                        self.proxy_info_zhima_name[0],
                                        self.proxy_info_zhima_name[1])
             if not name:
                 self.proxy_info_zhima_name = ()
                 time.sleep(20)
                 continue
             else:
                 time.sleep(5)  # Avoid get proxy too fast
                 sq.update_item_name(column_id, name)
                 while True:
                     if not self.proxy_info_zhima_price:
                         self.proxy_info_zhima_price = pr.get_proxy_zhima()
                     print('Price proxy:', self.proxy_info_zhima_price,
                           items)
                     price = Crawler.get_price_jd(
                         item_id, self.proxy_info_zhima_price[0],
                         self.proxy_info_zhima_price[1])
                     if not price:
                         self.proxy_info_zhima_price = ()
                         time.sleep(20)
                         continue
                     else:
                         sq.update_item_price(column_id, price)
                         break
                 break
     else:
         # Using local ip
         name = Crawler.get_name_jd(item_id, pr.get_ua())
         sq.update_item_name(column_id, name)
         price = Crawler.get_price_jd(item_id, pr.get_ua())
         sq.update_item_price(column_id, price)
         return name, price