Пример #1
0
 def getProxyLs(self):
     try:
         html = requests.get('http://47.97.7.119:8080/proxypool/proxys/0',timeout =self.timeout)
         db.conn()
         proxyLs = []
         if html.status_code != requests.codes.ok:
             print(u'获取代理失败!')
             return
         proxyDataLs = json.loads(html.text)['data']
         for proxy in proxyDataLs:
             proxyLs.append([proxy['proxyType'],proxy['proxyAddress'],str(proxy['proxyPort']),round(time.time()*1000),None,self.md5Encode(proxy['proxyAddress'])])
         try:
             db.executemany('replace into proxyls(PROXY_TYPE,PROXY_ADDR,PROXY_PORT,PROXY_ADD_TIME,PROXY_CHECK_TIME,PROXY_MD5) values (%s,%s,%s,%s,%s,%s)',proxyLs)
         except Exception as e:
             traceback.print_exc()
             db.rollback()
         else:
             db.commit()
             time.sleep(3)
             print(u'爬取完毕')
     except Exception as e:
         traceback.print_exc()
     time.sleep(2)
     print(u'开始检验代理...')
     self.testbythread()
Пример #2
0
 def complete(self, animeid):
     """这个函数是更新已完成的URL完成"""
     try:
         db.execute(
             'UPDATE anime_home a SET a.ANIME_INFO_DOWNLOAD_STATUS = 2 where a.ANIME_ID = %s'
             % (animeid), None)
     except Exception as e:
         db.rollback()
         raise e
     finally:
         db.commit()
Пример #3
0
 def getData(self, url):
     content = self.requestbyproxy(url)
     dataJson = json.loads(content.text)
     if dataJson['status'] == 200:
         if len(dataJson['data']['page']['list']):
             values_insert = []
             for i in dataJson['data']['page']['list']:
                 anime_bid = int(i['bid'])
                 anime_name = i['title']
                 anime_cover = i['cover']
                 anime_vertical_cover = i['verticalCover']
                 anime_play_date = i['playDate']
                 anime_play_time = i['playTime']
                 anime_origin_time = i['originTime']
                 anime_play_site = i['playSite']
                 anime_origin_station = i['originStation']
                 anime_play_url = i['playUrl']
                 anime_play_episode = i['episode']
                 if db.execute(
                         'select 1 from anime_timetable a where a.ANIME_BID = %s limit 1',
                     (anime_bid)) == 0:
                     values_insert.append([
                         anime_bid, None, anime_name, anime_cover,
                         anime_vertical_cover, anime_play_date,
                         anime_play_time, anime_origin_time,
                         anime_play_site, anime_origin_station,
                         anime_play_url, anime_play_episode
                     ])
                 else:
                     try:
                         db.execute(
                             "update anime_timetable t set t.ANIME_PLAY_DATE = '%s',t.ANIME_PLAY_EPISODE = '%s' where t.ANIME_BID = %d"
                             % (anime_play_date, anime_play_episode,
                                anime_bid), None)
                     except Exception as e:
                         db.rollback()
                         traceback.print_exc()
             try:
                 if len(values_insert):
                     db.executemany(
                         'insert into anime_timetable(ANIME_BID,ANIME_ID,ANIME_NAME,ANIME_COVER,ANIME_VERTICAL_COVER,ANIME_PLAY_DATE,ANIME_PLAY_TIME,ANIME_ORIGIN_TIME,ANIME_PLAY_SITE,ANIME_ORIGIN_STATION,ANIME_PLAY_URL,ANIME_PLAY_EPISODE) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)',
                         values_insert)
             except Exception as e:
                 db.rollback()
                 traceback.print_exc()
             finally:
                 db.commit()
                 time.sleep(1)
         else:
             pass
     else:
         pass
Пример #4
0
 def repair(self):
     try:
         db.execute(
             'SELECT h.* FROM anime_home h LEFT JOIN anime_info i on (h.ANIME_ID = i.ANIME_ID) WHERE i.ANIME_ID IS NOT NULL',
             None)
         records = db.fetchall()
         if records:
             for index, r in enumerate(records):
                 db.execute(
                     'UPDATE anime_home a SET a.ANIME_INFO_DOWNLOAD_STATUS = 2 WHERE a.ID = %s'
                     % (r[0]), None)
         db.execute(
             'UPDATE anime_home h SET h.ANIME_INFO_DOWNLOAD_STATUS = 0 WHERE h.ANIME_INFO_DOWNLOAD_STATUS =1',
             None)
     except Exception as e:
         db.rollback()
         raise e
     finally:
         db.commit()
Пример #5
0
 def pop(self):
     try:
         db.execute(
             'SELECT a.ID,a.ANIME_LINE,a.ANIME_INFO_DOWNLOAD_STATUS FROM anime_home a WHERE a.ANIME_INFO_DOWNLOAD_STATUS = 0',
             None)
         records = db.fetchall()
         db.execute(
             'UPDATE anime_home a SET a.ANIME_INFO_DOWNLOAD_STATUS = 1 WHERE a.ANIME_INFO_DOWNLOAD_STATUS = 0',
             None)
         if records:
             for r in records:
                 self.queue.put(r)
             return self.queue
         # else:
         # 	self.repair()
         # 	raise KeyError
     except Exception as e:
         db.rollback()
         raise e
     finally:
         db.commit()
Пример #6
0
 def getxiciProxy(self,i):
     proxyList=[]
     proxyLs=[]
     print(u'获取第',i,'页数据')
     html=self.request('http://www.xicidaili.com/nn/'+str(i),i)
     all_proxy=self.xpathResolve(html,"//table[@id='ip_list']/tr[@class][position()>1]//td[6]|//table[@id='ip_list']/tr[@class][position()>1]//td[2]|//table[@id='ip_list']/tr[@class][position()>1]//td[3]")
     if len(all_proxy)>0:
         for i in range(0,len(all_proxy),3):
             proxyList.append(all_proxy[i:i+3])
     if len(proxyList)>0:
         for i in proxyList:
             proxyLs.append([i[2].text.lower(),i[0].text,str(i[1].text),round(time.time()*1000),None,self.md5Encode(i[0].text)])
     self.lock.acquire()  
     try:
         db.executemany('insert into proxyls(PROXY_TYPE,PROXY_ADDR,PROXY_PORT,PROXY_ADD_TIME,PROXY_CHECK_TIME,PROXY_MD5) values (%s,%s,%s,%s,%s,%s)',proxyLs)
     except Exception as e:
         traceback.print_exc()
         db.rollback()
     else:
         db.commit()
     self.lock.release() 
Пример #7
0
 def checkproxy2(self):
      start = time.clock()
      print(start)
      for result in self.proxyLs:
          try:
              content = requests.get('https://www.baidu.com/', proxies={result[1]:'http://'+result[2]+':'+result[3]},timeout =5)
              print(u'验证'+result[1]+':'+'http://'+result[2]+':'+result[3])
          except Exception as e:
             traceback.print_exc()
             print('false')
             self.delproxy(result[0])
          else:
             if content.status_code == requests.codes.ok:
                 self.writeproxy(result[0])
             else:
                 self.delproxy(result[0])
          finally:
             db.commit()
             db.close
      print(str(time.clock()-start) + "秒")
      print(u'验证完毕')
Пример #8
0
 def testbythread(self):
     threads=[]
     db.execute('select * from proxyls',None);
     proxyLs = db.fetchall()
     start = time.clock()
     for p in proxyLs:
         self.queue.put(p)
     for i in range(8):
         thread = threading.Thread(target=self.checkproxy)
         thread.start() #启动线程
         threads.append(thread)
         time.sleep(1)
     #结束线程
     for thread in threads:
         thread.join()
      #等待所有任务完成
     self.queue.join()  
     db.commit()
     db.close()
     print(str(time.clock()-start) + "秒")
     print(u'验证完毕')
Пример #9
0
 def testdb(self):
     db.execute('delete from proxy where ID = %s' % (73),None)
     db.commit()
     db.close()