def run(self): while not self.NOT_EXIST: # 队列为空, 结束 if self.queue.empty(): NOT_EXIST = 1 self.queue.task_done() break url = self.queue.get() try: response = requests.get(url, headers=RequestModel.getHeaders(), proxies=RequestModel.getProxies(), timeout=3) print('Top 子线程 ' + str(self.id) + ' 请求【 ' + url + ' 】的结果: ' + str(response.status_code)) # time.sleep(1000) # 需将电影天堂的页面的编码改为 GBK, 不然会出现乱码的情况 response.encoding = 'GBK' if response.status_code != 200: self.queue.put(url) time.sleep(20) else: temp = dytt_Lastest.getMoiveInforms(url, response.text) TaskQueue.getContentQueue().put(temp) time.sleep(5) except Exception as e: # self.queue.put(url) print(e)
def insertData(): DBName = 'dytt.db' db = sqlite3.connect('./' + DBName, 10) conn = db.cursor() SelectSql = 'Select * from sqlite_master where type = "table" and name="lastest_moive";' CreateTableSql = ''' Create Table lastest_moive ( 'm_id' INTEGER PRIMARY KEY, 'm_type' varchar(100), 'm_trans_name' varchar(200), 'm_name' varchar(100), 'm_decade' varchar(30), 'm_conutry' varchar(30), 'm_level' varchar(100), 'm_language' varchar(30), 'm_subtitles' varchar(100), 'm_publish' varchar(30), 'm_IMDB_socre' varchar(50), 'm_douban_score' varchar(50), 'm_format' varchar(20), 'm_resolution' varchar(20), 'm_size' varchar(10), 'm_duration' varchar(10), 'm_director' varchar(50), 'm_actors' varchar(1000), 'm_placard' varchar(200), 'm_screenshot' varchar(200), 'm_ftpurl' varchar(200), 'm_dytt8_url' varchar(200) ); ''' InsertSql = ''' Insert into lastest_moive(m_type, m_trans_name, m_name, m_decade, m_conutry, m_level, m_language, m_subtitles, m_publish, m_IMDB_socre, m_douban_score, m_format, m_resolution, m_size, m_duration, m_director, m_actors, m_placard, m_screenshot, m_ftpurl, m_dytt8_url) values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?); ''' if not conn.execute(SelectSql).fetchone(): conn.execute(CreateTableSql) db.commit() print('==== 创建表成功 ====') else: print('==== 创建表失败, 表已经存在 ====') count = 1 while not TaskQueue.isContentQueueEmpty(): item = TaskQueue.getContentQueue().get() conn.execute(InsertSql, Utils.dirToList(item)) db.commit() print('插入第 ' + str(count) + ' 条数据成功') count = count + 1 db.commit() db.close()
def startSpider(): # 实例化对象 try: # 获取【最新电影】有多少个页面 LASTEST_MOIVE_TOTAL_SUM = dytt_Lastest.getMaxsize() print('【最新电影】一共 ' + str(LASTEST_MOIVE_TOTAL_SUM) + ' 有个页面') dyttlastest = dytt_Lastest(LASTEST_MOIVE_TOTAL_SUM) floorlist = dyttlastest.getPageUrlList() floorQueue = TaskQueue.getFloorQueue() for item in floorlist: floorQueue.put(item, 3) # print(floorQueue.qsize()) for i in range(THREAD_SUM): workthread = FloorWorkThread(floorQueue, i) workthread.start() while True: if TaskQueue.isFloorQueueEmpty(): break else: pass for i in range(THREAD_SUM): workthread = TopWorkThread(TaskQueue.getMiddleQueue(), i) workthread.start() while True: if TaskQueue.isMiddleQueueEmpty(): break else: pass insertData() except Exception as e: print('error:') print(e)
def run(self): while not self.NOT_EXIST: # 队列为空, 结束 if self.queue.empty(): NOT_EXIST = 1 self.queue.task_done() break url = self.queue.get() try: response = requests.get(url, headers=RequestModel.getHeaders(), proxies=RequestModel.getProxies(), timeout=3) print('Floor 子线程 ' + str(self.id) + ' 请求【 ' + url + ' 】的结果: ' + str(response.status_code)) # time.sleep(1000) # 需将电影天堂的页面的编码改为 GBK, 不然会出现乱码的情况 response.encoding = 'GBK' if response.status_code != 200: self.queue.put(url) time.sleep(20) else: moivePageUrlList = dytt_Lastest.getMoivePageUrlList( response.text) for item in moivePageUrlList: each = self.host + item # print(each) # time.sleep(1000) TaskQueue.putToMiddleQueue(each) time.sleep(3) # 5 except Exception as e: # print('catsh Exception ==== ') # self.queue.put(url) print(e)
def SaveMysql(): import pymysql conn = pymysql.connect(host='127.0.0.1', port=3306, user='******', password="******", database='dytt', charset="utf8") cursor = conn.cursor() CreateTableSql = """ Create Table lastest_moive( m_id int not null auto_increment, m_type varchar(100), m_trans_name varchar(200), m_name varchar(100), m_decade varchar(30), m_conutry varchar(30), m_level varchar(100), m_language varchar(30), m_subtitles varchar(100), m_publish varchar(30), m_IMDB_socre varchar(50), m_douban_score varchar(50), m_format varchar(20), m_resolution varchar(20), m_size varchar(10), m_duration varchar(10), m_director varchar(50), m_actors varchar(1000), m_placard varchar(200), m_screenshot varchar(200), m_ftpurl varchar(200), m_dytt8_url varchar(200), primary key (m_id) ); """ InsertSql = ''' Insert into lastest_moive(m_type, m_trans_name, m_name, m_decade, m_conutry, m_level, m_language, m_subtitles, m_publish, m_IMDB_socre, m_douban_score, m_format, m_resolution, m_size, m_duration, m_director, m_actors, m_placard, m_screenshot, m_ftpurl, m_dytt8_url) values(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s); ''' # if not cursor.execute(SelectSql).fetchone(): # cursor.execute(CreateTableSql) # conn.commit() # print('==== 创建表成功 ====') # else: # print('==== 创建表失败, 表已经存在 ====') table_name = 'lastest_moive' if (table_exists(cursor, table_name) != 1): cursor.execute(CreateTableSql) conn.commit() print('==== 创建表成功 ====') count = 1 # while not TaskQueue.isContentQueueEmpty(): item = TaskQueue.getContentQueue().get() cursor.execute(InsertSql, Utils.dirToList(item)) conn.commit() print('插入第 ' + str(count) + ' 条数据成功') count = count + 1 conn.commit() conn.close()
def insertData(): try: DBName = 'dytt.db' db = MySQLdb.connect("127.0.0.1", "root", "we3613040", "movie", charset='utf8') # db = sqlite3.connect('./' + DBName, 10) conn = db.cursor() tableName = 'lastest_movie' CreateTableSql = ''' Create Table lastest_movie ( `m_id` int unsigned auto_increment PRIMARY KEY, `m_type` varchar(100), `m_trans_name` varchar(200), `m_name` varchar(100), `m_decade` varchar(30), `m_conutry` varchar(30), `m_level` varchar(100), `m_language` varchar(30), `m_subtitles` varchar(100), `m_publish` varchar(80), `m_IMDB_socre` varchar(50), `m_douban_score` varchar(50), `m_format` varchar(20), `m_resolution` varchar(20), `m_size` varchar(10), `m_duration` varchar(10), `m_director` varchar(50), `m_actors` varchar(1000), `m_placard` varchar(200), `m_screenshot` varchar(200), `m_ftpurl` varchar(200), `m_dytt8_url` varchar(200) ); ''' InsertSqlPrefix = ''' Insert into lastest_movie(m_type, m_trans_name, m_name, m_decade, m_conutry, m_level, m_language, m_subtitles, m_publish, m_IMDB_socre, m_douban_score, m_format, m_resolution, m_size, m_duration, m_director, m_actors, m_placard, m_screenshot, m_ftpurl, m_dytt8_url) values('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'); ''' if not table_exists(conn, tableName): conn.execute(CreateTableSql) db.commit() print('==== 创建表成功 ====') else: print('==== 创建表失败, 表已经存在 ====') count = 1 while not TaskQueue.isContentQueueEmpty(): item = TaskQueue.getContentQueue().get() InsertSql = InsertSqlPrefix % Utils.dirToList(item) conn.execute(InsertSql) db.commit() print('插入第 ' + str(count) + ' 条数据成功') count = count + 1 db.commit() db.close() except Exception as e: print('mysql error:') print(e) db.close() insertData()