def parse(self, src, acinfolist): max_id = 0 #max_id这个字段用来查询更多的投稿 now = 0 rows = [] #front_urlData for data in src: try: row = ACcommentsInfoPO() #保存一篇投稿抓取的内容 #获取投稿类型 if data[0][0:5] == '/v/ac': row.set_id(data[0][5:]) row.set_type('视频') elif data[0][0:5] == '/a/ac': row.set_id(data[0][5:]) row.set_type('文章') elif data[0][0:5] == '/v/ab': #番剧的id和其他不一样,加负号以示区别 row.set_id('-' + data[0][5:]) row.set_type('番剧') else: continue #获取acid和url row.set_url(ACFUN + data[0]) #max_id这个字段用来查询更多的投稿,比如我从首页获取的最大投稿是ac190000,那么一会我会多抓去ac188900到ac190000的评论信息 if max_id < int(data[0][5:]): max_id = int(data[0][5:]) #先过滤掉前面几个字 data[1] = data[1][7:] row.set_title(data[1]) row.set_check_time(str(datetime.datetime.now())) except Exception: continue if str(row.get_id()) not in acinfolist: rows.append(row) acinfolist.append(str(row.get_id())) #开始随机抓取评论 if len(rows) > 0: self.create_more(rows, max_id, acinfolist) #投稿信息单独放一张表 self.ac_comments.db_proc.ACCommentsInfo.insert(rows) return acinfolist