def query(self, sql): cursor = self.get_cursor() try: cursor.execute(sql, None) result = cursor.fetchall() except Exception as e: log.error("mysql query error: %s", e) return None finally: cursor.close() return result
def executemany(self, sql, params=None): cursor = self.get_cursor() try: cursor.executemany(sql, params) self.conn.commit() affected_rows = cursor.rowcount except Exception as e: log.error("mysql executemany error: %s", e) return 0 finally: cursor.close() return affected_rows
def _craw(self, url, param=None, *args): res = requests.post(url, json.dumps(param), headers=header) if res.status_code == 200: like_total = args[0] # 至少喜欢的数量 # juejin response body_json = res.json() print(body_json) if body_json['data'] is None: log.error("爬取掘金失败" + body_json['errors']) return article_list = body_json['data']['articleFeed']['items']['edges'] res_list = [] for artiCol in article_list: arti = artiCol['node'] data = third_post_db.find_by_pt_id( arti['id'], self.third_id) if data is None and arti['likeCount'] > like_total: # 大于30喜欢的加入 # 构建 post = ThirdPost(self.third_id, self.third_name, 0) tags = [] for t in arti['tags']: tags.append(t['title']) post.tags = ",".join(tags) # 顺序 文章id、标题、标签、作者、喜欢数、评论数、跳转url、创建时间 post.post_id = arti['id'] post.title = arti['title'] post.author = arti['user']['username'] post.content = arti['content'] post.like_num = arti['likeCount'] post.comment_num = arti['commentsCount'] post.redirect_url = arti['originalUrl'] post.creatime = arrow.get( arti['createdAt']).format('YYYY-MM-DD HH:mm:ss') res_list.append(post) log.info("[%s]爬取-> %s %d条记录", self.third_name, url, len(res_list)) self.batch_insert(res_list)