def user_spider(user_url): database_name = 'wjw_zhihu' table_name = 'user_info' # 设置数据库连接 conn=pymysql.connect(host='localhost',user='******',passwd='root',port=3306) cur=conn.cursor() # 选择数据库 conn.select_db(database_name) # 设置编码, 否则插入数据库乱码 cur.execute('set names utf8') # 设置Redis链接, 记录爬过的user_unique redis_conn = redis.Redis(host='127.0.0.1', port=6379, db=0) # 获取当前用户信息 user = User(user_url) user_unique = user.get_user_unique() if redis_conn.get(get_user_redis_key(user_unique)) == None: user_info = user.get_user_info() # print user_info; # sys.exit() # 将用户数据插入数据库 try: insert_sql = prepare_insert_sql(table_name, user_info) res=cur.execute(insert_sql) conn.commit() # commit之后才能真正提交到数据库 redis_conn.set(get_user_redis_key(user_unique), 1) #设置redis缓存, 防止重爬 print(user_info['user_unique'] + ' ------ ' + str(res)) except Exception as e: # 打印日志, 记录异常信息 exceptMsg = str(e) print(exceptMsg) # 获取该用户关注的人 followees = user.get_followees() # 获取关注该用户的人 followers = user.get_followers() # 统计该用户关注的人 # i = 0 # for followee in followees: # print followee.user_url # print followee.get_user_id() # i = i + 1 # if i == 41: # break # print followers # <generator object get_follower at 0x7ffcac3af0f0> # 代表所有关注该用户的人的生成器对象 i = 0 for follower in followers: i = i + 1 if i % 10 == 0: redis_conn.save() # 将数据写回磁盘。保存时阻塞 time.sleep(0.3) follower_user_unique = follower.get_user_unique() if redis_conn.get(get_user_redis_key(follower_user_unique)) == None: try: follower_info = follower.get_user_info() follower_insert_sql = prepare_insert_sql(table_name, follower_info) res=cur.execute(follower_insert_sql) conn.commit() redis_conn.set(get_user_redis_key(follower_user_unique), 1) #设置redis缓存, 防止重爬 print(follower_info['user_unique'] + ' ------ ' + str(res)) except Exception as e: # 打印日志, 记录异常信息 exceptMsg = str(e) print(exceptMsg)