def cache_thread_member(): """从数据库载入数据(thread,member). """ CacheService.cache_data_delete_model("forum_thread") CacheService.cache_data_delete_model("common_member") if not CacheService.cache_table_dict.get("forum_thread", False): forum_thread_entities = robot_session.query(Thread).all() CacheService.cache_data_import_model(forum_thread_entities, "forum_thread") if not CacheService.cache_table_dict.get("common_member", False): common_member_entities = robot_session.query(Member).all() CacheService.cache_data_import_model(common_member_entities, "common_member")
def spread_match_files(limit=5): """对结果入库的数据扫描, 并文件上传. :parameter limit: 扫描数据数量 """ attachment_entities = robot_session.query(Attachment).filter( Attachment.status == 1).order_by(Attachment.id).limit(limit).all() def author_uid_and_name(real_name): """由真实姓名拼音获取论坛账户(账户Id,账户名称) :parameter real_name: 账户名称 """ authors = USER_MAP_CONFIG.get(real_name).split("|") return int(authors[0]), authors[2] if attachment_entities: for attachment in attachment_entities: # 构建主题, 帖子, 附件 file_base_name = os.path.basename(attachment.file_name) subject = message = os.path.splitext(file_base_name)[0] author = author_uid_and_name(attachment.author) fid = attachment.plate post_info.info("=" * 80) post_info.info("正在发帖:%s" % file_base_name) tid, pid, aid = spread_info(subject, message, author, fid, file_name=file_base_name, attachment=attachment.key_name) if tid and pid: try: # 更新发帖成功的数据状态, 保存记录 attachment.status = 2 robot_record = Thread(tid, pid, fid, aid, attachment.id) robot_session.add(attachment) robot_session.add(robot_record) robot_session.commit() post_info.info("发帖成功: OK.") except Exception as ex: robot_session.rollback() post_info.exception(ex) finally: robot_session.close() else: post_info.info("发帖失败: Error.") else: # 如果无数据静默五分钟 time.sleep(5 * 60)
def fix_member_miss_status(): """修复自动注册的用户缺失状态数据. """ print("=" * 80) try: print("Info: Work is now being prepared.") # 自动注册用户. robot_member_entities = robot_session.query(Member).all() if not robot_member_entities: print("Info: No Data.") return gen_data_count = len(robot_member_entities) # 补充自注册数据. member_status_data = FakeMemberStatus().generate(gen_data_count) member_status_list = [entity for entity in member_status_data] print("Info: member_entities_total = %s." % gen_data_count) member_status_entities = [] for index, member_entity in enumerate(robot_member_entities): print( "Info: %s %s%%." % (index, str(int(float(index) / float(gen_data_count) * 100)))) status_data = member_status_list[index] member_status = CommonMemberStatus(__uid=member_entity.dz_uid, __regip=status_data['reg_ip'], __lastip=status_data['last_ip'], __lastvisit=int(time.time()), __lastactivity=int(time.time())) member_status_entities.append(member_status) forum_session.add_all(member_status_entities) forum_session.commit() except Exception as ex: print(ex) traceback.print_exc() forum_session.rollback() else: print("Info: Well Done.") finally: forum_session.close() print("All Work Have Finished.") print("=" * 80)
def init_redis_data(kind="md5sum"): """初始化redis的数据. :parameter kind: 操作类型 """ attachment_entities = robot_session.query(Attachment, Attachment.id, Attachment.md5sum, Attachment.key_name).all() # 清除既有数据. if kind.lower() == "md5sum": redis_md5sum.flush_db() elif kind.lower() == "unique": redis_unique.flush_db() for entity in attachment_entities: if kind.lower() == "md5sum" and entity.md5sum: redis_md5sum.set(entity.md5sum, entity.id) elif kind.lower() == "unique" and entity.key_name: redis_unique.set(entity.key_name, entity.id)
def update_name_files(limit=20): """更新导入库的索引文件. :parameter limit: 每次限制数 """ attachment_entities = robot_session.query(Attachment).filter( Attachment.key_name == "", Attachment.status == 0).order_by(Attachment.id).limit(limit).all() result = False if attachment_entities: for attachment in attachment_entities: suffix = Utils.get_info_by_path(attachment.file_name)[2] # 生成唯一标识, 防冲突可能从cache比对已有值. while True: key_name = ''.join((uuid.uuid4().get_hex(), suffix)) fid = redis_md5sum.get(key_name) if not fid: break attachment.key_name = key_name # 放入cache供后续比对. redis_md5sum.set(key_name, 1) try: robot_session.add_all(attachment_entities) robot_session.commit() except Exception as ex: print(ex) robot_session.rollback() else: print("OK") finally: robot_session.close() else: result = True return result
def upload_match_files(limit=5, loops=True): """对结果入库的数据扫描, 并文件上传. :parameter limit: 检索数据数量 :parameter loops: 是否执行完数据再扫描 """ attachment_entities = robot_session.query(Attachment).filter( Attachment.status == 0).order_by(Attachment.id).limit(limit).all() if attachment_entities: # map(map_handler, attachment_entities) for attachment in attachment_entities: errors = False upload_info.info("=" * 80) upload_info.info("正在上传:%s" % attachment.file_name) try: # 上传文件到七牛 ret, info = put_up_datum(key=attachment.key_name, kind="file", file_path=attachment.file_name, progress_handler=progress_handler) except Exception as ex: errors = True upload_info.exception(ex) else: upload_info.info(ret) upload_info.info(info) if ret and ret["key"] == attachment.key_name: try: attachment = attachment.after_upload_action("") # 更新上传成功的数据 robot_session.add(attachment) robot_session.commit() except Exception as ex: errors = True robot_session.rollback() upload_info.exception(ex) upload_error.log( upload_only_log % (attachment.upload_datetime, attachment.id)) else: # 移走成功的文件. file_name_list = [attachment.file_name] try: fileFinished.batch_move(file_name_list) except Exception as ex: errors = True upload_info.exception(ex) finally: robot_session.close() # 如果异常, 报警并跳过 if errors: media_instance.play() continue else: # 如果无数据静默五分钟 time.sleep(5 * 60) if loops: search_match_files(SEEK_DIRECTORY)
def scat_content_to_user(): """分发自动发帖数据到部分用户. """ print("=" * 80) print("Info: Work is now being prepared.") split_groups = 5 username = '******' # uid在[56,200]之间. thread_range = (56, 200) # create_datetime在["2015-11-20 00:00:00","2015-11-21 23:00:00"]之间. datetime_range = ('2015-11-20 00:00:00', '2015-11-21 23:00:00') try: # 分发用户(荣堂)数据. thread_entities = forum_session.query(ForumThread).filter( ForumThread.__author == username).all() if not thread_entities: print("Info: No Data.") return # 查询出分发用户数据. author_entities = robot_session.query(Member).filter( Member.dz_uid.between(thread_range[0], thread_range[1])).all() author_list = [(author.dz_uid, author.username) for author in author_entities] # 查询出自动发帖数据. thread_logs = robot_session.query(Thread.thread_id, Thread.post_id).filter( Thread.create_datetime.between( datetime_range[0], datetime_range[1])).all() post_ids = [thread_entity.post_id for thread_entity in thread_logs] thread_ids = [thread_entity.thread_id for thread_entity in thread_logs] unit_entities = [] threads_total = len(thread_entities) thread_normal, thread_moved, thread_retain = 0, 0, 0 print("Info: threads_total = %s." % threads_total) print("=" * 80) for index, thread_entity in enumerate(thread_entities): print("Info: %s %s%%." % (index, str(int(float(index) / float(threads_total) * 100)))) # 只分发自动发帖数据, 跳过正常从网站发出数据. if thread_entity.__tid not in thread_ids: thread_normal += 1 continue # 将用户(荣堂)数据分五份(保留一份给本人,四份分发). if index % split_groups == 0: thread_retain += 1 continue # 更新主题用户信息 author = random.choice(author_list) thread_entity.__author = author[1], thread_entity.__authorid = author[0], # 更新帖子用户信息 post_entity = forum_session.query(ForumPost).filter( ForumPost.__tid == thread_entity.__tid).first() if not post_entity or post_entity.__pid not in post_ids: continue post_entity.__author = author[1], post_entity.__authorid = author[0], unit_entities.append(thread_entity) unit_entities.append(post_entity) thread_moved = len(unit_entities) / 2 forum_session.add_all(unit_entities) forum_session.commit() except Exception as ex: print(ex) traceback.print_exc() forum_session.rollback() else: print("=" * 80) print("Info: About User(%s) In %s Info:" % (username, datetime_range)) print("Info: 论坛正常帖数 %(d)s 分发出贴数 %(d)s 保留的帖数" % {"d": " " * 20}) print( "Info: thread_normal = %d thread_moved = %d thread_retain = %d." % (thread_normal, thread_moved, thread_retain)) print("Info: Well Done.") finally: forum_session.close() print("All Work Have Finished.") print("=" * 80)