def search_match_files(directory): """对指定的目录文件扫描, 并结果入库. :parameter directory 指定的扫描目录 """ # 扫描文件 for i in os.listdir(directory): sub_path = os.path.join(directory, i) if os.path.isdir(sub_path): # 跳过未适配的版块和作者. if ENABLE_FOLDER_RULE: base_name = os.path.basename(sub_path).lower() if base_name not in itertools.chain( list(PLATE_MAP_CONFIG.keys()), list(USER_MAP_CONFIG.keys())): continue search_match_files(sub_path) else: # 跳过计划的文件列表. if SKIP_README_FILE: ignore_file_list = [x.lower() for x in IGNORE_FILE_LIST] if ignore_file_list and os.path.basename( sub_path).lower() in ignore_file_list: continue # 版块与作者(plate=0, author='')的对应. if ENABLE_FOLDER_RULE: author, plate = Utils.get_info_by_path(sub_path)[:2] plate = PLATE_MAP_CONFIG.get(plate) else: author, plate = '', 0 # 如有重复记录到日志. md5sum = Utils.md5sum(sub_path) fid = redis_md5sum.get(md5sum) if fid: Surplus(sub_path, plate=plate, author=author, md5sum=md5sum, fid=fid).__save(robot_session) record_info.info("skipping: %s ==> %s" % (author, sub_path)) continue record_info.info("indexing: %s ==> %s" % (author, sub_path)) suffix = Utils.get_info_by_path(sub_path)[2] key_name = ''.join((uuid.uuid4().get_hex(), suffix)) entity = Attachment(sub_path, key_name, plate=plate, author=author, md5sum=md5sum) robot_session.add(entity) robot_session.commit() redis_md5sum.set(entity.md5sum, entity.id)
def spread_match_files(limit=5): """对结果入库的数据扫描, 并文件上传. :parameter limit: 扫描数据数量 """ attachment_entities = robot_session.query(Attachment).filter( Attachment.status == 1).order_by(Attachment.id).limit(limit).all() def author_uid_and_name(real_name): """由真实姓名拼音获取论坛账户(账户Id,账户名称) :parameter real_name: 账户名称 """ authors = USER_MAP_CONFIG.get(real_name).split("|") return int(authors[0]), authors[2] if attachment_entities: for attachment in attachment_entities: # 构建主题, 帖子, 附件 file_base_name = os.path.basename(attachment.file_name) subject = message = os.path.splitext(file_base_name)[0] author = author_uid_and_name(attachment.author) fid = attachment.plate post_info.info("=" * 80) post_info.info("正在发帖:%s" % file_base_name) tid, pid, aid = spread_info(subject, message, author, fid, file_name=file_base_name, attachment=attachment.key_name) if tid and pid: try: # 更新发帖成功的数据状态, 保存记录 attachment.status = 2 robot_record = Thread(tid, pid, fid, aid, attachment.id) robot_session.add(attachment) robot_session.add(robot_record) robot_session.commit() post_info.info("发帖成功: OK.") except Exception as ex: robot_session.rollback() post_info.exception(ex) finally: robot_session.close() else: post_info.info("发帖失败: Error.") else: # 如果无数据静默五分钟 time.sleep(5 * 60)
def map_handler(_attachment): """使用map函数分发模式. :parameter _attachment 文件信息 """ _suffix = Utils.get_info_by_path(_attachment.file_name)[2] _key_name = ''.join((uuid.uuid4().get_hex(), _suffix)) upload_info.info("=" * 80) upload_info.info("正在上传:%s" % _attachment.file_name) try: # 上传文件到七牛 _ret, _info = put_up_datum(key=_key_name, kind="file", file_path=_attachment.file_name, progress_handler=progress_handler) except Exception as ex: upload_info.exception(ex) else: upload_info.info(_ret) upload_info.info(_info) if _ret and _ret["key"] == _key_name: try: attachment = _attachment.after_upload_action("") # 更新上传成功的数据 robot_session.add(attachment) robot_session.commit() except Exception as ex: robot_session.rollback() upload_info.exception(ex) upload_error.info( upload_only_log % (_attachment.upload_datetime, _attachment.id)) else: # 移走成功的文件. file_name_list = [attachment.file_name] try: fileFinished.batch_move(file_name_list) except Exception as ex: upload_info.exception(ex) finally: robot_session.close()
def fake_post(gen_data_count=1): """虚拟对主题回帖. :parameter gen_data_count: 生成数据数量 """ for entity in FakePost().generate(gen_data_count): uid, tid, fid = entity["uid"], entity["tid"], entity["fid"] username, message = entity["username"], entity["message"] faker_post_info.info("=" * 80) faker_post_info.info("message = %s" % message) faker_post_info.info("(%s)正在回帖(%s)" % (username, tid)) # print("uid = %s; tid = %s; fid = %s; username = %s;" % ( # uid, tid, fid, username)) pid = spread_post(uid, tid, fid, username, message) if pid: try: # 更新主题帖最后回帖信息 forum_thread = forum_session.query(ForumThread).filter( ForumThread.__tid == tid).first() forum_thread.__lastposter = username forum_thread.__lastpost = int(time.time()) forum_session.add(forum_thread) forum_session.commit() post = Post(uid, tid, pid, fid) robot_session.add(post) robot_session.commit() except Exception as ex: robot_session.rollback() faker_post_info.exception(ex) faker_post_info.info("回帖成功但记录失败: OK.") time_now = datetime.datetime.now().strftime("%Y-%m-%d %X") faker_post_error.info(faker_post_only % (uid, tid, pid, time_now)) else: faker_post_info.info("回帖成功: OK.") finally: robot_session.close() else: faker_post_info.info("回帖失败: Error.")
def update_name_files(limit=20): """更新导入库的索引文件. :parameter limit: 每次限制数 """ attachment_entities = robot_session.query(Attachment).filter( Attachment.key_name == "", Attachment.status == 0).order_by(Attachment.id).limit(limit).all() result = False if attachment_entities: for attachment in attachment_entities: suffix = Utils.get_info_by_path(attachment.file_name)[2] # 生成唯一标识, 防冲突可能从cache比对已有值. while True: key_name = ''.join((uuid.uuid4().get_hex(), suffix)) fid = redis_md5sum.get(key_name) if not fid: break attachment.key_name = key_name # 放入cache供后续比对. redis_md5sum.set(key_name, 1) try: robot_session.add_all(attachment_entities) robot_session.commit() except Exception as ex: print(ex) robot_session.rollback() else: print("OK") finally: robot_session.close() else: result = True return result
def upload_match_files(limit=5, loops=True): """对结果入库的数据扫描, 并文件上传. :parameter limit: 检索数据数量 :parameter loops: 是否执行完数据再扫描 """ attachment_entities = robot_session.query(Attachment).filter( Attachment.status == 0).order_by(Attachment.id).limit(limit).all() if attachment_entities: # map(map_handler, attachment_entities) for attachment in attachment_entities: errors = False upload_info.info("=" * 80) upload_info.info("正在上传:%s" % attachment.file_name) try: # 上传文件到七牛 ret, info = put_up_datum(key=attachment.key_name, kind="file", file_path=attachment.file_name, progress_handler=progress_handler) except Exception as ex: errors = True upload_info.exception(ex) else: upload_info.info(ret) upload_info.info(info) if ret and ret["key"] == attachment.key_name: try: attachment = attachment.after_upload_action("") # 更新上传成功的数据 robot_session.add(attachment) robot_session.commit() except Exception as ex: errors = True robot_session.rollback() upload_info.exception(ex) upload_error.log( upload_only_log % (attachment.upload_datetime, attachment.id)) else: # 移走成功的文件. file_name_list = [attachment.file_name] try: fileFinished.batch_move(file_name_list) except Exception as ex: errors = True upload_info.exception(ex) finally: robot_session.close() # 如果异常, 报警并跳过 if errors: media_instance.play() continue else: # 如果无数据静默五分钟 time.sleep(5 * 60) if loops: search_match_files(SEEK_DIRECTORY)
def fake_member(gen_data_count=1): """创建虚拟账户. gen_data_count的取值建议不要大, 因为不希望在时间点上跳跃性增长. :parameter gen_data_count: 生成数据数量 """ member_status_data = FakeMemberStatus().generate(gen_data_count) member_status_list = [entity for entity in member_status_data] for index, entity in enumerate(FakeMember().generate(gen_data_count)): username = entity["username"].lower() length = random.randint(6, 20) random_string = ''.join( (entity["password"], str(entity["assist_number"]))) random_string = [random.choice(random_string) for _ in range(length)] password = ''.join(random_string) # 用户中心md5后的实际密码. salt = "".join( [random.choice(string.ascii_lowercase + string.digits) for _ in range(6)]) hash_password = Utils.dz_uc_md5(password, salt) # 会员表md5后的伪密码. fake_password = Utils.md5(str(random.randint(10 * 9, 10 ** 10 - 1))) faker_user_info.info("=" * 80) faker_user_info.info("正在注册账户:%s" % username) try: common_member = CommonMember(__groupid=10, __username=username, __password=fake_password, __email=entity["email"], __regdate=int(time.time())) forum_session.add(common_member) forum_session.flush() uid = common_member.__uid center_member = CenterMember(__salt=salt, __username=username, __password=hash_password, __email=entity["email"], __regdate=int(time.time()), __uid=uid) forum_session.add(center_member) status_data = member_status_list[index] member_status = CommonMemberStatus(__uid=uid, __regip=status_data['reg_ip'], __lastip=status_data['last_ip'], __lastvisit=int(time.time()), __lastactivity=int(time.time())) forum_session.add(member_status) forum_session.commit() member = Member(username, password, entity["email"], uid) robot_session.add(member) robot_session.commit() except Exception as ex: faker_user_info.exception(ex) faker_user_info.info("注册账户失败: Error.") forum_session.rollback() robot_session.rollback() else: faker_user_info.info("注册账户成功: OK.") CacheService.cache_data_insert_model("common_member", member) finally: forum_session.close() robot_session.close()