def FileWithLink(): import time, os, MyDef start_time = time.time() # 初始时间戳 # ========================输入区开始======================== output_readline = [] refer_file_path = '/Users/alicewish/Dropbox/漫画图源MD5表.csv' MD5_dict = MyDef.ReadDictC(refer_file_path, True) yun_link_file_path = '/Users/alicewish/Dropbox/漫画图源度盘地址表.csv' yun_link_dict = MyDef.ReadDictB(yun_link_file_path) file_path_list = [] for key in MD5_dict: file_path = MD5_dict[key] file_size = os.path.getsize(file_path) readable_file_size = sizeof_fmt(file_size) file_name = os.path.split(file_path)[1] file_path_list.append(file_path) if file_path in yun_link_dict: yun_link = yun_link_dict[file_path] output_line = "[" + file_name + "](" + yun_link + ") | " + readable_file_size else: output_line = "[" + file_name + "]() | " + readable_file_size print(file_name) # print(output_line) output_readline.append(output_line) output_readline.sort() output_readline.insert(0, '--- | ---') output_readline.insert(0, '文件名 | 大小') for i in range(len(output_readline)): line = output_readline[i] if "]()" in line: line = line[1:].replace("]()", "") output_readline[i] = line # ================写入剪贴板================ output_text = '\r\n'.join(output_readline) MyDef.WriteClip(output_text) print(MyDef.RunTime(start_time))
def Processing(): import time, jieba, re, MyDef start_time = time.time() # 初始时间戳 scenario_list_full = Training() # ======================================处理区开始====================================== dict_file_path = '/Users/alicewish/我的坚果云/userdict.txt' # 自定义词典路径 cut_right_count = 0 cut_wrong_count = 0 # ========================输入区开始======================== input_file_path = "/Users/alicewish/Downloads/my.md" # ================按行读取输入文本================ read_text = open(input_file_path, 'r').read() # 读取文本 text_readline = read_text.replace("\nclass", "class").replace("...", "…").splitlines() # print(text_readline) # ================按行读取文本:with open(更好)================ status_readline = [] # 状态列表 output_readline = [] # 输出列表 jieba.load_userdict(dict_file_path) line_formmat_list_all = [] for a in range(len(text_readline)): text_readline[a] = re.sub(r'<span.*</span>', '', text_readline[a]) # 去除span text_readline[a] = text_readline[a].replace('……', '…') markdown_line = text_readline[a].replace("\*", "の").replace( "\[", "[").replace("\]", "]") print(markdown_line) line_cut_list = markdown_line.split("*") print(line_cut_list) plain_line = markdown_line.replace("*", "").replace("の", "*") # 调整* print(plain_line) line_formmat_list = [] for j in range(len(plain_line)): line_formmat_list.append(0) # print(line_formmat_list) line_mark_count_list = [] for k in range(len(plain_line) + 1): line_mark_count_list.append(0) point = 0 for char in markdown_line: if char == '*': line_mark_count_list[point] = line_mark_count_list[point] + 1 else: point = point + 1 print(line_mark_count_list) pin = 0 before = 0 for seg in line_cut_list: if seg == '': pass else: last_pin = pin pin += len(seg) # print(line_mark_count_list[last_pin]) # print(line_mark_count_list[pin]) if last_pin > 0: before = line_formmat_list[last_pin - 1] for l in range(last_pin, pin): if before == 0: line_formmat_list[ l] = before + line_mark_count_list[last_pin] elif before == 1 and line_mark_count_list[last_pin] == 1: line_formmat_list[ l] = before - line_mark_count_list[last_pin] elif before == 1 and line_mark_count_list[last_pin] == 2: line_formmat_list[ l] = before + line_mark_count_list[last_pin] elif before == 2 and line_mark_count_list[last_pin] == 1: line_formmat_list[ l] = before + line_mark_count_list[last_pin] elif before == 2 and line_mark_count_list[last_pin] == 2: line_formmat_list[ l] = before - line_mark_count_list[last_pin] elif before == 3: line_formmat_list[ l] = before - line_mark_count_list[last_pin] # print(line_formmat_list[last_pin - 1]) print(line_formmat_list) line_formmat_list_all.append(line_formmat_list) print(plain_line) print(len(plain_line)) need_cut = True # 需要切吗? if len(plain_line) == 0: pass elif len(plain_line) == 2 and re.match(r'[0-9][0-9]', plain_line): # 页码 need_cut = False elif a < 8: # 首部 need_cut = False elif len(plain_line) > 2: if plain_line[0] == '*' or plain_line[0] == '[': # 注释 need_cut = False if plain_line == "": status = 0 # 空行 elif not need_cut: status = -1 # 不需要切 else: status = 1 # 待分词 status_readline.append(status) if status == 1: # ================结巴分词================ string_list = [] seg_list = jieba.cut(plain_line) # 默认是精确模式 for word in seg_list: string_list.append(word) print(string_list) start_status = False for i in range(len(scenario_list_full)): scenario_line_full = scenario_list_full[i] if scenario_line_full[0:2] == str(len(plain_line)).zfill(2): if start_status: end_i = i else: start_i = i start_status = True end_i = i # ================进行切分================ current_i = start_i cut_right = False while current_i <= end_i and not cut_right: current_cut = scenario_list_full[current_i] current_cut_list = current_cut[7:].split("-") # 列表存储的切分方案 # ================进行分词判断================ line_can_cut_list = [] for i in range(len(plain_line)): line_can_cut_list.append(0) j = 0 for string in string_list: j = j + len(string) # print(j) line_can_cut_list[j - 1] = 1 # ================对标点和语气词进行纠正================ for i in range(len(plain_line)): if plain_line[i] in ',.?!,。…?!”·-》>:】【]、': # 这些之前不可切 line_can_cut_list[i - 1] = 0 elif plain_line[i] in '“《<【[': # 这些之后不可切 line_can_cut_list[i] = 0 elif plain_line[ i] in '上中下内出完的地得了吗吧着个就前世里嘛图们来呗' and line_can_cut_list[ i - 1] == 1 and line_can_cut_list[i] == 1: # 这些之前不可切 line_can_cut_list[i - 1] = 0 elif plain_line[i] in '太每帮跟另' and line_can_cut_list[ i - 1] == 1 and line_can_cut_list[i] == 1: # 这些之后不可切 line_can_cut_list[i] = 0 print(line_can_cut_list) print(current_cut_list) # ================判断方案正确与否================ sum = 0 cut_right = True for i in range(len(current_cut_list)): # 切分 last_sum = sum sum = sum + int(current_cut_list[i]) print(line_can_cut_list[sum - 1]) print(plain_line[last_sum:sum]) if line_can_cut_list[sum - 1] == 0: cut_right = False print(cut_right) if not cut_right: current_i += 1 if cut_right: # 切对了 cut_right_count += 1 sum = 0 for i in range(len(current_cut_list)): last_sum = sum sum = sum + int(current_cut_list[i]) output_line = plain_line[last_sum:sum] output_line_format_list = line_formmat_list_all[a][ last_sum:sum] output_line_mark_count_list = [] format_list_for_use = [0] + output_line_format_list + [ 0 ] # 11 for b in range(len(format_list_for_use) - 1): output_line_mark_count = abs(format_list_for_use[b + 1] - format_list_for_use[b]) output_line_mark_count_list.append( output_line_mark_count) print(output_line_mark_count_list) # 10 output_markdown_line = '' for c in range(len(output_line_mark_count_list) - 1): for d in range(output_line_mark_count_list[c]): output_markdown_line += '*' output_markdown_line += output_line[c] for d in range(output_line_mark_count_list[-1]): output_markdown_line += '*' output_readline.append(output_markdown_line) print("格式", output_line_format_list) else: # 切错了 output_readline.append(text_readline[a]) cut_wrong_count += 1 elif status == 0: # 不需要切 output_readline.append('\n|\n') else: # 不需要切 output_readline.append(text_readline[a].replace("\[", "[").replace( "\]", "]")) print('切对', cut_right_count) print('待切', cut_wrong_count) # ================写入剪贴板================ text = '\r\n'.join(output_readline) MyDef.WriteClip(text) print(MyDef.RunTime(start_time))
all_count = len(output_readline) - 1 print(all_count) text = '\r\n'.join(output_readline) # print(text) output_file_name = '0 Day Week文件地址-墨问非名制作-' + now_date + '(' + str( all_count) + ').csv' output_file_path = os.path.join(dropbox_path, output_file_name) f = open(output_file_path, 'w') try: f.write(text) finally: f.close() # ================写入剪贴板================ markdown_text = '\r\n'.join(markdown_readline) MyDef.WriteClip(markdown_text) # ================运行时间计时================ run_time = time.time() - start_time if run_time < 60: # 秒(两位小数) print("耗时:{:.2f}秒".format(run_time)) elif run_time < 3600: # 分+秒(取整) print("耗时:{:.0f}分{:.0f}秒".format(run_time // 60, run_time % 60)) else: # 时分秒取整 print("耗时:{:.0f}时{:.0f}分{:.0f}秒".format(run_time // 3600, run_time % 3600 // 60, run_time % 60))