def get_dirfiles_into_list_luhn(file_dir,replace_dir): list,result = [],{} tools.get_filelist(file_dir,list) for listfile in list: filename = tools.get_name(listfile) filename = filename[8:] if filename not in result.keys(): result[filename ]= [] if replace_dir == "": result[filename] = (listfile) else: result[filename].append(str(replace_dir + "/" + tools.get_name(listfile)+".txt")) return result
def replace_words_by_num(whole_words,file_dir,save_dir): if os.path.lexists(save_dir): shutil.rmtree(save_dir) filename = [] def filter(s): if "all" in s: return True return False tools.get_filelist(file_dir,filename,filter) content = {} for file in filename: lines = tools.read_lines(file) string = "" for line in lines: words = line.split(" ") for word in words: if word.__len__()>0: if word in whole_words.keys(): string+= str(whole_words[word])+" " string = string.strip() string+="\n" content[tools.get_name(file)] = string # print(string) # input() for name in content: savepath = save_dir+name+".txt" tools.write(savepath,content[name])
def read_file(self, dir): filelist = [] tools.get_filelist(dir, filelist) data = {} reverse_data = {} filelist = sorted(filelist) for filename in filelist: with open(filename, mode="r", encoding="utf-8") as file: content = file.read() sentences = self.seperate_sentences(content) data[filename] = sentences for sen in sentences: if sen not in reverse_data.keys(): reverse_data[sen] = [tools.get_name(filename)] else: reverse_data[sen].append(tools.get_name(filename)) # print(sen,reverse_data[sen]) return data, reverse_data
def result_process(file_dir,save_dir): if os.path.lexists(save_dir): shutil.rmtree(save_dir) filenames = [] tools.get_filelist(file_dir,filenames) for file in filenames: content = tools.read_lines(file) name = tools.get_name(file) result =[] for line in content: words = jieba.cut(line) string = "" for word in words: string+= word+" " string = string[:-1] result.append(string) save_path = save_dir+"/"+name+".txt" tools.write_list(save_path,result)