def first(stmt, search_set): if len(stmt) == 0: return search_set elif stmt[0] in terminal_set: return set([stmt[0]]) elif stmt[0] in non_terminal_set: re = first_set(stmt[0]) if '' in re: re = re.copy() re.remove('') re = re.union(first(stmt[1:], search_set)) return re
def first_for_stat(left, statement): global terminal_set global non_terminal_set if len(statement) == 0: return follow(left) else: word = statement[0] if word in terminal_set: return set([word]) elif word in non_terminal_set: re = first(word) if '' in re: re = re.union(first_for_stat(left, statement[1:])) re.remove('') return re else: return re
portion = os.path.splitext(filename) if portion[1] == ".dat": # recombine file name newname = portion[0] + ".txt" filenamedir = path_1 + filename newnamedir = path_1 + newname # os.rename(filename, newname) os.rename(filenamedir, newnamedir) #提取爬虫文件中的文本,并对文本进行处理,删除标点符号 add_punc = ',。、【】“”:;()《》‘’{}<>!?/=~★◥◣█◢ ▌▅▇█◤●!▃▃▁▂▃?一→¥▲一⑦()、%^>℃:.”“^-——=&#@$❤*+▆.' all_punc = punctuation + add_punc with open('demo/newsVec_train.csv', encoding='utf-8') as rf, open(r'demo/newsVec_train_process.csv', 'w', newline='', encoding='utf-8') as wf: reader = csv.reader(rf) writer = csv.writer(wf) for row in reader: re = [] for i in row[2]: re.append(i) if i in add_punc: re.remove(i) sec = [''.join(re)] print(sec) writer.writerow(sec)