for line in ds: line = re.sub('\n', '', line) ds_cs.append(line) ryzd = [] output = [] for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 pattern = r'\s*\d+、+\s?(.*)' c = re.compile(pattern) for line in f.readlines(): line1 = line.strip('\n') line2 = ''.join(line1) line2 = line2.strip() line3 = c.findall(line2) line3 = ''.join(line3) line4 = str(line3) out = line4 out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out) out = re.sub(r'肺肺', '肺', out) out = re.sub('(.*?)', '', out) out = re.sub(r'很高危|极高危', '', out) out = re.sub(r'/?|?', '', out) for ds in ds_cs: if EMRdef.SBS(out, ds) > 0.8: output.append(out) output = EMRdef.delre(output) EMRdef.text_save(u'D:\python\EMR\jbml.txt', output1)
#-*- coding: UTF-8 -*- #根据给药方式和剂量剂型分词 import os import EMRdef import string import re pattern = r',|;|\*|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|,|。|:|;|‘|’|\+|\-|【|】| \)|\( |(|)|·|!|、|…' #清除标点 c = open(r'C:\Users\Administrator\Desktop\quanbu.txt', 'r', errors="ignore") #给药剂量加剂型加给药方式词典 b = open(r'C:\Users\Administrator\Desktop\tc.txt', 'r', errors="ignore") #给药剂量词典 crl = c.readlines() brl = b.readlines() test_out = [] for cl in crl: cl = re.sub('\n', '', cl) for bl in brl: bl = re.sub('\n', '', bl) if cl == bl: test_out.append(cl) adult_a = EMRdef.delre(test_out) EMRdef.text_save(r'C:\Users\Administrator\Desktop\output.txt', adult_a)
ds_cs = [] for line in ds: line = re.sub('\n', '', line) ds_cs.append(line) ryzd = [] output = [] for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 pattern = r'\s*\d+、+\s?(.*)' c = re.compile(pattern) for line in f.readlines(): line1 = line.strip('\n') line2 = ''.join(line1) line2 = line2.strip() line3 = c.findall(line2) line3 = ''.join(line3) line4 = str(line3) out = line4 out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out) out = re.sub(r'肺肺', '肺', out) out = re.sub('(.*?)', '', out) out = re.sub(r'很高危|极高危', '', out) for ds in ds_cs: if EMRdef.SBS(out, ds) > 0.8: output.append(out) output = EMRdef.delre(output) output1 = '\n'.join(output) EMRdef.text_save(u'D:\python\EMR\jbml.txt', adult_c)
ds_cs = [] for line in ds: line = re.sub('\n', '', line) ds_cs.append(line) ryzd = [] output = [] for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrpath = os.path.basename(emrtxt) emrpath = os.path.splitext(emrpath)[0] #提取目录 pattern = r'\s*\d+、+\s?(.*)' c = re.compile(pattern) for line in f.readlines(): line1 = line.strip('\n') line2 = ''.join(line1) line2 = line2.strip() line3 = c.findall(line2) line3 = ''.join(line3) line4 = str(line3) out = line4 out = re.sub(r'右侧|两侧|双侧|左侧|右|左|双', '', out) out = re.sub(r'肺肺', '肺', out) out = re.sub('(.*?)', '', out) out = re.sub(r'很高危|极高危', '', out) for ds in ds_cs: if EMRdef.SBS(out, ds) > 0.8: output.append(out) output = EMRdef.delre(output) output1 = '\n'.join(output) EMRdef.text_save(u'D:\python\EMR\967yw.txt', adult_c)
import time import math import os import sys import os, os.path, shutil import codecs import EMRdef import re emrtxts = EMRdef.txttq(u'D:\DeepLearning ER\EHRzlgc4') #txt目录提取 a_out = [] pattern = r',|;|\'|`|\[|\]|<|>|\?|"|\{|\}|!|@|#|\$|%|\^|&|=|>|,|。|:|<|;|‘|’|【|】|(|)|·|!|\*|\/|…' #清除标点 for emrtxt in emrtxts: f = open(emrtxt, 'r', errors="ignore") #中文加入errors emrtxt = os.path.basename(emrtxt) emrtxt_str = re.findall(r'(^.+?)\_', emrtxt) #提取ID emrtxt = "".join(emrtxt_str) #转成str #txtp=txtp.decode('utf-8') for line in f.readlines(): line = re.sub(' ', '', line) #删除空格 line = re.sub('\.', '', line) #删除. line = re.sub('×', '', line) #删除. a = EMRdef.tq_bnum(line) a_end = "".join(a) #转成str a_end = re.split(pattern, a_end) a_end = "".join(a_end) #转成str a_end = re.sub(' ', '', a_end) #删除空格 a_out.append(a_end) adult_a = EMRdef.delre(a_out) EMRdef.text_save('D:\python\EMR\hyxm.txt', adult_a)