示例#1
0
def __split_MSH_by_PID(filename):
	"""Assumes:
		- ONE MSH per file
		- utf8 encoding
		- first non-empty line must be MSH line
		- next line must be PID line

		IOW, what's created by __split_hl7_file_by_MSH()
	"""
	_log.debug('splitting single-MSH file [%s] into single-PID files', filename)

	MSH_in = io.open(filename, mode = 'rt', encoding = 'utf8')

	looking_for_MSH = True
	MSH_line = None
	looking_for_first_PID = True
	PID_file = None
	PID_fnames = []
	idx = 0
	for line in MSH_in:
		line = line.strip()
		# ignore empty
		if line == '':
			continue

		# first non-empty line must be MSH
		if looking_for_MSH:
			if line.startswith('MSH|'):
				looking_for_MSH = False
				MSH_line = line + HL7_EOL
				continue
			raise ValueError('HL7 MSH file <%s> does not start with "MSH" line' % filename)
		else:
			if line.startswith('MSH|'):
				raise ValueError('HL7 single-MSH file <%s> contains more than one MSH line' % filename)

		# first non-empty line after MSH must be PID
		if looking_for_first_PID:
			if not line.startswith('PID|'):
				raise ValueError('HL7 MSH file <%s> does not have "PID" line follow "MSH" line' % filename)
			looking_for_first_PID = False

		# start new file if line is PID
		if line.startswith('PID|'):
			if PID_file is not None:
				PID_file.close()
			idx += 1
			out_fname = gmTools.get_unique_filename(prefix = '%s-PID_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
			_log.debug('writing message for PID %s to [%s]', idx, out_fname)
			PID_fnames.append(out_fname)
			PID_file = io.open(out_fname, mode = 'wt', encoding = 'utf8', newline = '')
			PID_file.write(MSH_line)
		# else write line to new file
		PID_file.write(line + HL7_EOL)

	if PID_file is not None:
		PID_file.close()
	MSH_in.close()

	return PID_fnames
示例#2
0
文件: gmHL7.py 项目: ncqgm/gnumed
def __split_MSH_by_PID(filename):
	"""Assumes:
		- ONE MSH per file
		- utf8 encoding
		- first non-empty line must be MSH line
		- next line must be PID line

		IOW, what's created by __split_hl7_file_by_MSH()
	"""
	_log.debug('splitting single-MSH file [%s] into single-PID files', filename)

	MSH_in = io.open(filename, mode = 'rt', encoding = 'utf8')

	looking_for_MSH = True
	MSH_line = None
	looking_for_first_PID = True
	PID_file = None
	PID_fnames = []
	idx = 0
	for line in MSH_in:
		line = line.strip()
		# ignore empty
		if line == '':
			continue

		# first non-empty line must be MSH
		if looking_for_MSH:
			if line.startswith('MSH|'):
				looking_for_MSH = False
				MSH_line = line + HL7_EOL
				continue
			raise ValueError('HL7 MSH file <%s> does not start with "MSH" line' % filename)
		else:
			if line.startswith('MSH|'):
				raise ValueError('HL7 single-MSH file <%s> contains more than one MSH line' % filename)

		# first non-empty line after MSH must be PID
		if looking_for_first_PID:
			if not line.startswith('PID|'):
				raise ValueError('HL7 MSH file <%s> does not have "PID" line follow "MSH" line' % filename)
			looking_for_first_PID = False

		# start new file if line is PID
		if line.startswith('PID|'):
			if PID_file is not None:
				PID_file.close()
			idx += 1
			out_fname = gmTools.get_unique_filename(prefix = '%s-PID_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
			_log.debug('writing message for PID %s to [%s]', idx, out_fname)
			PID_fnames.append(out_fname)
			PID_file = io.open(out_fname, mode = 'wt', encoding = 'utf8', newline = '')
			PID_file.write(MSH_line)
		# else write line to new file
		PID_file.write(line + HL7_EOL)

	if PID_file is not None:
		PID_file.close()
	MSH_in.close()

	return PID_fnames
示例#3
0
文件: gmHL7.py 项目: ncqgm/gnumed
def __split_hl7_file_by_MSH(filename, encoding='utf8'):

	_log.debug('splitting [%s] into single-MSH files', filename)

	hl7_in = io.open(filename, mode = 'rt', encoding = encoding)

	idx = 0
	first_line = True
	MSH_file = None
	MSH_fnames = []
	for line in hl7_in:
		line = line.strip()
		# first line must be MSH
		if first_line:
			# ignore empty / FHS / BHS lines
			if line == '':
				continue
			if line.startswith('FHS|'):
				_log.debug('ignoring FHS')
				continue
			if line.startswith('BHS|'):
				_log.debug('ignoring BHS')
				continue
			if not line.startswith('MSH|'):
				raise ValueError('HL7 file <%s> does not start with "MSH" line' % filename)
			first_line = False
		# start new file
		if line.startswith('MSH|'):
			if MSH_file is not None:
				MSH_file.close()
			idx += 1
			out_fname = gmTools.get_unique_filename(prefix = '%s-MSH_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
			_log.debug('writing message %s to [%s]', idx, out_fname)
			MSH_fnames.append(out_fname)
			MSH_file = io.open(out_fname, mode = 'wt', encoding = 'utf8', newline = '')
		# ignore BTS / FTS lines
		if line.startswith('BTS|'):
			_log.debug('ignoring BTS')
			continue
		if line.startswith('FTS|'):
			_log.debug('ignoring FTS')
			continue
		# else write line to new file
		MSH_file.write(line + HL7_EOL)

	if MSH_file is not None:
		MSH_file.close()
	hl7_in.close()

	return MSH_fnames
示例#4
0
def __split_hl7_file_by_MSH(filename, encoding='utf8'):

	_log.debug('splitting [%s] into single-MSH files', filename)

	hl7_in = io.open(filename, mode = 'rt', encoding = encoding)

	idx = 0
	first_line = True
	MSH_file = None
	MSH_fnames = []
	for line in hl7_in:
		line = line.strip()
		# first line must be MSH
		if first_line:
			# ignore empty / FHS / BHS lines
			if line == '':
				continue
			if line.startswith('FHS|'):
				_log.debug('ignoring FHS')
				continue
			if line.startswith('BHS|'):
				_log.debug('ignoring BHS')
				continue
			if not line.startswith('MSH|'):
				raise ValueError('HL7 file <%s> does not start with "MSH" line' % filename)
			first_line = False
		# start new file
		if line.startswith('MSH|'):
			if MSH_file is not None:
				MSH_file.close()
			idx += 1
			out_fname = gmTools.get_unique_filename(prefix = '%s-MSH_%s-' % (gmTools.fname_stem(filename), idx), suffix = 'hl7')
			_log.debug('writing message %s to [%s]', idx, out_fname)
			MSH_fnames.append(out_fname)
			MSH_file = io.open(out_fname, mode = 'wt', encoding = 'utf8', newline = '')
		# ignore BTS / FTS lines
		if line.startswith('BTS|'):
			_log.debug('ignoring BTS')
			continue
		if line.startswith('FTS|'):
			_log.debug('ignoring FTS')
			continue
		# else write line to new file
		MSH_file.write(line + HL7_EOL)

	if MSH_file is not None:
		MSH_file.close()
	hl7_in.close()

	return MSH_fnames
示例#5
0
文件: gmHL7.py 项目: ncqgm/gnumed
def extract_HL7_from_XML_CDATA(filename, xml_path, target_dir=None):

	_log.debug('extracting HL7 from CDATA of <%s> nodes in XML file [%s]', xml_path, filename)

	# sanity checks/setup
	try:
		open(filename).close()
		orig_dir = os.path.split(filename)[0]
		work_filename = gmTools.get_unique_filename(prefix = 'gm-x2h-%s-' % gmTools.fname_stem(filename), suffix = '.hl7')
		if target_dir is None:
			target_dir = os.path.join(orig_dir, 'HL7')
			done_dir = os.path.join(orig_dir, 'done')
		else:
			done_dir = os.path.join(target_dir, 'done')
		_log.debug('target dir: %s', target_dir)
		gmTools.mkdir(target_dir)
		gmTools.mkdir(done_dir)
	except Exception:
		_log.exception('cannot setup unwrapping environment')
		return None

	hl7_xml = pyxml.ElementTree()
	try:
		hl7_xml.parse(filename)
	except pyxml.ParseError:
		_log.exception('cannot parse [%s]' % filename)
		return None
	nodes = hl7_xml.findall(xml_path)
	if len(nodes) == 0:
		_log.debug('no nodes found for data extraction')
		return None

	_log.debug('unwrapping HL7 from XML into [%s]', work_filename)
	hl7_file = io.open(work_filename, mode = 'wt', encoding = 'utf8', newline = '')		# universal newlines acceptance but no translation on output
	for node in nodes:
#		hl7_file.write(node.text.rstrip() + HL7_EOL)
		hl7_file.write(node.text + '')		# trick to make node.text unicode
	hl7_file.close()

	target_fname = os.path.join(target_dir, os.path.split(work_filename)[1])
	shutil.copy(work_filename, target_dir)
	shutil.move(filename, done_dir)

	return target_fname
示例#6
0
def extract_HL7_from_XML_CDATA(filename, xml_path, target_dir=None):

	_log.debug('extracting HL7 from CDATA of <%s> nodes in XML file [%s]', xml_path, filename)

	# sanity checks/setup
	try:
		open(filename).close()
		orig_dir = os.path.split(filename)[0]
		work_filename = gmTools.get_unique_filename(prefix = 'gm-x2h-%s-' % gmTools.fname_stem(filename), suffix = '.hl7')
		if target_dir is None:
			target_dir = os.path.join(orig_dir, 'HL7')
			done_dir = os.path.join(orig_dir, 'done')
		else:
			done_dir = os.path.join(target_dir, 'done')
		_log.debug('target dir: %s', target_dir)
		gmTools.mkdir(target_dir)
		gmTools.mkdir(done_dir)
	except Exception:
		_log.exception('cannot setup unwrapping environment')
		return None

	hl7_xml = pyxml.ElementTree()
	try:
		hl7_xml.parse(filename)
	except pyxml.ParseError:
		_log.exception('cannot parse [%s]' % filename)
		return None
	nodes = hl7_xml.findall(xml_path)
	if len(nodes) == 0:
		_log.debug('no nodes found for data extraction')
		return None

	_log.debug('unwrapping HL7 from XML into [%s]', work_filename)
	hl7_file = io.open(work_filename, mode = 'wt', encoding = 'utf8', newline = '')		# universal newlines acceptance but no translation on output
	for node in nodes:
#		hl7_file.write(node.text.rstrip() + HL7_EOL)
		hl7_file.write(node.text + '')		# trick to make node.text unicode
	hl7_file.close()

	target_fname = os.path.join(target_dir, os.path.split(work_filename)[1])
	shutil.copy(work_filename, target_dir)
	shutil.move(filename, done_dir)

	return target_fname
示例#7
0
文件: gmHL7.py 项目: sk/gnumed
def __fix_malformed_hl7_file(filename, encoding='utf8'):

	_log.debug(u'fixing HL7 file [%s]', filename)

	# first pass:
	# - remove empty lines
	# - normalize line endings
	# - unwrap wrapped segments
	out1_fname = gmTools.get_unique_filename (
		prefix = u'gm_fix1-%s-' % gmTools.fname_stem(filename),
		suffix = u'.hl7'
	)
	hl7_in = io.open(filename, mode = 'rt', encoding = encoding)
	hl7_out = io.open(out1_fname, mode = 'wt', encoding = 'utf8')

	is_first_line = True
	for line in hl7_in:
		# skip empty line
		if line.strip().strip(u'\r').strip(u'\n').strip(u'\r').strip(u'\n').strip() == u'':
			continue

		# starts with known segment ?
		segment = line[:3]
		if (segment in HL7_SEGMENTS) and (line[3] == u'|'):
			if not is_first_line:
				hl7_out.write(HL7_EOL)
			else:
				is_first_line = False

		hl7_out.write(line.rstrip(u'\r').rstrip(u'\n').rstrip(u'\r').rstrip(u'\n'))

	hl7_out.close()
	hl7_in.close()

	# second pass:
	# - normalize # of fields per line
	out2_fname = gmTools.get_unique_filename (
		prefix = u'gm_fix2-%s-' % gmTools.fname_stem(filename),
		suffix = '.hl7'
	)
	hl7_in = io.open(out1_fname, mode = 'rt', encoding = 'utf8')
	hl7_out = io.open(out2_fname, mode = 'wt', encoding = 'utf8')

	for line in hl7_in:
		line = line.strip(HL7_EOL)
		seg_type = line[:3]						# assumption: field separator = '|'
		field_count = line.count(u'|') + 1		# assumption: no '|' in data ...
		try:
			required_fields = HL7_segment2field_count[seg_type]
		except KeyError:
			required_fields = field_count
		missing_fields_count = required_fields - field_count
		if missing_fields_count > 0:
			line += (u'|' * missing_fields_count)
		hl7_out.write(line + HL7_EOL)

	hl7_out.close()
	hl7_in.close()

	# third pass:
	# - unsplit same-name, same-time, text-type OBX segments
	out3_fname = gmTools.get_unique_filename (
		prefix = u'gm_fix3-%s-' % gmTools.fname_stem(filename),
		suffix = '.hl7'
	)
	hl7_in = io.open(out2_fname, mode = 'rt', encoding = 'utf8')
	hl7_out = io.open(out3_fname, mode = 'wt', encoding = 'utf8')

	prev_identity = None
	prev_fields = None
	for line in hl7_in:
		if not line.startswith(u'OBX|'):
			if prev_fields is not None:
				hl7_out.write(u'|'.join(prev_fields) + HL7_EOL)
			hl7_out.write(line)
			prev_identity = None
			prev_fields = None
			curr_fields = None
			continue
		line = line.strip(HL7_EOL)
		# first OBX
		curr_fields = line.split(u'|')
		if curr_fields[OBX_field__datatype] != u'FT':
			hl7_out.write(line + HL7_EOL)
			prev_identity = None
			prev_fields = None
			curr_fields = None
			continue
		# first FT type OBX
		if prev_fields is None:
			prev_fields = line.split(u'|')
			prev_identity = line.split(u'|')
			prev_identity[OBX_field__set_id] = u''
			prev_identity[OBX_field__subid] = u''
			prev_identity[OBX_field__value] = u''
			prev_identity = u'|'.join(prev_identity)
			continue
		# non-first FT type OBX
		curr_identity = line.split(u'|')
		curr_identity[OBX_field__set_id] = u''
		curr_identity[OBX_field__subid] = u''
		curr_identity[OBX_field__value] = u''
		curr_identity = u'|'.join(curr_identity)
		if curr_identity != prev_identity:
			# write out previous line
			hl7_out.write(u'|'.join(prev_fields) + HL7_EOL)
			# keep current fields, since it may start a "repeat FT type OBX block"
			prev_fields = curr_fields
			prev_identity = curr_identity
			continue
		if prev_fields[OBX_field__value].endswith(u'\.br\\'):
			prev_fields[OBX_field__value] += curr_fields[OBX_field__value]
		else:
			if curr_fields[OBX_field__value].startswith(u'\.br\\'):
				prev_fields[OBX_field__value] += curr_fields[OBX_field__value]
			else:
				prev_fields[OBX_field__value] += u'\.br\\'
				prev_fields[OBX_field__value] += curr_fields[OBX_field__value]

	if prev_fields is not None:
		hl7_out.write(u'|'.join(prev_fields) + HL7_EOL)

	hl7_out.close()
	hl7_in.close()

	return out3_fname
示例#8
0
文件: gmHL7.py 项目: sk/gnumed
def stage_single_PID_hl7_file(filename, source=None, encoding='utf8'):
	"""Multi-step processing of HL7 files.

	- input must be single-MSH / single-PID / normalized HL7

	- imports into clin.incoming_data_unmatched

	- needs write permissions in dir_of(filename)
	- moves PID files which were successfully staged into dir_of(filename)/done/PID/
	"""
	local_log_name = gmTools.get_unique_filename (
		prefix = gmTools.fname_stem(filename) + '-',
		suffix = '.stage.log'
	)
	local_logger = logging.FileHandler(local_log_name)
	local_logger.setLevel(logging.DEBUG)
	root_logger = logging.getLogger('')
	root_logger.addHandler(local_logger)
	_log.info(u'staging [%s] as unmatched incoming HL7%s', filename, gmTools.coalesce(source, u'', u' (%s)'))
	_log.debug(u'log file: %s', local_log_name)

	# sanity checks/setup
	try:
		open(filename).close()
		orig_dir = os.path.split(filename)[0]
		done_dir = os.path.join(orig_dir, u'done')
		gmTools.mkdir(done_dir)
		error_dir = os.path.join(orig_dir, u'failed')
		gmTools.mkdir(error_dir)
	except Exception:
		_log.exception('cannot setup staging environment')
		root_logger.removeHandler(local_logger)
		return False

	# stage
	try:
		inc = create_incoming_data(u'HL7%s' % gmTools.coalesce(source, u'', u' (%s)'), filename)
		if inc is None:
			_log.error(u'cannot stage PID file: %s', filename)
			root_logger.removeHandler(local_logger)
			shutil.move(filename, error_dir)
			shutil.move(local_log_name, error_dir)
			return False
		inc.update_data_from_file(fname = filename)
	except Exception:
		_log.exception(u'error staging PID file')
		root_logger.removeHandler(local_logger)
		shutil.move(filename, error_dir)
		shutil.move(local_log_name, error_dir)
		return False

	# set additional data
	MSH_file = io.open(filename, mode = 'rt', encoding = 'utf8')
	raw_hl7 = MSH_file.read(1024 * 1024 * 5)	# 5 MB max
	MSH_file.close()
	shutil.move(filename, done_dir)
	inc['comment'] = format_hl7_message (
		message = raw_hl7,
		skip_empty_fields = True,
		eol = u'\n'
	)
	HL7 = pyhl7.parse(raw_hl7)
	del raw_hl7
	inc['comment'] += u'\n'
	inc['comment'] += (u'-' * 80)
	inc['comment'] += u'\n\n'
	log = io.open(local_log_name, mode = 'rt', encoding = 'utf8')
	inc['comment'] += log.read()
	log.close()
	try:
		inc['lastnames'] = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__lastname)
		inc['firstnames'] = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__firstname)
		val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__middlename)
		if val is not None:
			inc['firstnames'] += u' '
			inc['firstnames'] += val
		val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__dob)
		if val is not None:
			tmp = time.strptime(val, '%Y%m%d')
			inc['dob'] = pyDT.datetime(tmp.tm_year, tmp.tm_mon, tmp.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
		val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__gender)
		if val is not None:
			inc['gender'] = val
		inc['external_data_id'] = filename
		#u'fk_patient_candidates',
		#	u'request_id',						# request ID as found in <data>
		#	u'postcode',
		#	u'other_info',						# other identifying info in .data
		#	u'requestor',						# Requestor of data (e.g. who ordered test results) if available in source data.
		#	u'fk_identity_disambiguated',
		#	u'comment',							# a free text comment on this row, eg. why is it here, error logs etc
		#	u'fk_provider_disambiguated'		# The provider the data is relevant to.
	except Exception:
		_log.exception(u'cannot add more data')
	inc.save()

	_log.info(u'successfully staged')
	root_logger.removeHandler(local_logger)
	shutil.move(local_log_name, done_dir)
	return True
示例#9
0
文件: gmHL7.py 项目: sk/gnumed
def split_hl7_file(filename, target_dir=None, encoding='utf8'):
	"""Multi-step processing of HL7 files.

	- input can be multi-MSH / multi-PID / partially malformed HL7
	- tries to fix oddities
	- splits by MSH
	- splits by PID into <target_dir>

	- needs write permissions in dir_of(filename)
	- moves HL7 files which were successfully split up into dir_of(filename)/done/

	- returns (True|False, list_of_PID_files)
	"""
	local_log_name = gmTools.get_unique_filename (
		prefix = gmTools.fname_stem(filename) + '-',
		suffix = '.split.log'
	)
	local_logger = logging.FileHandler(local_log_name)
	local_logger.setLevel(logging.DEBUG)
	root_logger = logging.getLogger('')
	root_logger.addHandler(local_logger)
	_log.info('splitting HL7 file: %s', filename)
	_log.debug('log file: %s', local_log_name)

	# sanity checks/setup
	try:
		open(filename).close()
		orig_dir = os.path.split(filename)[0]
		done_dir = os.path.join(orig_dir, u'done')
		gmTools.mkdir(done_dir)
		error_dir = os.path.join(orig_dir, u'failed')
		gmTools.mkdir(error_dir)
		work_filename = gmTools.get_unique_filename(prefix = gmTools.fname_stem(filename) + '-', suffix = '.hl7')
		if target_dir is None:
			target_dir = os.path.join(orig_dir, u'PID')
		_log.debug('target dir: %s', target_dir)
		gmTools.mkdir(target_dir)
	except Exception:
		_log.exception('cannot setup splitting environment')
		root_logger.removeHandler(local_logger)
		return False, None

	# split
	target_names = []
	try:
		shutil.copy(filename, work_filename)
		fixed_filename = __fix_malformed_hl7_file(work_filename, encoding = encoding)
		MSH_fnames = __split_hl7_file_by_MSH(fixed_filename, encoding)
		PID_fnames = []
		for MSH_fname in MSH_fnames:
			PID_fnames.extend(__split_MSH_by_PID(MSH_fname))
		for PID_fname in PID_fnames:
			shutil.move(PID_fname, target_dir)
			target_names.append(os.path.join(target_dir, os.path.split(PID_fname)[1]))
	except Exception:
		_log.exception('cannot split HL7 file')
		for target_name in target_names:
			try: os.remove(target_name)
			except: pass
		root_logger.removeHandler(local_logger)
		shutil.move(local_log_name, error_dir)
		return False, None

	_log.info('successfully split')
	root_logger.removeHandler(local_logger)
	try:
		shutil.move(filename, done_dir)
		shutil.move(local_log_name, done_dir)
	except shutil.Error:
		_log.exception('cannot move hl7 file or log file to holding area')
	return True, target_names
示例#10
0
文件: gmHL7.py 项目: ncqgm/gnumed
def __fix_malformed_hl7_file(filename, encoding='utf8'):

	_log.debug('fixing HL7 file [%s]', filename)

	# first pass:
	# - remove empty lines
	# - normalize line endings
	# - unwrap wrapped segments (based on the assumption that segments are wrapped until a line starts with a known segment marker)
	out1_fname = gmTools.get_unique_filename (
		prefix = 'gm_fix1-%s-' % gmTools.fname_stem(filename),
		suffix = '.hl7'
	)
	hl7_in = io.open(filename, mode = 'rt', encoding = encoding)					# universal newlines: translate any type of EOL to \n
	hl7_out = io.open(out1_fname, mode = 'wt', encoding = 'utf8', newline = '')	# newline='' -> no translation of EOL at all
	is_first_line = True
	for line in hl7_in:
		# skip empty line
		if line.strip() == '':
			continue
		# starts with known segment ?
		segment = line[:3]
		if (segment in HL7_SEGMENTS) and (line[3] == '|'):
			if not is_first_line:
				hl7_out.write(HL7_EOL)
			else:
				is_first_line = False
		else:
			hl7_out.write(' ')
		hl7_out.write(line.rstrip())
	hl7_out.write(HL7_EOL)
	hl7_out.close()
	hl7_in.close()

	# second pass:
	# - normalize # of fields per line
	# - remove '\.br.\'-only fields ;-)
	out2_fname = gmTools.get_unique_filename (
		prefix = 'gm_fix2-%s-' % gmTools.fname_stem(filename),
		suffix = '.hl7'
	)
	# we can now _expect_ lines to end in HL7_EOL, anything else is an error
	hl7_in = io.open(out1_fname, mode = 'rt', encoding = 'utf8', newline = HL7_EOL)
	hl7_out = io.open(out2_fname, mode = 'wt', encoding = 'utf8', newline = '')
	for line in hl7_in:
		line = line.strip()
		seg_type = line[:3]						# assumption: field separator = '|'
		field_count = line.count('|') + 1		# assumption: no '|' in data ...
		try:
			required_fields = HL7_segment2field_count[seg_type]
		except KeyError:
			required_fields = field_count
		missing_fields_count = required_fields - field_count
		if missing_fields_count > 0:
			line += ('|' * missing_fields_count)
		cleaned_fields = []
		for field in line.split('|'):
			if field.replace(HL7_BRK, '').strip() == '':
				cleaned_fields.append('')
				continue
			cleaned = gmTools.strip_prefix(field, HL7_BRK, remove_repeats = True, remove_whitespace = True)
			cleaned = gmTools.strip_suffix(cleaned, HL7_BRK, remove_repeats = True, remove_whitespace = True)
			cleaned_fields.append(cleaned)
		hl7_out.write('|'.join(cleaned_fields) + HL7_EOL)
	hl7_out.close()
	hl7_in.close()

	# third pass:
	# - unsplit same-name, same-time, text-type OBX segments
	out3_fname = gmTools.get_unique_filename (
		prefix = 'gm_fix3-%s-' % gmTools.fname_stem(filename),
		suffix = '.hl7'
	)
	# we can now _expect_ lines to end in HL7_EOL, anything else is an error
	hl7_in = io.open(out2_fname, mode = 'rt', encoding = 'utf8', newline = HL7_EOL)
	hl7_out = io.open(out3_fname, mode = 'wt', encoding = 'utf8', newline = '')
	prev_identity = None
	prev_fields = None
	for line in hl7_in:
		line = line.strip()
		if not line.startswith('OBX|'):
			if prev_fields is not None:
				hl7_out.write('|'.join(prev_fields) + HL7_EOL)
			hl7_out.write(line + HL7_EOL)
			prev_identity = None
			prev_fields = None
			curr_fields = None
			continue
		# first OBX
		curr_fields = line.split('|')
		if curr_fields[OBX_field__datatype] != 'FT':
			hl7_out.write(line + HL7_EOL)
			prev_identity = None
			prev_fields = None
			curr_fields = None
			continue
		# first FT type OBX
		if prev_fields is None:
			prev_fields = line.split('|')
			prev_identity = line.split('|')
			prev_identity[OBX_field__set_id] = ''
			prev_identity[OBX_field__subid] = ''
			prev_identity[OBX_field__value] = ''
			prev_identity = '|'.join(prev_identity)
			continue
		# non-first FT type OBX
		curr_identity = line.split('|')
		curr_identity[OBX_field__set_id] = ''
		curr_identity[OBX_field__subid] = ''
		curr_identity[OBX_field__value] = ''
		curr_identity = '|'.join(curr_identity)
		if curr_identity != prev_identity:
			# write out previous line
			hl7_out.write('|'.join(prev_fields) + HL7_EOL)
			# keep current fields, since it may start a "repeat FT type OBX block"
			prev_fields = curr_fields
			prev_identity = curr_identity
			continue
		if prev_fields[OBX_field__value].endswith(HL7_BRK):
			prev_fields[OBX_field__value] += curr_fields[OBX_field__value]
		else:
			if curr_fields[OBX_field__value].startswith(HL7_BRK):
				prev_fields[OBX_field__value] += curr_fields[OBX_field__value]
			else:
				prev_fields[OBX_field__value] += HL7_BRK
				prev_fields[OBX_field__value] += curr_fields[OBX_field__value]
	if prev_fields is not None:
		hl7_out.write('|'.join(prev_fields) + HL7_EOL)
	hl7_out.close()
	hl7_in.close()

	return out3_fname
示例#11
0
def __fix_malformed_hl7_file(filename, encoding='utf8'):

	_log.debug('fixing HL7 file [%s]', filename)

	# first pass:
	# - remove empty lines
	# - normalize line endings
	# - unwrap wrapped segments (based on the assumption that segments are wrapped until a line starts with a known segment marker)
	out1_fname = gmTools.get_unique_filename (
		prefix = 'gm_fix1-%s-' % gmTools.fname_stem(filename),
		suffix = '.hl7'
	)
	hl7_in = io.open(filename, mode = 'rt', encoding = encoding)					# universal newlines: translate any type of EOL to \n
	hl7_out = io.open(out1_fname, mode = 'wt', encoding = 'utf8', newline = '')	# newline='' -> no translation of EOL at all
	is_first_line = True
	for line in hl7_in:
		# skip empty line
		if line.strip() == '':
			continue
		# starts with known segment ?
		segment = line[:3]
		if (segment in HL7_SEGMENTS) and (line[3] == '|'):
			if not is_first_line:
				hl7_out.write(HL7_EOL)
			else:
				is_first_line = False
		else:
			hl7_out.write(' ')
		hl7_out.write(line.rstrip())
	hl7_out.write(HL7_EOL)
	hl7_out.close()
	hl7_in.close()

	# second pass:
	# - normalize # of fields per line
	# - remove '\.br.\'-only fields ;-)
	out2_fname = gmTools.get_unique_filename (
		prefix = 'gm_fix2-%s-' % gmTools.fname_stem(filename),
		suffix = '.hl7'
	)
	# we can now _expect_ lines to end in HL7_EOL, anything else is an error
	hl7_in = io.open(out1_fname, mode = 'rt', encoding = 'utf8', newline = HL7_EOL)
	hl7_out = io.open(out2_fname, mode = 'wt', encoding = 'utf8', newline = '')
	for line in hl7_in:
		line = line.strip()
		seg_type = line[:3]						# assumption: field separator = '|'
		field_count = line.count('|') + 1		# assumption: no '|' in data ...
		try:
			required_fields = HL7_segment2field_count[seg_type]
		except KeyError:
			required_fields = field_count
		missing_fields_count = required_fields - field_count
		if missing_fields_count > 0:
			line += ('|' * missing_fields_count)
		cleaned_fields = []
		for field in line.split('|'):
			if field.replace(HL7_BRK, '').strip() == '':
				cleaned_fields.append('')
				continue
			cleaned = gmTools.strip_prefix(field, HL7_BRK, remove_repeats = True, remove_whitespace = True)
			cleaned = gmTools.strip_suffix(cleaned, HL7_BRK, remove_repeats = True, remove_whitespace = True)
			cleaned_fields.append(cleaned)
		hl7_out.write('|'.join(cleaned_fields) + HL7_EOL)
	hl7_out.close()
	hl7_in.close()

	# third pass:
	# - unsplit same-name, same-time, text-type OBX segments
	out3_fname = gmTools.get_unique_filename (
		prefix = 'gm_fix3-%s-' % gmTools.fname_stem(filename),
		suffix = '.hl7'
	)
	# we can now _expect_ lines to end in HL7_EOL, anything else is an error
	hl7_in = io.open(out2_fname, mode = 'rt', encoding = 'utf8', newline = HL7_EOL)
	hl7_out = io.open(out3_fname, mode = 'wt', encoding = 'utf8', newline = '')
	prev_identity = None
	prev_fields = None
	for line in hl7_in:
		line = line.strip()
		if not line.startswith('OBX|'):
			if prev_fields is not None:
				hl7_out.write('|'.join(prev_fields) + HL7_EOL)
			hl7_out.write(line + HL7_EOL)
			prev_identity = None
			prev_fields = None
			curr_fields = None
			continue
		# first OBX
		curr_fields = line.split('|')
		if curr_fields[OBX_field__datatype] != 'FT':
			hl7_out.write(line + HL7_EOL)
			prev_identity = None
			prev_fields = None
			curr_fields = None
			continue
		# first FT type OBX
		if prev_fields is None:
			prev_fields = line.split('|')
			prev_identity = line.split('|')
			prev_identity[OBX_field__set_id] = ''
			prev_identity[OBX_field__subid] = ''
			prev_identity[OBX_field__value] = ''
			prev_identity = '|'.join(prev_identity)
			continue
		# non-first FT type OBX
		curr_identity = line.split('|')
		curr_identity[OBX_field__set_id] = ''
		curr_identity[OBX_field__subid] = ''
		curr_identity[OBX_field__value] = ''
		curr_identity = '|'.join(curr_identity)
		if curr_identity != prev_identity:
			# write out previous line
			hl7_out.write('|'.join(prev_fields) + HL7_EOL)
			# keep current fields, since it may start a "repeat FT type OBX block"
			prev_fields = curr_fields
			prev_identity = curr_identity
			continue
		if prev_fields[OBX_field__value].endswith(HL7_BRK):
			prev_fields[OBX_field__value] += curr_fields[OBX_field__value]
		else:
			if curr_fields[OBX_field__value].startswith(HL7_BRK):
				prev_fields[OBX_field__value] += curr_fields[OBX_field__value]
			else:
				prev_fields[OBX_field__value] += HL7_BRK
				prev_fields[OBX_field__value] += curr_fields[OBX_field__value]
	if prev_fields is not None:
		hl7_out.write('|'.join(prev_fields) + HL7_EOL)
	hl7_out.close()
	hl7_in.close()

	return out3_fname
示例#12
0
def stage_single_PID_hl7_file(filename, source=None, encoding='utf8'):
	"""Multi-step processing of HL7 files.

	- input must be single-MSH / single-PID / normalized HL7

	- imports into clin.incoming_data_unmatched

	- needs write permissions in dir_of(filename)
	- moves PID files which were successfully staged into dir_of(filename)/done/PID/
	"""
	local_log_name = gmTools.get_unique_filename (
		prefix = gmTools.fname_stem(filename) + '-',
		suffix = '.stage.log'
	)
	local_logger = logging.FileHandler(local_log_name)
	local_logger.setLevel(logging.DEBUG)
	root_logger = logging.getLogger('')
	root_logger.addHandler(local_logger)
	_log.info('staging [%s] as unmatched incoming HL7%s', filename, gmTools.coalesce(source, '', ' (%s)'))
	_log.debug('log file: %s', local_log_name)

	# sanity checks/setup
	try:
		open(filename).close()
		orig_dir = os.path.split(filename)[0]
		done_dir = os.path.join(orig_dir, 'done')
		gmTools.mkdir(done_dir)
		error_dir = os.path.join(orig_dir, 'failed')
		gmTools.mkdir(error_dir)
	except Exception:
		_log.exception('cannot setup staging environment')
		root_logger.removeHandler(local_logger)
		return False

	# stage
	try:
		incoming = gmIncomingData.create_incoming_data('HL7%s' % gmTools.coalesce(source, '', ' (%s)'), filename)
		if incoming is None:
			_log.error('cannot stage PID file: %s', filename)
			root_logger.removeHandler(local_logger)
			shutil.move(filename, error_dir)
			shutil.move(local_log_name, error_dir)
			return False
		incoming.update_data_from_file(fname = filename)
	except Exception:
		_log.exception('error staging PID file')
		root_logger.removeHandler(local_logger)
		shutil.move(filename, error_dir)
		shutil.move(local_log_name, error_dir)
		return False

	# set additional data
	MSH_file = io.open(filename, mode = 'rt', encoding = 'utf8', newline = '')
	raw_hl7 = MSH_file.read(1024 * 1024 * 5)	# 5 MB max
	MSH_file.close()
	shutil.move(filename, done_dir)
	incoming['comment'] = format_hl7_message (
		message = raw_hl7,
		skip_empty_fields = True,
		eol = '\n'
	)
	HL7 = pyhl7.parse(raw_hl7)
	del raw_hl7
	incoming['comment'] += '\n'
	incoming['comment'] += ('-' * 80)
	incoming['comment'] += '\n\n'
	log = io.open(local_log_name, mode = 'rt', encoding = 'utf8')
	incoming['comment'] += log.read()
	log.close()
	try:
		incoming['lastnames'] = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__lastname)
		incoming['firstnames'] = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__firstname)
		val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__name, component_num = PID_component__middlename)
		if val is not None:
			incoming['firstnames'] += ' '
			incoming['firstnames'] += val
		val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__dob)
		if val is not None:
			tmp = time.strptime(val, '%Y%m%d')
			incoming['dob'] = pyDT.datetime(tmp.tm_year, tmp.tm_mon, tmp.tm_mday, tzinfo = gmDateTime.gmCurrentLocalTimezone)
		val = HL7.extract_field('PID', segment_num = 1, field_num = PID_field__gender)
		if val is not None:
			incoming['gender'] = val
		incoming['external_data_id'] = filename
		#u'fk_patient_candidates',
		#	u'request_id',						# request ID as found in <data>
		#	u'postcode',
		#	u'other_info',						# other identifying info in .data
		#	u'requestor',						# Requestor of data (e.g. who ordered test results) if available in source data.
		#	u'fk_identity_disambiguated',
		#	u'comment',							# a free text comment on this row, eg. why is it here, error logs etc
		#	u'fk_provider_disambiguated'		# The provider the data is relevant to.
	except Exception:
		_log.exception('cannot add more data')
	incoming.save()

	_log.info('successfully staged')
	root_logger.removeHandler(local_logger)
	shutil.move(local_log_name, done_dir)
	return True
示例#13
0
def split_hl7_file(filename, target_dir=None, encoding='utf8'):
	"""Multi-step processing of HL7 files.

	- input can be multi-MSH / multi-PID / partially malformed HL7
	- tries to fix oddities
	- splits by MSH
	- splits by PID into <target_dir>

	- needs write permissions in dir_of(filename)
	- moves HL7 files which were successfully split up into dir_of(filename)/done/

	- returns (True|False, list_of_PID_files)
	"""
	local_log_name = gmTools.get_unique_filename (
		prefix = gmTools.fname_stem(filename) + '-',
		suffix = '.split.log'
	)
	local_logger = logging.FileHandler(local_log_name)
	local_logger.setLevel(logging.DEBUG)
	root_logger = logging.getLogger('')
	root_logger.addHandler(local_logger)
	_log.info('splitting HL7 file: %s', filename)
	_log.debug('log file: %s', local_log_name)

	# sanity checks/setup
	try:
		open(filename).close()
		orig_dir = os.path.split(filename)[0]
		done_dir = os.path.join(orig_dir, 'done')
		gmTools.mkdir(done_dir)
		error_dir = os.path.join(orig_dir, 'failed')
		gmTools.mkdir(error_dir)
		work_filename = gmTools.get_unique_filename(prefix = gmTools.fname_stem(filename) + '-', suffix = '.hl7')
		if target_dir is None:
			target_dir = os.path.join(orig_dir, 'PID')
		_log.debug('target dir: %s', target_dir)
		gmTools.mkdir(target_dir)
	except Exception:
		_log.exception('cannot setup splitting environment')
		root_logger.removeHandler(local_logger)
		return False, None

	# split
	target_names = []
	try:
		shutil.copy(filename, work_filename)
		fixed_filename = __fix_malformed_hl7_file(work_filename, encoding = encoding)
		MSH_fnames = __split_hl7_file_by_MSH(fixed_filename, encoding)
		PID_fnames = []
		for MSH_fname in MSH_fnames:
			PID_fnames.extend(__split_MSH_by_PID(MSH_fname))
		for PID_fname in PID_fnames:
			shutil.move(PID_fname, target_dir)
			target_names.append(os.path.join(target_dir, os.path.split(PID_fname)[1]))
	except Exception:
		_log.exception('cannot split HL7 file')
		for target_name in target_names:
			try: os.remove(target_name)
			except Exception: pass
		root_logger.removeHandler(local_logger)
		shutil.move(local_log_name, error_dir)
		return False, None

	_log.info('successfully split')
	root_logger.removeHandler(local_logger)
	try:
		shutil.move(filename, done_dir)
		shutil.move(local_log_name, done_dir)
	except shutil.Error:
		_log.exception('cannot move hl7 file or log file to holding area')
	return True, target_names
示例#14
0
文件: gmMimeLib.py 项目: ncqgm/gnumed
def convert_latex_to_pdf(filename: str = None,
                         verbose: bool = False,
                         is_sandboxed: bool = False) -> str:
    """Compile LaTeX code to PDF using pdflatex.

	Args:
		is_sandboxed: whether or not to create a sandbox for compiling

	Returns:
		Name of resulting PDF, or None on failure.
	"""
    global __LaTeX_version_checked
    global __pdflatex_executable
    if not __LaTeX_version_checked:
        __LaTeX_version_checked = True
        found, __pdflatex_executable = gmShellAPI.detect_external_binary(
            binary='pdflatex')
        if not found:
            _log.error('pdflatex not found')
            return None

        cmd_line = [__pdflatex_executable, '-version']
        success, ret_code, stdout = gmShellAPI.run_process(cmd_line=cmd_line,
                                                           encoding='utf8',
                                                           verbose=True)
        if not success:
            _log.error('[%s] failed, LaTeX not usable', cmd_line)
            return None

    if is_sandboxed:
        sandbox_dir = os.path.split(filename)[0]
    else:
        sandbox_dir = gmTools.mk_sandbox_dir(
            prefix=gmTools.fname_stem(filename) + '_')
        shutil.copy(filename, sandbox_dir)
        filename = os.path.join(sandbox_dir, os.path.split(filename)[1])
    _log.debug('LaTeX sandbox directory: [%s]', sandbox_dir)
    cmd_final = [
        __pdflatex_executable, '-recorder', '-interaction=nonstopmode',
        "-output-directory=%s" % sandbox_dir
    ]
    cmd_draft = cmd_final + ['-draftmode']
    # LaTeX can need up to three runs to get cross references et al right
    for cmd2run in [cmd_draft, cmd_draft, cmd_final]:
        success, ret_code, stdout = gmShellAPI.run_process(
            cmd_line=cmd2run + [filename],
            acceptable_return_codes=[0],
            encoding='utf8',
            verbose=True  #_cfg.get(option = 'debug')
        )
        if not success:
            _log.error(
                'problem running pdflatex, cannot generate form output, trying diagnostics'
            )
            found, binary = gmShellAPI.find_first_binary(
                binaries=['lacheck', 'miktex-lacheck.exe'])
            if not found:
                _log.debug('lacheck not found')
            else:
                cmd_line = [binary, filename]
                success, ret_code, stdout = gmShellAPI.run_process(
                    cmd_line=cmd_line, encoding='utf8', verbose=True)
            found, binary = gmShellAPI.find_first_binary(
                binaries=['chktex', 'ChkTeX.exe'])
            if not found:
                _log.debug('chcktex not found')
            else:
                cmd_line = [binary, '--verbosity=2', '--headererr', filename]
                success, ret_code, stdout = gmShellAPI.run_process(
                    cmd_line=cmd_line, encoding='utf8', verbose=True)
            return None

    return '%s.pdf' % os.path.splitext(filename)[0]