def merge_status(*args): """Merge two binary entries and return a FHIR CarePlan.status""" for inact, val in args: if is_empty(val) or is_empty(inact): return "unknown" if inact == 1: return "revoked" else: if val == 1: return "active" if val == 0: return "on hold"
def clean_time(raw_input): # noqa: C901 if not isinstance(raw_input, str): raw_input = str(raw_input) if utils.is_empty(raw_input): return "" time = None formats = [ "%H:%M:%S", "%H:%M:%S.%f", "%Y%m%d%H%M%S", "%Y%m%d%H%M", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z", "%a, %d %b %Y %H:%M:%S %Z", "%H:%M:%S%z", "%H::%M::%S", "%H %M %S", "%H%M%S", "%H%M", ] for fmt in formats: try: time = datetime.datetime.strptime(raw_input, fmt).time() except ValueError: pass if not time: return "00:00:00" return time.strftime("%H:%M:%S")
def merge_insee(value1, value2): """Custom script (x, y) -> x if x is not None else y""" if not utils.is_empty(value1): insee = value1 else: insee = value2 return insee.strip()
def if_valid_func(value): if not utils.is_empty(process(value)): if callable(callback): return callback(value) else: return callback else: return ""
def map_intent(raw_input): """Map (0,1,NULL) to (plan, order, proposal)""" mapping = {0: intent.PLAN.value, 1: intent.ORDER.value} if is_empty(input): return intent.PROPOSAL.value elif raw_input in mapping.keys(): return mapping[raw_input] else: return None
def map_permission(raw_input): """Map UMLS codes (Yes, No) to (permit, deny)""" if utils.is_empty(raw_input): return None mapping = { "UMLS:C1298907": Authorization.PERMIT.value, "UMLS:C1298908": Authorization.DENY.value, } if raw_input in mapping.keys(): return mapping[raw_input]
def clean_codes(raw_input): """Remove terminology system from code ("HL7:male") to ("male")""" if utils.is_empty(raw_input): return None code = re.match(r"([A-z0-9\-]*)( *: *)(.*)", raw_input) if not code or code.group(1) not in codeTypes: return raw_input else: return code.group(3)
def clean_instant(raw_input): # noqa: C901 if not isinstance(raw_input, str): raw_input = str(raw_input) if utils.is_empty(raw_input): return "" date = None formats = [ "%Y", "%Y-%m", "%Y%m", "%Y-%m-%d", "%Y%m%d", "%Y%m%d%H%M", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", ] for fmt in formats: try: date = datetime.datetime.strptime(raw_input, fmt) # By default, we set the timezone to UTC+2 (Paris)... Until we expand worldwide! date_with_tz = date.replace(tzinfo=datetime.timezone(datetime.timedelta(hours=2))) result = date_with_tz.isoformat() break except ValueError: pass # Handle YYYY-MM-DDTH:M:S+zz:zz try: date = datetime.datetime.strptime(raw_input, "%Y-%m-%dT%H:%M:%S+%z") result = date.isoformat() except ValueError: pass # Handle YYYY-MM-DDTH:M:S-zz:zz try: date = datetime.datetime.strptime(raw_input, "%Y-%m-%dT%H:%M:%S-%z") result = date.isoformat() except ValueError: pass # Handle RFC 1123 format try: date = datetime.datetime.strptime(raw_input, "%a, %d %b %Y %H:%M:%S GMT") date_with_tz = date.replace(tzinfo=datetime.timezone(datetime.timedelta(hours=0))) result = date_with_tz.isoformat() except ValueError: pass if date is None: return raw_input return result
def clean_date(raw_input): """Convert date to ISO format""" if utils.is_empty(raw_input): return "" # Handle YYYYMMDD try: date = datetime.datetime.strptime(raw_input, "%Y%m%d") iso_date = date.isoformat() return iso_date except ValueError: pass return raw_input
def map_gender(raw_input): """Map gender from (M,F) or (HL7:M, HL7:F) to (male,female)""" if utils.is_empty(raw_input): return None mapping = { "M": Gender.MALE.value, "F": Gender.FEMALE.value, "HL7:M": Gender.MALE.value, "HL7:F": Gender.FEMALE.value, } if raw_input in mapping.keys(): return mapping[raw_input] else: return Gender.UNKNOWN.value
def clean_date(raw_input): # noqa: C901 if not isinstance(raw_input, str): raw_input = str(raw_input) if utils.is_empty(raw_input): return "" date = None # Correct format try: pattern = re.compile( r"([0-9]([0-9]([0-9][1-9]|[1-9]0)|[1-9]00)|[1-9]000)" r"(-(0[1-9]|1[0-2])(-(0[1-9]|[1-2][0-9]|3[0-1])(T([01][0-9]|2[0-3])" r":[0-5][0-9]:([0-5][0-9]|60)(\.[0-9]+)?(Z|(\+|-)((0[0-9]|1[0-3]):" r"[0-5][0-9]|14:00)))?)?)?") full_match = re.fullmatch(raw_input, pattern) date = datetime.datetime.strptime( full_match.group(0)[0:10], "%Y-%m-%d") except Exception: pass formats = [ "%Y%m%d%H%M", "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%S%z", "%a, %d %b %Y %H:%M:%S %Z", "%Y-%m-%d", "%Y%m%d", "%Y-%m", "%Y%m", "%Y", ] for fmt in formats: try: date = datetime.datetime.strptime(raw_input, fmt) except ValueError: pass if date is None: return raw_input # We only want the date return date.isoformat().split("T")[0]
def clean_quantity(raw_input): """ [deprecated: river parses types automatically] Removes input not conform to FHIR quantity type """ if isinstance(raw_input, (float, int)): return raw_input if utils.is_empty(raw_input): return None number = re.search(r"-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?", raw_input) if not number or number.group(0) != raw_input: logging.warning( "The quantity cleaning-script has removed input {}, not of decimal type" .format(raw_input)) return None else: return float(raw_input)
def map_marital_status(code): """Map MIMIC marital status""" status = MaritalStatus mapping = { "MARRIED": status.Married.value, "SINGLE": status.Unmarried.value, "WIDOWED": status.Widowed.value, "SEPARATED": status.LegallySeparated.value, "DIVORCED": status.Divorced.value, "UNKNOWN": status.Unknown.value, } if code in mapping.keys(): return mapping[code] elif utils.is_empty(code): return status.Unknown.value else: logging.warning("In {}, args {} not recognised".format( "marital_status", code)) return status.Unknown.value
def map_2_true(raw_input): """Map some code from (2->True) and None otherwise""" if utils.is_empty(raw_input) or raw_input != 2: return None else: return True
def merge_insee(value1, value2): if not utils.is_empty(value1): insee = value1 else: insee = value2 return insee.strip()
def test_is_empty(): assert utils.is_empty(None) assert utils.is_empty("NaN") assert utils.is_empty("") assert utils.is_empty(" ") assert not utils.is_empty(" holà ")
def select_first_not_empty(*args): """Merging script which select the first input not empty""" for arg in args: if not is_empty(arg): return arg
def code_to_empty(raw_input): """Return None when entry is some code: -1 or (sans)""" if utils.is_empty(raw_input) or raw_input == "-1" or raw_input == "(sans)": return None else: return raw_input
def clean_dateTime(raw_input): # noqa: C901 if not isinstance(raw_input, str): raw_input = str(raw_input) if utils.is_empty(raw_input): return "" date = None # Correct format try: pattern = re.compile( r"([0-9]([0-9]([0-9][1-9]|[1-9]0)|[1-9]00)|[1-9]000)" r"(-(0[1-9]|1[0-2])(-(0[1-9]|[1-2][0-9]|3[0-1])(T([01][0-9]|2[0-3])" r":[0-5][0-9]:([0-5][0-9]|60)(\.[0-9]+)?(Z|(\+|-)((0[0-9]|1[0-3]):" r"[0-5][0-9]|14:00)))?)?)?") date = re.fullmatch(pattern, raw_input).group(0) result = date except AttributeError: pass # Handle YYYY try: date = datetime.datetime.strptime(raw_input, "%Y") result = date.isoformat().split("-")[0] except ValueError: pass # Handle YYYY-MM try: date = datetime.datetime.strptime(raw_input, "%Y-%m") result = date.isoformat()[:7] except ValueError: pass # Handle YYYYMM try: date = datetime.datetime.strptime(raw_input, "%Y%m") result = date.isoformat()[:7] except ValueError: pass # Handle YYYY-MM-DD try: date = datetime.datetime.strptime(raw_input, "%Y-%m-%d") result = date.isoformat().split("T")[0] except ValueError: pass # Handle YYYYMMDD try: date = datetime.datetime.strptime(raw_input, "%Y%m%d") result = date.isoformat().split("T")[0] except ValueError: pass # Handle YYYYMMDDHHMM try: date = datetime.datetime.strptime(raw_input, "%Y%m%d%H%M") # By default, we set the timezone to UTC+2 (Paris)... Until we expand worldwide! date_with_tz = date.replace( tzinfo=datetime.timezone(datetime.timedelta(hours=2))) result = date_with_tz.isoformat() except ValueError: pass # Handle YYYY-MM-DD H:M:S try: date = datetime.datetime.strptime(raw_input, "%Y-%m-%d %H:%M:%S") # By default, we set the timezone to UTC+2 (Paris)... Until we expand worldwide! date_with_tz = date.replace( tzinfo=datetime.timezone(datetime.timedelta(hours=2))) result = date_with_tz.isoformat() except ValueError: pass # Handle YYYY-MM-DDTH:M:S try: date = datetime.datetime.strptime(raw_input, "%Y-%m-%dT%H:%M:%S") # By default, we set the timezone to UTC+2 (Paris)... Until we expand worldwide! date_with_tz = date.replace( tzinfo=datetime.timezone(datetime.timedelta(hours=2))) result = date_with_tz.isoformat() except ValueError: pass # Handle YYYY-MM-DDTH:M:S+zz:zz try: date = datetime.datetime.strptime(raw_input, "%Y-%m-%dT%H:%M:%S+%z") result = date.isoformat() except ValueError: pass # Handle YYYY-MM-DDTH:M:S-zz:zz try: date = datetime.datetime.strptime(raw_input, "%Y-%m-%dT%H:%M:%S-%z") result = date.isoformat() except ValueError: pass # Handle RFC 1123 format try: date = datetime.datetime.strptime(raw_input, "%a, %d %b %Y %H:%M:%S GMT") date_with_tz = date.replace( tzinfo=datetime.timezone(datetime.timedelta(hours=0))) result = date_with_tz.isoformat() except ValueError: pass if date is None: return raw_input return result
def zero_to_empty(raw_input): """Return None when entry is 0""" if utils.is_empty(raw_input) or raw_input == "0": return None else: return raw_input