def fix_sort_slash(med, words, medDict): if "/" not in med: return _format_return(med, words, medDict) slashWords = med.split('/') slashWords = clean_words(slashWords) slashWords = sorted(slashWords) med1 = '/'.join(slashWords) return _format_return(med1, slashWords, medDict)
def try_with(med, words, medDict): if "/" in med: tmpMed = med.replace("/", " with ") tmpWords = clean_words(tmpMed.split(' ')) tmpMed = ' '.join(tmpWords) if tmpMed in medDict: return _format_return(tmpMed, tmpWords, medDict) return _format_return(med, words, medDict)
def fix_dash(med, words, medDict): if "-" not in med: return _format_return(med, words, medDict) noDash = med.replace('-', ' ') words = clean_words(noDash.split(' ')) tmpWords = sorted(words) tmpMed = '/'.join(tmpWords) if tmpMed in medDict: return _format_return(tmpMed, tmpWords, medDict) return _format_return(' '.join(words), words, medDict)
def slash_subclass(med, words, medDict): if "/" in med: slashWords = med.split('/') slashWords = clean_words(slashWords) slashCat = map(lambda x: x in medDict, slashWords) if all(slashCat): slashCat = map(lambda x: medDict[x], slashWords) if all(x == slashCat[0] for x in slashCat): return _format_return(slashWords[0], words, medDict) return _format_return(med, words, medDict)
def check_eyes(med, words, medDict): if 'ophthalmic' in med: return _remove_word(words, 'ophthalmic', medDict) if 'eye drop' in med: cleanWords = set(words).difference(set(['eye', 'drops', 'drop'])) cleanWords = sorted(cleanWords, key=words.index) medWords = clean_words(cleanWords) medWords = filter(not_decimal, cleanWords) return _format_return(' '.join(medWords), medWords, medDict) return _format_return(med, words, medDict)
def fix_words(med, medWords, medDict): medWords = map(lambda x: x.replace(';', ''), medWords) medWords = map(lambda x: x.replace('\'', ''), medWords) badWords = set([ 'inj.', 'inj', 'injection', 'cream', 'hcl', 'oral', 'concentrate', 'ointment', 'liquid', 'suspension', "patch", 'chewable', '.', 'neb', 'flush', 'protocol', 'initiated', 'patient', 'uses', 'intra-muscular', 'intramuscular', 'transdermal', 'valerate', 'sublingual', 'syrup', 'extract', 'capsule', 'cap', 'dissolve', 'tablet', 'tab', 'mdi', 'soln', 'solution', 'infusion', '%', 'sustained', 'elixir', 'powder', 'inhaler', 'hfa', 'suppository', 'topical', 'disintegrating', 'delayed', 'jelly', 'gel', 'monohydrate', 'macrocrystals', 'macrocrystal', 'otic', 'immediate', 'releas', 'release', 'pca', 'h2o', 'human', 'sliding', 'scale', 'scal', 'bolus', 'take', 'home', 'pack', 'coated', 'parenteral', 'vaginal', "#", 'supp', 'supplements', 'er' ]) words = sorted(set(medWords).difference(badWords), key=medWords.index) words = map(lambda x: x.replace('%', ''), words) words = map(_check_num_slash, words) words = clean_words(words) return _format_return(' '.join(words), words, medDict)
def get_med_approx(med, medDict): med = med.replace(' - ', '-') # search by splitting either on : or () medWords = try_splits(med) medSuccess = map(lambda x: x in medDict, medWords) if any(medSuccess): idx = medSuccess.index(True) return medWords[idx], medDict[medWords[idx]] tmpMed = med.replace("w/", "/ ") words = tmpMed.split(' ') cleanFuncs = [ remove_parens, quick_match, fix_synonyms, fix_words, fix_vitamin, quick_match, try_with, truncate_for, check_strength, fix_sort_slash, try_no_slash, slash_subclass, fix_dash, check_eyes, fuzzy_match, quick_lookup, remove_extra, quick_match ] for func in cleanFuncs: # print str(func), tmpMed, words tmpMed, words, medType = func(tmpMed, words, medDict) if medType is not None: print "Med:" + str(med) + " = " + str(tmpMed) return tmpMed, medType if not len(words): return med, None if "/" in med: slashWords = med.split('/') slashWords = clean_words(slashWords) slashCat = map(lambda x: x in medDict, slashWords) if all(slashCat): slashCat = map(lambda x: medDict[x], slashWords) grpCat = set(list(itertools.chain(*slashCat))) print "Concatenated together:" + med return ' '.join(slashWords), list(grpCat) medType = check_rxnorm(med, medDict) if medType is not None: return med, medType medType = check_rxnorm(tmpMed, medDict) return tmpMed, medType
def _remove_word(words, txt, medDict): medWords = filter(lambda x: x not in set(['hr', 'ns', txt, '/']), words) medWords = map(lambda x: decimal_and_pattern(x, txt), medWords) medWords = clean_words(medWords) return _format_return(' '.join(medWords), medWords, medDict)