def removeGerund(ehrChild): separatedElementTag = Utilities.listFromCapitals(ehrChild); if ( len(separatedElementTag) > 1 ): taggedSeperatedElementTag = nltk.pos_tag(separatedElementTag); separatedElementTag = [tag[0] for tag in taggedSeperatedElementTag if "VBG" not in tag[1]] return "".join(separatedElementTag); return ehrChild;
def compositeStringSimilarity(ehrClassField, fhirClassField, comparisonMethod, comparisonMethodArgs=[], highestResult=True, removeStopwords=True): if ( comparisonMethod(ehrClassField, fhirClassField, *comparisonMethodArgs) == 1 ): return 1; # If ehrClass string is composite, compare each word with the FHIR target using all of the metrics, and then use chosen combination method to produce a value, e.g. for each word, add these values, and then divide by number of words to get an average match across all words or return highest. highestSimilarity = 0; highestSimilarityWord = ""; totalSimilarity = 0; ehrWords = Utilities.listFromCapitals(ehrClassField); fhirWords = Utilities.listFromCapitals(fhirClassField); if (removeStopwords): ehrWords = [word for word in ehrWords if word.lower() not in stopwords.words('english')]; for ehrWord in ehrWords: highestSimilarityForEHRWord = 0; for fhirWord in fhirWords: similarity = comparisonMethod(ehrWord, fhirWord, *comparisonMethodArgs); if ( similarity > highestSimilarity ): highestSimilarity = similarity; highestSimilarityWord = ehrWord; if ( similarity > highestSimilarityForEHRWord ): highestSimilarityForEHRWord = similarity; totalSimilarity += highestSimilarityForEHRWord; if ( highestResult and len(highestSimilarityWord) > TranslationConstants.LENGTH_TO_IGNORE_IN_COMPOSITE_HIGHEST ): return highestSimilarity; else: return old_div(totalSimilarity, max(float(len(ehrWords)), float(len(fhirWords))));
def getFHIRElements(root, classesToChildren, children=True, parents=True, recurse=True, selectiveRecurse=[], visited=[], addParentName=False, addStemmed=False, attributeTypeOverAttributeName=False, resolveFHIRReferences=False, otherFHIRClasses=None): if len([excludedMatch for excludedMatch in TranslationConstants.EXCLUDED_FHIR_CLASS_TYPES if excludedMatch in root.__name__]): return []; # Convert string to class, if not class. if ( not inspect.isclass(root) ): root = eval(root); # Ignore test classes. if ( unittest.TestCase in inspect.getmro(root) or Exception in inspect.getmro(root) ): return []; # Don't examine classes that don't use the 'elementsProperties' approach to list attributes. if ( not callable(getattr(root, "elementProperties", None)) ): return []; if ( root not in list(classesToChildren.keys()) ): classesToChildren[root] = set(); # Attributes of this class and parents. attributes = root.elementProperties(root()); # List of parents (first element in tuple is this class). parentList = inspect.getmro(root)[1:] for parent in parentList: if ( not callable(getattr(parent, "elementProperties", None)) ): continue; # Don't reinclude attributes that are already included in a parent. attributes = [item for item in attributes if item not in parent.elementProperties(parent())] # If the type of an attribute is simply 'FHIRReference' we aim to resolve the scope of this reference by adding duplicate attributes for each potential reference type. if resolveFHIRReferences: newAttributes = []; for attributeContainer in attributes: if ( "FHIRReference" in attributeContainer[2].__name__ ): sourceLines = inspect.getsource(root).split("\n"); for sourceLine in sourceLines: if ( "self." + attributeContainer[0] in sourceLine ): # If the list of possible references happens not to be two lines later, try three lines later. if "FHIRReference" not in sourceLines[sourceLines.index(sourceLine) + 2]: index = sourceLines.index(sourceLine) + 3; else: index = sourceLines.index(sourceLine) + 2; for possibleFHIRReference in inspect.getsource(root).split("\n")[index].split("`")[3].split(","): if possibleFHIRReference.strip() in [item.__name__ for item in otherFHIRClasses]: attributeContainerAsList = list(attributeContainer); attributeContainerAsList.insert(2, otherFHIRClasses[[item.__name__ for item in otherFHIRClasses].index(possibleFHIRReference.strip())]); attributeContainer = tuple(attributeContainerAsList); newAttributes.append(attributeContainer); else: newAttributes.append(attributeContainer); attributes = newAttributes; # For all attributes of this class (minus attributes of parent, which are typically generic). for attributeContainer in attributes: attribute = getattr(attributeContainer[2], "elementProperties", None) attributeName = attributeContainer[0]; if len([excludedMatch for excludedMatch in TranslationConstants.EXCLUDED_FHIR_CLASS_TYPES if excludedMatch in attributeName]): continue if children: elementsOfChildren = TranslationUtilities.getFHIRElements(attributeContainer[2], {}, True, False, False); # If a parent child (linked to another FHIR resource) has a suitable field that can hold data (e.g. value, text), then it doesn't matter if it's a parent, as it can effectively just act as a named container, so it might as well be a child (effective leaf node). if not callable(attribute) or not set(TranslationConstants.FIELDS_THAT_INDICATE_RESOURCE_CAN_HOLD_ANY_DATA).isdisjoint(set(elementsOfChildren)): TranslationUtilities.processAttribute(root, attributeTypeOverAttributeName, resolveFHIRReferences, classesToChildren, attributeContainer, attributeName); # Add an additional pseudoelement with the parent name appended, which aims to represents the context given by the parent (e.g. child: given (not enough on its own) parent: HumanName full: givenHumanName (better representation of what is stored)). if addParentName: TranslationUtilities.processAttribute(root, attributeTypeOverAttributeName, resolveFHIRReferences, classesToChildren, attributeContainer, attributeName + str(root.__name__)); separatedParentName = Utilities.listFromCapitals(str(root.__name__)); # To reflect the fact that certain classes are better represented by removing the first word (e.g. HumanName -> Name) if ( addStemmed and len(separatedParentName) > 1 ): TranslationUtilities.processAttribute(root, attributeTypeOverAttributeName, resolveFHIRReferences, classesToChildren, attributeContainer, attributeName + separatedParentName[len(separatedParentName) - 1]); if parents: if callable(attribute): TranslationUtilities.processAttribute(root, attributeTypeOverAttributeName, resolveFHIRReferences, classesToChildren, attributeContainer, attributeName); # Don't expand from within FHIRReference, as it has a recursive reference to identifier (also doesn't appear to be captured correctly by the parser, e.g. organisation from Patient). # Extensions classes appear in every class so don't show anything unique. # Don't follow links to types that are of the root class itself. # and attributeContainer[0] not in set([j for i in classesToChildren.values() for j in i]) if ( ( recurse and len(selectiveRecurse) == 0 ) or ( recurse and str(root.__name__) in selectiveRecurse ) or ( recurse and str(attributeContainer[2].__name__) in selectiveRecurse ) ) and callable(attribute) and "FHIRReference" not in str(root.__name__) and "Extension" not in str(attributeContainer[2]) and attributeContainer[2] != root and attributeContainer[0] not in visited: visited.append(attributeContainer[0]); TranslationUtilities.getFHIRElements(attributeContainer[2], classesToChildren, children, parents, recurse, selectiveRecurse, visited, addParentName, addStemmed, attributeTypeOverAttributeName, resolveFHIRReferences, otherFHIRClasses); return classesToChildren;