def calculateAverageConceptVectorDistance(self, concepts, filterName='DSM'): ''' This function calculates the average concept vector distance, based on the vector representing a cui. ''' feats = dict() if (filterName=='ALL'): feats.update(self.calculateAverageConceptVectorDistance(concepts, 'DSM')) feats.update(self.calculateAverageConceptVectorDistance(concepts, 'DSM+1')) feats.update(self.calculateAverageConceptVectorDistance(concepts, 'MED')) else: subset = list(set(self.getSubsetOfConcepts(concepts, filterName))) maxDist = 0 dists = [] for cui in subset: for cui2 in subset: try: concept = cui.split(';')[0] wVector= Resources.getConceptVectors_dsm().vectorize(concept, remove_oov=True) concept2 = cui2.split(';')[0] wVector2= Resources.getConceptVectors_dsm().vectorize(concept2, remove_oov=True) cosDist = spatial.distance.cosine(wVector, wVector2) if not math.isnan(cosDist): dists.append(cosDist) if (cosDist) > maxDist: maxDist = cosDist except: continue feats[filterName+'maxConceptVectorDist'] = round(maxDist,3) if bool(dists): feats[filterName+'avgConceptVectorDist'] = round(np.mean(dists),3) return feats
def getVectorizedConcepts(self, concepts): """ Vectorizes a list of Concept Unique Identifiers into an array of concept vectors of identical size. @param concepts: A list of CUIS. @return: A list of vectors, each one of which represents a concept. """ return Resources.getConceptVectors_dsm().vectorize(concepts, remove_oov=False)