def hungarianAssignment(cases, controls, numberOfControlsPerCase): selectedControls = list() convFactor = 100000.00 m = list() for control in controls: row = list() for _ in xrange(numberOfControlsPerCase): row += [(control.relatedTo.get(case) if (control.relatedTo.has_key(case)) else 0) for case in cases] m.append(row) cm = munkres.make_cost_matrix(m, lambda cost: convFactor - cost * convFactor) matrix = cm m = Munkres() indexes = m.compute(matrix) total = 0 for row, column in indexes: value = matrix[row][column] total += value if value < convFactor: selectedControls.append(controls[row % len(controls)]) print("\tassignment kic score: %f" % (float(len(indexes) * convFactor - total) / convFactor)) print("\tall kic score: %f" % (kicScore(cases, selectedControls))) return [person.id for person in selectedControls]
selectedControls = hungarianAssignment(cases, controls, numberOfControlsPerCase) else: print("Have too few good controls - using all of them") selectedControls = goodControls #if we don't have enough good controls take all related controls and add #min resid controls if (len(selectedControls) < neededControls): print( "Number of selected controls (%d) < number of needed controls (%d) - adding min resid controls" % ((len(selectedControls), neededControls))) for resid, person in sorted(controlsResidMap.iteritems()): if ((not manyControls) or (person in goodControls)): if not (person in selectedControls): selectedControls.append(person) if (len(selectedControls) >= neededControls): break #print selected controls print("\nFinal Control Selection (KIC = %f):" % kicScore(cases, selectedControls)) for person in selectedControls: print(person.id) #print("Final KIC score: %d" %kicScore(cases, selectedControls)) print([person.id for person in cases]) print([person.id for person in selectedControls]) return (selectedControls) def hungarianAssignment(cases, controls, numberOfControlsPerCase):
def main( numberOfControlsPerCase=2, KICoutFilepath=currDir + "/KIC_out", caseFilepath=currDir + "/cases", contFilepath=currDir + "/controls", ): print(caseFilepath) print(numberOfControlsPerCase) # load cases caseFile = open(caseFilepath) next(caseFile) for caseData in caseFile: caseData = caseData.strip().split("\t") caseID = caseData[1] case = Person(caseID) people.update({caseID: case}) cases.append(case) caseFile.close() neededControls = len(cases) * numberOfControlsPerCase # load controls contFile = open(contFilepath) next(contFile) for contData in contFile: contData = contData.strip().split("\t") contID = contData[1] contResid = math.fabs(float(contData[-1])) cont = Person(contID) people.update({contID: cont}) controls.append(cont) controlsResidMap.update({contResid: cont}) contFile.close() if neededControls >= len(controls): # print(controls) print("Warning need more controls than available - returning all") return [person.id for person in controls] print("%d cases and %d potential controls" % (len(cases), len(controls))) # load KIC info KICoutFile = open(KICoutFilepath) for line in KICoutFile: lineData = line.strip().split(",") currPersonID = lineData[1] if people.has_key(currPersonID): currPerson = people.get(currPersonID) relativeID = lineData[2] if people.has_key(relativeID): relative = people.get(relativeID) kc = float(lineData[3]) if kc > 0: currPerson.putKinship(relative, kc) count = 0 caseRelCount = dict() for case in cases: caseNumRel = case.numRel() if len(case.kinshipDict) <= 0: count += 1 if not (caseRelCount.has_key(caseNumRel)): caseRelCount.update({caseNumRel: 0}) caseRelCount.update({caseNumRel: caseRelCount.get(caseNumRel) + 1}) print("Cases to Number of Related Controls: " + str(caseRelCount)) print("Number of zero-matched cases: %d" % (caseRelCount.get(0))) goodControls = list() for person in controls: for relative in person.relatedTo: if relative in cases: goodControls.append(person) break print("Number of good controls: %d" % (len(goodControls))) manyControls = False if len(goodControls) > neededControls: manyControls = True print("Have too many good controls - need to trim down number of controls by modified Hungarian Assignment") selectedControls = hungarianAssignment(cases, controls, numberOfControlsPerCase) else: print("Have too few good controls - using all of them") selectedControls = goodControls # if we don't have enough good controls take all related controls and add # min resid controls if len(selectedControls) < neededControls: print( "Number of selected controls (%d) < number of needed controls (%d) - adding min resid controls" % ((len(selectedControls), neededControls)) ) for resid, person in sorted(controlsResidMap.iteritems()): if (not manyControls) or (person in goodControls): if not (person in selectedControls): selectedControls.append(person) if len(selectedControls) >= neededControls: break # print selected controls print("\nFinal Control Selection (KIC = %f):" % kicScore(cases, selectedControls)) for person in selectedControls: print(person.id) # print("Final KIC score: %d" %kicScore(cases, selectedControls)) print([person.id for person in cases]) print([person.id for person in selectedControls]) return selectedControls