def CalcMCCF1(pred=None, truth=None, probCutoff=0.5, contactCutoff=8.0): if pred is None: print 'please provide a predicted contact matrix' exit(-1) if truth is None: print 'please provide a true distance matrix' exit(-1) assert pred.shape == truth.shape ## in case the matrix is not square, e.g., interfacial contact matrix seqLen = pred.shape[0] seqLen2 = pred.shape[1] pred_binary = (pred > probCutoff) truth_binary = (0 < truth) & (truth < contactCutoff) pred_truth = pred_binary * 2 + truth_binary numPredicted = np.sum(pred_binary) numTruths = np.sum(truth_binary) #print "#predicted=", numPredicted, "#natives=", numTruths mask_LR = np.triu_indices(seqLen, 24, m=seqLen2) mask_MLR = np.triu_indices(seqLen, 12, m=seqLen2) mask_SMLR = np.triu_indices(seqLen, 6, m=seqLen2) metrics = [] for mask in [mask_LR, mask_MLR, mask_SMLR]: res = pred_truth[mask] total = res.shape[0] count = np.bincount(res, minlength=4) assert (total == np.sum(count)) ## pred=0, truth=0 TN = count[0] ## pred=0, truth=1 FN = count[1] ## pred=1, truth=0 FP = count[2] ## pred=1, truth=1 TP = count[3] #print TP, FP, TN, FN MCC = Metrics.MCC(TP, FP, TN, FN) F1, precision, recall = Metrics.F1(TP, FP, TN, FN) metrics.extend([MCC, TP, FP, TN, FN, F1, precision, recall]) return np.array(metrics)
with open(nativeFile, 'rb') as fh: nativeInfo = cPickle.load(fh) truth = nativeInfo['atomDistMatrix']['CbCb'] truths[protein] = truth for prob in np.arange(5, 60, 1): #print "prob=", prob accs = [] for protein in content: acc = CalcMCCF1(pred=preds[protein], truth=truths[protein], probCutoff=prob / 100.) accs.append(acc) avgacc = np.average(accs, axis=0) resultStr = 'per-target avgMCCF1 at cutoff=' + str( prob) + ': ' + str_display(avgacc) print resultStr lrMCC = Metrics.MCC(avgacc[1], avgacc[2], avgacc[3], avgacc[4]) lrF1, lrprecision, lrrecall = Metrics.F1(avgacc[1], avgacc[2], avgacc[3], avgacc[4]) mrMCC = Metrics.MCC(avgacc[9], avgacc[10], avgacc[11], avgacc[12]) mrF1, mrprecision, mrrecall = Metrics.F1(avgacc[9], avgacc[10], avgacc[11], avgacc[12]) print 'per-pair avgMCCF1 at cutoff=' + str(prob) + ': ' + str_display([ lrMCC, lrF1, lrprecision, lrrecall, mrMCC, mrF1, mrprecision, mrrecall ])