acc = np.mean(accs)
                print "CV accuracy %0.4f (std %0.4f)" % \
                    (acc, np.std(accs))
                d['cv_acc'].append(acc)

                aucs = sklearn.cross_validation.cross_val_score(
                    ensemble, X, Y, cv = 5, scoring='roc_auc')
                auc = np.mean(aucs)
                print "CV AUC %0.4f (std %0.4f)" % \
                    (auc, np.std(aucs))
                d['cv_auc'].append(auc)

                ensemble.fit(X, Y)

                X_pos_test = vectorizer.transform(cancer_peptides)
                Y_pos_pred = ensemble.predict(X_pos_test)
                pos_acc = np.mean(Y_pos_pred)
                print "Tumor antigen accuracy %0.4f" % (pos_acc,)
                d['pos_acc'].append(pos_acc)

                X_neg_test = vectorizer.transform(
                    non_immunogenic_hiv_peptides)
                Y_neg_pred = ensemble.predict(X_neg_test)
                neg_acc = 1.0 - np.mean(Y_neg_pred)
                print "Non-immunogenic accuracy %0.4f" % (neg_acc,)
                d['neg_acc'].append(neg_acc)

                n_pos_pred = np.sum(Y_pos_pred)
                n_neg_pred = np.sum(Y_neg_pred)
                precision = n_pos_pred / float(n_pos_pred + n_neg_pred)
                recall = pos_acc
Exemplo n.º 2
0
                (np.mean(accs), np.std(accs))
            d['acc'].append(np.mean(accs))

            aucs = sklearn.cross_validation.cross_val_score(
                ensemble, X, Y, cv = 5, scoring='roc_auc')
            print "CV AUC %0.4f (std %0.4f)" % \
                (np.mean(aucs), np.std(aucs))
            d['auc'].append(np.mean(aucs))

            ensemble.fit(X, Y)

            #X_self = vectorizer.transform(self_peptides)
            #Y_pred = ensemble.predict(X_self)
            #print "Self epitope accuracy %0.4f" % \
            #    (1.0 - np.mean(Y_pred))
            X_test = vectorizer.transform(cancer_peptides)
            Y_pred = ensemble.predict(X_test)
            recall = np.mean(Y_pred)
            print "Tumor antigen accuracy %0.4f" % (recall,)
            d['recall'].append(recall)
            print "---"
            print
            combined = (np.mean(aucs) + recall) / 2.0
            d['combined'].append(combined)



df = pd.DataFrame(d)
print df.sort('combined', ascending=False)