def make_SVM_1(sw, dssp, x, dataset): inputs_train = [] outputs_train = [] #read dataset file f = open(dataset, 'r') for i in range(x): desc = f.readline().strip() primlen = int(desc.split('#')[1]) prim = [] for j in range(primlen): prim.append(f.readline().strip()) sec = f.readline().strip() #prim = prim[-2:] primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp) for q in ins: inputs_train.append(q) for q in outs: outputs_train.append(q) f.close() clf = svm.SVC(C=2.5, gamma=0.05) clf.fit(inputs_train, outputs_train) return clf
def test_SMV_1(sw, dssp, w, clfx, z): f = open('rs126.fa', 'r') cq = 0 cqp = 0 for i in range(z+w): desc = f.readline().strip() primlen = int(desc.split('#')[1]) prim = [] for j in range(primlen): prim.append(f.readline().strip()) sec = f.readline().strip() #prim = prim[-2:] if i >= z: primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp) pred = inOutFunctions.display_result(clfx.predict(np.array(ins, np.float32)), {0:'X', 1:dssp}) print sec print pred print "\n" cq += measurePrediction.calcQ(pred, sec, dssp) cqp += measurePrediction.calcQpred(pred, sec, dssp) return (cq/w, cqp/w)
def make_SVM_2(sw, dataset, groups, without, struct): inputs_train = [] outputs_train = [] protCodes = [] for i in range(len(groups)): if i == without: continue else: for s in groups[i]: protCodes.append(s) for p in protCodes: sec = dataset[p]['sec'] prim = dataset[p]['prim'] #prim = prim[-2:] primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, struct) for q in ins: inputs_train.append(q) for q in outs: outputs_train.append(q) clf = svm.SVC(C=1.5, gamma=0.1) clf.fit(inputs_train, outputs_train) return clf
def test_SMV_2(clf, sw, dataset, groups, without, dssp): cq = 0 cqp = 0 cc = 0 sov = 0 q = 18 qq = 0 protCodes = [] for s in groups[without]: protCodes.append(s) for p in protCodes: sec = dataset[p]['sec'] prim = dataset[p]['prim'] #prim = prim[-2:] primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp) pred = inOutFunctions.display_result(clf.predict(np.array(ins, np.float32)), {0:'X', 1:dssp}) # print sec # print pred # print "\n" cq += measurePrediction.calcQ(pred, sec, dssp) cqp += measurePrediction.calcQpred(pred, sec, dssp) cc += measurePrediction.calcC(pred, sec, dssp) sovx = measurePrediction.calcSOV(pred, sec, dssp) if sovx != None: sov += sovx qq += 1 return (cq/q, cqp/q, cc/q, sov/qq)