def make_NN(sw, dataset, groups, without): ann = create_ann(2, sw, len(codes.aac)) inputs_train = [] outputs_train = [] protCodes = [] for i in range(len(groups)): if i == without: continue else: for s in groups[i]: protCodes.append(s) for p in protCodes: sec = dataset[p]['sec'] prim = dataset[p]['prim'] #prim = prim[-2:] primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.convert_inputNN(sec, primx, sw) for q in ins: inputs_train.append(q) for q in outs: outputs_train.append(q) ann = train_ann(ann, inputs_train, outputs_train) return ann
def make_NN2(ann, sw, dataset, groups, without): ann2 = create_ann(2, sw, 3) inputs_train = [] outputs_train = [] protCodes = [] for s in groups[without]: protCodes.append(s) for p in protCodes: sec = dataset[p]['sec'] prim = dataset[p]['prim'] #prim = prim[-2:] prim = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.convert_inputNN(sec, prim, sw) pred = ann.predict(np.array(ins, np.float32)) ins, outs = inOutFunctions.convert_inputNN2(sec, pred, sw) for q in ins: inputs_train.append(q) for q in outs: outputs_train.append(q) ann2 = train_ann(ann2, inputs_train, outputs_train) return ann2
def make_SVM_1(sw, dssp, x, dataset): inputs_train = [] outputs_train = [] #read dataset file f = open(dataset, 'r') for i in range(x): desc = f.readline().strip() primlen = int(desc.split('#')[1]) prim = [] for j in range(primlen): prim.append(f.readline().strip()) sec = f.readline().strip() #prim = prim[-2:] primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp) for q in ins: inputs_train.append(q) for q in outs: outputs_train.append(q) f.close() clf = svm.SVC(C=2.5, gamma=0.05) clf.fit(inputs_train, outputs_train) return clf
def test_NN(ann, sw, dataset, groups, without): sum = 0 q3 = 0 qh = 0 qhp = 0 qe = 0 qep = 0 qc = 0 qcp = 0 sovh, zh = 0, 0.01 sove, ze = 0, 0.01 sovc, zc = 0, 0.01 z = 126/7 protCodes = [] for s in groups[without]: protCodes.append(s) for p in protCodes: sec = dataset[p]['sec'] prim = dataset[p]['prim'] #prim = prim[-2:] prim = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.convert_inputNN(sec, prim, sw) pred = ann.predict(np.array(ins, np.float32)) predx = [] for p in pred: predx.append(winner(p)) pred = inOutFunctions.display_result(predx, codes.alphabeth) sum += measurePrediction.compare(pred, sec) q3 += measurePrediction.calcQ3(pred, sec) qh += measurePrediction.calcQ(pred, sec, 'H') qhp += measurePrediction.calcQpred(pred, sec, 'H') qe += measurePrediction.calcQ(pred, sec, 'E') qep += measurePrediction.calcQpred(pred, sec, 'E') qc += measurePrediction.calcQ(pred, sec, 'C') qcp += measurePrediction.calcQpred(pred, sec, 'C') _sovh = measurePrediction.calcSOV(pred, sec, 'H') _sove = measurePrediction.calcSOV(pred, sec, 'E') _sovc = measurePrediction.calcSOV(pred, sec, 'C') if _sovh != None: sovh += _sovh zh += 1 if _sove != None: sove += _sove ze += 1 if _sovc != None: sovc += _sovc zc += 1 return (q3/z, qh/z, qhp/z, qe/z, qep/z, qc/z, qcp/z, sovh/zh, sove/ze, sovc/zc)
def test_SMV_1(sw, dssp, w, clfx, z): f = open('rs126.fa', 'r') cq = 0 cqp = 0 for i in range(z+w): desc = f.readline().strip() primlen = int(desc.split('#')[1]) prim = [] for j in range(primlen): prim.append(f.readline().strip()) sec = f.readline().strip() #prim = prim[-2:] if i >= z: primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp) pred = inOutFunctions.display_result(clfx.predict(np.array(ins, np.float32)), {0:'X', 1:dssp}) print sec print pred print "\n" cq += measurePrediction.calcQ(pred, sec, dssp) cqp += measurePrediction.calcQpred(pred, sec, dssp) return (cq/w, cqp/w)
def make_SVM_3X(sw, dataset, groups, without): inputs_train = [] outputs_train = [] protCodes = [] for i in range(len(groups)): if i == without: continue else: for s in groups[i]: protCodes.append(s) for p in protCodes: sec = dataset[p]['sec'] prim = dataset[p]['prim'] prim = prim[-2:] primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.convert_inputX(sec, primx, sw) for q in ins: inputs_train.append(q) for q in outs: outputs_train.append(q) clf = svm.SVC(C=1.5, gamma=0.1) clf.fit(inputs_train, outputs_train) return clf
def test_SVM_3(clf, z, x, sw, filename): f = open(filename, 'r') q3 = 0 qh = 0 qhp = 0 qe = 0 qep = 0 qc = 0 qcp = 0 sovh, zh = 0, 0.01 sove, ze = 0, 0.01 sovc, zc = 0, 0.01 for i in range(x+z): desc = f.readline().strip() primlen = int(desc.split('#')[1]) prim = [] for j in range(primlen): prim.append(f.readline().strip()) sec = f.readline().strip() #prim = prim[-2:] if i >= x: primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.convert_inputX(sec, primx, sw) pred = inOutFunctions.display_result(clf.predict(np.array(ins, np.float32)), codes.alphabeth) q3 += measurePrediction.calcQ3(pred, sec) qh += measurePrediction.calcQ(pred, sec, 'H') qhp += measurePrediction.calcQpred(pred, sec, 'H') qe += measurePrediction.calcQ(pred, sec, 'E') qep += measurePrediction.calcQpred(pred, sec, 'E') qc += measurePrediction.calcQ(pred, sec, 'C') qcp += measurePrediction.calcQpred(pred, sec, 'C') _sovh = measurePrediction.calcSOV(pred, sec, 'H') _sove = measurePrediction.calcSOV(pred, sec, 'E') _sovc = measurePrediction.calcSOV(pred, sec, 'C') #print sec #print pred if _sovh != None: sovh += _sovh zh += 1 if _sove != None: sove += _sove ze += 1 if _sovc != None: sovc += _sovc zc += 1 f.close() return (q3/z, qh/z, qhp/z, qe/z, qep/z, qc/z, qcp/z, sovh/zh, sove/ze, sovc/zc)
def test_SVM_3X(clf, sw, dataset, groups, without): q3 = 0 qh = 0 qhp = 0 qe = 0 qep = 0 qc = 0 qcp = 0 sovh, zh = 0, 0.01 sove, ze = 0, 0.01 sovc, zc = 0, 0.01 protCodes = [] for s in groups[without]: protCodes.append(s) for p in protCodes: sec = dataset[p]['sec'] prim = dataset[p]['prim'] #prim = prim[-2:] primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.convert_inputX(sec, primx, sw) pred = inOutFunctions.display_result(clf.predict(np.array(ins, np.float32)), codes.alphabeth) q3 += measurePrediction.calcQ3(pred, sec) qh += measurePrediction.calcQ(pred, sec, 'H') qhp += measurePrediction.calcQpred(pred, sec, 'H') qe += measurePrediction.calcQ(pred, sec, 'E') qep += measurePrediction.calcQpred(pred, sec, 'E') qc += measurePrediction.calcQ(pred, sec, 'C') qcp += measurePrediction.calcQpred(pred, sec, 'C') _sovh = measurePrediction.calcSOV(pred, sec, 'H') _sove = measurePrediction.calcSOV(pred, sec, 'E') _sovc = measurePrediction.calcSOV(pred, sec, 'C') if _sovh != None: sovh += _sovh zh += 1 if _sove != None: sove += _sove ze += 1 if _sovc != None: sovc += _sovc zc += 1 z = len(groups[without]) return (q3/z, qh/z, qhp/z, qe/z, qep/z, qc/z, qcp/z, sovh/zh, sove/ze, sovc/zc)
def test_SMV_2(clf, sw, dataset, groups, without, dssp): cq = 0 cqp = 0 cc = 0 sov = 0 q = 18 qq = 0 protCodes = [] for s in groups[without]: protCodes.append(s) for p in protCodes: sec = dataset[p]['sec'] prim = dataset[p]['prim'] #prim = prim[-2:] primx = inOutFunctions.merge_sequences(prim) ins, outs = inOutFunctions.prepare_input_forX(sec, primx, sw, dssp) pred = inOutFunctions.display_result(clf.predict(np.array(ins, np.float32)), {0:'X', 1:dssp}) # print sec # print pred # print "\n" cq += measurePrediction.calcQ(pred, sec, dssp) cqp += measurePrediction.calcQpred(pred, sec, dssp) cc += measurePrediction.calcC(pred, sec, dssp) sovx = measurePrediction.calcSOV(pred, sec, dssp) if sovx != None: sov += sovx qq += 1 return (cq/q, cqp/q, cc/q, sov/qq)