def predict(path, weightPath1, weightPath2, method): predictionPath = "predictions.txt" predictions = [] limit = 500 # load weight1 = np.load(weightPath1) weight2 = np.load(weightPath2) method = method.lower() if(method == 'fasta'): proteins = fileUtils.readFasta(path) for proteinId in range(len(proteins)): sequences = sequenceUtils.openReadingFrames(proteins[proteinId]) for pos in range(len(sequences)): # lav sekvens om til binær inputLayer = sequenceUtils.createInputLayer(sequences[pos]) # forward outputLayer = forward(inputLayer, weight1, weight2)[1] outputLayer = logTransform.invTransform(outputLayer) if(outputLayer <= limit): # plus one, since both are zero indexed predictions.append([proteinId + 1, pos + 1]) np.savetxt(predictionPath, np.array(predictions), fmt = '%d', delimiter = '\t') print("There is {} predicted epitopes.".format(len(predictions)))
from forward import forward # set seed to be able to reproduce results np.random.seed(1234) limit = 500 syfLimit = 21 names = np.array(["gag", "pol", "vif", "vpr", "tat", "rev", "vpu", "env", "nef"]) # mhc epitopes mhcSequences, mhcAffinities = fileUtils.readHLA("data/mhcSequences.txt") mhcEpitopes = mhcSequences[mhcAffinities <= limit] # complete hiv hivProteins = fileUtils.readFasta("data/hivCodingSequences.txt") # SMMPMBEC smm0 = fileUtils.readColumn("data/smmpmbec.csv", 0, True) smm1 = fileUtils.readColumn("data/smmpmbec.csv", 1, True) smm2 = fileUtils.readColumn("data/smmpmbec.csv", 2, True) smm3 = fileUtils.readColumn("data/smmpmbec.csv", 3, True) index = smm3 <= limit smm = [smm2[index], np.repeat(0, sum(index)), smm1[index]] # replace names wih numbers for name in names: # plus one to make it one indexed smm[1][smm0[index] == name] = np.where(name == names)[0] +1